+2006-01-24 Cluster File Systems, Inc. <info@clusterfs.com>
+ * version 1.4.6
+ * bug fixes
+
+Severity : major
+Frequency : Cray XT3 only
+Bugzilla : 10074
+Description: fix use of portals/lnet pid
+Details : incorrect use of portals/lnet pid caused them to get out of
+ sync and would result in silent dropping of RPC messages
+
+Severity : major
+Frequency : Infiniband IB LND only
+Bugzilla : 9776
+Description: iiblnd wasn't mapping all memory
+Details : iiblnd wasn't mapping all memory, resulting in comms errors
+ on some architectures/memory configs
+
2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com>
* Configuration change for the XT3
- The PTLLND is now used to run Lustre over Portals on the XT3.
+ The PTLLND is now used to run Lustre over Portals on the XT3.
The configure option(s) --with-cray-portals are no longer
used. Rather --with-portals=<path-to-portals-includes> is
used to enable building on the XT3. In addition to enable
2005-10-10 Cluster File Systems, Inc. <info@clusterfs.com>
* Portals has been removed, replaced by LNET.
- LNET is new networking infrastructure for Lustre, it includes a
+ LNET is new networking infrastructure for Lustre, it includes a
reorganized network configuration mode (see the user
documentation for full details) as well as support for routing
between different network fabrics. Lustre Networking Devices
#ifndef __LIBCFS_CURPROC_H__
#define __LIBCFS_CURPROC_H__
+#ifdef __KERNEL__
/*
* Portable API to access common characteristics of "current" UNIX process.
*
*/
cfs_kernel_cap_t cfs_curproc_cap_get(void);
void cfs_curproc_cap_set(cfs_kernel_cap_t cap);
+#endif
/* __LIBCFS_CURPROC_H__ */
#endif
EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \
darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \
- darwin-lock.h darwin-sync.h kp30.h portals_lib.h
+ darwin-lock.h darwin-sync.h darwin-tcpip.h kp30.h portals_lib.h
-#ifndef __LIBCFS_DARWIN_CFS_FS_H__
-#define __LIBCFS_DARWIN_CFS_FS_H__
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Implementation of standard file system interfaces for XNU kernel.
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+#ifndef __LIBCFS_DARWIN_FS_H__
+#define __LIBCFS_DARWIN_FS_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
#include <sys/types.h>
#include <sys/systm.h>
-/*
- * __APPLE_API_PRIVATE is defined before include user.h
- * Doing this way to get the define of uthread, it's not good
- * but I do need to know what's inside uthread.
- */
-#ifndef __APPLE_API_PRIVATE
-#define __APPLE_API_PRIVATE
-#include <sys/vnode.h>
-#undef __APPLE_API_PRIVATE
-#else
-#include <sys/vnode.h>
-#endif
#include <sys/kernel.h>
#include <sys/file.h>
#include <sys/time.h>
#include <sys/filedesc.h>
-#include <sys/stat.h>
#include <sys/mount.h>
+#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/ubc.h>
#include <sys/mbuf.h>
#include <stdarg.h>
#include <mach/mach_types.h>
-#include <mach/mach_traps.h>
#include <mach/time_value.h>
#include <kern/clock.h>
#include <sys/param.h>
/*
* File operating APIs in kernel
*/
+#ifdef __DARWIN8__
+/*
+ * Kernel file descriptor
+ */
+typedef struct cfs_kern_file {
+ int f_flags;
+ vnode_t f_vp;
+ vfs_context_t f_ctxt;
+} cfs_file_t;
+
+#else
+
typedef struct file cfs_file_t;
-int filp_node_size(cfs_file_t *fp, off_t *size);
+#endif
+
+int kern_file_size(cfs_file_t *fp, off_t *size);
#define cfs_filp_size(fp) \
({ \
off_t __size; \
- filp_node_size((fp), &__size); \
+ kern_file_size((fp), &__size); \
__size; \
})
#define cfs_filp_poff(fp) (NULL)
-cfs_file_t *filp_open(const char *name, int flags, int mode, int *err);
-int filp_close(cfs_file_t *fp);
-int filp_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
-int filp_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
-int filp_fsync(cfs_file_t *fp);
+cfs_file_t *kern_file_open(const char *name, int flags, int mode, int *err);
+int kern_file_close(cfs_file_t *fp);
+int kern_file_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
+int kern_file_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
+int kern_file_sync(cfs_file_t *fp);
-#define cfs_filp_open(n, f, m, e) filp_open(n, f, m, e)
-#define cfs_filp_close(f) filp_close(f)
-#define cfs_filp_read(f, b, n, p) filp_read(f, b, n, p)
-#define cfs_filp_write(f, b, n, p) filp_write(f, b, n, p)
-#define cfs_filp_fsync(f) filp_fsync(f)
+#define cfs_filp_open(n, f, m, e) kern_file_open(n, f, m, e)
+#define cfs_filp_close(f) kern_file_close(f)
+#define cfs_filp_read(f, b, n, p) kern_file_read(f, b, n, p)
+#define cfs_filp_write(f, b, n, p) kern_file_write(f, b, n, p)
+#define cfs_filp_fsync(f) kern_file_sync(f)
int ref_file(cfs_file_t *fp);
int rele_file(cfs_file_t *fp);
#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t)
typedef struct flock cfs_flock_t;
-#define CFS_FLOCK_TYPE(fl) ((fl)->l_type)
-#define CFS_FLOCK_SET_TYPE(fl, type) do { (fl)->l_type = (type); } while(0)
-#define CFS_FLOCK_PID(fl) ((fl)->l_pid)
-#define CFS_FLOCK_SET_PID(fl, pid) do { (fl)->l_pid = (pid); } while(0)
-#define CFS_FLOCK_START(fl) ((fl)->l_start)
-#define CFS_FLOCK_SET_START(fl, start) do { (fl)->l_start = (start); } while(0)
-#define CFS_FLOCK_END(fl) ((fl)->l_len == 0? CFS_OFFSET_MAX: ((fl)->l_start + (fl)->l_en))
-#define CFS_FLOCK_SET_END(fl, end) \
- do { \
- if (end == CFS_OFFSET_MAX) \
- (fl)->l_len = 0; \
- else \
- (fl)->l_len = (end) - (fl)->l_start;\
- } while(0)
-
-typedef struct {
- void *d;
-} cfs_dentry_t;
-typedef unsigned short umode_t;
+#define cfs_flock_type(fl) ((fl)->l_type)
+#define cfs_flock_set_type(fl, type) do { (fl)->l_type = (type); } while(0)
+#define cfs_flock_pid(fl) ((fl)->l_pid)
+#define cfs_flock_set_pid(fl, pid) do { (fl)->l_pid = (pid); } while(0)
+#define cfs_flock_start(fl) ((fl)->l_start)
+#define cfs_flock_set_start(fl, start) do { (fl)->l_start = (start); } while(0)
+
+static inline loff_t cfs_flock_end(cfs_flock_t *fl)
+{
+ return (fl->l_len == 0 ? CFS_OFFSET_MAX: (fl->l_start + fl->l_len));
+}
+
+static inline void cfs_flock_set_end(cfs_flock_t *fl, loff_t end)
+{
+ if (end == CFS_OFFSET_MAX)
+ fl->l_len = 0;
+ else
+ fl->l_len = end - fl->l_start;
+}
#define ATTR_MODE 0x0001
#define ATTR_UID 0x0002
#define in_group_p(x) (0)
-#endif
+struct posix_acl_entry {
+ short e_tag;
+ unsigned short e_perm;
+ unsigned int e_id;
+};
+
+struct posix_acl {
+ atomic_t a_refcount;
+ unsigned int a_count;
+ struct posix_acl_entry a_entries[0];
+};
+
+struct posix_acl *posix_acl_alloc(int count, int flags);
+static inline struct posix_acl *posix_acl_from_xattr(const void *value,
+ size_t size)
+{
+ return posix_acl_alloc(0, 0);
+}
+static inline void posix_acl_release(struct posix_acl *acl) {};
+static inline int posix_acl_valid(const struct posix_acl *acl) { return 0; }
+static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl)
+{
+ return acl;
+}
+
+/*
+ * portable UNIX device file identification.
+ */
+
+typedef dev_t cfs_rdev_t;
+
+#else /* !__KERNEL__ */
+
+typedef struct file cfs_file_t;
+#endif /* END __KERNEL__ */
+
+typedef struct {
+ void *d;
+} cfs_dentry_t;
+
+#ifndef O_SYNC
#define O_SYNC 0
+#endif
+#ifndef O_DIRECTORY
#define O_DIRECTORY 0
+#endif
+#ifndef O_LARGEFILE
#define O_LARGEFILE 0
+#endif
#endif
#include <mach/sync_policy.h>
#include <mach/task.h>
#include <mach/semaphore.h>
-#include <mach/mach_traps.h>
-
-/* spin lock types and operations */
-#include <kern/simple_lock.h>
#include <kern/assert.h>
#include <kern/thread.h>
return kspin_trylock(&lock->spin);
}
+static inline void spin_lock_done(spinlock_t *lock)
+{
+ kspin_done(&lock->spin);
+}
+
#define spin_lock_bh(x) spin_lock(x)
#define spin_unlock_bh(x) spin_unlock(x)
#define spin_lock_bh_init(x) spin_lock_init(x)
extern boolean_t ml_set_interrupts_enabled(boolean_t enable);
-#define __disable_irq() (spl_t) ml_set_interrupts_enabled(FALSE)
+#define __disable_irq() ml_set_interrupts_enabled(FALSE)
#define __enable_irq(x) (void) ml_set_interrupts_enabled(x)
#define spin_lock_irqsave(s, f) do{ \
krw_sem_init(&s->s);
}
+static inline void fini_rwsem(struct rw_semaphore *s)
+{
+ krw_sem_done(&s->s);
+}
+
static inline void down_read(struct rw_semaphore *s)
{
krw_sem_down_r(&s->s);
static inline int down_read_trylock(struct rw_semaphore *s)
{
int ret = krw_sem_down_r_try(&s->s);
- return ret == 0? 1: 0;
+ return ret == 0;
}
static inline void down_write(struct rw_semaphore *s)
static inline int down_write_trylock(struct rw_semaphore *s)
{
int ret = krw_sem_down_w_try(&s->s);
- return ret == 0? 1: 0;
+ return ret == 0;
}
static inline void up_read(struct rw_semaphore *s)
/*
* read-write lock : Need to be investigated more!!
- * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore
*
* - DECLARE_RWLOCK(l)
* - rwlock_init(x)
* - write_lock(x)
* - write_unlock(x)
*/
-typedef struct rw_semaphore rwlock_t;
+typedef struct krw_spin rwlock_t;
-#define rwlock_init(pl) init_rwsem(pl)
+#define rwlock_init(pl) krw_spin_init(pl)
-#define read_lock(l) down_read(l)
-#define read_unlock(l) up_read(l)
-#define write_lock(l) down_write(l)
-#define write_unlock(l) up_write(l)
+#define read_lock(l) krw_spin_down_r(l)
+#define read_unlock(l) krw_spin_up_r(l)
+#define write_lock(l) krw_spin_down_w(l)
+#define write_unlock(l) krw_spin_up_w(l)
#define write_lock_irqsave(l, f) do{ \
f = __disable_irq(); \
#define read_unlock_irqrestore(l, f) do{ \
read_unlock(l); \
__enable_irq(f);}while(0)
-
/*
* Funnel:
*
* Safe funnel in/out
*/
+#ifdef __DARWIN8__
+
+#define CFS_DECL_FUNNEL_DATA
+#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA
+#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA
+#define CFS_CONE_IN do {} while(0)
+#define CFS_CONE_EX do {} while(0)
+
+#define CFS_NET_IN do {} while(0)
+#define CFS_NET_EX do {} while(0)
+
+#else
#define CFS_DECL_FUNNEL_DATA \
boolean_t __funnel_state = FALSE; \
#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel)
#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel)
-/* __KERNEL__ */
#endif
+#else
+#include <libcfs/user-lock.h>
+#endif /* __KERNEL__ */
+
/* __XNU_CFS_LOCK_H */
#endif
#include <libcfs/list.h>
/*
- * Page of OSX
- *
- * There is no page in OSX, however, we need page in lustre.
- */
-#define PAGE_MASK (~(PAGE_SIZE-1))
-#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
-#define _ALIGN(addr,size) _ALIGN_UP(addr,size)
-#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
-
-/*
* Basic xnu_page struct, should be binary compatibility with
* all page types in xnu (we have only xnu_raw_page, xll_page now)
*/
/* Variable sized pages are not supported */
+#ifdef PAGE_SHIFT
+#define CFS_PAGE_SHIFT PAGE_SHIFT
+#else
#define CFS_PAGE_SHIFT 12
+#endif
+
+#ifdef PAGE_SIZE
+#define CFS_PAGE_SIZE PAGE_SIZE
+#else
#define CFS_PAGE_SIZE (1 << CFS_PAGE_SHIFT)
+#endif
+
#define PAGE_CACHE_SIZE CFS_PAGE_SIZE
+
+#ifdef PAGE_MASK
+#define CFS_PAGE_MASK PAGE_MASK
+#else
#define CFS_PAGE_MASK (~(CFS_PAGE_SIZE - 1))
+#endif
enum {
XNU_PAGE_RAW,
* raw page, no cache object, just like buffer
*/
struct xnu_raw_page {
- struct xnu_page header;
- vm_address_t virtual;
- upl_t upl;
- int order;
- atomic_t count;
- void *private;
+ struct xnu_page header;
+ void *virtual;
+ atomic_t count;
+ struct list_head link;
};
/*
* Public interface to lustre
*
- * - cfs_alloc_pages(f, o)
* - cfs_alloc_page(f)
- * - cfs_free_pages(p, o)
* - cfs_free_page(p)
* - cfs_kmap(p)
* - cfs_kunmap(p)
* pages only.
*/
-cfs_page_t *cfs_alloc_pages(u_int32_t flags, u_int32_t order);
cfs_page_t *cfs_alloc_page(u_int32_t flags);
-void cfs_free_pages(cfs_page_t *pages, int order);
void cfs_free_page(cfs_page_t *page);
void cfs_get_page(cfs_page_t *page);
int cfs_put_page_testzero(cfs_page_t *page);
int cfs_page_count(cfs_page_t *page);
void cfs_set_page_count(cfs_page_t *page, int v);
+#define cfs_page_index(pg) (0)
void *cfs_page_address(cfs_page_t *pg);
void *cfs_kmap(cfs_page_t *pg);
* Memory allocator
*/
-extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
-extern void cfs_free(void *addr);
+void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+void cfs_free(void *addr);
+
+void *cfs_alloc_large(size_t nr_bytes);
+void cfs_free_large(void *addr);
-extern void *cfs_alloc_large(size_t nr_bytes);
-extern void cfs_free_large(void *addr);
+extern int get_preemption_level(void);
+
+#define CFS_ALLOC_ATOMIC_TRY \
+ (get_preemption_level() != 0 ? CFS_ALLOC_ATOMIC : 0)
/*
* Slab:
*
- * No slab in OSX, use zone allocator to fake slab
+ * No slab in OSX, use zone allocator to simulate slab
*/
#define SLAB_HWCACHE_ALIGN 0
+#ifdef __DARWIN8__
+/*
+ * In Darwin8, we cannot use zalloc_noblock(not exported by kernel),
+ * also, direct using of zone allocator is not recommended.
+ */
+#define CFS_INDIVIDUAL_ZONE (0)
+
+#if !CFS_INDIVIDUAL_ZONE
+#include <libkern/OSMalloc.h>
+typedef OSMallocTag mem_cache_t;
+#else
+typedef void* zone_t;
+typedef zone_t mem_cache_t;
+#endif
+
+#else /* !__DARWIN8__ */
+
+#define CFS_INDIVIDUAL_ZONE (1)
+
+typedef zone_t mem_cache_t;
+
+#endif /* !__DARWIN8__ */
+
+#define MC_NAME_MAX_LEN 64
+
typedef struct cfs_mem_cache {
- struct list_head link;
- zone_t zone;
- int size;
- char name [ZONE_NAME_MAX_LEN];
+ int mc_size;
+ mem_cache_t mc_cache;
+ struct list_head mc_link;
+ char mc_name [MC_NAME_MAX_LEN];
} cfs_mem_cache_t;
#define KMEM_CACHE_MAX_COUNT 64
#define KMEM_MAX_ZONE 8192
-extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long,
- void (*)(void *, cfs_mem_cache_t *, unsigned long),
- void (*)(void *, cfs_mem_cache_t *, unsigned long));
-extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
-extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
-extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
+int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
/*
* Misc
*/
-/* XXX fix me */
+/* XXX Liang: num_physpages... fix me */
#define num_physpages (64 * 1024)
#define CFS_DECL_MMSPACE
#define CFS_MMSPACE_OPEN do {} while(0)
#define CFS_MMSPACE_CLOSE do {} while(0)
-#define copy_from_user(kaddr, uaddr, size) copyin((caddr_t)uaddr, (caddr_t)kaddr, size)
-#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, (caddr_t)uaddr, size)
+#define copy_from_user(kaddr, uaddr, size) copyin(CAST_USER_ADDR_T(uaddr), (caddr_t)kaddr, size)
+#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, CAST_USER_ADDR_T(uaddr), size)
-#error "need this define"
-#define strncpy_from_user(kaddr, uaddr, size) "something"
+#if 0
+static inline int strncpy_from_user(char *kaddr, char *uaddr, int size)
+{
+ size_t count;
+ return copyinstr((const user_addr_t)uaddr, (void *)kaddr, size, &count);
+}
+#endif
#if defined (__ppc__)
#define mb() __asm__ __volatile__ ("sync" : : : "memory")
#else /* !__KERNEL__ */
-typedef struct cfs_page{
- void *foo;
-} cfs_page_t;
+#define CFS_CACHE_SHIFT 12
+#define PAGE_CACHE_SIZE (1 << CFS_CACHE_SHIFT)
+#include <libcfs/user-prim.h>
+
#endif /* __KERNEL__ */
#endif /* __XNU_CFS_MEM_H__ */
#include <sys/types.h>
#include <sys/systm.h>
-#ifndef __APPLE_API_PRIVATE
-#define __APPLE_API_PRIVATE
-#include <sys/user.h>
-#undef __APPLE_API_PRIVATE
-#else
-#include <sys/user.h>
-#endif
+#ifndef __DARWIN8__
+# ifndef __APPLE_API_PRIVATE
+# define __APPLE_API_PRIVATE
+# include <sys/user.h>
+# undef __APPLE_API_PRIVATE
+# else
+# include <sys/user.h>
+# endif
+# include <mach/mach_traps.h>
+# include <mach/thread_switch.h>
+# include <machine/cpu_number.h>
+#endif /* !__DARWIN8__ */
#include <sys/kernel.h>
#include <mach/thread_act.h>
#include <mach/mach_types.h>
-#include <mach/mach_traps.h>
-#include <mach/thread_switch.h>
#include <mach/time_value.h>
#include <kern/sched_prim.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h>
#include <mach/machine/vm_param.h>
+#include <machine/machine_routines.h>
#include <kern/clock.h>
#include <kern/thread_call.h>
#include <sys/param.h>
* User can register/unregister a list of sysctl_oids
* sysctl_oid is data struct of osx's sysctl-entry
*/
+#define CONFIG_SYSCTL 1
+
typedef struct sysctl_oid * cfs_sysctl_table_t;
typedef cfs_sysctl_table_t cfs_sysctl_table_header_t;
-cfs_sysctl_table_header_t *register_cfs_sysctl_table (cfs_sysctl_table_t *table, int arg);
-void unregister_cfs_sysctl_table (cfs_sysctl_table_header_t *table);
+cfs_sysctl_table_header_t *cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg);
+void cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table);
/*
* Proc file system APIs, no /proc fs support in OSX
extern boolean_t assert_wait_possible(void);
extern void *get_bsdtask_info(task_t);
+#ifdef __DARWIN8__
+
+typedef struct {} cfs_task_t;
+#define cfs_current() ((cfs_task_t *)current_thread())
+#else /* !__DARWIN8__ */
+
typedef struct uthread cfs_task_t;
+
#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act()))
#define cfs_current() current_uthread()
+#endif /* !__DARWIN8__ */
+
+#define cfs_task_lock(t) do {;} while (0)
+#define cfs_task_unlock(t) do {;} while (0)
+
#define set_current_state(s) do {;} while (0)
#define reparent_to_init() do {;} while (0)
*
* OSX kernel thread can not be created with args,
* so we have to implement new APIs to create thread with args
- *
- * All requests to create kernel thread will create a new
- * thread instance of cfs_thread_agent, one by one.
- * cfs_thread_agent will call the caller's thread function
- * with argument supplied by caller.
*/
typedef int (*cfs_thread_t)(void *);
extern task_t kernel_task;
-struct kernel_thread_arg
-{
- spinlock_t lock;
- atomic_t inuse;
- cfs_thread_t func;
- void *arg;
-};
-
-extern struct kernel_thread_arg cfs_thread_arg;
-extern void cfs_thread_agent(void);
-
-#define THREAD_ARG_FREE 0
-#define THREAD_ARG_HOLD 1
-#define THREAD_ARG_RECV 2
-
-#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v)
-#define get_targ_stat(a) atomic_read(&(a)->inuse)
-
-/*
- * Hold the thread argument and set the status of thread_status
- * to THREAD_ARG_HOLD, if the thread argument is held by other
- * threads (It's THREAD_ARG_HOLD already), current-thread has to wait.
- */
-#define thread_arg_hold(pta, _func, _arg) \
- do { \
- spin_lock(&(pta)->lock); \
- if (get_targ_stat(pta) == THREAD_ARG_FREE) { \
- set_targ_stat((pta), THREAD_ARG_HOLD); \
- (pta)->arg = (void *)_arg; \
- (pta)->func = _func; \
- spin_unlock(&(pta)->lock); \
- break; \
- } \
- spin_unlock(&(pta)->lock); \
- schedule(); \
- } while(1); \
-
-/*
- * Release the thread argument if the thread argument has been
- * received by the child-thread (Status of thread_args is
- * THREAD_ARG_RECV), otherwise current-thread has to wait.
- * After release, the thread_args' status will be set to
- * THREAD_ARG_FREE, and others can re-use the thread_args to
- * create new kernel_thread.
- */
-#define thread_arg_release(pta) \
- do { \
- spin_lock(&(pta)->lock); \
- if (get_targ_stat(pta) == THREAD_ARG_RECV) { \
- (pta)->arg = NULL; \
- (pta)->func = NULL; \
- set_targ_stat(pta, THREAD_ARG_FREE); \
- spin_unlock(&(pta)->lock); \
- break; \
- } \
- spin_unlock(&(pta)->lock); \
- schedule(); \
- } while(1)
-
-/*
- * Receive thread argument (Used in child thread), set the status
- * of thread_args to THREAD_ARG_RECV.
- */
-#define __thread_arg_recv_fin(pta, _func, _arg, fin) \
- do { \
- spin_lock(&(pta)->lock); \
- if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \
- if (fin) \
- set_targ_stat(pta, THREAD_ARG_RECV);\
- _arg = (pta)->arg; \
- _func = (pta)->func; \
- spin_unlock(&(pta)->lock); \
- break; \
- } \
- spin_unlock(&(pta)->lock); \
- schedule(); \
- } while (1); \
-
-/*
- * Just set the thread_args' status to THREAD_ARG_RECV
- */
-#define thread_arg_fin(pta) \
- do { \
- spin_lock(&(pta)->lock); \
- assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \
- set_targ_stat(pta, THREAD_ARG_RECV); \
- spin_unlock(&(pta)->lock); \
- } while(0)
-
-#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1)
-#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0)
-
/*
* cloning flags, no use in OSX, just copy them from Linux
*/
struct ksleep_link wl_ksleep_link;
} cfs_waitlink_t;
+typedef int cfs_task_state_t;
+
+#define CFS_TASK_INTERRUPTIBLE THREAD_ABORTSAFE
+#define CFS_TASK_UNINT THREAD_UNINT
+
void cfs_waitq_init(struct cfs_waitq *waitq);
void cfs_waitlink_init(struct cfs_waitlink *link);
void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
struct cfs_waitlink *link);
void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
void cfs_waitq_broadcast(struct cfs_waitq *waitq);
-void cfs_waitq_wait(struct cfs_waitlink *link);
-cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+ cfs_task_state_t state,
cfs_duration_t timeout);
/*
* Thread schedule APIs.
*/
#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12))
+extern void thread_set_timer_deadline(uint64_t deadline);
+extern void thread_cancel_timer(void);
-static inline int schedule_timeout(int64_t timeout)
+static inline int cfs_schedule_timeout(int state, int64_t timeout)
{
int result;
- AbsoluteTime clock_current;
- AbsoluteTime clock_delay;
- result = assert_wait((event_t)current_uthread(), THREAD_UNINT);
- clock_get_uptime(&clock_current);
- nanoseconds_to_absolutetime(timeout, &clock_delay);
- ADD_ABSOLUTETIME(&clock_current, &clock_delay);
- thread_set_timer_deadline(clock_current);
+#ifdef __DARWIN8__
+ result = assert_wait((event_t)current_thread(), state);
+#else
+ result = assert_wait((event_t)current_uthread(), state);
+#endif
+ if (timeout > 0) {
+ uint64_t expire;
+ nanoseconds_to_absolutetime(timeout, &expire);
+ clock_absolutetime_interval_to_deadline(expire, &expire);
+ thread_set_timer_deadline(expire);
+ }
if (result == THREAD_WAITING)
result = thread_block(THREAD_CONTINUE_NULL);
- thread_cancel_timer();
+ if (timeout > 0)
+ thread_cancel_timer();
if (result == THREAD_TIMED_OUT)
result = 0;
else
return result;
}
-#define schedule() \
- do { \
- if (assert_wait_possible()) \
- schedule_timeout(1); \
- else \
- schedule_timeout(0); \
- } while (0)
+#define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_JIFFY)
+#define cfs_pause(tick) cfs_schedule_timeout(CFS_TASK_UNINT, tick)
+
+#define __wait_event(wq, condition) \
+do { \
+ struct cfs_waitlink __wait; \
+ \
+ cfs_waitlink_init(&__wait); \
+ for (;;) { \
+ cfs_waitq_add(&wq, &__wait); \
+ if (condition) \
+ break; \
+ cfs_waitq_wait(&__wait, CFS_TASK_UNINT); \
+ cfs_waitq_del(&wq, &__wait); \
+ } \
+ cfs_waitq_del(&wq, &__wait); \
+} while (0)
-#define __wait_event(wq, condition) \
-do { \
- struct cfs_waitlink __wait; \
- \
- cfs_waitlink_init(&__wait); \
- for (;;) { \
- cfs_waitq_add(&wq, &__wait); \
- if (condition) \
- break; \
- cfs_waitq_wait(&__wait); \
- cfs_waitq_del(&wq, &__wait); \
- } \
- cfs_waitq_del(&wq, &__wait); \
+#define wait_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __wait_event(wq, condition); \
} while (0)
-#define wait_event(wq, condition) \
-do { \
- if (condition) \
- break; \
- __wait_event(wq, condition); \
+#define __wait_event_interruptible(wq, condition, ex, ret) \
+do { \
+ struct cfs_waitlink __wait; \
+ \
+ cfs_waitlink_init(&__wait); \
+ for (;;) { \
+ if (ex == 0) \
+ cfs_waitq_add(&wq, &__wait); \
+ else \
+ cfs_waitq_add_exclusive(&wq, &__wait); \
+ if (condition) \
+ break; \
+ if (!cfs_signal_pending()) { \
+ cfs_waitq_wait(&__wait, \
+ CFS_TASK_INTERRUPTIBLE); \
+ cfs_waitq_del(&wq, &__wait); \
+ continue; \
+ } \
+ ret = -ERESTARTSYS; \
+ break; \
+ } \
+ cfs_waitq_del(&wq, &__wait); \
} while (0)
-#define wait_event_interruptible(wq, condition) \
-({ \
- wait_event(wq, condition); \
- 0; \
+#define wait_event_interruptible(wq, condition) \
+({ \
+ int __ret = 0; \
+ if (!condition) \
+ __wait_event_interruptible(wq, condition, \
+ 0, __ret); \
+ __ret; \
})
+#define wait_event_interruptible_exclusive(wq, condition) \
+({ \
+ int __ret = 0; \
+ if (!condition) \
+ __wait_event_interruptible(wq, condition, \
+ 1, __ret); \
+ __ret; \
+})
+
+#ifndef __DARWIN8__
extern void wakeup_one __P((void * chan));
+#endif
/* only used in tests */
-#define wake_up_process(p) \
- do { \
- wakeup_one(p); \
+#define wake_up_process(p) \
+ do { \
+ wakeup_one((caddr_t)p); \
} while (0)
/* used in couple of places */
cfs_waitlink_init(&link);
cfs_waitq_add(waitq, &link);
- cfs_waitq_wait(&link);
+ cfs_waitq_wait(&link, CFS_TASK_UNINT);
cfs_waitq_del(waitq, &link);
}
/*
- * XXX
* Signal
+ * We don't use signal_lock/signal_unlock in cfs_sigmask_lock()
+ * and cfs_sigmask_unlock() because they will be called in
+ * signal kernel APIs by xnu.
*/
-#define cfs_sigmask_lock(t, f) do { f = 0; } while(0)
-#define cfs_sigmask_unlock(t, f) do { f = 0; } while(0)
-#define cfs_signal_pending(t) (0)
-
-#define cfs_siginitset(pmask, sigs) \
- do { \
- sigset_t __sigs = sigs & (~sigcantmask); \
- *(pmask) = __sigs; \
- } while(0)
-
-#define cfs_siginitsetinv(pmask, sigs) \
- do { \
- sigset_t __sigs = ~(sigs | sigcantmask); \
- *(pmask) = __sigs; \
- } while(0)
-
-#define cfs_recalc_sigpending(ut) \
- do { \
- (ut)->uu_siglist = (ut)->uu_siglist & ~(ut)->uu_sigmask;\
- } while (0)
-#define cfs_sigfillset(s) \
- do { \
- memset((s), -1, sizeof(sigset_t)); \
- } while(0)
-
-#define cfs_set_sig_blocked(ut, b) do {(ut)->uu_sigmask = b;} while(0)
-#define cfs_get_sig_blocked(ut) (&(ut)->uu_sigmask)
+typedef sigset_t cfs_sigset_t;
+#define cfs_sigmask_lock(f) do { f = 0; } while (0)
+#define cfs_sigmask_unlock(f) do { f = 0; } while (0)
+int cfs_signal_pending(void);
+/*
+ * We don't need to recalc_sigpending because xnu always
+ * call SHOULDissignal to checking if there are pending signals.
+ */
+#define cfs_recalc_sigpending() do {} while (0)
+/*
+ * Clear all pending signals.
+ */
+#define cfs_clear_sigpending() clear_procsiglist(current_proc(), -1)
#define SIGNAL_MASK_ASSERT()
/*
* CPU
*/
-#include <machine/cpu_number.h>
/* Run in PowerG5 who is PPC64 */
#define SMP_CACHE_BYTES 128
#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
-/* XXX How to get the value of NCPUS from xnu ? */
#define NR_CPUS 2
-#define smp_processor_id() cpu_number()
-#define smp_num_cpus NR_CPUS
+
+extern unsigned int cpu_number(void);
+#define smp_num_cpus cpu_number()
+/*
+ * XXX Liang: patch xnu and export current_processor()?
+ *
+ * #define smp_processor_id() current_processor()
+ */
+#define smp_processor_id() 0
/* XXX smp_call_function is not supported in xnu */
#define smp_call_function(f, a, n, w) do {} while(0)
+int cfs_online_cpus(void);
/*
* Misc
*/
+extern int is_suser(void);
+
#ifndef likely
#define likely(exp) (exp)
#endif
#define inter_module_get(n) cfs_symbol_get(n)
#define inter_module_put(n) cfs_symbol_put(n)
+static inline int request_module(char *name)
+{
+ return (-EINVAL);
+}
+
#ifndef __exit
#define __exit
#endif
#define MODULE_PARM_DESC(a, b)
#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
-#define LINUX_VERSION_CODE (2*200+5*10+0)
+#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0)
-#define NR_IRQS 512
-#define in_interrupt() (0)
+#define NR_IRQS 512
+#define in_interrupt() ml_at_interrupt_context()
#define KERN_EMERG "<0>" /* system is unusable */
#define KERN_ALERT "<1>" /* action must be taken immediately */
/* XXX */
#define IS_ERR(p) (0)
+#else /* !__KERNEL__ */
+
+typedef struct cfs_proc_dir_entry{
+ void *data;
+}cfs_proc_dir_entry_t;
+
+#include <libcfs/user-prim.h>
+#define __WORDSIZE 32
+
+#endif /* END __KERNEL__ */
/*
* Error number
*/
+#ifndef EPROTO
+#define EPROTO EPROTOTYPE
+#endif
+#ifndef EBADR
#define EBADR EBADRPC
-#define ERESTARTSYS ERESTART
+#endif
+#ifndef ERESTARTSYS
+#define ERESTARTSYS 512
+#endif
+#ifndef EDEADLOCK
#define EDEADLOCK EDEADLK
+#endif
+#ifndef ECOMM
#define ECOMM EINVAL
+#endif
+#ifndef ENODATA
#define ENODATA EINVAL
+#endif
+#ifndef ENOTSUPP
+#define ENOTSUPP EINVAL
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+# define __BIG_ENDIAN
#else
-#define __WORDSIZE 32
-#endif /* __KERNEL__ */
+# define __LITTLE_ENDIAN
+#endif
-#endif /* __LINUX__ */
+#endif /* __LIBCFS_DARWIN_CFS_PRIM_H__ */
/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * Lustre Light Super operations
+ * Implementation of standard libcfs synchronization primitives for XNU
+ * kernel.
*
* Copyright (c) 2004 Cluster File Systems, Inc.
*
#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
#endif
-#define XNU_SYNC_DEBUG (0)
+#define XNU_SYNC_DEBUG (1)
#if XNU_SYNC_DEBUG
#define ON_SYNC_DEBUG(e) e
KCOND_MAGIC = 0xb01dface,
KRW_MAGIC = 0xdabb1edd,
KSPIN_MAGIC = 0xca11ab1e,
+ KRW_SPIN_MAGIC = 0xbabeface,
KSLEEP_CHAN_MAGIC = 0x0debac1e,
KSLEEP_LINK_MAGIC = 0xacc01ade,
KTIMER_MAGIC = 0xbefadd1e
*/
#define SMP (1)
+#include <libcfs/list.h>
+
+#ifdef __DARWIN8__
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <kern/locks.h>
+
+/*
+ * hw_lock is not available in Darwin8 (hw_lock_* are not exported at all),
+ * so use lck_spin_t. we can hack out lck_spin_t easily:
+ *
+ * typedef struct {
+ * unsigned int opaque[3];
+ * } lck_spin_t;
+ *
+ * But it's not very necessory.
+ */
+typedef lck_spin_t *xnu_spin_t;
+/*
+ * wait_queue is not available in Darwin8 (wait_queue_* are not exported),
+ * use assert_wait/wakeup/wake_one (wait_queue in kernel hash).
+ */
+typedef void * xnu_wait_queue_t;
+
+/* DARWIN8 */
+#else
+
+#include <mach/mach_types.h>
+#include <sys/types.h>
#include <kern/simple_lock.h>
-#include <libcfs/list.h>
+typedef hw_lock_data_t xnu_spin_t;
+typedef struct wait_queue xnu_wait_queue_t;
+
+/* DARWIN8 */
+#endif
struct kspin {
#if SMP
- hw_lock_data_t lock;
+ xnu_spin_t lock;
#endif
#if XNU_SYNC_DEBUG
- unsigned magic;
- thread_t owner;
+ unsigned magic;
+ thread_t owner;
#endif
};
-/*
- * XXX nikita: we cannot use simple_* functions, because bsd/sys/lock.h
- * redefines them to nothing. Use low-level hw_lock_* instead.
- */
-
void kspin_init(struct kspin *spin);
void kspin_done(struct kspin *spin);
void kspin_lock(struct kspin *spin);
#define kspin_isnotlocked(s) (1)
#endif
+/* ------------------------- rw spinlock ----------------------- */
+struct krw_spin {
+ struct kspin guard;
+ int count;
+#if XNU_SYNC_DEBUG
+ unsigned magic;
+#endif
+};
+
+void krw_spin_init(struct krw_spin *sem);
+void krw_spin_done(struct krw_spin *sem);
+void krw_spin_down_r(struct krw_spin *sem);
+void krw_spin_down_w(struct krw_spin *sem);
+void krw_spin_up_r(struct krw_spin *sem);
+void krw_spin_up_w(struct krw_spin *sem);
+
/* ------------------------- semaphore ------------------------- */
struct ksem {
struct kspin guard;
- struct wait_queue q;
+ xnu_wait_queue_t q;
int value;
#if XNU_SYNC_DEBUG
unsigned magic;
void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link);
void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link);
-void ksleep_wait(struct ksleep_chan *chan);
-int64_t ksleep_timedwait(struct ksleep_chan *chan, uint64_t timeout);
+void ksleep_wait(struct ksleep_chan *chan, int state);
+int64_t ksleep_timedwait(struct ksleep_chan *chan, int state, uint64_t timeout);
void ksleep_wake(struct ksleep_chan *chan);
void ksleep_wake_all(struct ksleep_chan *chan);
void ksleep_wake_nr(struct ksleep_chan *chan, int nr);
-#define KSLEEP_LINK_DECLARE(name) \
-{ \
- .flags = 0, \
- .event = 0, \
- .hits = 0, \
- .linkage = CFS_LIST_HEAD_INIT(name.linkage), \
- .magic = KSLEEP_LINK_MAGIC \
+#define KSLEEP_LINK_DECLARE(name) \
+{ \
+ .flags = 0, \
+ .event = 0, \
+ .hits = 0, \
+ .linkage = CFS_LIST_HEAD(name.linkage), \
+ .magic = KSLEEP_LINK_MAGIC \
}
/* ------------------------- timer ------------------------- */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_TCPIP_H__
+#define __LIBCFS_DARWIN_TCPIP_H__
+
+#ifdef __KERNEL__
+#include <sys/socket.h>
+
+#ifdef __DARWIN8__
+
+struct socket;
+
+typedef void (*so_upcall)(socket_t sock, void* arg, int waitf);
+
+#define CFS_SOCK_UPCALL 0x1
+
+typedef struct cfs_socket {
+ socket_t s_so;
+ int s_flags;
+ so_upcall s_upcall;
+ void *s_upcallarg;
+} cfs_socket_t;
+
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+#endif
+
+/* cfs_socket_t to bsd socket */
+#define C2B_SOCK(s) ((s)->s_so)
+/* bsd socket to cfs_socket_t */
+#define B2C_SOCK(s) container_of((s), cfs_socket_t, s_so)
+
+static inline int get_sock_intopt(socket_t so, int opt)
+{
+ int val, len;
+ int rc;
+
+ /*
+ * sock_getsockopt will take a lock(mutex) for socket,
+ * so it can be blocked. So be careful while using
+ * them.
+ */
+ rc = sock_getsockopt(so, SOL_SOCKET, opt, &val, &len);
+ assert(rc == 0);
+ return opt;
+}
+
+#define SOCK_ERROR(s) get_sock_intopt(C2B_SOCK(s), SO_ERROR)
+/* #define SOCK_WMEM_QUEUED(s) (0) */
+#define SOCK_WMEM_QUEUED(s) get_sock_intopt(C2B_SOCK(s), SO_NWRITE)
+/* XXX Liang: no reliable way to get it in Darwin8.x */
+#define SOCK_TEST_NOSPACE(s) (0)
+
+#else /* !__DARWIN8__ */
+
+#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc)
+#define SOCK_ERROR(so) ((so)->so_error)
+
+#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat)
+
+#endif /* !__DARWIN8__ */
+
+#endif /* __KERNEL END */
+
+#endif /* __XNU_CFS_TYPES_H__ */
* int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
* int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
*
- * cfs_duration_t cfs_time_minimal_timeout(void)
- *
* CFS_TIME_FORMAT
* CFS_DURATION_FORMAT
*
#include <sys/types.h>
#include <sys/systm.h>
-#ifndef __APPLE_API_PRIVATE
-#define __APPLE_API_PRIVATE
-#include <sys/user.h>
-#undef __APPLE_API_PRIVATE
-#else
-#include <sys/user.h>
-#endif
-
#include <sys/kernel.h>
-#include <mach/thread_act.h>
#include <mach/mach_types.h>
-#include <mach/mach_traps.h>
-#include <mach/thread_switch.h>
#include <mach/time_value.h>
-#include <kern/sched_prim.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <mach/machine/vm_param.h>
#include <kern/clock.h>
-#include <kern/thread_call.h>
#include <sys/param.h>
-#include <sys/vm.h>
#include <libcfs/darwin/darwin-types.h>
#include <libcfs/darwin/darwin-utils.h>
#include <libcfs/darwin/darwin-lock.h>
+/*
+ * There are three way to measure time in OS X:
+ * 1. nanoseconds
+ * 2. absolute time (abstime unit equal to the length of one bus cycle),
+ * schedule of thread/timer are counted by absolute time, but abstime
+ * in different mac can be different also, so we wouldn't use it.
+ * 3. clock interval (1sec = 100hz). But clock interval only taken by KPI
+ * like tsleep().
+ *
+ * We use nanoseconds (uptime, not calendar time)
+ *
+ * clock_get_uptime() :get absolute time since bootup.
+ * nanouptime() :get nanoseconds since bootup
+ * microuptime() :get microseonds since bootup
+ * nanotime() :get nanoseconds since epoch
+ * microtime() :get microseconds since epoch
+ */
typedef u_int64_t cfs_time_t; /* nanoseconds */
typedef int64_t cfs_duration_t;
{
struct timespec instant;
- nanotime(&instant);
- return ((u_int64_t)instant.tv_sec) * ONE_BILLION + instant.tv_nsec;
+ nanouptime(&instant);
+ return ((u_int64_t)instant.tv_sec) * NSEC_PER_SEC + instant.tv_nsec;
}
static inline time_t cfs_time_current_sec(void)
{
struct timespec instant;
- nanotime(&instant);
+ nanouptime(&instant);
return instant.tv_sec;
}
static inline void cfs_fs_time_current(cfs_fs_time_t *t)
{
- *t = time;
+ microtime((struct timeval *)t);
}
static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
return t->tv_sec;
}
-static inline cfs_duration_t cfs_duration_build(int64_t nano)
-{
- return nano;
-}
-
-
static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
{
*v = *t;
static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
{
s->tv_sec = t->tv_sec;
- s->tv_nsec = t->tv_usec * 1000;
+ s->tv_nsec = t->tv_usec * NSEC_PER_USEC;
}
static inline cfs_duration_t cfs_time_seconds(int seconds)
{
- return cfs_duration_build(ONE_BILLION * (int64_t)seconds);
-}
-
-static inline cfs_time_t cfs_time_shift(int seconds)
-{
- return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
+ return (NSEC_PER_SEC * (int64_t)seconds);
}
/*
*/
static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
{
- return ((int64_t)t->tv_sec) * ONE_BILLION + t->tv_usec;
+ return ((int64_t)t->tv_sec)*NSEC_PER_SEC + t->tv_usec*NSEC_PER_USEC;
}
static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
static inline time_t cfs_duration_sec(cfs_duration_t d)
{
- return d / ONE_BILLION;
+ return d / NSEC_PER_SEC;
}
static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
{
- s->tv_sec = d / ONE_BILLION;
- s->tv_usec = (d - s->tv_sec * ONE_BILLION) / 1000;
+ s->tv_sec = d / NSEC_PER_SEC;
+ s->tv_usec = (d - s->tv_sec * NSEC_PER_SEC) / NSEC_PER_USEC;
}
static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
{
- s->tv_sec = d / ONE_BILLION;
- s->tv_nsec = d - ((int64_t)s->tv_sec) * ONE_BILLION;
+ s->tv_sec = d / NSEC_PER_SEC;
+ s->tv_nsec = d - ((int64_t)s->tv_sec) * NSEC_PER_SEC;
}
-static inline cfs_duration_t cfs_time_minimal_timeout(void)
-{
- return ONE_BILLION / (u_int64_t)hz;
-}
-
-/* inline function cfs_time_minimal_timeout() can not be used to
- * initiallize static variable */
-#define CFS_MIN_DELAY (ONE_BILLION / (u_int64_t)100)
+/*
+ * One jiffy (in nanoseconds)
+ *
+ * osfmk/kern/sched_prim.c
+ * #define DEFAULT_PREEMPTION_RATE 100
+ */
+#define CFS_JIFFY (NSEC_PER_SEC / (u_int64_t)100)
#define LTIME_S(t) (t)
#include <mach/mach_types.h>
#include <sys/types.h>
+#ifndef _BLKID_TYPES_H
+#define _BLKID_TYPES_H
+#endif
+
typedef u_int8_t __u8;
typedef u_int16_t __u16;
typedef u_int32_t __u32;
#define ATOMIC_INIT(i) { (i) }
#define atomic_read(a) ((a)->counter)
#define atomic_set(a, v) (((a)->counter) = (v))
+#ifdef __DARWIN8__
+#define atomic_add(v, a) OSAddAtomic(v, (SInt32 *)&((a)->counter))
+#define atomic_sub(v, a) OSAddAtomic(-(v), (SInt32 *)&((a)->counter))
+#define atomic_inc(a) OSIncrementAtomic((SInt32 *)&((a)->counter))
+#define atomic_dec(a) OSDecrementAtomic((SInt32 *)&((a)->counter))
+#else /* !__DARWIN8__ */
#define atomic_add(v, a) hw_atomic_add((uint32_t *)&((a)->counter), v)
#define atomic_sub(v, a) hw_atomic_sub((uint32_t *)&((a)->counter), v)
#define atomic_inc(a) atomic_add(1, a)
#define atomic_dec(a) atomic_sub(1, a)
+#endif /* !__DARWIN8__ */
#define atomic_sub_and_test(v, a) ( atomic_sub(v, a) == 0 )
#define atomic_dec_and_test(a) ( atomic_dec(a) == 0 )
#include <libsa/mach/mach.h>
-typedef uint64_t loff_t;
+typedef off_t loff_t;
#else /* !__KERNEL__ */
#include <stdint.h>
-typedef uint64_t loff_t;
+typedef off_t loff_t;
#endif /* __KERNEL END */
-#ifndef __LIBCFS_DARWIN_XNU_UTILS_H__
-#define __LIBCFS_DARWIN_XNU_UTILS_H__
+#ifndef __LIBCFS_DARWIN_UTILS_H__
+#define __LIBCFS_DARWIN_UTILS_H__
#ifndef __LIBCFS_LIBCFS_H__
#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
#define HIPQUAD NIPQUAD
+#ifndef LIST_CIRCLE
+#define LIST_CIRCLE(elm, field) \
+ do { \
+ (elm)->field.le_prev = &(elm)->field.le_next; \
+ } while (0)
+#endif
+
#endif /* __XNU_UTILS_H__ */
#include <libcfs/darwin/darwin-prim.h>
#include <lnet/lnet.h>
-#define our_cond_resched() schedule_timeout(1);
+#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1)
#ifdef CONFIG_SMP
#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
#endif
#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */
-#define LBUG_WITH_LOC(file, func, line) do {libcfs_catastrophe = 1;} while(0)
-
/* --------------------------------------------------------------------- */
#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x)
#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0)
#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0)
-#define printk(format, args...) printf(format, ## args)
+#define num_online_cpus() cfs_online_cpus()
/******************************************************************************/
-/* Module parameter support */
-#define CFS_MODULE_PARM(name, t, type, perm, desc) \
- this should force a syntax error
+/* XXX Liang: There is no module parameter supporting in OSX */
+#define CFS_MODULE_PARM(name, t, type, perm, desc)
#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */
/******************************************************************************/
#endif
#include <mach/mach_types.h>
+#include <sys/errno.h>
#include <string.h>
#include <libcfs/darwin/darwin-types.h>
#include <libcfs/darwin/darwin-time.h>
#include <libcfs/darwin/darwin-mem.h>
#include <libcfs/darwin/darwin-lock.h>
#include <libcfs/darwin/darwin-fs.h>
+#include <libcfs/darwin/darwin-tcpip.h>
#ifdef __KERNEL__
# include <sys/types.h>
*
* Implementation is in darwin-curproc.c
*/
-#define CFS_CURPROC_COMM_MAX (sizeof ((struct proc *)0)->p_comm)
+#define CFS_CURPROC_COMM_MAX MAXCOMLEN
/*
* XNU has no capabilities
*/
typedef int cfs_kernel_cap_t;
+#ifdef __KERNEL__
+enum {
+ /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */
+ CFS_STACK_TRACE_DEPTH = 16
+};
+
+struct cfs_stack_trace {
+ void *frame[CFS_STACK_TRACE_DEPTH];
+};
+
+#define printk(format, args...) printf(format, ## args)
+
+#ifdef WITH_WATCHDOG
+#undef WITH_WATCHDOG
+#endif
+
+#endif /* __KERNEL__ */
+
#endif /* _XNU_LIBCFS_H */
#define LIBCFS_DEBUG
#include <libcfs/libcfs.h>
+#include <lnet/types.h>
#if defined(__linux__)
#include <libcfs/linux/kp30.h>
#elif defined(__APPLE__)
#include <libcfs/darwin/kp30.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/kp30.h>
#else
#error Unsupported operating system
#endif
# else
# define LASSERT(e)
# define LASSERTF(cond, args...) do { } while (0)
-# endif
+# endif /* LIBCFS_DEBUG */
# define LBUG() assert(0)
# define printk(format, args...) printf (format, ## args)
# define LIBCFS_ALLOC(ptr, size) do { (ptr) = calloc(1,size); } while (0);
CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
return 1;
}
- if (libcfs_ioctl_packlen(data) != data->ioc_len ) {
+ if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) {
CERROR ("LIBCFS ioctl: packlen != ioc_len\n");
return 1;
}
#include <libcfs/linux/libcfs.h>
#elif defined(__APPLE__)
#include <libcfs/darwin/libcfs.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/libcfs.h>
#else
#error Unsupported operating system.
#endif
# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
#endif
+/* cardinality of array */
+#define sizeof_array(a) ((sizeof (a)) / (sizeof ((a)[0])))
+
+#if !defined(container_of)
+/* given a pointer @ptr to the field @member embedded into type (usually
+ * struct) @type, return pointer to the embedding instance of @type. */
+#define container_of(ptr, type, member) \
+ ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+#endif
+
+/*
+ * true iff @i is power-of-2
+ */
+#define IS_PO2(i) \
+({ \
+ typeof(i) __i; \
+ \
+ __i = (i); \
+ !(__i & (__i - 1)); \
+})
+
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
/*
extern unsigned int libcfs_stack;
extern unsigned int libcfs_debug;
extern unsigned int libcfs_printk;
+extern unsigned int libcfs_debug_binary;
/* Has there been an LBUG? */
extern unsigned int libcfs_catastrophe;
} \
} while (0)
-#elif defined(LUSTRE_UTILS)
-
-#define CDEBUG(mask, format, a...) \
-do { \
- if ((mask) & (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)) \
- fprintf(stderr, "(%s:%d:%s()) " format, \
- __FILE__, __LINE__, __FUNCTION__, ## a); \
-} while (0)
-#define CDEBUG_LIMIT CDEBUG
-
-#else /* !__KERNEL__ && !LUSTRE_UTILS*/
+#elif defined(__arch_lib__) && !defined(LUSTRE_UTILS)
#define CDEBUG(mask, format, a...) \
do { \
} while (0)
#define CDEBUG_LIMIT CDEBUG
+#else
+
+#define CDEBUG(mask, format, a...) \
+do { \
+ if ((mask) & (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)) \
+ fprintf(stderr, "(%s:%d:%s()) " format, \
+ __FILE__, __LINE__, __FUNCTION__, ## a); \
+} while (0)
+#define CDEBUG_LIMIT CDEBUG
+
#endif /* !__KERNEL__ */
#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a)
goto label; \
} while (0)
-#define CDEBUG_ENTRY_EXIT 1
+#define CDEBUG_ENTRY_EXIT (1)
#if CDEBUG_ENTRY_EXIT
/*
#endif /* !CDEBUG_ENTRY_EXIT */
+/*
+ * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses
+ * Lustre RETURN(NULL) macro.
+ */
+#if defined(NULL)
+#undef NULL
+#endif
+
+/*
+ * Define lbug_with_loc for your own platform.
+ */
+void lbug_with_loc(char *file,
+ const char *func,
+ const int line) __attribute__((noreturn));
+
+#define LBUG_WITH_LOC(file, func, line) \
+do { \
+ libcfs_catastrophe = 1; \
+ lbug_with_loc(file, func, line); \
+} while (0)
+
+#define NULL ((void *)0)
#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID
#define DECLARE_IOCTL_HANDLER(ident, func) \
struct libcfs_ioctl_handler ident = { \
- .item = CFS_LIST_HEAD_INIT(ident.item), \
- .handle_ioctl = func \
+ /* .item = */ CFS_LIST_HEAD_INIT(ident.item), \
+ /* .handle_ioctl = */ func \
}
int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
* touch it once to enable it. */
struct lc_watchdog *lc_watchdog_add(int time,
void (*cb)(struct lc_watchdog *,
- struct task_struct *,
+ cfs_task_t *,
void *),
void *data);
/* Dump a debug log */
void lc_watchdog_dumplog(struct lc_watchdog *lcw,
- struct task_struct *tsk,
+ cfs_task_t *tsk,
void *data);
/* __KERNEL__ */
cfs_fs_time_t t;
cfs_fs_time_current(&t);
- return cfs_fs_time_sec(&t);
+ return (time_t)cfs_fs_time_sec(&t);
+}
+
+static inline cfs_time_t cfs_time_shift(int seconds)
+{
+ return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
+}
+
+static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small,
+ struct timeval *result)
+{
+ long r = (long) (
+ (large->tv_sec - small->tv_sec) * ONE_MILLION +
+ (large->tv_usec - small->tv_usec));
+ if (result != NULL) {
+ result->tv_usec = do_div(r, ONE_MILLION);
+ result->tv_sec = r;
+ }
+ return r;
}
#define CFS_RATELIMIT(seconds) \
}
/*
- * Portable memory allocator API (draft)
+ * Universal memory allocator API
*/
enum cfs_alloc_flags {
/* allocation is not allowed to block */
CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO,
};
-#define CFS_SLAB_ATOMIC CFS_ALLOC_ATOMIC
-#define CFS_SLAB_WAIT CFS_ALLOC_WAIT
-#define CFS_SLAB_ZERO CFS_ALLOC_ZERO
-#define CFS_SLAB_FS CFS_ALLOC_FS
-#define CFS_SLAB_IO CFS_ALLOC_IO
-#define CFS_SLAB_STD CFS_ALLOC_STD
-#define CFS_SLAB_USER CFS_ALLOC_USER
-
/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */
-enum cfs_page_alloc_flags {
+enum cfs_alloc_page_flags {
/* allow to return page beyond KVM. It has to be mapped into KVM by
* cfs_page_map(); */
CFS_ALLOC_HIGH = (1 << 5),
CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH,
};
+/*
+ * portable UNIX device file identification. (This is not _very_
+ * portable. Probably makes no sense for Windows.)
+ */
+/*
+ * Platform defines
+ *
+ * cfs_rdev_t
+ */
+
+typedef unsigned int cfs_major_nr_t;
+typedef unsigned int cfs_minor_nr_t;
+
+/*
+ * Defined by platform.
+ */
+cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor);
+cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev);
+cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev);
+
+/*
+ * Generic on-wire rdev format.
+ */
+
+typedef __u32 cfs_wire_rdev_t;
+
+cfs_wire_rdev_t cfs_wire_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor);
+cfs_major_nr_t cfs_wire_rdev_major(cfs_wire_rdev_t rdev);
+cfs_minor_nr_t cfs_wire_rdev_minor(cfs_wire_rdev_t rdev);
+
+/*
+ * Drop into debugger, if possible. Implementation is provided by platform.
+ */
+
+void cfs_enter_debugger(void);
+
+/*
+ * Defined by platform
+ */
+void cfs_daemonize(char *str);
+#ifdef __KERNEL__
+void cfs_block_allsigs(void);
+void cfs_block_sigs(cfs_sigset_t bits);
+cfs_sigset_t cfs_get_blocked_sigs(void);
+#endif
+
+int convert_server_error(__u64 ecode);
+int convert_client_oflag(int cflag, int *result);
+
+/*
+ * Stack-tracing filling.
+ */
+
+/*
+ * Platform-dependent data-type to hold stack frames.
+ */
+struct cfs_stack_trace;
+
+/*
+ * Fill @trace with current back-trace.
+ */
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace);
+
+/*
+ * Return instruction pointer for frame @frame_no. NULL if @frame_no is
+ * invalid.
+ */
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no);
+
+/*
+ * Universal open flags.
+ */
+#define CFS_O_ACCMODE 0003
+#define CFS_O_CREAT 0100
+#define CFS_O_EXCL 0200
+#define CFS_O_NOCTTY 0400
+#define CFS_O_TRUNC 01000
+#define CFS_O_APPEND 02000
+#define CFS_O_NONBLOCK 04000
+#define CFS_O_NDELAY CFS_O_NONBLOCK
+#define CFS_O_SYNC 010000
+#define CFS_O_ASYNC 020000
+#define CFS_O_DIRECT 040000
+#define CFS_O_LARGEFILE 0100000
+#define CFS_O_DIRECTORY 0200000
+#define CFS_O_NOFOLLOW 0400000
+#define CFS_O_NOATIME 01000000
+
+/* convert local open flags to universal open flags */
+int cfs_oflags2univ(int flags);
+/* convert universal open flags to local open flags */
+int cfs_univ2oflags(int flags);
#define _LIBCFS_H
#endif
#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0)
-#ifdef __arch_um__
-#define LBUG_WITH_LOC(file, func, line) \
-do { \
- CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \
- libcfs_catastrophe = 1; \
- libcfs_debug_dumplog(); \
- libcfs_run_lbug_upcall(file, func, line); \
- panic("LBUG"); \
-} while (0)
-#else
-#define LBUG_WITH_LOC(file, func, line) \
-do { \
- CEMERG("LBUG\n"); \
- libcfs_catastrophe = 1; \
- libcfs_debug_dumpstack(NULL); \
- libcfs_debug_dumplog(); \
- libcfs_run_lbug_upcall(file, func, line); \
- set_task_state(current, TASK_UNINTERRUPTIBLE); \
- schedule(); \
-} while (0)
-#endif /* __arch_um__ */
-
/* ------------------------------------------------------------------- */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
-#define PORTAL_SYMBOL_GET(x) (void *)inter_module_get(#x)
+#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
#define PORTAL_MODULE_USE MOD_INC_USE_COUNT
#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
#endif
+#include <stdarg.h>
#include <libcfs/linux/linux-mem.h>
#include <libcfs/linux/linux-time.h>
#include <libcfs/linux/linux-prim.h>
/* initial pid */
#define LUSTRE_LNET_PID 12345
-#define ENTRY_NESTING_SUPPORT (0)
+#define ENTRY_NESTING_SUPPORT (1)
#define ENTRY_NESTING do {;} while (0)
#define EXIT_NESTING do {;} while (0)
#define __current_nesting_level() (0)
typedef __u32 cfs_kernel_cap_t;
#endif
+#if defined(__KERNEL__)
+/*
+ * No stack-back-tracing in Linux for now.
+ */
+struct cfs_stack_trace {
+};
+
+#ifndef WITH_WATCHDOG
+#define WITH_WATCHDOG
+#endif
+
+#endif
+
#endif /* _LINUX_LIBCFS_H */
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/mount.h>
-#endif
+#else /* !__KERNEL__ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <mntent.h>
+#endif /* __KERNEL__ */
typedef struct file cfs_file_t;
typedef struct dentry cfs_dentry_t;
#define cfs_put_file(f) fput(f)
#define cfs_file_count(f) file_count(f)
-typedef struct file_lock cfs_flock_t;
-#define CFS_FLOCK_TYPE(fl) ((fl)->fl_type)
-#define CFS_FLOCK_SET_TYPE(fl, type) do { (fl)->fl_type = (type); } while(0)
-#define CFS_FLOCK_PID(fl) ((fl)->fl_pid)
-#define CFS_FLOCK_SET_PID(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
-#define CFS_FLOCK_START(fl) ((fl)->fl_start)
-#define CFS_FLOCK_SET_START(fl, start) do { (fl)->fl_start = (start); } while(0)
-#define CFS_FLOCK_END(fl) ((fl)->fl_end)
-#define CFS_FLOCK_SET_END(fl, end) do { (fl)->fl_end = (end); } while(0)
+typedef struct file_lock cfs_flock_t;
+#define cfs_flock_type(fl) ((fl)->fl_type)
+#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0)
+#define cfs_flock_pid(fl) ((fl)->fl_pid)
+#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
+#define cfs_flock_start(fl) ((fl)->fl_start)
+#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0)
+#define cfs_flock_end(fl) ((fl)->fl_end)
+#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0)
ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset);
+
+/*
+ * portable UNIX device file identification.
+ */
+
+typedef dev_t cfs_rdev_t;
+
#endif
#endif
* - wait_for_completion(c)
*/
-/*
- * OSX funnels:
- *
- * No funnels needed in Linux
- */
-#define CFS_DECL_FUNNEL_DATA
-#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA
-#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA
-#define CFS_CONE_IN do {} while(0)
-#define CFS_CONE_EX do {} while(0)
-
-#define CFS_NET_IN do {} while(0)
-#define CFS_NET_EX do {} while(0)
-
/* __KERNEL__ */
#else
-//#include "../user-lock.h"
+#include "../user-lock.h"
/* __KERNEL__ */
#endif
#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT
#define CFS_PAGE_MASK PAGE_CACHE_MASK
-cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
-#define cfs_alloc_page(f) cfs_alloc_pages(f, 0)
-#define cfs_free_pages(p, o) __free_pages(p, o)
-#define cfs_free_page(p) __free_pages(p, 0)
+cfs_page_t *cfs_alloc_page(unsigned int flags);
+#define cfs_free_page(p) __free_pages(p, 0)
static inline void *cfs_page_address(cfs_page_t *page)
{
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
return page_address(page);
}
set_page_count(page, v);
}
+#define cfs_page_index(p) ((p)->index)
+
/*
* Memory allocator
+ * XXX Liang: move these declare to public file
*/
extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
extern void cfs_free(void *addr);
extern void cfs_free_large(void *addr);
/*
+ * In Linux there is no way to determine whether current execution context is
+ * blockable.
+ */
+#define CFS_ALLOC_ATOMIC_TRY CFS_ALLOC_ATOMIC
+
+/*
* SLAB allocator
+ * XXX Liang: move these declare to public file
*/
typedef kmem_cache_t cfs_mem_cache_t;
-extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long,
- void (*)(void *, cfs_mem_cache_t *, unsigned long),
- void (*)(void *, cfs_mem_cache_t *, unsigned long));
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
#define CFS_MMSPACE_CLOSE set_fs(__oldfs)
+#else /* !__KERNEL__ */
+#ifdef HAVE_ASM_PAGE_H
+#include <asm/page.h> /* needed for PAGE_SIZE - rread */
+#endif
+
+#define PAGE_CACHE_SIZE PAGE_SIZE
+#include <libcfs/user-prim.h>
/* __KERNEL__ */
#endif
/*
* Pseudo device register
*/
-typedef struct miscdevice cfs_psdev_t;
-#define cfs_psdev_register(dev) misc_register(dev)
-#define cfs_psdev_deregister(dev) misc_deregister(dev)
+typedef struct miscdevice cfs_psdev_t;
+#define cfs_psdev_register(dev) misc_register(dev)
+#define cfs_psdev_deregister(dev) misc_deregister(dev)
/*
* Sysctl register
*/
-typedef struct ctl_table cfs_sysctl_table_t;
-typedef struct ctl_table_header cfs_sysctl_table_header_t;
+typedef struct ctl_table cfs_sysctl_table_t;
+typedef struct ctl_table_header cfs_sysctl_table_header_t;
-#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a)
-#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a)
+#define cfs_register_sysctl_table(t, a) register_sysctl_table(t, a)
+#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t, a)
+
+/*
+ * Symbol register
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define cfs_symbol_register(s, p) inter_module_register(s, THIS_MODULE, p)
+#define cfs_symbol_unregister(s) inter_module_unregister(s)
+#define cfs_symbol_get(s) inter_module_get(s)
+#define cfs_symbol_put(s) inter_module_put(s)
+#define cfs_module_get() MOD_INC_USE_COUNT
+#define cfs_module_put() MOD_DEC_USE_COUNT
+#else
+#define cfs_symbol_register(s, p) do {} while(0)
+#define cfs_symbol_unregister(s) do {} while(0)
+#define cfs_symbol_get(s) symbol_get(s)
+#define cfs_symbol_put(s) symbol_put(s)
+#define cfs_module_get() try_module_get(THIS_MODULE)
+#define cfs_module_put() module_put(THIS_MODULE)
+#endif
/*
* Proc file system APIs
/*
* Wait Queue
*/
-typedef wait_queue_t cfs_waitlink_t;
-typedef wait_queue_head_t cfs_waitq_t;
+#define CFS_TASK_INTERRUPTIBLE TASK_INTERRUPTIBLE
+#define CFS_TASK_UNINT TASK_UNINTERRUPTIBLE
+
+typedef wait_queue_t cfs_waitlink_t;
+typedef wait_queue_head_t cfs_waitq_t;
-#define cfs_waitq_init(w) init_waitqueue_head(w)
-#define cfs_waitlink_init(l) init_waitqueue_entry(l, current)
-#define cfs_waitq_add(w, l) add_wait_queue(w, l)
-#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l)
+typedef long cfs_task_state_t;
+
+#define CFS_TASK_INTERRUPTIBLE TASK_INTERRUPTIBLE
+#define CFS_TASK_UNINT TASK_UNINTERRUPTIBLE
+
+#define cfs_waitq_init(w) init_waitqueue_head(w)
+#define cfs_waitlink_init(l) init_waitqueue_entry(l, current)
+#define cfs_waitq_add(w, l) add_wait_queue(w, l)
+#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l)
#define cfs_waitq_forward(l, w) do {} while(0)
-#define cfs_waitq_del(w, l) remove_wait_queue(w, l)
-#define cfs_waitq_active(w) waitqueue_active(w)
-#define cfs_waitq_signal(w) wake_up(w)
-#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n)
-#define cfs_waitq_broadcast(w) wake_up_all(w)
-#define cfs_waitq_wait(l) schedule()
-#define cfs_waitq_timedwait(l, t) schedule_timeout(t)
+#define cfs_waitq_del(w, l) remove_wait_queue(w, l)
+#define cfs_waitq_active(w) waitqueue_active(w)
+#define cfs_waitq_signal(w) wake_up(w)
+#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n)
+#define cfs_waitq_broadcast(w) wake_up_all(w)
+#define cfs_waitq_wait(l, s) schedule()
+#define cfs_waitq_timedwait(l, s, t) schedule_timeout(t)
+#define cfs_schedule_timeout(s, t) schedule_timeout(t)
+#define cfs_schedule() schedule()
/* Kernel thread */
typedef int (*cfs_thread_t)(void *);
*/
typedef struct task_struct cfs_task_t;
#define cfs_current() current
+#define cfs_task_lock(t) task_lock(t)
+#define cfs_task_unlock(t) task_unlock(t)
#define CFS_DECL_JOURNAL_DATA void *journal_info
#define CFS_PUSH_JOURNAL do { \
journal_info = current->journal_info; \
/*
* Signal
*/
+typedef sigset_t cfs_sigset_t;
#define cfs_sigmask_lock(t, f) SIGNAL_MASK_LOCK(t, f)
#define cfs_sigmask_unlock(t, f) SIGNAL_MASK_UNLOCK(t, f)
#define cfs_recalc_sigpending(t) RECALC_SIGPENDING
+#define cfs_clear_sigpending(t) CLEAR_SIGPENDING
#define cfs_signal_pending(t) signal_pending(t)
-#define cfs_sigfillset(s) sigfillset(s)
-
-#define cfs_set_sig_blocked(t, b) do { (t)->blocked = b; } while(0)
-#define cfs_get_sig_blocked(t) (&(t)->blocked)
/*
* Timer
#else /* !__KERNEL__ */
+typedef struct proc_dir_entry cfs_proc_dir_entry_t;
#include "../user-prim.h"
#endif /* __KERNEL__ */
return seconds * HZ;
}
-static inline cfs_time_t cfs_time_shift(int seconds)
-{
- return jiffies + seconds * HZ;
-}
-
static inline time_t cfs_duration_sec(cfs_duration_t d)
{
return d / HZ;
* using the generic single-entry routines.
*/
+#ifndef __WINNT__
#define prefetch(a) ((void)a)
+#else
+#define prefetch(a) ((void *)a)
+#endif
struct list_head {
struct list_head *next, *prev;
#include <libcfs/linux/lltrace.h>
#elif defined(__APPLE__)
#include <libcfs/darwin/lltrace.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/lltrace.h>
#else
#error Unsupported Operating System
#endif
#include <libcfs/linux/portals_lib.h>
#elif defined(__APPLE__)
#include <libcfs/darwin/portals_lib.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/portals_lib.h>
#else
#error Unsupported Operating System
#endif
static inline size_t round_strlen(char *fset)
{
- return size_round(strlen(fset) + 1);
+ return (size_t)size_round((int)strlen(fset) + 1);
}
#define LOGL(var,len,ptr) \
#include <libcfs/linux/portals_utils.h>
#elif defined(__APPLE__)
#include <libcfs/darwin/portals_utils.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/portals_utils.h>
#else
#error Unsupported Operating System
#endif
/*
* liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ *
+ * XXX Liang: There are several branches share lnet with b_hd_newconfig,
+ * if we define lock APIs at here, there will be conflict with liblustre
+ * in other branches.
*/
#ifndef __KERNEL__
+#include <stdio.h>
+#include <stdlib.h>
+#if 0
/*
* Optional debugging (magic stamping and checking ownership) can be added.
*/
*
* No-op implementation.
*/
-struct spin_lock {};
+struct spin_lock {int foo;};
typedef struct spin_lock spinlock_t;
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { }
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+
void spin_lock_init(spinlock_t *lock);
void spin_lock(spinlock_t *lock);
void spin_unlock(spinlock_t *lock);
void spin_lock_bh_init(spinlock_t *lock);
void spin_lock_bh(spinlock_t *lock);
void spin_unlock_bh(spinlock_t *lock);
+static inline int spin_is_locked(spinlock_t *l) {return 1;}
-static inline void
-spin_lock_irqsave(spinlock_t *l, unsigned long f) { spin_lock(l); }
-static inline void
-spin_unlock_irqrestore(spinlock_t *l, unsigned long f) { spin_unlock(l); }
+static inline void spin_lock_irqsave(spinlock_t *l, unsigned long f){}
+static inline void spin_unlock_irqrestore(spinlock_t *l, unsigned long f){}
/*
* Semaphore
* - __down(x)
* - __up(x)
*/
-struct semaphore {};
+typedef struct semaphore {
+ int foo;
+} mutex_t;
void sema_init(struct semaphore *s, int val);
void __down(struct semaphore *s);
* - complete(c)
* - wait_for_completion(c)
*/
+#if 0
struct completion {};
void init_completion(struct completion *c);
void complete(struct completion *c);
void wait_for_completion(struct completion *c);
+#endif
/*
* rw_semaphore:
static inline void
read_unlock_irqrestore(rwlock_t *l, unsigned long f) { read_unlock(l); }
+/*
+ * Atomic for user-space
+ * Copied from liblustre
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+#define atomic_read(a) ((a)->counter)
+#define atomic_set(a,b) do {(a)->counter = b; } while (0)
+#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
+#define atomic_inc(a) (((a)->counter)++)
+#define atomic_dec(a) do { (a)->counter--; } while (0)
+#define atomic_add(b,a) do {(a)->counter += b;} while (0)
+#define atomic_sub(b,a) do {(a)->counter -= b;} while (0)
+
+#endif
+
/* !__KERNEL__ */
#endif
#ifndef __KERNEL__
+#include <stdlib.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/mman.h>
#include <libcfs/list.h>
+#include <libcfs/user-time.h>
+typedef sigset_t cfs_sigset_t;
/*
* Wait Queue. No-op implementation.
*/
-typedef struct cfs_waitlink {} cfs_waitlink_t;
-typedef struct cfs_waitq {} cfs_waitq_t;
+typedef struct cfs_waitlink {
+ struct list_head sleeping;
+ void *process;
+} cfs_waitlink_t;
+
+typedef struct cfs_waitq {
+ struct list_head sleepers;
+} cfs_waitq_t;
void cfs_waitq_init(struct cfs_waitq *waitq);
void cfs_waitlink_init(struct cfs_waitlink *link);
int cfs_waitq_active(struct cfs_waitq *waitq);
void cfs_waitq_signal(struct cfs_waitq *waitq);
void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
-void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq, int state);
void cfs_waitq_wait(struct cfs_waitlink *link);
-int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int64_t timeout);
+int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout);
+#define cfs_schedule_timeout(s, t) \
+ do { \
+ cfs_waitlink_t l; \
+ cfs_waitq_timedwait(&l, s, t); \
+ } while (0)
-/*
- * Allocator
- */
+#define CFS_TASK_INTERRUPTIBLE (0)
+#define CFS_TASK_UNINT (0)
/* 2.4 defines */
#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT
#define CFS_PAGE_MASK PAGE_CACHE_MASK
-cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
-void cfs_free_pages(struct page *pg, int what);
-
cfs_page_t *cfs_alloc_page(unsigned int flags);
-void cfs_free_page(cfs_page_t *pg, int what);
+void cfs_free_page(cfs_page_t *pg);
void *cfs_page_address(cfs_page_t *pg);
void *cfs_kmap(cfs_page_t *pg);
void cfs_kunmap(cfs_page_t *pg);
#define cfs_get_page(p) __I_should_not_be_called__(at_all)
#define cfs_page_count(p) __I_should_not_be_called__(at_all)
#define cfs_set_page_count(p, v) __I_should_not_be_called__(at_all)
+#define cfs_page_index(p) ((p)->index)
/*
* Memory allocator
+ * Inline function, so utils can use them without linking of libcfs
*/
-void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
-void cfs_free(void *addr);
-void *cfs_alloc_large(size_t nr_bytes);
-void cfs_free_large(void *addr);
+#define __ALLOC_ZERO (1 << 2)
+static inline void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+ void *result;
+ result = malloc(nr_bytes);
+ if (result != NULL && (flags & __ALLOC_ZERO))
+ memset(result, 0, nr_bytes);
+ return result;
+}
+
+#define cfs_free(addr) free(addr)
+#define cfs_alloc_large(nr_bytes) cfs_alloc(nr_bytes, 0)
+#define cfs_free_large(addr) cfs_free(addr)
+
+#define CFS_ALLOC_ATOMIC_TRY (0)
/*
* SLAB allocator
*/
#define SLAB_HWCACHE_ALIGN 0
cfs_mem_cache_t *
-cfs_mem_cache_create(const char *, size_t, size_t, unsigned long,
- void (*)(void *, cfs_mem_cache_t *, unsigned long),
- void (*)(void *, cfs_mem_cache_t *, unsigned long));
+cfs_mem_cache_create(const char *, size_t, size_t, unsigned long);
int cfs_mem_cache_destroy(cfs_mem_cache_t *c);
void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp);
void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr);
/*
* Timer
*/
+#include <sys/time.h>
+
+typedef struct {
+ struct list_head tl_list;
+ void (*function)(unsigned long unused);
+ unsigned long data;
+ long expires;
+} cfs_timer_t;
+
+#define cfs_init_timer(t) do {} while(0)
+#define cfs_jiffies \
+({ \
+ unsigned long _ret = 0; \
+ struct timeval tv; \
+ if (gettimeofday(&tv, NULL) == 0) \
+ _ret = tv.tv_sec; \
+ _ret; \
+})
+
+static inline int cfs_timer_init(cfs_timer_t *l, void (* func)(unsigned long), void *arg)
+{
+ CFS_INIT_LIST_HEAD(&l->tl_list);
+ l->function = func;
+ l->data = (unsigned long)arg;
+ return 0;
+}
+
+static inline int cfs_timer_is_armed(cfs_timer_t *l)
+{
+ if (cfs_time_before(cfs_jiffies, l->expires))
+ return 1;
+ else
+ return 0;
+}
-typedef struct cfs_timer {} cfs_timer_t;
+static inline void cfs_timer_arm(cfs_timer_t *l, int thetime)
+{
+ l->expires = thetime;
+}
+
+static inline void cfs_timer_disarm(cfs_timer_t *l)
+{
+}
+
+static inline long cfs_timer_deadline(cfs_timer_t *l)
+{
+ return l->expires;
+}
#if 0
#define cfs_init_timer(t) do {} while(0)
return 0;
}
+/*
+ * portable UNIX device file identification.
+ */
+
+typedef unsigned int cfs_rdev_t;
+// typedef unsigned long long kdev_t;
+/*
+ */
+#define cfs_lock_kernel() do {} while (0)
+#define cfs_sigfillset(l) do {} while (0)
+#define cfs_recalc_sigpending(l) do {} while (0)
+#define cfs_kernel_thread(l,m,n) LBUG()
+
+// static inline void local_irq_save(unsigned long flag) {return;}
+// static inline void local_irq_restore(unsigned long flag) {return;}
+
+enum {
+ CFS_STACK_TRACE_DEPTH = 16
+};
+
+struct cfs_stack_trace {
+ void *frame[CFS_STACK_TRACE_DEPTH];
+};
+
+/*
+ * arithmetic
+ */
+#define do_div(a,b) \
+ ({ \
+ unsigned long remainder;\
+ remainder = (a) % (b); \
+ (a) = (a) / (b); \
+ (remainder); \
+ })
+
+
/* !__KERNEL__ */
#endif
static inline cfs_duration_t cfs_duration_build(int64_t nano)
{
- return nano / ONE_BILLION;
+ return (cfs_duration_t) (nano / ONE_BILLION);
}
static inline time_t cfs_duration_sec(cfs_duration_t d)
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_KP30_H__
+#define __LIBCFS_WINNT_KP30_H__
+
+#ifndef __LIBCFS_KP30_H__
+#error Do not #include this file directly. #include <libcfs/kp30.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <libcfs/winnt/portals_compat25.h>
+#include <lnet/types.h>
+
+/* Module parameter support */
+#define CFS_MODULE_PARM(name, t, type, perm, desc)
+
+#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */
+
+
+static inline void our_cond_resched()
+{
+ schedule_timeout(1i64);
+}
+
+#ifdef CONFIG_SMP
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
+#else
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+#endif
+
+
+#define cfs_work_struct_t WORK_QUEUE_ITEM
+#define cfs_prepare_work(tq, routine, contex)
+#define cfs_schedule_work(tq)
+
+/* ------------------------------------------------------------------- */
+
+#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x) (cfs_symbol_get(#x))
+#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x)
+
+#define PORTAL_MODULE_USE do{}while(0)
+#define PORTAL_MODULE_UNUSE do{}while(0)
+
+#define printk DbgPrint
+#define ptintf DbgPrint
+
+#else /* !__KERNEL__ */
+
+# include <stdio.h>
+# include <stdlib.h>
+#ifdef __CYGWIN__
+# include <cygwin-ioctl.h>
+#endif
+# include <time.h>
+
+#endif /* End of !__KERNEL__ */
+
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT 0
+
+/* kernel hasn't defined this? */
+typedef struct {
+ __s64 lwte_when;
+ char *lwte_where;
+ void *lwte_task;
+ long_ptr lwte_p1;
+ long_ptr lwte_p2;
+ long_ptr lwte_p3;
+ long_ptr lwte_p4;
+# if BITS_PER_LONG > 32
+ long_ptr lwte_pad;
+# endif
+} lwt_event_t;
+
+
+# define LWT_EVENT(p1,p2,p3,p4)
+
+
+/* ------------------------------------------------------------------ */
+
+#define IOCTL_LIBCFS_TYPE long_ptr
+
+#ifdef __CYGWIN__
+# ifndef BITS_PER_LONG
+# if (~0UL) == 0xffffffffUL
+# define BITS_PER_LONG 32
+# else
+# define BITS_PER_LONG 64
+# endif
+# endif
+#endif
+
+#if BITS_PER_LONG > 32
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a)
+#else
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long_ptr)0x5a5a5a5a)
+# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a)
+#endif
+
+#if defined(__x86_64__)
+# define LPU64 "%I64u"
+# define LPD64 "%I64d"
+# define LPX64 "%I64x"
+# define LPSZ "%lu"
+# define LPSSZ "%ld"
+#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%I64u"
+# define LPD64 "%I64d"
+# define LPX64 "%I64x"
+# define LPSZ "%u"
+# define LPSSZ "%d"
+#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%I64u"
+# define LPD64 "%I64d"
+# define LPX64 "%I64x"
+# define LPSZ "%u"
+# define LPSSZ "%d"
+#endif
+#ifndef LPU64
+# error "No word size defined"
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_LIBCFS_H__
+#define __LIBCFS_WINNT_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* workgroud for VC compiler */
+#ifndef __FUNCTION__
+#define __FUNCTION__ "generic"
+#endif
+
+#include <libcfs/winnt/winnt-types.h>
+#include <libcfs/portals_utils.h>
+#include <libcfs/winnt/winnt-time.h>
+#include <libcfs/winnt/winnt-lock.h>
+#include <libcfs/winnt/winnt-mem.h>
+#include <libcfs/winnt/winnt-prim.h>
+#include <libcfs/winnt/winnt-fs.h>
+#include <libcfs/winnt/winnt-tcpip.h>
+
+struct ptldebug_header {
+ __u32 ph_len;
+ __u32 ph_flags;
+ __u32 ph_subsys;
+ __u32 ph_mask;
+ __u32 ph_cpu_id;
+ __u32 ph_sec;
+ __u64 ph_usec;
+ __u32 ph_stack;
+ __u32 ph_pid;
+ __u32 ph_extern_pid;
+ __u32 ph_line_num;
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+
+enum {
+ /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */
+ CFS_STACK_TRACE_DEPTH = 16
+};
+
+struct cfs_stack_trace {
+ void *frame[CFS_STACK_TRACE_DEPTH];
+};
+
+static inline __u32 query_stack_size()
+{
+ ULONG LowLimit, HighLimit;
+
+ IoGetStackLimits(&LowLimit, &HighLimit);
+ ASSERT(HighLimit > LowLimit);
+
+ return (__u32) (HighLimit - LowLimit);
+}
+#else
+static inline __u32 query_stack_size()
+{
+ return 4096;
+}
+#endif
+
+
+#ifndef THREAD_SIZE
+# define THREAD_SIZE query_stack_size()
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef __ia64__
+# define CDEBUG_STACK (THREAD_SIZE - \
+ ((ulong_ptr)__builtin_dwarf_cfa() & \
+ (THREAD_SIZE - 1)))
+# else
+# define CDEBUG_STACK (IoGetRemainingStackSize())
+# endif /* __ia64__ */
+
+#define CHECK_STACK(stack) \
+ do { \
+ if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
+ __FILE__, NULL, __LINE__, \
+ (stack),"maximum lustre stack %u\n",\
+ portal_stack = (stack)); \
+ } \
+ } while (0)
+#else /* !__KERNEL__ */
+#define CHECK_STACK(stack) do { } while(0)
+#define CDEBUG_STACK (0L)
+#endif /* __KERNEL__ */
+
+/* initial pid */
+#define LUSTRE_LNET_PID 12345
+
+#define ENTRY_NESTING_SUPPORT (0)
+#define ENTRY_NESTING do {;} while (0)
+#define EXIT_NESTING do {;} while (0)
+#define __current_nesting_level() (0)
+
+
+#define LBUG_WITH_LOC(_FILE, _FUNC, _LINE) \
+do { \
+ CEMERG("LBUG: pid: %u thread: %#x\n", \
+ (unsigned)cfs_curproc_pid(), \
+ (unsigned)PsGetCurrentThread()); \
+} while(0)
+
+#endif /* _WINNT_LIBCFS_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_LLTRACE_H__
+#define __LIBCFS_WINNT_LLTRACE_H__
+
+#ifndef __LIBCFS_LLTRACE_H__
+#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
+#endif
+
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__
+#define __LIBCFS_WINNT_PORTALS_COMPAT_H__
+
+
+
+#endif /* _PORTALS_COMPAT_H */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_PORTALS_LIB_H__
+#define __LIBCFS_WINNT_PORTALS_LIB_H__
+
+#ifndef __LIBCFS_PORTALS_LIB_H__
+#error Do not #include this file directly. #include <libcfs/portals_lib.h> instead
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_PORTALS_UTILS_H__
+#define __LIBCFS_WINNT_PORTALS_UTILS_H__
+
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
+#endif
+
+#ifndef cfs_is_flag_set
+#define cfs_is_flag_set(x,f) (((x)&(f))==(f))
+#endif
+
+#ifndef cfs_set_flag
+#define cfs_set_flag(x,f) ((x) |= (f))
+#endif
+
+#ifndef cfs_clear_flag
+#define cfs_clear_flag(x,f) ((x) &= ~(f))
+#endif
+
+
+static inline __u32 __do_div(__u32 * n, __u32 b)
+{
+ __u32 mod;
+
+ mod = *n % b;
+ *n = *n / b;
+ return mod;
+}
+
+#define do_div(n,base) __do_div((__u32 *)&(n), (__u32) (base))
+
+#ifdef __KERNEL__
+
+#include <stdlib.h>
+#include <libcfs/winnt/winnt-types.h>
+
+char * strsep(char **s, const char *ct);
+static inline size_t strnlen(const char * s, size_t count) {
+ size_t len = 0;
+ while(len < count && s[len++]);
+ return len;
+}
+char * ul2dstr(ulong_ptr address, char *buf, int len);
+
+#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3)
+#define simple_strtoll(a1, a2, a3) (__s64)strtoull(a1, a2, a3)
+#define simple_strtoull(a1, a2, a3) strtoull(a1, a2, a3)
+
+unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base);
+
+static inline int test_bit(int nr, void * addr)
+{
+ return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0;
+}
+
+static inline void clear_bit(int nr, void * addr)
+{
+ (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31)));
+}
+
+
+static inline void set_bit(int nr, void * addr)
+{
+ (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31));
+}
+
+static inline void read_random(char *buf, int len)
+{
+ ULONG Seed = (ULONG) buf;
+ Seed = RtlRandom(&Seed);
+ while (len >0) {
+ if (len > sizeof(ULONG)) {
+ memcpy(buf, &Seed, sizeof(ULONG));
+ len -= sizeof(ULONG);
+ buf += sizeof(ULONG);
+ } else {
+ memcpy(buf, &Seed, len);
+ len = 0;
+ break;
+ }
+ }
+}
+#define get_random_bytes(buf, len) read_random(buf, len)
+
+/* do NOT use function or expression as parameters ... */
+
+#ifndef min_t
+#define min_t(type,x,y) (type)(x) < (type)(y) ? (x): (y)
+#endif
+
+#ifndef max_t
+#define max_t(type,x,y) (type)(x) < (type)(y) ? (y): (x)
+#endif
+
+
+#define NIPQUAD(addr) \
+ ((unsigned char *)&addr)[0], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[3]
+
+#define HIPQUAD(addr) \
+ ((unsigned char *)&addr)[3], \
+ ((unsigned char *)&addr)[2], \
+ ((unsigned char *)&addr)[1], \
+ ((unsigned char *)&addr)[0]
+
+static int copy_from_user(void *to, void *from, int c)
+{
+ memcpy(to, from, c);
+ return 0;
+}
+
+static int copy_to_user(void *to, void *from, int c)
+{
+ memcpy(to, from, c);
+ return 0;
+}
+
+
+#define put_user(x, ptr) \
+( \
+ *(ptr) = x, \
+ 0 \
+)
+
+
+#define get_user(x,ptr) \
+( \
+ x = *(ptr), \
+ 0 \
+)
+
+#define num_physpages (64 * 1024)
+
+#define snprintf _snprintf
+#define vsnprintf _vsnprintf
+
+
+#endif /* !__KERNEL__ */
+
+int cfs_error_code(NTSTATUS);
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * File operations & routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_FS_H__
+#define __LIBCFS_WINNT_CFS_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+
+/*
+ * Platform defines
+ *
+ * cfs_rdev_t
+ */
+
+typedef unsigned short cfs_rdev_t;
+
+typedef unsigned int cfs_major_nr_t;
+typedef unsigned int cfs_minor_nr_t;
+
+
+#define MINORBITS 8
+#define MINORMASK ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+#define NODEV 0
+#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
+
+
+static inline cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor)
+{
+ return MKDEV(major, minor);
+}
+
+static inline cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev)
+{
+ return MAJOR(rdev);
+}
+
+static inline cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev)
+{
+ return MINOR(rdev);
+}
+
+
+#ifdef __KERNEL__
+
+struct file_operations
+{
+ loff_t (*lseek)(struct file * file, loff_t offset, int origin);
+ ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos);
+ ssize_t (*write)(struct file * file, const char * buffer,
+ size_t count, loff_t *ppos);
+ int (*ioctl) (struct file *, unsigned int, ulong_ptr);
+ int (*open) (struct file *);
+ int (*release) (struct file *);
+};
+
+struct file {
+
+ cfs_handle_t f_handle;
+ unsigned int f_flags;
+ mode_t f_mode;
+ ulong_ptr f_count;
+
+ //struct list_head f_list;
+ //struct dentry * f_dentry;
+
+ cfs_proc_entry_t * proc_dentry;
+ cfs_file_operations_t * f_op;
+
+ size_t f_size;
+ loff_t f_pos;
+ unsigned int f_uid, f_gid;
+ int f_error;
+
+ ulong_ptr f_version;
+
+ void * private_data;
+
+ char f_name[1];
+
+};
+
+#define cfs_filp_size(f) ((f)->f_size)
+#define cfs_filp_poff(f) (&(f)->f_pos)
+
+cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err);
+int cfs_filp_close(cfs_file_t *fp);
+int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos);
+int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos);
+int cfs_filp_fsync(cfs_file_t *fp);
+int cfs_get_file(cfs_file_t *fp);
+int cfs_put_file(cfs_file_t *fp);
+int cfs_file_count(cfs_file_t *fp);
+
+
+
+/*
+ * CFS_FLOCK routines
+ */
+
+typedef struct file_lock{
+ int fl_type;
+ pid_t fl_pid;
+ size_t fl_len;
+ off_t fl_start;
+ off_t fl_end;
+} cfs_flock_t;
+
+#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1)))
+#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t)
+
+#define cfs_flock_type(fl) ((fl)->fl_type)
+#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0)
+#define cfs_flock_pid(fl) ((fl)->fl_pid)
+#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0)
+#define cfs_flock_start(fl) ((fl)->fl_start)
+#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0)
+#define cfs_flock_end(fl) ((fl)->fl_end)
+#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0)
+
+#define ATTR_MODE 0x0001
+#define ATTR_UID 0x0002
+#define ATTR_GID 0x0004
+#define ATTR_SIZE 0x0008
+#define ATTR_ATIME 0x0010
+#define ATTR_MTIME 0x0020
+#define ATTR_CTIME 0x0040
+#define ATTR_ATIME_SET 0x0080
+#define ATTR_MTIME_SET 0x0100
+#define ATTR_FORCE 0x0200 /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG 0x0400
+#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */
+//#define ATTR_CTIME_SET 0x2000
+
+#define in_group_p(x) (0)
+
+/*
+ * proc fs routines
+ */
+
+int proc_init_fs();
+void proc_destroy_fs();
+
+
+/*
+ * misc
+ */
+
+static inline void *ERR_PTR(long_ptr error)
+{
+ return (void *) error;
+}
+
+static inline long_ptr PTR_ERR(const void *ptr)
+{
+ return (long_ptr) ptr;
+}
+
+static inline long_ptr IS_ERR(const void *ptr)
+{
+ return (ulong_ptr)ptr > (ulong_ptr)-1000L;
+}
+
+#else /* !__KERNEL__ */
+
+#define CREATE_NEW 1
+#define CREATE_ALWAYS 2
+#define OPEN_EXISTING 3
+#define OPEN_ALWAYS 4
+#define TRUNCATE_EXISTING 5
+
+#define SECTION_QUERY 0x0001
+#define SECTION_MAP_WRITE 0x0002
+#define SECTION_MAP_READ 0x0004
+#define SECTION_MAP_EXECUTE 0x0008
+#define SECTION_EXTEND_SIZE 0x0010
+
+#define FILE_MAP_COPY SECTION_QUERY
+#define FILE_MAP_WRITE SECTION_MAP_WRITE
+#define FILE_MAP_READ SECTION_MAP_READ
+#define FILE_MAP_ALL_ACCESS SECTION_ALL_ACCESS
+
+
+NTSYSAPI
+HANDLE
+NTAPI
+CreateFileA(
+ IN LPCSTR lpFileName,
+ IN DWORD dwDesiredAccess,
+ IN DWORD dwShareMode,
+ IN PVOID lpSecurityAttributes,
+ IN DWORD dwCreationDisposition,
+ IN DWORD dwFlagsAndAttributes,
+ IN HANDLE hTemplateFile
+ );
+
+#define CreateFile CreateFileA
+
+NTSYSAPI
+BOOL
+NTAPI
+CloseHandle(
+ IN OUT HANDLE hObject
+ );
+
+NTSYSAPI
+HANDLE
+NTAPI
+CreateFileMappingA(
+ IN HANDLE hFile,
+ IN PVOID lpFileMappingAttributes,
+ IN DWORD flProtect,
+ IN DWORD dwMaximumSizeHigh,
+ IN DWORD dwMaximumSizeLow,
+ IN LPCSTR lpName
+ );
+#define CreateFileMapping CreateFileMappingA
+
+NTSYSAPI
+DWORD
+NTAPI
+GetFileSize(
+ IN HANDLE hFile,
+ OUT DWORD * lpFileSizeHigh
+ );
+
+NTSYSAPI
+PVOID
+NTAPI
+MapViewOfFile(
+ IN HANDLE hFileMappingObject,
+ IN DWORD dwDesiredAccess,
+ IN DWORD dwFileOffsetHigh,
+ IN DWORD dwFileOffsetLow,
+ IN SIZE_T dwNumberOfBytesToMap
+ );
+
+NTSYSAPI
+BOOL
+NTAPI
+UnmapViewOfFile(
+ IN PVOID lpBaseAddress
+ );
+
+#endif /* __KERNEL__ */
+
+typedef struct {
+ void *d;
+} cfs_dentry_t;
+
+
+#endif /* __LIBCFS_WINNT_CFS_FS_H__*/
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_LOCK_H__
+#define __LIBCFS_WINNT_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+
+/*
+ * nt specific part ...
+ */
+
+
+/* atomic */
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { i }
+
+#define atomic_read(v) ((v)->counter)
+#define atomic_set(v,i) (((v)->counter) = (i))
+
+void FASTCALL atomic_add(int i, atomic_t *v);
+void FASTCALL atomic_sub(int i, atomic_t *v);
+
+int FASTCALL atomic_sub_and_test(int i, atomic_t *v);
+
+void FASTCALL atomic_inc(atomic_t *v);
+void FASTCALL atomic_dec(atomic_t *v);
+
+int FASTCALL atomic_dec_and_test(atomic_t *v);
+int FASTCALL atomic_inc_and_test(atomic_t *v);
+
+
+/* event */
+
+typedef KEVENT event_t;
+
+/*
+ * cfs_init_event
+ * To initialize the event object
+ *
+ * Arguments:
+ * event: pointer to the event object
+ * type: Non Zero: SynchronizationEvent
+ * Zero: NotificationEvent
+ * status: the initial stats of the event
+ * Non Zero: signaled
+ * Zero: un-signaled
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+static inline void
+ cfs_init_event(event_t *event, int type, int status)
+{
+ KeInitializeEvent(
+ event,
+ (type) ? SynchronizationEvent: NotificationEvent,
+ (status) ? TRUE : FALSE
+ );
+}
+
+/*
+ * cfs_wait_event
+ * To wait on an event to syncrhonize the process
+ *
+ * Arguments:
+ * event: pointer to the event object
+ * timeout: the timeout for waitting or 0 means infinite time.
+ *
+ * Return Value:
+ * Zero: waiting timeouts
+ * Non Zero: event signaled ...
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline int64_t
+cfs_wait_event(event_t * event, int64_t timeout)
+{
+ NTSTATUS Status;
+ LARGE_INTEGER TimeOut;
+
+ TimeOut.QuadPart = -1 * (10000000/HZ) * timeout;
+
+ Status = KeWaitForSingleObject(
+ event,
+ Executive,
+ KernelMode,
+ FALSE,
+ (timeout != 0) ? (&TimeOut) : (NULL)
+ );
+
+ if (Status == STATUS_TIMEOUT) {
+ return 0;
+ }
+
+ return TRUE; // signaled case
+}
+
+/*
+ * cfs_wake_event
+ * To signal the event object
+ *
+ * Arguments:
+ * event: pointer to the event object
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline int
+cfs_wake_event(event_t * event)
+{
+ return (KeSetEvent(event, 0, FALSE) != 0);
+}
+
+/*
+ * cfs_clear_event
+ * To clear/reset the status of the event object
+ *
+ * Arguments:
+ * event: pointer to the event object
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void
+cfs_clear_event(event_t * event)
+{
+ KeResetEvent(event);
+}
+
+
+/*
+ * IMPORTANT !!!!!!!!
+ *
+ * All locks' declaration are not guaranteed to be initialized,
+ * Althought some of they are initialized in Linux. All locks
+ * declared by CFS_DECL_* should be initialized explicitly.
+ */
+
+
+/*
+ * spin lock defintions / routines
+ */
+
+/*
+ * Warning:
+ *
+ * for spinlock operations, try to grab nesting acquisition of
+ * spinlock will cause dead-lock in MP system and current irql
+ * overwritten for UP system. (UP system could allow nesting spin
+ * acqisition, because it's not spin at all just raising the irql.)
+ *
+ */
+
+typedef struct spin_lock {
+
+ KSPIN_LOCK lock;
+ KIRQL irql;
+
+} spinlock_t;
+
+
+#define CFS_DECL_SPIN(name) spinlock_t name;
+#define CFS_DECL_SPIN_EXTERN(name) extern spinlock_t name;
+
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+ KeInitializeSpinLock(&(lock->lock));
+}
+
+
+static inline void spin_lock(spinlock_t *lock)
+{
+ KeAcquireSpinLock(&(lock->lock), &(lock->irql));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+ KeReleaseSpinLock(&(lock->lock), lock->irql);
+}
+
+
+#define spin_lock_irqsave(lock, flags) do {(flags) = 0; spin_lock(lock);} while(0)
+#define spin_unlock_irqrestore(lock, flags) do {spin_unlock(lock);} while(0)
+
+
+/* There's no corresponding routine in windows kernel.
+ We must realize a light one of our own. But there's
+ no way to identify the system is MP build or UP build
+ on the runtime. We just uses a workaround for it. */
+
+extern int MPSystem;
+
+static int spin_trylock(spinlock_t *lock)
+{
+ KIRQL Irql;
+ int rc = 0;
+
+ ASSERT(lock != NULL);
+
+ KeRaiseIrql(DISPATCH_LEVEL, &Irql);
+
+ if (MPSystem) {
+ if (0 == (ulong_ptr)lock->lock) {
+#if _X86_
+ __asm {
+ mov edx, dword ptr [ebp + 8]
+ lock bts dword ptr[edx], 0
+ jb lock_failed
+ mov rc, TRUE
+ lock_failed:
+ }
+#else
+ KdBreakPoint();
+#endif
+
+ }
+ } else {
+ rc = TRUE;
+ }
+
+ if (rc) {
+ lock->irql = Irql;
+ } else {
+ KeLowerIrql(Irql);
+ }
+
+ return rc;
+}
+
+#define spin_lock_bh(x) spin_lock(x)
+#define spin_unlock_bh(x) spin_unlock(x)
+#define spin_lock_bh_init(x) spin_lock_init(x)
+
+/*
+ * rw_semaphore (using ERESOURCE)
+ */
+
+
+typedef struct rw_semaphore {
+ ERESOURCE rwsem;
+} rw_semaphore_t;
+
+
+#define CFS_DECL_RWSEM(name) rw_semaphore_t name
+#define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name
+
+
+/*
+ * init_rwsem
+ * To initialize the the rw_semaphore_t structure
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void init_rwsem(rw_semaphore_t *s)
+{
+ ExInitializeResourceLite(&s->rwsem);
+}
+
+
+/*
+ * fini_rwsem
+ * To finilize/destroy the the rw_semaphore_t structure
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * For winnt system, we need this routine to delete the ERESOURCE.
+ * Just define it NULL for other systems.
+ */
+
+static inline void fini_rwsem(rw_semaphore_t *s)
+{
+ ExDeleteResourceLite(&s->rwsem);
+}
+
+/*
+ * down_read
+ * To acquire read-lock of the rw_semahore
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void down_read(struct rw_semaphore *s)
+{
+ ExAcquireResourceSharedLite(&s->rwsem, TRUE);
+}
+
+
+/*
+ * down_read_trylock
+ * To acquire read-lock of the rw_semahore without blocking
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * Zero: failed to acquire the read lock
+ * Non-Zero: succeeded to acquire the read lock
+ *
+ * Notes:
+ * This routine will return immediately without waiting.
+ */
+
+static inline int down_read_trylock(struct rw_semaphore *s)
+{
+ return ExAcquireResourceSharedLite(&s->rwsem, FALSE);
+}
+
+
+/*
+ * down_write
+ * To acquire write-lock of the rw_semahore
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void down_write(struct rw_semaphore *s)
+{
+ ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE);
+}
+
+
+/*
+ * down_write_trylock
+ * To acquire write-lock of the rw_semahore without blocking
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * Zero: failed to acquire the write lock
+ * Non-Zero: succeeded to acquire the read lock
+ *
+ * Notes:
+ * This routine will return immediately without waiting.
+ */
+
+static inline int down_write_trylock(struct rw_semaphore *s)
+{
+ return ExAcquireResourceExclusiveLite(&(s->rwsem), FALSE);
+}
+
+
+/*
+ * up_read
+ * To release read-lock of the rw_semahore
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void up_read(struct rw_semaphore *s)
+{
+ ExReleaseResourceForThreadLite(
+ &(s->rwsem),
+ ExGetCurrentResourceThread());
+}
+
+
+/*
+ * up_write
+ * To release write-lock of the rw_semahore
+ *
+ * Arguments:
+ * rwsem: pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void up_write(struct rw_semaphore *s)
+{
+ ExReleaseResourceForThreadLite(
+ &(s->rwsem),
+ ExGetCurrentResourceThread());
+}
+
+/*
+ * rwlock_t (using sempahore)
+ *
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+
+typedef struct {
+ spinlock_t guard;
+ int count;
+} rwlock_t;
+
+void rwlock_init(rwlock_t * rwlock);
+void rwlock_fini(rwlock_t * rwlock);
+
+void read_lock(rwlock_t * rwlock);
+void read_unlock(rwlock_t * rwlock);
+void write_lock(rwlock_t * rwlock);
+void write_unlock(rwlock_t * rwlock);
+
+#define write_lock_irqsave(l, f) do {f = 0; write_lock(l);} while(0)
+#define write_unlock_irqrestore(l, f) do {write_unlock(l);} while(0)
+#define read_lock_irqsave(l, f) do {f=0; read_lock(l);} while(0)
+#define read_unlock_irqrestore(l, f) do {read_unlock(l);} while(0)
+
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+
+typedef struct semaphore {
+ KSEMAPHORE sem;
+} mutex_t;
+
+static inline void sema_init(struct semaphore *s, int val)
+{
+ KeInitializeSemaphore(&s->sem, val, val);
+}
+
+static inline void __down(struct semaphore *s)
+{
+ KeWaitForSingleObject( &(s->sem), Executive,
+ KernelMode, FALSE, NULL );
+
+}
+
+static inline void __up(struct semaphore *s)
+{
+ KeReleaseSemaphore(&s->sem, 0, 1, FALSE);
+}
+
+/*
+ * mutex_t:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+
+
+/*
+ * init_mutex
+ * To initialize a mutex_t structure
+ *
+ * Arguments:
+ * mutex: pointer to the mutex_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void init_mutex(mutex_t *mutex)
+{
+ sema_init(mutex, 1);
+}
+
+
+/*
+ * mutex_down
+ * To acquire the mutex lock
+ *
+ * Arguments:
+ * mutex: pointer to the mutex_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void mutex_down(mutex_t *mutex)
+{
+ __down(mutex);
+}
+
+
+/*
+ * mutex_up
+ * To release the mutex lock (acquired already)
+ *
+ * Arguments:
+ * mutex: pointer to the mutex_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void mutex_up(mutex_t *mutex)
+{
+ __up(mutex);
+}
+
+
+/*
+ * init_mutex_locked
+ * To initialize the mutex as acquired state
+ *
+ * Arguments:
+ * mutex: pointer to the mutex_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline init_mutex_locked(mutex_t *mutex)
+{
+ init_mutex(mutex);
+ mutex_down(mutex);
+}
+
+/*
+ * completion
+ *
+ * - init_complition(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+
+struct completion {
+ event_t event;
+};
+
+
+/*
+ * init_completion
+ * To initialize the completion object
+ *
+ * Arguments:
+ * c: pointer to the completion structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void init_completion(struct completion *c)
+{
+ cfs_init_event(&(c->event), 1, FALSE);
+}
+
+
+/*
+ * complete
+ * To complete/signal the completion object
+ *
+ * Arguments:
+ * c: pointer to the completion structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void complete(struct completion *c)
+{
+ cfs_wake_event(&(c->event));
+}
+
+/*
+ * wait_for_completion
+ * To wait on the completion object. If the event is signaled,
+ * this function will return to the call with the event un-singled.
+ *
+ * Arguments:
+ * c: pointer to the completion structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+static inline void wait_for_completion(struct completion *c)
+{
+ cfs_wait_event(&(c->event), 0);
+}
+
+/* __KERNEL__ */
+#else
+
+#include "../user-lock.h"
+
+/* __KERNEL__ */
+#endif
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines of memory manipulation routines .
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_MEM_H__
+#define __LIBCFS_WINNT_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#define CFS_PAGE_SIZE PAGE_SIZE
+#define CFS_PAGE_SHIFT PAGE_SHIFT
+#define CFS_PAGE_MASK (~(PAGE_SIZE - 1))
+
+typedef struct cfs_page {
+ void * addr;
+ atomic_t count;
+} cfs_page_t;
+
+
+cfs_page_t *cfs_alloc_page(int flags);
+void cfs_free_page(cfs_page_t *pg);
+
+static inline void *cfs_page_address(cfs_page_t *page)
+{
+ return page->addr;
+}
+
+static inline void *cfs_kmap(cfs_page_t *page)
+{
+ return page->addr;
+}
+
+static inline void cfs_kunmap(cfs_page_t *page)
+{
+ return;
+}
+
+static inline void cfs_get_page(cfs_page_t *page)
+{
+ atomic_inc(&page->count);
+}
+
+static inline void cfs_put_page(cfs_page_t *page)
+{
+ atomic_dec(&page->count);
+}
+
+static inline int cfs_page_count(cfs_page_t *page)
+{
+ return atomic_read(&page->count);
+}
+
+static inline void cfs_set_page_count(cfs_page_t *page, int v)
+{
+ atomic_set(&page->count, v);
+}
+
+/*
+ * Memory allocator
+ */
+
+#define CFS_ALLOC_ATOMIC_TRY (0)
+
+extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+extern void cfs_free(void *addr);
+
+extern void *cfs_alloc_large(size_t nr_bytes);
+extern void cfs_free_large(void *addr);
+
+/*
+ * SLAB allocator
+ */
+
+#define SLAB_HWCACHE_ALIGN 0
+
+/* The cache name is limited to 20 chars */
+
+typedef struct cfs_mem_cache {
+
+ char name[20];
+ ulong_ptr flags;
+ NPAGED_LOOKASIDE_LIST npll;
+
+} cfs_mem_cache_t;
+
+
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr);
+extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+
+
+/*
+ * Page allocator slabs
+ */
+
+extern cfs_mem_cache_t *cfs_page_t_slab;
+extern cfs_mem_cache_t *cfs_page_p_slab;
+
+
+#define CFS_DECL_MMSPACE
+#define CFS_MMSPACE_OPEN do {} while(0)
+#define CFS_MMSPACE_CLOSE do {} while(0)
+
+
+#define mb() do {} while(0)
+#define rmb() mb()
+#define wmb() mb()
+
+
+/* __KERNEL__ */
+#endif
+
+#endif /* __WINNT_CFS_MEM_H__ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_PRIM_H__
+#define __LIBCFS_WINNT_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+
+/*
+ * libcfs proc device object
+ */
+
+
+#define LUSTRE_PROC_DEVICE L"\\Device\\lproc" /* proc fs emulator device object */
+#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\lproc" /* proc fs user-visible device */
+
+
+/*
+ * Device IO Control Code Definitions
+ */
+
+#define FILE_DEVICE_LIBCFS ('LC')
+
+#define FILE_DEVICE_LIBCFS ('LC')
+
+#define FUNC_LIBCFS_VERSION 0x101 // get version of current libcfs
+#define FUNC_LIBCFS_IOCTL 0x102 // Device i/o control to proc fs
+
+
+#define IOCTL_LIBCFS_VERSION \
+ CTL_CODE (FILE_DEVICE_LIBCFS, FUNC_LIBCFS_VERSION, METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define IOCTL_LIBCFS_ENTRY \
+ CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL, METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#pragma pack(4)
+
+typedef struct _CFS_PROC_IOCTL {
+
+ ULONG cmd; // ioctl command identifier
+ ULONG len; // length of data
+
+ // UCHAR data[]; // content of the real ioctl
+
+} CFS_PROC_IOCTL, *PCFS_PROC_IOCTL;
+
+#pragma pack()
+
+#ifdef __KERNEL__
+
+#include <libcfs/list.h>
+
+/*
+ * Symbol functions for libcfs
+ *
+ * OSX has no facility for use to register symbol.
+ * So we have to implement it.
+ */
+#define CFS_SYMBOL_LEN 64
+
+struct cfs_symbol {
+ char name[CFS_SYMBOL_LEN];
+ void *value;
+ int ref;
+ struct list_head sym_list;
+};
+
+extern int cfs_symbol_register(const char *, const void *);
+extern void cfs_symbol_unregister(const char *);
+extern void * cfs_symbol_get(const char *);
+extern void cfs_symbol_put(const char *);
+extern void cfs_symbol_clean();
+
+
+
+typedef struct file_operations cfs_file_operations_t;
+typedef struct file cfs_file_t;
+
+/*
+ * Pseudo device register
+ */
+
+typedef struct
+{
+ int minor;
+ const char * name;
+ cfs_file_operations_t * fops;
+} cfs_psdev_t;
+
+int cfs_psdev_register(cfs_psdev_t * psdev);
+int cfs_psdev_deregister(cfs_psdev_t * psdev);
+
+
+/*
+ * Proc emulator file system APIs
+ */
+
+typedef int cfs_read_proc_t(char *page, char **start, off_t off,
+ int count, int *eof, void *data);
+typedef int cfs_write_proc_t(struct file *file, const char *buffer,
+ ulong_ptr count, void *data);
+
+#define CFS_PROC_ENTRY_MAGIC 'CPEM'
+
+#define CFS_PROC_FLAG_DIRECTORY 0x00000001 // directory node
+#define CFS_PROC_FLAG_ATTACHED 0x00000002 // node is attached to proc
+#define CFS_PROC_FLAG_MISCDEV 0x00000004 // miscellaneous device
+
+typedef struct cfs_proc_entry
+{
+ ULONG magic; // Magic
+ ULONG flags; // Flags
+
+ struct _dir_entry { // proc directory entry
+ PRTL_SPLAY_LINKS root;
+ };
+
+ struct _file_entry { // proc file / leaf entry
+ cfs_read_proc_t * read_proc;
+ cfs_write_proc_t * write_proc;
+ };
+
+ mode_t mode;
+ unsigned short nlink;
+
+
+ struct file_operations * proc_fops;
+ void * data;
+
+ // proc_dir_entry ended.
+
+ RTL_SPLAY_LINKS s_link; // splay link
+
+ //
+ // Maximum length of proc entry name is 0x20
+ //
+
+ char name[0x20];
+
+} cfs_proc_entry_t, cfs_proc_dir_entry_t;
+
+typedef cfs_proc_entry_t cfs_proc_dir_entry_t;
+
+#define PROC_BLOCK_SIZE PAGE_SIZE
+
+/*
+ * Sysctl register
+ */
+
+typedef struct ctl_table cfs_sysctl_table_t;
+typedef struct ctl_table_header cfs_sysctl_table_header_t;
+
+
+typedef int ctl_handler (
+ cfs_sysctl_table_t *table,
+ int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen,
+ void **context );
+
+typedef int proc_handler (
+ cfs_sysctl_table_t *ctl,
+ int write, struct file * filp,
+ void *buffer, size_t *lenp );
+
+
+int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
+ void *buffer, size_t *lenp);
+
+int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp,
+ void *buffer, size_t *lenp);
+
+int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context);
+
+
+/*
+ * System io control definitions
+ */
+
+#define CTL_MAXNAME 10
+
+#define CTL_ANY -1 /* Matches any name */
+#define CTL_NONE 0
+
+enum
+{
+ CTL_KERN=1, /* General kernel info and control */
+ CTL_VM=2, /* VM management */
+ CTL_NET=3, /* Networking */
+ CTL_PROC=4, /* Process info */
+ CTL_FS=5, /* Filesystems */
+ CTL_DEBUG=6, /* Debugging */
+ CTL_DEV=7, /* Devices */
+ CTL_BUS=8, /* Busses */
+ CTL_ABI=9, /* Binary emulation */
+ CTL_CPU=10 /* CPU stuff (speed scaling, etc) */
+};
+
+/* sysctl table definitons */
+struct ctl_table
+{
+ int ctl_name;
+ char *procname;
+ void *data;
+ int maxlen;
+ mode_t mode;
+ cfs_sysctl_table_t *child;
+ proc_handler *proc_handler; /* text formatting callback */
+ ctl_handler *strategy; /* read / write callback functions */
+ cfs_proc_entry_t *de; /* proc entry block */
+ void *extra1;
+ void *extra2;
+};
+
+
+/* the mantaner of the cfs_sysctl_table trees */
+struct ctl_table_header
+{
+ cfs_sysctl_table_t * ctl_table;
+ struct list_head ctl_entry;
+};
+
+
+cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod,
+ cfs_proc_entry_t *parent);
+void proc_free_entry(cfs_proc_entry_t *de);
+void remove_proc_entry(char *name, cfs_proc_entry_t *entry);
+cfs_proc_entry_t * search_proc_entry(char * name,
+ cfs_proc_entry_t * root );
+
+#define cfs_create_proc_entry create_proc_entry
+#define cfs_free_proc_entry proc_free_entry
+#define cfs_remove_proc_entry remove_proc_entry
+
+#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a)
+#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a)
+
+
+/*
+ * declaration of proc kernel process routines
+ */
+
+cfs_file_t *
+lustre_open_file(char * filename);
+
+int
+lustre_close_file(cfs_file_t * fh);
+
+int
+lustre_do_ioctl( cfs_file_t * fh,
+ unsigned long cmd,
+ ulong_ptr arg );
+
+int
+lustre_ioctl_file( cfs_file_t * fh,
+ PCFS_PROC_IOCTL devctl);
+
+size_t
+lustre_read_file( cfs_file_t * fh,
+ loff_t off,
+ size_t size,
+ char * buf
+ );
+
+size_t
+lustre_write_file( cfs_file_t * fh,
+ loff_t off,
+ size_t size,
+ char * buf
+ );
+
+/*
+ * Wait Queue
+ */
+
+
+typedef int cfs_task_state_t;
+
+#define CFS_TASK_INTERRUPTIBLE 0x00000001
+#define CFS_TASK_UNINT 0x00000002
+
+
+
+#define CFS_WAITQ_MAGIC 'CWQM'
+#define CFS_WAITLINK_MAGIC 'CWLM'
+
+typedef struct cfs_waitq {
+
+ unsigned int magic;
+ unsigned int flags;
+
+ spinlock_t guard;
+ struct list_head waiters;
+
+} cfs_waitq_t;
+
+
+typedef struct cfs_waitlink cfs_waitlink_t;
+
+#define CFS_WAITQ_CHANNELS (2)
+
+#define CFS_WAITQ_CHAN_NORMAL (0)
+#define CFS_WAITQ_CHAN_FORWARD (1)
+
+
+
+typedef struct cfs_waitlink_channel {
+ struct list_head link;
+ cfs_waitq_t * waitq;
+ cfs_waitlink_t * waitl;
+} cfs_waitlink_channel_t;
+
+struct cfs_waitlink {
+
+ unsigned int magic;
+ int flags;
+ event_t * event;
+ atomic_t * hits;
+
+ cfs_waitlink_channel_t waitq[CFS_WAITQ_CHANNELS];
+};
+
+enum {
+ CFS_WAITQ_EXCLUSIVE = 1
+};
+
+#define CFS_DECL_WAITQ(name) cfs_waitq_t name
+
+
+void cfs_waitq_init(struct cfs_waitq *waitq);
+void cfs_waitlink_init(struct cfs_waitlink *link);
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+ struct cfs_waitlink *link);
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+int cfs_waitq_active(struct cfs_waitq *waitq);
+
+void cfs_waitq_signal(struct cfs_waitq *waitq);
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+
+void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+ cfs_task_state_t state, cfs_duration_t timeout);
+
+
+
+/* Kernel thread */
+
+typedef int (*cfs_thread_t) (void *arg);
+
+typedef struct _cfs_thread_context {
+ cfs_thread_t func;
+ void * arg;
+} cfs_thread_context_t;
+
+int cfs_kernel_thread(int (*func)(void *), void *arg, int flag);
+
+/*
+ * thread creation flags from Linux, not used in winnt
+ */
+#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
+#define CLONE_VM 0x00000100 /* set if VM shared between processes */
+#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
+#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
+#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PID 0x00001000 /* set if pid shared */
+#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD 0x00010000 /* Same thread group? */
+#define CLONE_NEWNS 0x00020000 /* New namespace group? */
+
+#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD)
+
+
+/*
+ * sigset ...
+ */
+
+typedef sigset_t cfs_sigset_t;
+
+/*
+ * Task struct
+ */
+
+#define MAX_SCHEDULE_TIMEOUT ((long_ptr)(~0UL>>12))
+
+
+#define NGROUPS 1
+#define CFS_CURPROC_COMM_MAX (16)
+typedef struct task_sruct{
+ mode_t umask;
+
+ pid_t pid;
+ pid_t pgrp;
+
+ uid_t uid,euid,suid,fsuid;
+ gid_t gid,egid,sgid,fsgid;
+
+ int ngroups;
+ gid_t groups[NGROUPS];
+ cfs_kernel_cap_t cap_effective,
+ cap_inheritable,
+ cap_permitted;
+
+ char comm[CFS_CURPROC_COMM_MAX];
+ void * journal_info;
+} cfs_task_t;
+
+
+/*
+ * linux task struct emulator ...
+ */
+
+#define TASKMAN_MAGIC 'TMAN' /* Task Manager */
+#define TASKSLT_MAGIC 'TSLT' /* Task Slot */
+
+typedef struct _TASK_MAN {
+
+ ULONG Magic; /* Magic and Flags */
+ ULONG Flags;
+
+ spinlock_t Lock; /* Protection lock */
+
+ cfs_mem_cache_t * slab; /* Memory slab for task slot */
+
+ ULONG NumOfTasks; /* Total tasks (threads) */
+ LIST_ENTRY TaskList; /* List of task slots */
+
+} TASK_MAN, *PTASK_MAN;
+
+typedef struct _TASK_SLOT {
+
+ ULONG Magic; /* Magic and Flags */
+ ULONG Flags;
+
+ LIST_ENTRY Link; /* To be linked to TaskMan */
+
+ event_t Event; /* Schedule event */
+
+ HANDLE Pid; /* Process id */
+ HANDLE Tid; /* Thread id */
+ PETHREAD Tet; /* Pointer to ethread */
+
+ atomic_t count; /* refer count */
+ atomic_t hits; /* times of waken event singaled */
+
+ KIRQL irql; /* irql for rwlock ... */
+
+ cfs_task_t task; /* linux task part */
+
+} TASK_SLOT, *PTASK_SLOT;
+
+
+#define current cfs_current()
+#define set_current_state(s) do {;} while (0)
+#define reparent_to_init() do {;} while (0)
+
+#define wait_event(wq, condition) \
+do { \
+ cfs_waitlink_t __wait; \
+ \
+ cfs_waitlink_init(&__wait); \
+ while (TRUE) { \
+ cfs_waitq_add(&wq, &__wait); \
+ if (condition) { \
+ break; \
+ } \
+ cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \
+ cfs_waitq_del(&wq, &__wait); \
+ } \
+ cfs_waitq_del(&wq, &__wait); \
+} while(0)
+
+#define wait_event_interruptible(wq, condition, __ret) \
+do { \
+ cfs_waitlink_t __wait; \
+ \
+ __ret = 0; \
+ cfs_waitlink_init(&__wait); \
+ while (TRUE) { \
+ cfs_waitq_add(&wq, &__wait); \
+ if (condition) { \
+ break; \
+ } \
+ cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \
+ cfs_waitq_del(&wq, &__wait); \
+ } \
+ cfs_waitq_del(&wq, &__wait); \
+} while(0)
+
+
+int init_task_manager();
+void cleanup_task_manager();
+cfs_task_t * cfs_current();
+int schedule_timeout(int64_t time);
+int schedule();
+int wake_up_process(cfs_task_t * task);
+#define cfs_schedule_timeout(state, time) schedule_timeout(time)
+void sleep_on(cfs_waitq_t *waitq);
+
+#define CFS_DECL_JOURNAL_DATA
+#define CFS_PUSH_JOURNAL do {;} while(0)
+#define CFS_POP_JOURNAL do {;} while(0)
+
+
+/* module related definitions */
+
+#ifndef __exit
+#define __exit
+#endif
+#ifndef __init
+#define __init
+#endif
+
+#define request_module(x) (0)
+
+#define EXPORT_SYMBOL(s)
+#define MODULE_AUTHOR(s)
+#define MODULE_DESCRIPTION(s)
+#define MODULE_LICENSE(s)
+#define MODULE_PARM(a, b)
+#define MODULE_PARM_DESC(a, b)
+
+#define module_init(X) int __init module_##X() {return X();}
+#define module_exit(X) void __exit module_##X() {X();}
+
+#define DECLARE_INIT(X) extern int __init module_##X(void)
+#define DECLARE_EXIT(X) extern void __exit module_##X(void)
+
+#define MODULE_INIT(X) do { int rc = module_##X(); \
+ if (rc) goto errorout; \
+ } while(0)
+
+#define MODULE_EXIT(X) do { module_##X(); } while(0)
+
+
+/* Module interfaces */
+#define cfs_module(name, version, init, fini) \
+module_init(init); \
+module_exit(fini)
+
+
+/*
+ * Linux kernel version definition
+ */
+
+#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
+#define LINUX_VERSION_CODE (2*100+6*10+7)
+
+
+/*
+ * Signal
+ */
+
+#define cfs_sigmask_lock(t, f) do { f = 0; } while(0)
+#define cfs_sigmask_unlock(t, f) do { f = 0; } while(0)
+#define cfs_signal_pending(t) (0)
+
+#define cfs_recalc_sigpending(t) do { } while(0)
+
+#define cfs_siginitset(m, s) do { } while(0)
+#define cfs_sigfillset(s) do { } while(0)
+#define cfs_siginitsetinv(m,f) do { } while(0)
+
+#define cfs_set_sig_blocked(t, b) do { } while(0)
+#define cfs_get_sig_blocked(t) (0)
+
+#define SIGNAL_MASK_ASSERT()
+
+cfs_sigset_t cfs_get_blocked_sigs(cfs_task_t *t);
+void cfs_block_allsigs(cfs_task_t *t);
+void cfs_block_sigs(cfs_task_t *t, sigset_t bit);
+
+/*
+ * Clear all pending signals.
+ */
+#define cfs_clear_sigpending(ut) do {} while (0)
+
+
+/*
+ * Timer
+ */
+
+#define CFS_TIMER_FLAG_INITED 0x00000001 // Initialized already
+#define CFS_TIMER_FLAG_TIMERED 0x00000002 // KeSetTimer is called
+
+typedef struct cfs_timer {
+
+ KSPIN_LOCK Lock;
+
+ ULONG Flags;
+
+ KDPC Dpc;
+ KTIMER Timer;
+
+ cfs_time_t deadline;
+
+ void (*proc)(ulong_ptr);
+ void * arg;
+
+} cfs_timer_t;
+
+
+typedef void (*timer_func_t)(ulong_ptr);
+
+#define cfs_init_timer(t)
+
+void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg);
+void cfs_timer_done(cfs_timer_t *t);
+void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline);
+void cfs_timer_disarm(cfs_timer_t *t);
+int cfs_timer_is_armed(cfs_timer_t *t);
+cfs_time_t cfs_timer_deadline(cfs_timer_t *t);
+
+
+/* deschedule for a bit... */
+static inline void cfs_pause(cfs_duration_t ticks)
+{
+ cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks);
+}
+
+
+static inline void cfs_enter_debugger(void)
+{
+#if _X86_
+ __asm int 3;
+#else
+ KdBreakPoint();
+#endif
+}
+
+/*
+ * libcfs globals initialization/cleanup
+ */
+
+int
+libcfs_arch_init(void);
+
+void
+libcfs_arch_cleanup(void);
+
+/*
+ * SMP ...
+ */
+
+#define SMP_CACHE_BYTES 128
+#define __cacheline_aligned
+#define NR_CPUS (2)
+#define smp_processor_id() KeGetCurrentProcessorNumber()
+#define smp_num_cpus NR_CPUS
+#define num_online_cpus() smp_num_cpus
+#define smp_call_function(f, a, n, w) do {} while(0)
+
+/*
+ * Irp related
+ */
+
+#define NR_IRQS 512
+#define in_interrupt() (0)
+
+/*
+ * printk flags
+ */
+
+#define KERN_EMERG "<0>" /* system is unusable */
+#define KERN_ALERT "<1>" /* action must be taken immediately */
+#define KERN_CRIT "<2>" /* critical conditions */
+#define KERN_ERR "<3>" /* error conditions */
+#define KERN_WARNING "<4>" /* warning conditions */
+#define KERN_NOTICE "<5>" /* normal but significant condition */
+#define KERN_INFO "<6>" /* informational */
+#define KERN_DEBUG "<7>" /* debug-level messages */
+
+/*
+ * Misc
+ */
+
+
+#define inter_module_get(n) cfs_symbol_get(n)
+#define inter_module_put(n) cfs_symbol_put(n)
+
+#ifndef likely
+#define likely(exp) (exp)
+#endif
+#ifndef unlikely
+#define unlikely(exp) (exp)
+#endif
+
+#define lock_kernel() do {} while(0)
+#define unlock_kernel() do {} while(0)
+
+#define exit_mm(t) do {} while(0)
+#define exit_files(t) do {} while(0)
+
+#define CAP_SYS_ADMIN 0
+#define CAP_SYS_ROOT 1
+
+#define capable(a) (TRUE)
+
+#define USERMODEHELPER(path, argv, envp) (0)
+
+
+#define local_irq_save(x)
+#define local_irq_restore(x)
+
+#define cfs_assert ASSERT
+
+#define THREAD_NAME
+
+#else /* !__KERNEL__ */
+
+#define PAGE_CACHE_SIZE PAGE_SIZE
+#define PAGE_CACHE_MASK PAGE_MASK
+
+#define getpagesize() (PAGE_SIZE)
+
+
+typedef struct {
+ int foo;
+} pthread_mutex_t;
+
+typedef struct {
+ int foo;
+} pthread_cond_t;
+
+#define pthread_mutex_init(x, y) do {} while(0)
+#define pthread_cond_init(x, y) do {} while(0)
+
+#define pthread_mutex_lock(x) do {} while(0)
+#define pthread_mutex_unlock(x) do {} while(0)
+
+#define pthread_cond_wait(x,y) do {} while(0)
+#define pthread_cond_broadcast(x) do {} while(0)
+
+typedef struct file {
+ int foo;
+} cfs_file_t;
+
+typedef struct cfs_proc_dir_entry{
+ void *data;
+}cfs_proc_dir_entry_t;
+
+
+
+#include "../user-prim.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define strcasecmp strcmp
+#define strncasecmp strncmp
+#define snprintf _snprintf
+#define getpid() (0)
+
+
+#define getpwuid(x) (NULL)
+#define getgrgid(x) (NULL)
+
+int gethostname(char * name, int namelen);
+
+#define setlinebuf(x) do {} while(0)
+
+
+NTSYSAPI VOID NTAPI DebugBreak();
+
+
+static inline void cfs_enter_debugger(void)
+{
+#if _X86_
+ __asm int 3;
+#else
+ DebugBreak();
+#endif
+}
+
+/* Maximum EA Information Length */
+#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15)
+
+
+/*
+ * proc user mode routines
+ */
+
+HANDLE cfs_proc_open (char * filename, int oflag);
+int cfs_proc_close(HANDLE handle);
+int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count);
+int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count);
+int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer);
+
+
+/*
+ * Native API definitions
+ */
+
+//
+// Disk I/O Routines
+//
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtReadFile(HANDLE FileHandle,
+ HANDLE Event OPTIONAL,
+ PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+ PVOID ApcContext OPTIONAL,
+ PIO_STATUS_BLOCK IoStatusBlock,
+ PVOID Buffer,
+ ULONG Length,
+ PLARGE_INTEGER ByteOffset OPTIONAL,
+ PULONG Key OPTIONAL);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtWriteFile(HANDLE FileHandle,
+ HANDLE Event OPTIONAL,
+ PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+ PVOID ApcContext OPTIONAL,
+ PIO_STATUS_BLOCK IoStatusBlock,
+ PVOID Buffer,
+ ULONG Length,
+ PLARGE_INTEGER ByteOffset OPTIONAL,
+ PULONG Key OPTIONAL);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtClose(HANDLE Handle);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtCreateFile(PHANDLE FileHandle,
+ ACCESS_MASK DesiredAccess,
+ POBJECT_ATTRIBUTES ObjectAttributes,
+ PIO_STATUS_BLOCK IoStatusBlock,
+ PLARGE_INTEGER AllocationSize OPTIONAL,
+ ULONG FileAttributes,
+ ULONG ShareAccess,
+ ULONG CreateDisposition,
+ ULONG CreateOptions,
+ PVOID EaBuffer OPTIONAL,
+ ULONG EaLength);
+
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtDeviceIoControlFile(
+ IN HANDLE FileHandle,
+ IN HANDLE Event,
+ IN PIO_APC_ROUTINE ApcRoutine,
+ IN PVOID ApcContext,
+ OUT PIO_STATUS_BLOCK IoStatusBlock,
+ IN ULONG IoControlCode,
+ IN PVOID InputBuffer,
+ IN ULONG InputBufferLength,
+ OUT PVOID OutputBuffer,
+ OUT ULONG OutputBufferLength
+ );
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtFsControlFile(
+ IN HANDLE FileHandle,
+ IN HANDLE Event OPTIONAL,
+ IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+ IN PVOID ApcContext OPTIONAL,
+ OUT PIO_STATUS_BLOCK IoStatusBlock,
+ IN ULONG FsControlCode,
+ IN PVOID InputBuffer OPTIONAL,
+ IN ULONG InputBufferLength,
+ OUT PVOID OutputBuffer OPTIONAL,
+ IN ULONG OutputBufferLength
+);
+
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtQueryInformationFile(
+ IN HANDLE FileHandle,
+ OUT PIO_STATUS_BLOCK IoStatusBlock,
+ OUT PVOID FileInformation,
+ IN ULONG Length,
+ IN FILE_INFORMATION_CLASS FileInformationClass
+ );
+
+//
+// Random routines ...
+//
+
+NTSYSAPI
+ULONG
+NTAPI
+RtlRandom(
+ IN OUT PULONG Seed
+ );
+
+#endif /* __KERNEL__ */
+
+
+//
+// Inode flags (Linux uses octad number, but why ? strange!!!)
+//
+
+#undef S_IFMT
+#undef S_IFDIR
+#undef S_IFCHR
+#undef S_IFREG
+#undef S_IREAD
+#undef S_IWRITE
+#undef S_IEXEC
+
+#define S_IFMT 0x0F000 /* 017 0000 */
+#define S_IFSOCK 0x0C000 /* 014 0000 */
+#define S_IFLNK 0x0A000 /* 012 0000 */
+#define S_IFREG 0x08000 /* 010 0000 */
+#define S_IFBLK 0x06000 /* 006 0000 */
+#define S_IFDIR 0x04000 /* 004 0000 */
+#define S_IFCHR 0x02000 /* 002 0000 */
+#define S_IFIFO 0x01000 /* 001 0000 */
+#define S_ISUID 0x00800 /* 000 4000 */
+#define S_ISGID 0x00400 /* 000 2000 */
+#define S_ISVTX 0x00200 /* 000 1000 */
+
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
+#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
+#define S_ISFIL(m) (((m) & S_IFMT) == S_IFFIL)
+#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
+
+#define S_IPERMISSION_MASK 0x1FF /* */
+
+#define S_IRWXU 0x1C0 /* 0 0700 */
+#define S_IRUSR 0x100 /* 0 0400 */
+#define S_IWUSR 0x080 /* 0 0200 */
+#define S_IXUSR 0x040 /* 0 0100 */
+
+#define S_IRWXG 0x038 /* 0 0070 */
+#define S_IRGRP 0x020 /* 0 0040 */
+#define S_IWGRP 0x010 /* 0 0020 */
+#define S_IXGRP 0x008 /* 0 0010 */
+
+#define S_IRWXO 0x007 /* 0 0007 */
+#define S_IROTH 0x004 /* 0 0004 */
+#define S_IWOTH 0x002 /* 0 0002 */
+#define S_IXOTH 0x001 /* 0 0001 */
+
+#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
+#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH)
+#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH)
+#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
+
+/*
+ * linux ioctl coding definitions
+ */
+
+#define _IOC_NRBITS 8
+#define _IOC_TYPEBITS 8
+#define _IOC_SIZEBITS 14
+#define _IOC_DIRBITS 2
+
+#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT 0
+#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE 0U
+#define _IOC_WRITE 1U
+#define _IOC_READ 2U
+
+#define _IOC(dir,type,nr,size) \
+ (((dir) << _IOC_DIRSHIFT) | \
+ ((type) << _IOC_TYPESHIFT) | \
+ ((nr) << _IOC_NRSHIFT) | \
+ ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/*
+ * Io vector ...
+ */
+
+struct iovec
+{
+ void *iov_base;
+ size_t iov_len;
+};
+
+
+#define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF))
+/*
+ * Convert a string to an unsigned long long integer.
+ *
+ * Ignores `locale' stuff. Assumes that the upper and lower case
+ * alphabets and digits are each contiguous.
+ */
+static inline __u64
+strtoull(
+ char *nptr,
+ char **endptr,
+ int base)
+{
+ char *s = nptr;
+ __u64 acc, cutoff;
+ int c, neg = 0, any, cutlim;
+
+ /*
+ * See strtol for comments as to the logic used.
+ */
+ do {
+ c = *s++;
+ } while (isspace(c));
+ if (c == '-') {
+ neg = 1;
+ c = *s++;
+ } else if (c == '+')
+ c = *s++;
+ if ((base == 0 || base == 16) &&
+ c == '0' && (*s == 'x' || *s == 'X')) {
+ c = s[1];
+ s += 2;
+ base = 16;
+ }
+ if (base == 0)
+ base = c == '0' ? 8 : 10;
+ cutoff = (__u64)ULONG_LONG_MAX / (__u64)base;
+ cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base);
+ for (acc = 0, any = 0;; c = *s++) {
+ if (isdigit(c))
+ c -= '0';
+ else if (isalpha(c))
+ c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+ else
+ break;
+ if (c >= base)
+ break;
+ if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+ any = -1;
+ else {
+ any = 1;
+ acc *= base;
+ acc += c;
+ }
+ }
+ if (any < 0) {
+ acc = ULONG_LONG_MAX;
+ } else if (neg)
+ acc = 0 - acc;
+ if (endptr != 0)
+ *endptr = (char *) (any ? s - 1 : nptr);
+ return (acc);
+}
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for Winnt (kernel and user-level).
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_TCPIP_H__
+#define __LIBCFS_WINNT_TCPIP_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+
+#ifdef __KERNEL__
+
+//
+// ksocknal definitions
+//
+
+// iovec is defined in libcfs: winnt_prim.h
+// lnetkiov_t is defined in lnet/types.h
+
+typedef struct socket ksock_tconn_t;
+
+// completion notification callback routine
+
+typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr);
+
+/* completion routine to update tx structure for async sending */
+typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
+
+//
+// tdinal definitions
+//
+
+
+#if TDINAL_DBG
+#define KsPrint(X) KsPrintf X
+#else
+#define KsPrint(X)
+#endif
+
+
+//
+// Socket Addresses Related ...
+//
+
+#define INADDR_ANY (ULONG)0x00000000
+#define INADDR_LOOPBACK (ULONG)0x7f000001
+#define INADDR_BROADCAST (ULONG)0xffffffff
+#define INADDR_NONE (ULONG)0xffffffff
+
+/*
+ * TCP / IP options
+ */
+
+#define SOL_TCP 6
+#define SOL_UDP 17
+
+
+#define TL_INSTANCE 0
+
+#define TCP_SOCKET_NODELAY 1 // disabling "Nagle"
+#define TCP_SOCKET_KEEPALIVE 2
+#define TCP_SOCKET_OOBINLINE 3
+#define TCP_SOCKET_BSDURGENT 4
+#define TCP_SOCKET_ATMARK 5
+#define TCP_SOCKET_WINDOW 6
+
+
+/* Flags we can use with send/ and recv.
+ Added those for 1003.1g not all are supported yet
+ */
+
+#define MSG_OOB 1
+#define MSG_PEEK 2
+#define MSG_DONTROUTE 4
+#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC 8
+#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC 0x20
+#define MSG_DONTWAIT 0x40 /* Nonblocking io */
+#define MSG_EOR 0x80 /* End of record */
+#define MSG_WAITALL 0x100 /* Wait for a full request */
+#define MSG_FIN 0x200
+#define MSG_SYN 0x400
+#define MSG_CONFIRM 0x800 /* Confirm path validity */
+#define MSG_RST 0x1000
+#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */
+#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
+#define MSG_MORE 0x8000 /* Sender will send more */
+
+#define MSG_EOF MSG_FIN
+
+
+//
+// Maximum TRANSPORT_ADDRESS Length
+//
+// it must >= FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
+// + TDI_ADDRESS_LENGTH_IP
+//
+// I define it a little large and 16 bytes aligned to avoid possible overflow.
+//
+
+#define MAX_ADDRESS_LENGTH (0x30)
+
+
+//
+// Maximum Listers Children Sockets
+//
+
+#define MAX_CHILD_LISTENERS (4)
+
+//
+// Maximum EA Information Length
+//
+
+#define EA_MAX_LENGTH ( sizeof(FILE_FULL_EA_INFORMATION) - 1 + \
+ TDI_TRANSPORT_ADDRESS_LENGTH + 1 + \
+ MAX_ADDRESS_LENGTH )
+
+
+#define UDP_DEVICE_NAME L"\\Device\\Udp"
+#define TCP_DEVICE_NAME L"\\Device\\Tcp"
+
+
+/*
+ * TSDU definitions
+ */
+
+#define TDINAL_TSDU_DEFAULT_SIZE (0x10000)
+
+#define KS_TSDU_MAGIC 'KSTD'
+
+#define KS_TSDU_ATTACHED 0x00000001 // Attached to the socket receive tsdu list
+
+typedef struct _KS_TSDU {
+
+ ULONG Magic;
+ ULONG Flags;
+
+ struct list_head Link;
+
+ ULONG TotalLength; // Total size of KS_TSDU
+
+ ULONG StartOffset; // Start offset of the first Tsdu unit
+ ULONG LastOffset; // End offset of the last Tsdu unit
+
+/*
+ union {
+ KS_TSDU_DAT[];
+ KS_TSDU_BUF[];
+ KS_TSDU_MDL[];
+ }
+*/
+
+} KS_TSDU, *PKS_TSDU;
+
+#define TSDU_TYPE_BUF ((USHORT)0x5401)
+#define TSDU_TYPE_DAT ((USHORT)0x5402)
+#define TSDU_TYPE_MDL ((USHORT)0x5403)
+
+#define KS_TSDU_BUF_RECEIVING 0x0001
+typedef struct _KS_TSDU_BUF {
+
+ USHORT TsduType;
+ USHORT TsduFlags;
+
+ ULONG DataLength;
+ ULONG StartOffset;
+
+ PVOID UserBuffer;
+
+} KS_TSDU_BUF, *PKS_TSDU_BUF;
+
+#define KS_TSDU_DAT_RECEIVING 0x0001
+
+typedef struct _KS_TSDU_DAT {
+
+ USHORT TsduType;
+ USHORT TsduFlags;
+
+ ULONG DataLength;
+ ULONG StartOffset;
+
+ ULONG TotalLength;
+
+ UCHAR Data[1];
+
+} KS_TSDU_DAT, *PKS_TSDU_DAT;
+
+#define KS_DWORD_ALIGN(x) (((x) + 0x03) & (~(0x03)))
+#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data)))
+
+typedef struct _KS_TSDU_MDL {
+
+ USHORT TsduType;
+ USHORT TsduFlags;
+
+ ULONG DataLength;
+ ULONG StartOffset;
+
+ PMDL Mdl;
+ PVOID Descriptor;
+
+} KS_TSDU_MDL, *PKS_TSDU_MDL;
+
+
+typedef struct _KS_TSDUMGR {
+
+ struct list_head TsduList;
+ ULONG NumOfTsdu;
+ ULONG TotalBytes;
+ KEVENT Event;
+
+} KS_TSDUMGR, *PKS_TSDUMGR;
+
+
+typedef struct _KS_CHAIN {
+
+ KS_TSDUMGR Normal;
+ KS_TSDUMGR Expedited;
+
+} KS_CHAIN, *PKS_CHAIN;
+
+
+#define TDINAL_SCHED_FACTOR (1)
+#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR))
+
+//
+// Handler Settings Indictor
+//
+
+#define TDI_EVENT_MAXIMUM_HANDLER (TDI_EVENT_ERROR_EX + 1)
+
+
+typedef struct _KS_EVENT_HANDLERS {
+ BOOLEAN IsActive[TDI_EVENT_MAXIMUM_HANDLER];
+ PVOID Handler [TDI_EVENT_MAXIMUM_HANDLER];
+} KS_EVENT_HANDLERS, *PKS_EVENT_HANDLERS;
+
+#define SetEventHandler(ha, ht, hr) do { \
+ ha.IsActive[ht] = TRUE; \
+ ha.Handler[ht] = (PVOID) (hr); \
+ } while(0)
+
+//
+// KSock Internal Structures
+//
+
+typedef struct _KS_ADDRESS {
+
+ union {
+ TRANSPORT_ADDRESS Tdi;
+ UCHAR Pading[MAX_ADDRESS_LENGTH];
+ };
+
+ HANDLE Handle;
+ PFILE_OBJECT FileObject;
+
+} KS_ADDRESS, *PKS_ADDRESS;
+
+//
+// Structures for Disconnect Workitem
+//
+
+typedef struct _KS_DISCONNECT_WORKITEM {
+
+ WORK_QUEUE_ITEM WorkItem; // Workitem to perform disconnection
+ ksock_tconn_t * tconn; // tdi connecton
+ ULONG Flags; // connection broken/discnnection flags
+ KEVENT Event; // sync event
+
+} KS_DISCONNECT_WORKITEM, *PKS_DISCONNECT_WORKITEM;
+
+
+typedef struct _KS_CONNECTION {
+
+ HANDLE Handle; // Handle of the tdi connection
+ PFILE_OBJECT FileObject; // FileObject if the conn object
+
+ PTRANSPORT_ADDRESS Remote; // the ConnectionInfo of this connection
+ PTDI_CONNECTION_INFORMATION ConnectionInfo;
+
+ ULONG nagle; // Tcp options
+
+} KS_CONNECTION, *PKS_CONNECTION;
+
+
+//
+// type definitions
+//
+
+typedef MDL ksock_mdl_t;
+typedef UNICODE_STRING ksock_unicode_name_t;
+typedef WORK_QUEUE_ITEM ksock_workitem_t;
+
+
+typedef KS_CHAIN ksock_chain_t;
+typedef KS_ADDRESS ksock_tdi_addr_t;
+typedef KS_CONNECTION ksock_tconn_info_t;
+typedef KS_DISCONNECT_WORKITEM ksock_disconnect_workitem_t;
+
+
+//
+// Structures for transmission done Workitem
+//
+
+typedef struct _KS_TCPX_FINILIZE {
+ ksock_workitem_t item;
+ void * tx;
+} ksock_tcpx_fini_t;
+
+
+typedef struct ksock_backlogs {
+
+ struct list_head list; /* list to link the backlog connections */
+ int num; /* number of backlogs in the list */
+
+} ksock_backlogs_t;
+
+
+typedef struct ksock_daemon {
+
+ ksock_tconn_t * tconn; /* the listener connection object */
+ unsigned short nbacklogs; /* number of listening backlog conns */
+ unsigned short port; /* listening port number */
+ int shutdown; /* daemon threads is to exit */
+ struct list_head list; /* to be attached into ksock_nal_data_t*/
+
+} ksock_daemon_t ;
+
+
+typedef enum {
+
+ kstt_sender = 0, // normal sending connection type, it's active connection, while
+ // child tconn is for passive connection.
+
+ kstt_listener, // listener daemon type, it just acts as a daemon, and it does
+ // not have real connection. It manages children tcons to accept
+ // or refuse the connecting request from remote peers.
+
+ kstt_child, // accepted child connection type, it's parent must be Listener
+ kstt_lasttype
+} ksock_tconn_type;
+
+typedef enum {
+
+ ksts_uninited = 0, // tconn is just allocated (zero values), not initialized yet
+
+ ksts_inited, // tconn structure initialized: so it now can be identified as
+ // a sender, listener or a child
+
+ ksts_bind, // tconn is bound: the local address object (ip/port) is created.
+ // after being bound, we must call ksocknal_put_tconn to release
+ // the tconn objects, it's not safe just to free the memory of tconn.
+
+ ksts_associated, // the connection object is created and associated with the address
+ // object. so it's ready for connection. only for child and sender.
+
+ ksts_connecting, // only used by child tconn: in the ConnectEvent handler routine,
+ // it indicts the child tconn is busy to be connected to the peer.
+
+ ksts_connected, // the connection is built already: for sender and child
+
+ ksts_listening, // listener daemon is working, only for listener tconn
+
+ ksts_disconnected, // disconnected by user
+ ksts_aborted, // un-exptected broken status
+
+ ksts_last // total number of tconn statuses
+} ksock_tconn_state;
+
+#define KS_TCONN_MAGIC 'KSTM'
+
+#define KS_TCONN_HANDLERS_SET 0x00000001 // Conection handlers are set.
+#define KS_TCONN_DISCONNECT_BUSY 0x00010000 // Disconnect Workitem is queued ...
+#define KS_TCONN_DESTROY_BUSY 0x00020000 // Destory Workitem is queued ...
+
+#define KS_TCONN_DAEMON_STARTED 0x00100000 // indict the daemon is started,
+ // only valid for listener
+
+struct socket {
+
+ ulong_ptr kstc_magic; /* Magic & Flags */
+ ulong_ptr kstc_flags;
+
+ spinlock_t kstc_lock; /* serialise lock*/
+ void * kstc_conn; /* ksock_conn_t */
+
+ ksock_tconn_type kstc_type; /* tdi connection Type */
+ ksock_tconn_state kstc_state; /* tdi connection state flag */
+
+ ksock_unicode_name_t kstc_dev; /* tcp transport device name */
+
+ ksock_tdi_addr_t kstc_addr; /* local address handlers / Objects */
+
+ atomic_t kstc_refcount; /* reference count of ksock_tconn */
+
+ struct list_head kstc_list; /* linked to global ksocknal_data */
+
+ union {
+
+ struct {
+ int nbacklog; /* total number of backlog tdi connections */
+ ksock_backlogs_t kstc_listening; /* listeing backlog child connections */
+ ksock_backlogs_t kstc_accepted; /* connected backlog child connections */
+ event_t kstc_accept_event; /* Signaled by AcceptedHander,
+ ksocknal_wait_accpeted_conns waits on */
+ event_t kstc_destroy_event; /* Signaled when accepted child is released */
+ } listener;
+
+ struct {
+ ksock_tconn_info_t kstc_info; /* Connection Info if Connected */
+ ksock_chain_t kstc_recv; /* tsdu engine for data receiving */
+ ksock_chain_t kstc_send; /* tsdu engine for data sending */
+
+ int kstc_queued; /* Attached to Parent->ChildList ... */
+ int kstc_queueno; /* 0: Attached to Listening list
+ 1: Attached to Accepted list */
+
+ int kstc_busy; /* referred by ConnectEventCallback ? */
+ int kstc_accepted; /* the connection is built ready ? */
+
+ struct list_head kstc_link; /* linked to parent tdi connection */
+ ksock_tconn_t * kstc_parent; /* pointers to it's listener parent */
+ } child;
+
+ struct {
+ ksock_tconn_info_t kstc_info; /* Connection Info if Connected */
+ ksock_chain_t kstc_recv; /* tsdu engine for data receiving */
+ ksock_chain_t kstc_send; /* tsdu engine for data sending */
+ } sender;
+ };
+
+ ulong_ptr kstc_snd_wnd; /* Sending window size */
+ ulong_ptr kstc_rcv_wnd; /* Recving window size */
+
+ ksock_workitem_t kstc_destroy; /* tconn destruction workitem */
+ ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */
+
+ ksock_schedule_cb kstc_sched_cb; /* notification callback routine of completion */
+ ksock_update_tx kstc_update_tx; /* aync sending callback to update tx */
+};
+
+#define TDINAL_WINDOW_DEFAULT_SIZE (0x100000)
+
+
+struct _KS_UDP_COMPLETION_CONTEXT;
+struct _KS_TCP_COMPLETION_CONTEXT;
+
+
+typedef
+NTSTATUS
+(*PKS_UDP_COMPLETION_ROUTINE) (
+ IN PIRP Irp,
+ IN struct _KS_UDP_COMPLETION_CONTEXT
+ *UdpContext
+ );
+
+
+typedef
+NTSTATUS
+(*PKS_TCP_COMPLETION_ROUTINE) (
+ IN PIRP Irp,
+ IN struct _KS_TCP_COMPLETION_CONTEXT
+ *TcpContext
+ );
+
+//
+// Udp Irp Completion Context
+//
+
+typedef struct _KS_UDP_COMPLETION_CONTEXT {
+
+ PKEVENT Event;
+ union {
+ PFILE_OBJECT AddressObject;
+ ksock_tconn_t * tconn;
+ };
+
+ PKS_UDP_COMPLETION_ROUTINE CompletionRoutine;
+ PVOID CompletionContext;
+
+} KS_UDP_COMPLETION_CONTEXT, *PKS_UDP_COMPLETION_CONTEXT;
+
+
+//
+// Tcp Irp Completion Context (used by tcp data recv/send)
+//
+
+typedef struct _KS_TCP_COMPLETION_CONTEXT {
+
+ PKEVENT Event; // Event to be waited on by Irp caller ...
+
+ ksock_tconn_t * tconn; // the tdi connection
+
+ PKS_TCP_COMPLETION_ROUTINE CompletionRoutine;
+ PVOID CompletionContext;
+ PVOID CompletionContext2;
+
+ PKS_TSDUMGR KsTsduMgr; // Tsdu buffer manager
+
+ //
+ // These tow new members are for NON_BLOCKING transmission
+ //
+
+ BOOLEAN bCounted; // To indict needing refcount to
+ // execute CompetionRoutine
+ ULONG ReferCount; // Refer count of this structure
+
+} KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT;
+
+typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t;
+
+
+/*
+ * tdi extensions
+ */
+
+#define IOCTL_TCP_QUERY_INFORMATION_EX \
+ CTL_CODE(FILE_DEVICE_NETWORK, 0, METHOD_NEITHER, FILE_ANY_ACCESS)
+#define IOCTL_TCP_SET_INFORMATION_EX \
+ CTL_CODE(FILE_DEVICE_NETWORK, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
+
+
+#define TcpBuildSetInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, Buffer, BufferLen)\
+ { \
+ PIO_STACK_LOCATION _IRPSP; \
+ if ( CompRoutine != NULL) { \
+ IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\
+ } else { \
+ IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \
+ } \
+ _IRPSP = IoGetNextIrpStackLocation (Irp); \
+ _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \
+ _IRPSP->DeviceObject = DevObj; \
+ _IRPSP->FileObject = FileObj; \
+ _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = 0; \
+ _IRPSP->Parameters.DeviceIoControl.InputBufferLength = BufferLen; \
+ _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_SET_INFORMATION_EX; \
+ Irp->AssociatedIrp.SystemBuffer = Buffer; \
+ }
+
+
+#define TcpBuildQueryInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, InBuffer, InLength, OutBuffer, OutLength)\
+ { \
+ PIO_STACK_LOCATION _IRPSP; \
+ if ( CompRoutine != NULL) { \
+ IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\
+ } else { \
+ IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \
+ } \
+ _IRPSP = IoGetNextIrpStackLocation (Irp); \
+ _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \
+ _IRPSP->DeviceObject = DevObj; \
+ _IRPSP->FileObject = FileObj; \
+ _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = OutLength; \
+ _IRPSP->Parameters.DeviceIoControl.InputBufferLength = InLength; \
+ _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_QUERY_INFORMATION_EX; \
+ _IRPSP->Parameters.DeviceIoControl.Type3InputBuffer = InBuffer; \
+ Irp->UserBuffer = OutBuffer; \
+ }
+
+
+typedef struct
+{
+ /*
+ * Tdinal internal defintions
+ */
+
+ int ksnd_init; /* initialisation state */
+
+ TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */
+
+ spinlock_t ksnd_tconn_lock; /* tdi connections access serialise */
+
+ int ksnd_ntconns; /* number of tconns attached in list */
+ struct list_head ksnd_tconns; /* tdi connections list */
+ cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ksock_tconn_t allocations */
+ event_t ksnd_tconn_exit; /* exit event to be signaled by the last tconn */
+
+ spinlock_t ksnd_tsdu_lock; /* tsdu access serialise */
+
+ int ksnd_ntsdus; /* number of tsdu buffers allocated */
+ ulong_ptr ksnd_tsdu_size; /* the size of a signel tsdu buffer */
+ cfs_mem_cache_t * ksnd_tsdu_slab; /* slab cache for tsdu buffer allocation */
+
+ int ksnd_nfreetsdus; /* number of tsdu buffers in the freed list */
+ struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */
+
+ spinlock_t ksnd_daemon_lock; /* stabilize daemon ops */
+ int ksnd_ndaemons; /* number of listening daemons */
+ struct list_head ksnd_daemons; /* listening daemon list */
+ event_t ksnd_daemon_exit; /* the last daemon quiting should singal it */
+
+} ks_data_t;
+
+int
+ksocknal_init_tdi_data();
+
+void
+ksocknal_fini_tdi_data();
+
+
+#endif /* __KERNEL__ */
+#endif /* __LIBCFS_WINNT_TCPIP_H__ */
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for Winnt (kernel and user-level).
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_LINUX_TIME_H__
+#define __LIBCFS_WINNT_LINUX_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ * cfs_time_t represents point in time. This is internal kernel
+ * time rather than "wall clock". This time bears no
+ * relation to gettimeofday().
+ *
+ * cfs_duration_t represents time interval with resolution of internal
+ * platform clock
+ *
+ * cfs_fs_time_t represents instance in world-visible time. This is
+ * used in file-system time-stamps
+ *
+ * cfs_time_t cfs_time_current(void);
+ * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t);
+ * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t);
+ * int cfs_time_before (cfs_time_t, cfs_time_t);
+ * int cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ * cfs_duration_t cfs_duration_build(int64_t);
+ *
+ * time_t cfs_duration_sec (cfs_duration_t);
+ * void cfs_duration_usec(cfs_duration_t, struct timeval *);
+ * void cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ * void cfs_fs_time_current(cfs_fs_time_t *);
+ * time_t cfs_fs_time_sec (cfs_fs_time_t *);
+ * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *);
+ * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *);
+ * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ * cfs_duration_t cfs_time_minimal_timeout(void)
+ *
+ * CFS_TIME_FORMAT
+ * CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION ((u_int64_t) 1000000)
+
+#define HZ (100)
+
+struct timeval {
+ time_t tv_sec; /* seconds */
+ suseconds_t tv_usec; /* microseconds */
+};
+
+struct timespec {
+ ulong_ptr tv_sec;
+ ulong_ptr tv_nsec;
+};
+
+#ifdef __KERNEL__
+
+#include <libcfs/winnt/portals_compat25.h>
+
+/*
+ * Generic kernel stuff
+ */
+
+typedef struct timeval cfs_fs_time_t;
+
+typedef u_int64_t cfs_time_t;
+typedef int64_t cfs_duration_t;
+
+static inline void do_gettimeofday(struct timeval *tv)
+{
+ LARGE_INTEGER Time;
+
+ KeQuerySystemTime(&Time);
+
+ tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000);
+ tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
+}
+
+static inline cfs_time_t JIFFIES()
+{
+ LARGE_INTEGER Tick;
+ LARGE_INTEGER Elapse;
+
+ KeQueryTickCount(&Tick);
+
+ Elapse.QuadPart = Tick.QuadPart * KeQueryTimeIncrement();
+ Elapse.QuadPart /= (10000000 / HZ);
+
+ return Elapse.QuadPart;
+}
+
+static inline cfs_time_t cfs_time_current(void)
+{
+ return JIFFIES();
+}
+
+static inline cfs_time_t cfs_time_current_sec(void)
+{
+ return (JIFFIES() / HZ);
+}
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+ return (t + d);
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+ return (t1 - t2);
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+ return ((int64_t)t1 - (int64_t)t2) < 0;
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+ return ((int64_t)t1 - (int64_t)t2) <= 0;
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+ ULONG Linux;
+ LARGE_INTEGER Sys;
+
+ KeQuerySystemTime(&Sys);
+
+ RtlTimeToSecondsSince1970(&Sys, &Linux);
+
+ t->tv_sec = Linux;
+ t->tv_usec = (Sys.LowPart % 10000000) / 10;
+}
+
+static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+ return t->tv_sec;
+}
+
+static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+ return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return (__cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2));
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+ return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2));
+}
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+ return (cfs_duration_t)seconds * HZ;
+}
+
+#if 0 // defined in libcfs/libcfs.h
+static inline cfs_time_t cfs_time_shift(int seconds)
+{
+ return (JIFFIES() + seconds * HZ);
+}
+#endif
+
+static inline cfs_time_t cfs_duration_sec(cfs_duration_t d)
+{
+ return d / HZ;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+ s->tv_usec = (time_t)((d - s->tv_sec * HZ) * ONE_MILLION / HZ);
+ s->tv_sec = (suseconds_t) (d / HZ);
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+ s->tv_nsec = (time_t)((d - s->tv_sec * HZ) * ONE_BILLION / HZ);
+ s->tv_sec = (suseconds_t) (d / HZ);
+}
+
+static inline cfs_duration_t cfs_time_minimal_timeout(void)
+{
+ return 1;
+}
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+ *v = *t;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+ s->tv_sec = t->tv_sec;
+ s->tv_nsec = t->tv_usec * 1000;
+}
+
+
+/* inline function cfs_time_minimal_timeout() can not be used
+ * to initiallize static variable */
+#define CFS_MIN_DELAY (1)
+
+#define LTIME_S(t) (t)
+
+#define CFS_TIME_T "%I64u"
+#define CFS_DURATION_T "%I64d"
+
+#else /* !__KERNEL__ */
+
+/*
+ * Liblustre. time(2) based implementation.
+ */
+#include <libcfs/user-time.h>
+
+
+//
+// Time routines ...
+//
+
+NTSYSAPI
+CCHAR
+NTAPI
+NtQuerySystemTime(
+ OUT PLARGE_INTEGER CurrentTime
+ );
+
+
+NTSYSAPI
+BOOLEAN
+NTAPI
+RtlTimeToSecondsSince1970(
+ IN PLARGE_INTEGER Time,
+ OUT PULONG ElapsedSeconds
+ );
+
+
+NTSYSAPI
+VOID
+NTAPI
+RtlSecondsSince1970ToTime(
+ IN ULONG ElapsedSeconds,
+ OUT PLARGE_INTEGER Time
+ );
+
+NTSYSAPI
+VOID
+NTAPI
+Sleep(
+ DWORD dwMilliseconds // sleep time in milliseconds
+);
+
+
+static inline void sleep(int time)
+{
+ DWORD Time = 1000 * time;
+ Sleep(Time);
+}
+
+
+static inline void do_gettimeofday(struct timeval *tv)
+{
+ LARGE_INTEGER Time;
+
+ NtQuerySystemTime(&Time);
+
+ tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000);
+ tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
+}
+
+static inline int gettimeofday(struct timeval *tv, void * tz)
+{
+ do_gettimeofday(tv);
+ return 0;
+}
+
+#endif /* __KERNEL__ */
+
+/* __LIBCFS_LINUX_LINUX_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic types definitions
+ *
+ */
+
+#ifndef _WINNT_TYPE_H
+#define _WINNT_TYPE_H
+
+#ifdef __KERNEL__
+
+#include <ntifs.h>
+#include <windef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include <tdi.h>
+#include <tdikrnl.h>
+#include <tdiinfo.h>
+
+#else
+
+#include <ntddk.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <time.h>
+#include <io.h>
+#include <string.h>
+#include <assert.h>
+
+#endif
+
+
+#define __LITTLE_ENDIAN
+
+#define inline __inline
+#define __inline__ __inline
+
+typedef unsigned __int8 __u8;
+typedef signed __int8 __s8;
+
+typedef signed __int64 __s64;
+typedef unsigned __int64 __u64;
+
+typedef signed __int16 __s16;
+typedef unsigned __int16 __u16;
+
+typedef signed __int32 __s32;
+typedef unsigned __int32 __u32;
+
+typedef signed __int64 __s64;
+typedef unsigned __int64 __u64;
+
+typedef unsigned long ULONG;
+
+
+#if defined(_WIN64)
+ #define long_ptr __int64
+ #define ulong_ptr unsigned __int64
+ #define BITS_PER_LONG (64)
+#else
+ #define long_ptr long
+ #define ulong_ptr unsigned long
+ #define BITS_PER_LONG (32)
+
+#endif
+
+/* bsd */
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+typedef unsigned int u_int;
+typedef unsigned long u_long;
+
+/* sysv */
+typedef unsigned char unchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef __u8 u_int8_t;
+typedef __s8 int8_t;
+typedef __u16 u_int16_t;
+typedef __s16 int16_t;
+typedef __u32 u_int32_t;
+typedef __s32 int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef __u8 uint8_t;
+typedef __u16 uint16_t;
+typedef __u32 uint32_t;
+
+typedef __u64 uint64_t;
+typedef __u64 u_int64_t;
+typedef __s64 int64_t;
+
+typedef long ssize_t;
+
+typedef __u32 suseconds_t;
+
+typedef __u32 pid_t, tid_t;
+
+typedef __u16 uid_t, gid_t;
+
+typedef __u16 mode_t;
+typedef __u16 umode_t;
+
+typedef ulong_ptr sigset_t;
+
+typedef uint64_t loff_t;
+typedef HANDLE cfs_handle_t;
+typedef uint64_t cycles_t;
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE ((HANDLE)-1)
+#endif
+
+
+#ifdef __KERNEL__ /* kernel */
+
+typedef __u32 off_t;
+typedef __u32 time_t;
+
+typedef unsigned short kdev_t;
+
+#else /* !__KERNEL__ */
+
+typedef int BOOL;
+typedef __u8 BYTE;
+typedef __u16 WORD;
+typedef __u32 DWORD;
+
+#endif /* __KERNEL__ */
+
+/*
+ * Conastants suffix
+ */
+
+#define ULL i64
+#define ull i64
+
+/*
+ * Winnt kernel has no capabilities.
+ */
+
+typedef __u32 cfs_kernel_cap_t;
+
+#define INT_MAX ((int)(~0U>>1))
+#define INT_MIN (-INT_MAX - 1)
+#define UINT_MAX (~0U)
+
+#endif /* _WINNT_TYPES_H */
+
+
+/*
+ * Bytes order
+ */
+
+//
+// Byte order swapping routines
+//
+
+
+#define ___swab16(x) RtlUshortByteSwap(x)
+#define ___swab32(x) RtlUlongByteSwap(x)
+#define ___swab64(x) RtlUlonglongByteSwap(x)
+
+#define ___constant_swab16(x) \
+ ((__u16)( \
+ (((__u16)(x) & (__u16)0x00ffU) << 8) | \
+ (((__u16)(x) & (__u16)0xff00U) >> 8) ))
+
+#define ___constant_swab32(x) \
+ ((__u32)( \
+ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
+ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
+ (((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
+
+#define ___constant_swab64(x) \
+ ((__u64)( \
+ (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \
+ (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \
+ (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \
+ (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) << 8) | \
+ (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >> 8) | \
+ (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \
+ (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \
+ (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) ))
+
+
+#define __swab16(x) ___constant_swab16(x)
+#define __swab32(x) ___constant_swab32(x)
+#define __swab64(x) ___constant_swab64(x)
+
+#define __swab16s(x) do { *(x) = __swab16((USHORT)(*(x)));} while(0)
+#define __swab32s(x) do { *(x) = __swab32((ULONG)(*(x)));} while(0)
+#define __swab64s(x) do { *(x) = __swab64((ULONGLONG)(*(x)));} while(0)
+
+#define __constant_htonl(x) ___constant_swab32((x))
+#define __constant_ntohl(x) ___constant_swab32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) ((__u64)(x))
+#define __constant_le64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_le32(x) ((__u32)(x))
+#define __constant_le32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le16(x) ((__u16)(x))
+#define __constant_le16_to_cpu(x) ((__u16)(x))
+#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) ((__u64)(x))
+#define __le64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_le32(x) ((__u32)(x))
+#define __le32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le16(x) ((__u16)(x))
+#define __le16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_be64(x) __swab64((x))
+#define __be64_to_cpu(x) __swab64((x))
+#define __cpu_to_be32(x) __swab32((x))
+#define __be32_to_cpu(x) __swab32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) (*(__u64*)(x))
+#define __le64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_le32p(x) (*(__u32*)(x))
+#define __le32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) __swab64p((x))
+#define __be64_to_cpup(x) __swab64p((x))
+#define __cpu_to_be32p(x) __swab32p((x))
+#define __be32_to_cpup(x) __swab32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) do {} while (0)
+#define __le64_to_cpus(x) do {} while (0)
+#define __cpu_to_le32s(x) do {} while (0)
+#define __le32_to_cpus(x) do {} while (0)
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) __swab64s((x))
+#define __be64_to_cpus(x) __swab64s((x))
+#define __cpu_to_be32s(x) __swab32s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#ifndef cpu_to_le64
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#endif
+
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+
+
+//
+// Network to host byte swap functions
+//
+
+#define ntohl(x) ( ( ( ( x ) & 0x000000ff ) << 24 ) | \
+ ( ( ( x ) & 0x0000ff00 ) << 8 ) | \
+ ( ( ( x ) & 0x00ff0000 ) >> 8 ) | \
+ ( ( ( x ) & 0xff000000 ) >> 24 ) )
+
+#define ntohs(x) ( ( ( ( x ) & 0xff00 ) >> 8 ) | \
+ ( ( ( x ) & 0x00ff ) << 8 ) )
+
+
+#define htonl(x) ntohl(x)
+#define htons(x) ntohs(x)
+
+
+
+#ifndef _I386_ERRNO_H
+#define _I386_ERRNO_H
+
+#define EPERM 1 /* Operation not permitted */
+#define ENOENT 2 /* No such file or directory */
+#define ESRCH 3 /* No such process */
+#define EINTR 4 /* Interrupted system call */
+#define EIO 5 /* I/O error */
+#define ENXIO 6 /* No such device or address */
+#define E2BIG 7 /* Arg list too long */
+#define ENOEXEC 8 /* Exec format error */
+#define EBADF 9 /* Bad file number */
+#define ECHILD 10 /* No child processes */
+#define EAGAIN 11 /* Try again */
+#define ENOMEM 12 /* Out of memory */
+#define EACCES 13 /* Permission denied */
+#define EFAULT 14 /* Bad address */
+#define ENOTBLK 15 /* Block device required */
+#define EBUSY 16 /* Device or resource busy */
+#define EEXIST 17 /* File exists */
+#define EXDEV 18 /* Cross-device link */
+#define ENODEV 19 /* No such device */
+#define ENOTDIR 20 /* Not a directory */
+#define EISDIR 21 /* Is a directory */
+#define EINVAL 22 /* Invalid argument */
+#define ENFILE 23 /* File table overflow */
+#define EMFILE 24 /* Too many open files */
+#define ENOTTY 25 /* Not a typewriter */
+#define ETXTBSY 26 /* Text file busy */
+#define EFBIG 27 /* File too large */
+#define ENOSPC 28 /* No space left on device */
+#define ESPIPE 29 /* Illegal seek */
+#define EROFS 30 /* Read-only file system */
+#define EMLINK 31 /* Too many links */
+#define EPIPE 32 /* Broken pipe */
+#define EDOM 33 /* Math argument out of domain of func */
+#define ERANGE 34 /* Math result not representable */
+#undef EDEADLK
+#define EDEADLK 35 /* Resource deadlock would occur */
+#undef ENAMETOOLONG
+#define ENAMETOOLONG 36 /* File name too long */
+#undef ENOLCK
+#define ENOLCK 37 /* No record locks available */
+#undef ENOSYS
+#define ENOSYS 38 /* Function not implemented */
+#undef ENOTEMPTY
+#define ENOTEMPTY 39 /* Directory not empty */
+#define ELOOP 40 /* Too many symbolic links encountered */
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define ENOMSG 42 /* No message of desired type */
+#define EIDRM 43 /* Identifier removed */
+#define ECHRNG 44 /* Channel number out of range */
+#define EL2NSYNC 45 /* Level 2 not synchronized */
+#define EL3HLT 46 /* Level 3 halted */
+#define EL3RST 47 /* Level 3 reset */
+#define ELNRNG 48 /* Link number out of range */
+#define EUNATCH 49 /* Protocol driver not attached */
+#define ENOCSI 50 /* No CSI structure available */
+#define EL2HLT 51 /* Level 2 halted */
+#define EBADE 52 /* Invalid exchange */
+#define EBADR 53 /* Invalid request descriptor */
+#define EXFULL 54 /* Exchange full */
+#define ENOANO 55 /* No anode */
+#define EBADRQC 56 /* Invalid request code */
+#define EBADSLT 57 /* Invalid slot */
+
+#define EDEADLOCK EDEADLK
+
+#define EBFONT 59 /* Bad font file format */
+#define ENOSTR 60 /* Device not a stream */
+#define ENODATA 61 /* No data available */
+#define ETIME 62 /* Timer expired */
+#define ENOSR 63 /* Out of streams resources */
+#define ENONET 64 /* Machine is not on the network */
+#define ENOPKG 65 /* Package not installed */
+#define EREMOTE 66 /* Object is remote */
+#define ENOLINK 67 /* Link has been severed */
+#define EADV 68 /* Advertise error */
+#define ESRMNT 69 /* Srmount error */
+#define ECOMM 70 /* Communication error on send */
+#define EPROTO 71 /* Protocol error */
+#define EMULTIHOP 72 /* Multihop attempted */
+#define EDOTDOT 73 /* RFS specific error */
+#define EBADMSG 74 /* Not a data message */
+#define EOVERFLOW 75 /* Value too large for defined data type */
+#define ENOTUNIQ 76 /* Name not unique on network */
+#define EBADFD 77 /* File descriptor in bad state */
+#define EREMCHG 78 /* Remote address changed */
+#define ELIBACC 79 /* Can not access a needed shared library */
+#define ELIBBAD 80 /* Accessing a corrupted shared library */
+#define ELIBSCN 81 /* .lib section in a.out corrupted */
+#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 83 /* Cannot exec a shared library directly */
+#undef EILSEQ
+#define EILSEQ 84 /* Illegal byte sequence */
+#define ERESTART 85 /* Interrupted system call should be restarted */
+#define ESTRPIPE 86 /* Streams pipe error */
+#define EUSERS 87 /* Too many users */
+#define ENOTSOCK 88 /* Socket operation on non-socket */
+#define EDESTADDRREQ 89 /* Destination address required */
+#define EMSGSIZE 90 /* Message too long */
+#define EPROTOTYPE 91 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 92 /* Protocol not available */
+#define EPROTONOSUPPORT 93 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
+#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 96 /* Protocol family not supported */
+#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
+#define EADDRINUSE 98 /* Address already in use */
+#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
+#define ENETDOWN 100 /* Network is down */
+#define ENETUNREACH 101 /* Network is unreachable */
+#define ENETRESET 102 /* Network dropped connection because of reset */
+#define ECONNABORTED 103 /* Software caused connection abort */
+#define ECONNRESET 104 /* Connection reset by peer */
+#define ENOBUFS 105 /* No buffer space available */
+#define EISCONN 106 /* Transport endpoint is already connected */
+#define ENOTCONN 107 /* Transport endpoint is not connected */
+#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 109 /* Too many references: cannot splice */
+#define ETIMEDOUT 110 /* Connection timed out */
+#define ECONNREFUSED 111 /* Connection refused */
+#define EHOSTDOWN 112 /* Host is down */
+#define EHOSTUNREACH 113 /* No route to host */
+#define EALREADY 114 /* Operation already in progress */
+#define EINPROGRESS 115 /* Operation now in progress */
+#define ESTALE 116 /* Stale NFS file handle */
+#define EUCLEAN 117 /* Structure needs cleaning */
+#define ENOTNAM 118 /* Not a XENIX named type file */
+#define ENAVAIL 119 /* No XENIX semaphores available */
+#define EISNAM 120 /* Is a named type file */
+#define EREMOTEIO 121 /* Remote I/O error */
+#define EDQUOT 122 /* Quota exceeded */
+
+#define ENOMEDIUM 123 /* No medium found */
+#define EMEDIUMTYPE 124 /* Wrong medium type */
+
+/* Should never be seen by user programs */
+#define ERESTARTSYS 512
+#define ERESTARTNOINTR 513
+#define ERESTARTNOHAND 514 /* restart if no handler.. */
+#define ENOIOCTLCMD 515 /* No ioctl command */
+
+/* Defined for the NFSv3 protocol */
+#define EBADHANDLE 521 /* Illegal NFS file handle */
+#define ENOTSYNC 522 /* Update synchronization mismatch */
+#define EBADCOOKIE 523 /* Cookie is stale */
+#define ENOTSUPP 524 /* Operation is not supported */
+#define ETOOSMALL 525 /* Buffer or request is too small */
+#define ESERVERFAULT 526 /* An untranslatable error occurred */
+#define EBADTYPE 527 /* Type not supported by server */
+#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
+
+
+
+/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
+ located on an ext2 file system */
+#define O_ACCMODE 0003
+#define O_RDONLY 00
+#define O_WRONLY 01
+#define O_RDWR 02
+#define O_CREAT 0100 /* not fcntl */
+#define O_EXCL 0200 /* not fcntl */
+#define O_NOCTTY 0400 /* not fcntl */
+#define O_TRUNC 01000 /* not fcntl */
+#define O_APPEND 02000
+#define O_NONBLOCK 04000
+#define O_NDELAY O_NONBLOCK
+#define O_SYNC 010000
+#define FASYNC 020000 /* fcntl, for BSD compatibility */
+#define O_DIRECT 040000 /* direct disk access hint */
+#define O_LARGEFILE 0100000
+#define O_DIRECTORY 0200000 /* must be a directory */
+#define O_NOFOLLOW 0400000 /* don't follow links */
+
+#define F_DUPFD 0 /* dup */
+#define F_GETFD 1 /* get close_on_exec */
+#define F_SETFD 2 /* set/clear close_on_exec */
+#define F_GETFL 3 /* get file->f_flags */
+#define F_SETFL 4 /* set file->f_flags */
+#define F_GETLK 5
+#define F_SETLK 6
+#define F_SETLKW 7
+
+#define F_SETOWN 8 /* for sockets. */
+#define F_GETOWN 9 /* for sockets. */
+#define F_SETSIG 10 /* for sockets. */
+#define F_GETSIG 11 /* for sockets. */
+
+#define F_GETLK64 12 /* using 'struct flock64' */
+#define F_SETLK64 13
+#define F_SETLKW64 14
+
+/* for F_[GET|SET]FL */
+#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK 0
+#define F_WRLCK 1
+#define F_UNLCK 2
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK 4 /* or 3 */
+#define F_SHLCK 8 /* or 4 */
+
+/* for leases */
+#define F_INPROGRESS 16
+
+/* operations for bsd flock(), also used by the kernel implementation */
+#define LOCK_SH 1 /* shared lock */
+#define LOCK_EX 2 /* exclusive lock */
+#define LOCK_NB 4 /* or'd with one of the above to prevent
+ blocking */
+#define LOCK_UN 8 /* remove lock */
+
+#define LOCK_MAND 32 /* This is a mandatory flock */
+#define LOCK_READ 64 /* ... Which allows concurrent read operations */
+#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */
+#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */
+
+#endif
+
+
+#ifndef LIBCFS_SIGNAL_H
+#define LIBCFS_SIGNAL_H
+
+/*
+ * signal values ...
+ */
+
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+#define SIGBUS 7
+#define SIGFPE 8
+#define SIGKILL 9
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGSTKFLT 16
+#define SIGCHLD 17
+#define SIGCONT 18
+#define SIGSTOP 19
+#define SIGTSTP 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGURG 23
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGIO 29
+#define SIGPOLL SIGIO
+/*
+#define SIGLOST 29
+*/
+#define SIGPWR 30
+#define SIGSYS 31
+#define SIGUNUSED 31
+
+/* These should not be considered constants from userland. */
+#define SIGRTMIN 32
+#define SIGRTMAX (_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP 0x00000001
+#define SA_NOCLDWAIT 0x00000002 /* not supported yet */
+#define SA_SIGINFO 0x00000004
+#define SA_ONSTACK 0x08000000
+#define SA_RESTART 0x10000000
+#define SA_NODEFER 0x40000000
+#define SA_RESETHAND 0x80000000
+
+#define SA_NOMASK SA_NODEFER
+#define SA_ONESHOT SA_RESETHAND
+#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */
+
+#define SA_RESTORER 0x04000000
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK 1
+#define SS_DISABLE 2
+
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+
+
+#define sigmask(sig) ((__u32)1 << ((sig) - 1))
+
+#endif // LIBCFS_SIGNAL_H
\ No newline at end of file
#ifndef __LNET_API_SUPPORT_H__
#define __LNET_API_SUPPORT_H__
-#ifndef __KERNEL__
-# include <stdio.h>
-# include <stdlib.h>
-# include <unistd.h>
-# include <time.h>
+#if defined(__linux__)
+#include <lnet/linux/api-support.h>
+#elif defined(__APPLE__)
+#include <lnet/darwin/api-support.h>
+#elif defined(__WINNT__)
+#include <lnet/winnt/api-support.h>
+#else
+#error Unsupported Operating System
#endif
#include <lnet/types.h>
-EXTRA_DIST := lib-lnet.h lib-types.h lnet.h
+EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h
--- /dev/null
+#ifndef __DARWIN_API_SUPPORT_H__
+#define __DARWIN_API_SUPPORT_H__
+
+#ifndef __LNET_API_SUPPORT_H__
+#error Do not #include this file directly. #include <portals/api-support.h> instead
+#endif
+
+#ifndef __KERNEL__
+# include <stdio.h>
+# include <stdlib.h>
+# include <unistd.h>
+# include <time.h>
+
+/* Lots of POSIX dependencies to support PtlEQWait_timeout */
+# include <signal.h>
+# include <setjmp.h>
+# include <time.h>
+#endif
+
+
+#endif
#include <string.h>
#include <libcfs/libcfs.h>
+#undef LNET_ROUTER
+
#endif
#include <lnet/linux/lib-lnet.h>
#elif defined(__APPLE__)
#include <lnet/darwin/lib-lnet.h>
+#elif defined(__WINNT__)
+#include <lnet/winnt/lib-lnet.h>
#else
#error Unsupported Operating System
#endif
unsigned int nsiov, struct iovec *siov, unsigned int soffset,
unsigned int nob)
{
- struct iovec diov = {.iov_base = dest, .iov_len = dlen};
+ struct iovec diov = {/*.iov_base = */ dest, /*.iov_len = */ dlen};
lnet_copy_iov2iov(1, &diov, doffset,
nsiov, siov, soffset, nob);
unsigned int nsiov, lnet_kiov_t *skiov, unsigned int soffset,
unsigned int nob)
{
- struct iovec diov = {.iov_base = dest, .iov_len = dlen};
+ struct iovec diov = {/* .iov_base = */ dest, /* .iov_len = */ dlen};
lnet_copy_kiov2iov(1, &diov, doffset,
nsiov, skiov, soffset, nob);
lnet_copy_flat2iov(unsigned int ndiov, struct iovec *diov, unsigned int doffset,
int slen, void *src, unsigned int soffset, unsigned int nob)
{
- struct iovec siov = {.iov_base = src, .iov_len = slen};
+ struct iovec siov = {/*.iov_base = */ src, /*.iov_len = */slen};
lnet_copy_iov2iov(ndiov, diov, doffset,
1, &siov, soffset, nob);
}
lnet_copy_flat2kiov(unsigned int ndiov, lnet_kiov_t *dkiov, unsigned int doffset,
int slen, void *src, unsigned int soffset, unsigned int nob)
{
- struct iovec siov = {.iov_base = src, .iov_len = slen};
+ struct iovec siov = {/* .iov_base = */ src, /* .iov_len = */ slen};
lnet_copy_iov2kiov(ndiov, dkiov, doffset,
1, &siov, soffset, nob);
}
#include <lnet/linux/lib-types.h>
#elif defined(__APPLE__)
#include <lnet/darwin/lib-types.h>
+#elif defined(__WINNT__)
+#include <lnet/winnt/lib-types.h>
#else
#error Unsupported Operating System
#endif
-EXTRA_DIST := lib-lnet.h lib-types.h lnet.h
+EXTRA_DIST := lib-lnet.h lib-types.h lnet.h api-support.h
--- /dev/null
+#ifndef __LINUX_API_SUPPORT_H__
+#define __LINUX_API_SUPPORT_H__
+
+#ifndef __LNET_API_SUPPORT_H__
+#error Do not #include this file directly. #include <lnet /api-support.h> instead
+#endif
+
+#ifndef __KERNEL__
+# include <stdio.h>
+# include <stdlib.h>
+# include <unistd.h>
+# include <time.h>
+
+/* Lots of POSIX dependencies to support PtlEQWait_timeout */
+# include <signal.h>
+# include <setjmp.h>
+# include <time.h>
+#endif
+
+#endif
# endif
#endif
+#define LNET_ROUTER
+
#endif /* __LNET_LINUX_LIB_LNET_H__ */
#include <lnet/linux/lnet.h>
#elif defined(__APPLE__)
#include <lnet/darwin/lnet.h>
+#elif defined(__WINNT__)
+#include <lnet/winnt/lnet.h>
#else
#error Unsupported Operating System
#endif
--- /dev/null
+#ifndef __WINNT_API_SUPPORT_H__
+#define __WINNT_API_SUPPORT_H__
+
+#ifndef __API_SUPPORT_H__
+#error Do not #include this file directly. #include <lnet /api-support.h> instead
+#endif
+
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LNET_WINNT_LIB_LNET_H__
+#define __LNET_WINNT_LIB_LNET_H__
+
+#ifndef __LNET_LIB_LNET_H__
+#error Do not #include this file directly. #include <lnet/lib-lnet.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <libcfs/libcfs.h>
+# include <libcfs/kp30.h>
+
+static inline __u64
+lnet_page2phys (struct page *p)
+{
+ return 0;
+}
+
+#else /* __KERNEL__ */
+
+#endif
+
+#endif /* __LNET_WINNT_LIB_LNET_H__ */
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LNET_WINNT_LIB_TYPES_H__
+#define __LNET_WINNT_LIB_TYPES_H__
+
+#ifndef __LNET_LIB_TYPES_H__
+#error Do not #include this file directly. #include <lnet/lib-types.h> instead
+#endif
+
+#include <libcfs/libcfs.h>
+
+typedef struct {
+ spinlock_t lock;
+} lib_ni_lock_t;
+
+static inline void lib_ni_lock_init(lib_ni_lock_t *l)
+{
+ spin_lock_init(&l->lock);
+}
+
+static inline void lib_ni_lock_fini(lib_ni_lock_t *l)
+{}
+
+static inline void lib_ni_lock(lib_ni_lock_t *l)
+{
+ int flags;
+ spin_lock_irqsave(&l->lock, flags);
+}
+
+static inline void lib_ni_unlock(lib_ni_lock_t *l)
+{
+ spin_unlock_irqrestore(&l->lock, 0);
+}
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LNET_LINUX_LNET_H__
+#define __LNET_LINUX_LNET_H__
+
+#ifndef __LNET_H__
+#error Do not #include this file directly. #include <lnet/lnet.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <libcfs/libcfs.h>
+#include <lnet/lib-lnet.h>
+
+/*
+ * tdilnd routines
+ */
+
+//
+// debug.c
+//
+
+
+PUCHAR
+KsNtStatusToString (IN NTSTATUS Status);
+
+
+VOID
+KsPrintf(
+ IN LONG DebugPrintLevel,
+ IN PCHAR DebugMessage,
+ IN ...
+ );
+
+
+//
+// tconn.c
+//
+
+
+ksock_mdl_t *
+ksocknal_lock_iovs(
+ IN struct iovec *iov,
+ IN int niov,
+ IN int recv,
+ IN int * len
+ );
+
+ksock_mdl_t *
+ksocknal_lock_kiovs(
+ IN lnet_kiov_t * kiov,
+ IN int nkiov,
+ IN int recv,
+ IN int * len
+ );
+
+int
+ksocknal_send_mdl(
+ ksock_tconn_t * tconn,
+ void * tx,
+ ksock_mdl_t * mdl,
+ int len,
+ int flags
+ );
+
+int
+ksocknal_query_data(
+ ksock_tconn_t * tconn,
+ size_t * size,
+ int bIsExpedited);
+
+int
+ksocknal_recv_mdl(
+ ksock_tconn_t * tconn,
+ ksock_mdl_t * mdl,
+ int size,
+ int flags
+ );
+
+int
+ksocknal_get_tcp_option (
+ ksock_tconn_t * tconn,
+ ULONG ID,
+ PVOID OptionValue,
+ PULONG Length
+ );
+
+NTSTATUS
+ksocknal_set_tcp_option (
+ ksock_tconn_t * tconn,
+ ULONG ID,
+ PVOID OptionValue,
+ ULONG Length
+ );
+
+int
+ksocknal_bind_tconn (
+ ksock_tconn_t * tconn,
+ ksock_tconn_t * parent,
+ ulong_ptr addr,
+ unsigned short port
+ );
+
+int
+ksocknal_build_tconn(
+ ksock_tconn_t * tconn,
+ ulong_ptr addr,
+ unsigned short port
+ );
+
+int
+ksocknal_disconnect_tconn(
+ ksock_tconn_t * tconn,
+ ulong_ptr flags
+ );
+
+void
+ksocknal_abort_tconn(
+ ksock_tconn_t * tconn
+ );
+
+int
+ksocknal_query_local_ipaddr(
+ ksock_tconn_t * tconn
+ );
+
+int
+ksocknal_tconn_write (ksock_tconn_t *tconn, void *buffer, int nob);
+
+int
+ksocknal_tconn_read (ksock_tconn_t * tconn, void *buffer, int nob);
+
+//
+// tcp.c
+//
+
+NTSTATUS
+KsTcpCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ );
+
+NTSTATUS
+KsDisconectCompletionRoutine (
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ );
+
+NTSTATUS
+KsTcpReceiveCompletionRoutine(
+ IN PIRP Irp,
+ IN PKS_TCP_COMPLETION_CONTEXT Context
+ );
+
+NTSTATUS
+KsTcpSendCompletionRoutine(
+ IN PIRP Irp,
+ IN PKS_TCP_COMPLETION_CONTEXT Context
+ );
+
+NTSTATUS
+KsAcceptCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ );
+
+
+NTSTATUS
+KsConnectEventHandler(
+ IN PVOID TdiEventContext,
+ IN LONG RemoteAddressLength,
+ IN PVOID RemoteAddress,
+ IN LONG UserDataLength,
+ IN PVOID UserData,
+ IN LONG OptionsLength,
+ IN PVOID Options,
+ OUT CONNECTION_CONTEXT * ConnectionContext,
+ OUT PIRP * AcceptIrp
+ );
+
+NTSTATUS
+KsDisconnectEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN LONG DisconnectDataLength,
+ IN PVOID DisconnectData,
+ IN LONG DisconnectInformationLength,
+ IN PVOID DisconnectInformation,
+ IN ULONG DisconnectFlags
+ );
+
+NTSTATUS
+KsTcpReceiveEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG BytesIndicated,
+ IN ULONG BytesAvailable,
+ OUT ULONG * BytesTaken,
+ IN PVOID Tsdu,
+ OUT PIRP * IoRequestPacket
+ );
+
+NTSTATUS
+KsTcpReceiveExpeditedEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG BytesIndicated,
+ IN ULONG BytesAvailable,
+ OUT ULONG * BytesTaken,
+ IN PVOID Tsdu,
+ OUT PIRP * IoRequestPacket
+ );
+
+NTSTATUS
+KsTcpChainedReceiveEventHandler (
+ IN PVOID TdiEventContext, // the event context
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG ReceiveLength,
+ IN ULONG StartingOffset, // offset of start of client data in TSDU
+ IN PMDL Tsdu, // TSDU data chain
+ IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
+ );
+
+NTSTATUS
+KsTcpChainedReceiveExpeditedEventHandler (
+ IN PVOID TdiEventContext, // the event context
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG ReceiveLength,
+ IN ULONG StartingOffset, // offset of start of client data in TSDU
+ IN PMDL Tsdu, // TSDU data chain
+ IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
+ );
+
+
+
+VOID
+KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem);
+
+
+//
+// tdi.c
+//
+
+ULONG
+ksocknal_tdi_send_flags(ULONG SockFlags);
+
+PIRP
+KsBuildTdiIrp(
+ IN PDEVICE_OBJECT DeviceObject
+ );
+
+NTSTATUS
+KsSubmitTdiIrp(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN BOOLEAN bSynchronous,
+ OUT PULONG Information
+ );
+
+NTSTATUS
+KsOpenControl(
+ IN PUNICODE_STRING DeviceName,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ );
+
+NTSTATUS
+KsCloseControl(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ );
+
+NTSTATUS
+KsOpenAddress(
+ IN PUNICODE_STRING DeviceName,
+ IN PTRANSPORT_ADDRESS pAddress,
+ IN ULONG AddressLength,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ );
+
+NTSTATUS
+KsCloseAddress(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ );
+
+NTSTATUS
+KsOpenConnection(
+ IN PUNICODE_STRING DeviceName,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ );
+
+NTSTATUS
+KsCloseConnection(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ );
+
+NTSTATUS
+KsAssociateAddress(
+ IN HANDLE AddressHandle,
+ IN PFILE_OBJECT ConnectionObject
+ );
+
+
+NTSTATUS
+KsDisassociateAddress(
+ IN PFILE_OBJECT ConnectionObject
+ );
+
+
+NTSTATUS
+KsSetEventHandlers(
+ IN PFILE_OBJECT AddressObject,
+ IN PVOID EventContext,
+ IN PKS_EVENT_HANDLERS Handlers
+ );
+
+
+NTSTATUS
+KsQueryProviderInfo(
+ PWSTR TdiDeviceName,
+ PTDI_PROVIDER_INFO ProviderInfo
+ );
+
+NTSTATUS
+KsQueryAddressInfo(
+ IN PFILE_OBJECT FileObject,
+ OUT PTDI_ADDRESS_INFO AddressInfo,
+ OUT PULONG AddressSize
+ );
+
+NTSTATUS
+KsQueryConnectionInfo(
+ IN PFILE_OBJECT ConnectionObject,
+ OUT PTDI_CONNECTION_INFO ConnectionInfo,
+ OUT PULONG ConnectionSize
+ );
+
+ULONG
+KsInitializeTdiAddress(
+ IN OUT PTA_IP_ADDRESS pTransportAddress,
+ IN ULONG IpAddress,
+ IN USHORT IpPort
+ );
+
+ULONG
+KsQueryMdlsSize (IN PMDL Mdl);
+
+
+ULONG
+KsQueryTdiAddressLength(
+ OUT PTRANSPORT_ADDRESS pTransportAddress
+ );
+
+NTSTATUS
+KsQueryIpAddress(
+ IN PFILE_OBJECT FileObject,
+ OUT PVOID TdiAddress,
+ OUT ULONG* AddressLength
+ );
+
+
+NTSTATUS
+KsErrorEventHandler(
+ IN PVOID TdiEventContext,
+ IN NTSTATUS Status
+ );
+
+int
+ksocknal_set_handlers(
+ ksock_tconn_t * tconn
+ );
+
+
+
+//
+// Strusup.c
+//
+
+VOID
+KsPrintProviderInfo(
+ PWSTR DeviceName,
+ PTDI_PROVIDER_INFO ProviderInfo
+ );
+
+ksock_tconn_t *
+ksocknal_create_tconn();
+
+void
+ksocknal_free_tconn(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_init_listener(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_init_sender(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_init_child(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_get_tconn(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_put_tconn(
+ ksock_tconn_t * tconn
+ );
+
+int
+ksocknal_reset_handlers(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_destroy_tconn(
+ ksock_tconn_t * tconn
+ );
+
+
+PKS_TSDU
+KsAllocateKsTsdu();
+
+VOID
+KsPutKsTsdu(
+ PKS_TSDU KsTsdu
+ );
+
+VOID
+KsFreeKsTsdu(
+ PKS_TSDU KsTsdu
+ );
+
+VOID
+KsInitializeKsTsdu(
+ PKS_TSDU KsTsdu,
+ ULONG Length
+ );
+
+
+VOID
+KsInitializeKsTsduMgr(
+ PKS_TSDUMGR TsduMgr
+ );
+
+VOID
+KsInitializeKsChain(
+ PKS_CHAIN KsChain
+ );
+
+NTSTATUS
+KsCleanupTsduMgr(
+ PKS_TSDUMGR KsTsduMgr
+ );
+
+NTSTATUS
+KsCleanupKsChain(
+ PKS_CHAIN KsChain
+ );
+
+NTSTATUS
+KsCleanupTsdu(
+ ksock_tconn_t * tconn
+ );
+
+NTSTATUS
+KsCopyMdlChainToMdlChain(
+ IN PMDL SourceMdlChain,
+ IN ULONG SourceOffset,
+ IN PMDL DestinationMdlChain,
+ IN ULONG DestinationOffset,
+ IN ULONG BytesTobecopied,
+ OUT PULONG BytesCopied
+ );
+
+ULONG
+KsQueryMdlsSize (PMDL Mdl);
+
+NTSTATUS
+KsLockUserBuffer (
+ IN PVOID UserBuffer,
+ IN BOOLEAN bPaged,
+ IN ULONG Length,
+ IN LOCK_OPERATION Operation,
+ OUT PMDL * pMdl
+ );
+
+PVOID
+KsMapMdlBuffer (PMDL Mdl);
+
+VOID
+KsReleaseMdl ( IN PMDL Mdl,
+ IN int Paged );
+
+int
+ksocknal_lock_buffer (
+ void * buffer,
+ int paged,
+ int length,
+ LOCK_OPERATION access,
+ ksock_mdl_t ** kmdl
+ );
+
+void *
+ksocknal_map_mdl (ksock_mdl_t * mdl);
+
+void
+ksocknal_release_mdl (ksock_mdl_t *mdl, int paged);
+
+#endif /* __KERNEL__ */
+
+#endif
* chunk starting at 0 */
struct sysinfo si;
__u64 total;
+ __u64 total2;
__u64 roundup = (128<<20); /* round up in big chunks */
IB_MR_PHYS_BUFFER phys;
IB_ACCESS_CONTROL access;
}
si_meminfo(&si);
+
+ CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n",
+ si.totalram, si.mem_unit, num_physpages, PAGE_SIZE);
+
total = ((__u64)si.totalram) * si.mem_unit;
+ total2 = num_physpages * PAGE_SIZE;
+ if (total < total2)
+ total = total2;
if (total == 0) {
CERROR("Can't determine memory size\n");
return -ENOMEM;
}
-
+
roundup = (128<<20);
total = (total + (roundup - 1)) & ~(roundup - 1);
return -EIO;
}
- CDEBUG(D_NET, "registered phys mem from "LPX64" for "LPU64"\n",
- phys.PhysAddr, phys.Length);
+ CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n",
+ phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr);
return 0;
}
if (failed &&
tx->tx_status == 0 &&
- conn->ibc_state == IBNAL_CONN_ESTABLISHED)
+ conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
+#if KIBLND_DETAILED_DEBUG
+ int i;
+ IB_WORK_REQ2 *wrq = &tx->tx_wrq[0];
+ IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[0];
+ lnet_msg_t *lntmsg = tx->tx_lntmsg[0];
+#endif
CERROR("tx -> %s type %x cookie "LPX64
- "sending %d waiting %d: failed %d\n",
+ " sending %d waiting %d failed %d nwrk %d\n",
libcfs_nid2str(conn->ibc_peer->ibp_nid),
tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting, wc->Status);
+ tx->tx_sending, tx->tx_waiting, wc->Status,
+ tx->tx_nwrq);
+#if KIBLND_DETAILED_DEBUG
+ for (i = 0; i < tx->tx_nwrq; i++, wrq++, gl++) {
+ switch (wrq->Operation) {
+ default:
+ CDEBUG(D_ERROR, " [%3d] Addr %p Next %p OP %d "
+ "DSList %p(%p)/%d: "LPX64"/%d K %x\n",
+ i, wrq, wrq->Next, wrq->Operation,
+ wrq->DSList, gl, wrq->DSListDepth,
+ gl->Address, gl->Length, gl->Lkey);
+ break;
+ case WROpSend:
+ CDEBUG(D_ERROR, " [%3d] Addr %p Next %p SEND "
+ "DSList %p(%p)/%d: "LPX64"/%d K %x\n",
+ i, wrq, wrq->Next,
+ wrq->DSList, gl, wrq->DSListDepth,
+ gl->Address, gl->Length, gl->Lkey);
+ break;
+ case WROpRdmaWrite:
+ CDEBUG(D_ERROR, " [%3d] Addr %p Next %p DMA "
+ "DSList: %p(%p)/%d "LPX64"/%d K %x -> "
+ LPX64" K %x\n",
+ i, wrq, wrq->Next,
+ wrq->DSList, gl, wrq->DSListDepth,
+ gl->Address, gl->Length, gl->Lkey,
+ wrq->Req.SendRC.RemoteDS.Address,
+ wrq->Req.SendRC.RemoteDS.Rkey);
+ break;
+ }
+ }
+
+ switch (tx->tx_msg->ibm_type) {
+ default:
+ CERROR(" msg type %x %p/%d, No RDMA\n",
+ tx->tx_msg->ibm_type,
+ tx->tx_msg, tx->tx_msg->ibm_nob);
+ break;
+ case IBNAL_MSG_PUT_DONE:
+ case IBNAL_MSG_GET_DONE:
+ CERROR(" msg type %x %p/%d, RDMA key %x frags %d...\n",
+ tx->tx_msg->ibm_type,
+ tx->tx_msg, tx->tx_msg->ibm_nob,
+ tx->tx_rd->rd_key, tx->tx_rd->rd_nfrag);
+ for (i = 0; i < tx->tx_rd->rd_nfrag; i++)
+ CDEBUG(D_ERROR, " [%d] "LPX64"/%d\n", i,
+ tx->tx_rd->rd_frags[i].rf_addr,
+ tx->tx_rd->rd_frags[i].rf_nob);
+ if (lntmsg == NULL) {
+ CERROR(" No lntmsg\n");
+ } else if (lntmsg->msg_iov != NULL) {
+ CERROR(" lntmsg in %d VIRT frags...\n",
+ lntmsg->msg_niov);
+ for (i = 0; i < lntmsg->msg_niov; i++)
+ CDEBUG(D_ERROR, " [%d] %p/%d\n", i,
+ lntmsg->msg_iov[i].iov_base,
+ lntmsg->msg_iov[i].iov_len);
+ } else if (lntmsg->msg_kiov != NULL) {
+ CERROR(" lntmsg in %d PAGE frags...\n",
+ lntmsg->msg_niov);
+ for (i = 0; i < lntmsg->msg_niov; i++)
+ CDEBUG(D_ERROR, " [%d] %p+%d/%d\n", i,
+ lntmsg->msg_kiov[i].kiov_page,
+ lntmsg->msg_kiov[i].kiov_offset,
+ lntmsg->msg_kiov[i].kiov_len);
+ } else {
+ CERROR(" lntmsg in %d frags\n",
+ lntmsg->msg_niov);
+ }
+
+ break;
+ }
+#endif
+ }
+
spin_lock(&conn->ibc_lock);
/* I could be racing with rdma completion. Whoever makes 'tx' idle
/*
* Src nid can not be ANY
*/
- if (msg->ptlm_srcnid == PTL_NID_ANY) {
- CERROR("Bad src nid: "LPX64"\n", msg->ptlm_srcnid);
+ if (msg->ptlm_srcnid == LNET_NID_ANY) {
+ CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ptlm_srcnid));
return -EPROTO;
}
int rc = -EINVAL;
kptl_data_t *kptllnd_data = ni->ni_data;
- PJK_UT_MSG(">>> kptllnd_ctl cmd=%u arg=%p\n",cmd,arg);
+ CDEBUG(D_NET, ">>> kptllnd_ctl cmd=%u arg=%p\n",cmd,arg);
/*
* Validate that the context block is actually
switch(cmd) {
case IOC_LIBCFS_DEL_PEER: {
- rc = kptllnd_peer_del (kptllnd_data,data->ioc_nid);
+ rc = kptllnd_peer_del (kptllnd_data, data->ioc_nid);
break;
}
/*
rc=-EINVAL;
break;
}
- PJK_UT_MSG("<<< kptllnd_ctl rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< kptllnd_ctl rc=%d\n",rc);
return rc;
}
ptl_err_t ptl_rc;
- PJK_UT_MSG(">>>\n");
+ CDEBUG(D_NET, ">>>\n");
LASSERT (ni->ni_lnd == &kptllnd_lnd);
#else /* _USING_CRAY_PORTALS_ */
CRAY_KERN_NAL,
#endif
- PTLLND_PID, NULL, NULL,
+ *kptllnd_tunables.kptl_pid, NULL, NULL,
&kptllnd_data->kptl_nih);
/*
8, /* We use callback - no need for max */
kptllnd_eq_callback, /* handler callback */
&kptllnd_data->kptl_eqh); /* output handle */
- if(ptl_rc != 0) {
+ if(ptl_rc != PTL_OK) {
CERROR("PtlEQAlloc failed %d\n",ptl_rc);
rc = -ENOMEM;
goto failed;
/*
* Fetch the lower NID
*/
- if(ptl_rc != PtlGetId(kptllnd_data->kptl_nih,&kptllnd_data->kptl_portals_id)){
+ ptl_rc != PtlGetId(kptllnd_data->kptl_nih, &kptllnd_data->kptl_portals_id);
+ if (ptl_rc != PTL_OK) {
CERROR ("PtlGetID: error %d\n", ptl_rc);
rc = -EINVAL;
goto failed;
}
- PJK_UT_MSG("lnet nid=" LPX64 " (passed in)\n",ni->ni_nid);
+ if (kptllnd_data->kptl_portals_id.pid !=
+ *kptllnd_tunables.kptl_pid) {
+ /* The kernel ptllnd must have the expected PID */
+ CERROR("Unexpected PID: %u (%u expected)\n",
+ kptllnd_data->kptl_portals_id.pid,
+ *kptllnd_tunables.kptl_pid);
+ rc = -EINVAL;
+ goto failed;
+ }
+
+ CDEBUG(D_NET, "lnet nid=" LPX64 " (passed in)\n",ni->ni_nid);
/*
* Create the new NID. Based on the LND network type
* and the lower ni's address data.
*/
- ni->ni_nid = ptl2lnetnid(kptllnd_data,kptllnd_data->kptl_portals_id.nid);
-
- PJK_UT_MSG("ptl nid=" FMT_NID "\n",kptllnd_data->kptl_portals_id.nid);
- PJK_UT_MSG("lnet nid=" LPX64 " (passed back)\n",ni->ni_nid);
+ ni->ni_nid = ptl2lnetnid(kptllnd_data, kptllnd_data->kptl_portals_id.nid);
- CDEBUG(D_INFO,"ptl nid=" FMT_NID "\n",kptllnd_data->kptl_portals_id.nid);
- CDEBUG(D_INFO,"lnet nid=" LPX64 "\n",ni->ni_nid);
+ CDEBUG(D_NET, "ptl nid=" FMT_NID "\n",kptllnd_data->kptl_portals_id.nid);
+ CDEBUG(D_NET, "ptl pid= %d\n", kptllnd_data->kptl_portals_id.pid);
+ CDEBUG(D_NET, "lnet nid=" LPX64 " (passed back)\n",ni->ni_nid);
/*
* Initialized the incarnation
*/
do_gettimeofday(&tv);
kptllnd_data->kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
- PJK_UT_MSG("Incarnation=" LPX64 "\n",kptllnd_data->kptl_incarnation);
- CDEBUG(D_INFO,"Incarnation=" LPX64 "\n",kptllnd_data->kptl_incarnation);
+ CDEBUG(D_NET, "Incarnation=" LPX64 "\n",kptllnd_data->kptl_incarnation);
/*
* Setup the sched locks/lists/waitq
/*
* Allocate and setup the peer hash table
*/
- PJK_UT_MSG("Allocate Peer Hash Table\n");
+ CDEBUG(D_NET, "Allocate Peer Hash Table\n");
rwlock_init(&kptllnd_data->kptl_peer_rw_lock);
kptllnd_data->kptl_peer_hash_size = *kptllnd_tunables.kptl_peer_hash_table_size;
INIT_LIST_HEAD(&kptllnd_data->kptl_canceled_peers);
* this will be automatically cleaned up now that PTLNAT_INIT_DATA
* state has been entered
*/
- PJK_UT_MSG("starting %d scheduler threads\n",PTLLND_N_SCHED);
+ CDEBUG(D_NET, "starting %d scheduler threads\n",PTLLND_N_SCHED);
for (i = 0; i < PTLLND_N_SCHED; i++) {
rc = kptllnd_thread_start (
kptllnd_scheduler,
* because we'll use the pointer being NULL as a sentry
* to know that we have to clean this up
*/
- PJK_UT_MSG("Allocate TX Descriptor array\n");
+ CDEBUG(D_NET, "Allocate TX Descriptor array\n");
LIBCFS_ALLOC (kptllnd_data->kptl_tx_descs,
(*kptllnd_tunables.kptl_ntx) * sizeof(kptl_tx_t));
if (kptllnd_data->kptl_tx_descs == NULL){
/*****************************************************/
- PJK_UT_MSG("<<< kptllnd_startup SUCCESS\n");
+ CDEBUG(D_NET, "<<< kptllnd_startup SUCCESS\n");
return 0;
failed:
CDEBUG(D_NET, "kptllnd_startup failed rc=%d\n",rc);
kptllnd_shutdown (ni);
- PJK_UT_MSG("<<< kptllnd_startup rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< kptllnd_startup rc=%d\n",rc);
return rc;
}
int i;
kptl_data_t *kptllnd_data = ni->ni_data;
- PJK_UT_MSG(">>> kptllnd_shutdown\n");
+ CDEBUG(D_NET, ">>> kptllnd_shutdown\n");
/*
* Validate that the context block is actually
case PTLLND_INIT_ALL:
case PTLLND_INIT_RXD:
- PJK_UT_MSG("PTLLND_INIT_RXD\n");
+ CDEBUG(D_NET, "PTLLND_INIT_RXD\n");
kptllnd_rx_buffer_pool_fini(
&kptllnd_data->kptl_rx_buffer_pool);
/* fall through */
case PTLLND_INIT_TXD:
- PJK_UT_MSG("PTLLND_INIT_TXD\n");
+ CDEBUG(D_NET, "PTLLND_INIT_TXD\n");
/*
* If there were peers started up then
* clean them up.
*/
if( atomic_read(&kptllnd_data->kptl_npeers) != 0) {
- PJK_UT_MSG("Deleting %d peers\n",atomic_read(&kptllnd_data->kptl_npeers));
+ CDEBUG(D_NET, "Deleting %d peers\n",atomic_read(&kptllnd_data->kptl_npeers));
/* nuke all peers */
- kptllnd_peer_del(kptllnd_data,PTL_NID_ANY);
+ kptllnd_peer_del(kptllnd_data, LNET_NID_ANY);
i = 2;
while (atomic_read (&kptllnd_data->kptl_npeers) != 0) {
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
"Waiting for %d peers to terminate\n",
atomic_read (&kptllnd_data->kptl_npeers));
- PJK_UT_MSG("Waiting for %d peers to terminate\n",
+ CDEBUG(D_NET, "Waiting for %d peers to terminate\n",
atomic_read (&kptllnd_data->kptl_npeers));
cfs_pause(cfs_time_seconds(1));
}
}
LASSERT(list_empty(&kptllnd_data->kptl_canceled_peers));
- PJK_UT_MSG("All peers deleted\n");
+ CDEBUG(D_NET, "All peers deleted\n");
/*
* Set the shutdown flag
* if we are not in the right state.
*/
if(atomic_read (&kptllnd_data->kptl_nthreads) != 0){
- PJK_UT_MSG("Stopping %d threads\n",atomic_read(&kptllnd_data->kptl_nthreads));
+ CDEBUG(D_NET, "Stopping %d threads\n",atomic_read(&kptllnd_data->kptl_nthreads));
/*
* Wake up all the schedulers
*/
CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
"Waiting for %d threads to terminate\n",
atomic_read (&kptllnd_data->kptl_nthreads));
- PJK_UT_MSG("Waiting for %d threads to terminate\n",
+ CDEBUG(D_NET, "Waiting for %d threads to terminate\n",
atomic_read (&kptllnd_data->kptl_nthreads));
cfs_pause(cfs_time_seconds(1));
}
}
- PJK_UT_MSG("All Threads stopped\n");
+ CDEBUG(D_NET, "All Threads stopped\n");
LASSERT(list_empty(&kptllnd_data->kptl_sched_txq));
/* fall through */
case PTLLND_INIT_DATA:
- PJK_UT_MSG("PTLLND_INIT_DATA\n");
+ CDEBUG(D_NET, "PTLLND_INIT_DATA\n");
LASSERT (atomic_read(&kptllnd_data->kptl_npeers) == 0);
LASSERT (kptllnd_data->kptl_peers != NULL);
/* fall through */
case PTLLND_INIT_NOTHING:
- PJK_UT_MSG("PTLLND_INIT_NOTHING\n");
+ CDEBUG(D_NET, "PTLLND_INIT_NOTHING\n");
break;
}
atomic_read (&libcfs_kmemory));
PORTAL_MODULE_UNUSE;
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
}
int __init
{
int rc;
- PJK_UT_MSG(">>> %s %s\n",__DATE__,__TIME__);
+ CDEBUG(D_NET, ">>> %s %s\n",__DATE__,__TIME__);
/*
* Display the module parameters
kptllnd_proc_init();
lnet_register_lnd(&kptllnd_lnd);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return 0;
}
kptllnd_module_fini (void)
{
- PJK_UT_MSG(">>> %s %s\n",__DATE__,__TIME__);
+ CDEBUG(D_NET, ">>> %s %s\n",__DATE__,__TIME__);
lnet_unregister_lnd(&kptllnd_lnd);
kptllnd_proc_fini();
kptllnd_tunables_fini();
- kpttllnd_get_stats();
- PJK_UT_MSG("<<<\n");
+ // kpttllnd_get_stats();
+ CDEBUG(D_NET, "<<<\n");
}
#define DO_TYPE(x) case x: return #x;
}
}
-#define LOGSTAT(x) PJK_UT_MSG_ALWAYS("%30.30s %d\n",#x,kptllnd_stats.x);
+#define LOGSTAT(x) CDEBUG(D_NET, "%30.30s %d\n",#x,kptllnd_stats.x);
kptl_stats_t* kpttllnd_get_stats(void)
{
int *kptl_cksum; /* checksum kptl_msg_t? */
int *kptl_timeout; /* comms timeout (seconds) */
int *kptl_portal; /* portal number */
+ int *kptl_pid; /* portals PID (self + kernel peers) */
int *kptl_rxb_npages; /* number of pages for rx buffer */
int *kptl_credits; /* number of credits */
int *kptl_peercredits; /* number of credits */
int rxbp_reserved; /* the number currently reserved */
int rxbp_shutdown; /* the shutdown flag for the pool */
int rxbp_posted; /* the number of elements posted */
-}kptl_rx_buffer_pool_t;
+} kptl_rx_buffer_pool_t;
-typedef enum
+enum kptl_rxb_state
{
RXB_STATE_UNINITIALIZED = 0,
RXB_STATE_IDLE = 1,
RXB_STATE_POSTED = 2,
-}kptl_rxb_state_t;
+};
struct kptl_rx_buffer
{
kptl_rx_buffer_pool_t *rxb_pool;
struct list_head rxb_list; /* for the rxb_pool list */
struct list_head rxb_repost_list;/* for the kptl_sched_rxbq list*/
- kptl_rxb_state_t rxb_state; /* the state of this rx buffer*/
+ enum kptl_rxb_state rxb_state; /* the state of this rx buffer*/
atomic_t rxb_refcount; /* outstanding rx */
ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */
void *rxb_buffer; /* the buffer */
};
-typedef enum
+enum kptl_tx_state
{
TX_STATE_UNINITIALIZED = 0,
TX_STATE_ON_IDLE_QUEUE = 1,
TX_STATE_ALLOCATED = 2,
TX_STATE_WAITING_CREDITS = 3,
TX_STATE_WAITING_RESPONSE = 4
-}kptl_tx_state_t;
+};
-typedef enum
+enum kptl_tx_type
{
TX_TYPE_RESERVED = 0,
TX_TYPE_SMALL_MESSAGE = 1,
TX_TYPE_LARGE_GET = 3,
TX_TYPE_LARGE_PUT_RESPONSE = 4,
TX_TYPE_LARGE_GET_RESPONSE = 5,
-}kptl_tx_type_t;
+};
+
+/* */
+typedef union {
+#ifdef _USING_LUSTRE_PORTALS_
+ struct iovec iov[PTL_MD_MAX_IOV];
+ ptl_kiov_t kiov[PTL_MD_MAX_IOV];
+#else /* _USING_CRAY_PORTALS_ */
+ ptl_md_iovec_t iov[PTL_MD_MAX_IOV];
+#endif
+} kptl_fragvec_t;
typedef struct kptl_tx /* transmit message */
{
struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
struct list_head tx_schedlist; /* queue on idle_txs ibc_tx_queue etc. */
atomic_t tx_refcount; /* Posted Buffer refrences count*/
- kptl_tx_state_t tx_state; /* the state of this tx descriptor */
+ enum kptl_tx_state tx_state; /* the state of this tx descriptor */
int tx_seen_send_end; /* if we've seen a SEND_END event */
int tx_seen_reply_end; /* if we've seen a REPLY_END event */
- kptl_tx_type_t tx_type; /* type of transfer */
+ enum kptl_tx_type tx_type; /* type of transfer */
int tx_status; /* the status of this tx descriptor */
ptl_handle_md_t tx_mdh; /* the portals memory descriptor (MD) handle */
ptl_handle_md_t tx_mdh_msg; /* the portals MD handle for the initial message */
kptl_peer_t *tx_peer; /* the peer this is waiting on */
unsigned long tx_deadline; /* deadline */
kptl_rx_t *tx_associated_rx; /* Associated RX for Bulk RDMA */
+ kptl_fragvec_t *tx_frags; /* buffer fragments for buld RDMA */
unsigned int tx_payload_niov;
struct iovec *tx_payload_iov;
} kptl_tx_t;
-typedef enum
+enum kptllnd_peer_state
{
PEER_STATE_UNINITIALIZED = 0,
PEER_STATE_ALLOCATED = 1, //QQQ
PEER_STATE_WAITING_HELLO = 2,
PEER_STATE_ACTIVE = 3,
PEER_STATE_CANCELED = 4,
-}kptllnd_peer_state_t;
+};
struct kptl_peer
{
struct list_head peer_list;
atomic_t peer_refcount; /* The current refrences */
- kptllnd_peer_state_t peer_state;
+ enum kptllnd_peer_state peer_state;
kptl_data_t *peer_kptllnd_data; /* LND Instance Data */
spinlock_t peer_lock; /* serialize */
struct list_head peer_pending_txs; /* queue of pending txs */
struct list_head peer_active_txs; /* queue of activce txs */
int peer_active_txs_change_counter;/* updated when peer_active_txs changes*/
- lnet_nid_t peer_nid; /* who's on the other end(s) */
- int peer_pid; /* the pid on the other end */
+ lnet_nid_t peer_nid; /* Peer's LNET NID */
+ ptl_process_id_t peer_ptlid; /* Peer's portals id */
__u64 peer_incarnation; /* peer's incarnation */
__u64 peer_tx_seqnum; /* next seq# to send with*/
int peer_credits; /* number of send credits */
int kptl_canceled_peers_counter; /* updated when canceled_peers is modified*/
int kptl_peer_hash_size; /* size of kptl_peers */
atomic_t kptl_npeers; /* # peers extant */
-
};
typedef struct kptl_stats
lnet_msg_t *ptlmsg );
kptl_peer_t *
-kptllnd_peer_find (
- kptl_data_t *kptllnd_data,
- lnet_process_id_t target);
+kptllnd_nid2peer (kptl_data_t *kptllnd_data, lnet_nid_t nid);
kptl_peer_t *
-kptllnd_peer_handle_hello (
- kptl_data_t *kptllnd_data,
- lnet_process_id_t initiator,
- kptl_msg_t *msg);
+kptllnd_ptlnid2peer (kptl_data_t *kptllnd_data, ptl_nid_t ptlnid);
+
+kptl_peer_t *
+kptllnd_peer_handle_hello (kptl_data_t *kptllnd_data,
+ ptl_process_id_t initiator,
+ kptl_msg_t *msg);
static inline struct list_head *
-kptllnd_nid2peerlist (kptl_data_t *kptllnd_data,lnet_nid_t nid)
+kptllnd_ptlnid2peerlist (kptl_data_t *kptllnd_data, ptl_nid_t nid)
{
unsigned int hash = ((unsigned int)nid) % kptllnd_data->kptl_peer_hash_size;
kptl_tx_t *
kptllnd_get_idle_tx(
kptl_data_t *kptllnd_data,
- kptl_tx_type_t purpose);
+ enum kptl_tx_type purpose);
void
kptllnd_tx_callback(
* MISC SUPPORT FUNCTIONS
*/
-
-typedef union {
-#ifdef _USING_LUSTRE_PORTALS_
- struct iovec iov[PTL_MD_MAX_IOV];
- ptl_kiov_t kiov[PTL_MD_MAX_IOV];
-#else /* _USING_CRAY_PORTALS_ */
- ptl_md_iovec_t iov[PTL_MD_MAX_IOV];
-#endif
-}tempiov_t;
-
void
kptllnd_setup_md(
kptl_data_t *kptllnd_data,
struct iovec *payload_iov,
lnet_kiov_t *payload_kiov,
unsigned int payload_offset,
- int payload_nob,
- tempiov_t *tempiov);
+ int payload_nob);
static inline lnet_nid_t ptl2lnetnid(kptl_data_t *kptllnd_data,ptl_nid_t portals_nid)
{
}
#ifdef PJK_DEBUGGING
-
-#define PJK_UT_MSG_ALWAYS(fmt, a...) \
-do{ \
- printk("<1>ptllnd:%-30s:%u:",__FUNCTION__,cfs_curproc_pid()); \
- printk(fmt,## a); \
- CDEBUG(D_TRACE,fmt,## a); \
-}while(0)
-
-#define PJK_UT_MSG_SIMULATION(fmt, a...) PJK_UT_MSG_ALWAYS(fmt, ## a )
-
-
-#if 1
-#define PJK_UT_MSG_DATA(fmt, a...) PJK_UT_MSG_ALWAYS(fmt, ## a )
-#else
-#define PJK_UT_MSG_DATA(fmt, a...) do{}while(0)
-#endif
-
-#if 1
-#define PJK_UT_MSG(fmt, a...) PJK_UT_MSG_ALWAYS(fmt, ## a )
-#else
-#define PJK_UT_MSG(fmt, a...) do{}while(0)
-#endif
-
-
#define SIMULATION_FAIL_BLOCKING_TX_PUT_ALLOC 0 /* 0x00000001 */
#define SIMULATION_FAIL_BLOCKING_TX_GET_ALLOC 1 /* 0x00000002 */
#define SIMULATION_FAIL_BLOCKING_TX 2 /* 0x00000004 */
#define IS_SIMULATION_ENABLED(x) \
(((*kptllnd_tunables.kptl_simulation_bitmap) & 1<< SIMULATION_##x) != 0)
-
-
#else
-
-
-#define PJK_UT_MSG_ALWAYS(fmt, a...) do{}while(0)
-#define PJK_UT_MSG_SIMULATION(fmt, a...) do{}while(0)
-#define PJK_UT_MSG_DATA(fmt, a...) do{}while(0)
-#define PJK_UT_MSG(fmt, a...) do{}while(0)
-
#define IS_SIMULATION_ENABLED(x) 0
-
#endif
struct iovec *payload_iov,
lnet_kiov_t *payload_kiov,
unsigned int payload_offset,
- int payload_nob,
- tempiov_t *tempiov)
+ int payload_nob)
{
- unsigned int niov = 0;
+ kptl_fragvec_t *frags = tx->tx_frags;
+ unsigned int niov = 0;
- PJK_UT_MSG_DATA("%s nob=%d offset=%d niov=%d\n",
- op == PTL_MD_OP_GET ? "GET" : "PUT",
- payload_nob,payload_offset,payload_niov);
+ CDEBUG(D_NET, "%s nob=%d offset=%d niov=%d\n",
+ op == PTL_MD_OP_GET ? "GET" : "PUT",
+ payload_nob,payload_offset,payload_niov);
/* One but not both of iov or kiov must be NULL (XOR) */
LASSERT( (payload_iov != NULL && payload_kiov == NULL) ||
while(payload_nob){
LASSERT( payload_offset < payload_iov->iov_len);
LASSERT (payload_niov > 0);
- LASSERT (niov < sizeof(tempiov->iov)/sizeof(tempiov->iov[0]));
+ LASSERT (niov < sizeof(frags->iov)/sizeof(frags->iov[0]));
- tempiov->iov[niov].iov_base = payload_iov->iov_base + payload_offset;
- tempiov->iov[niov].iov_len = min((int)(payload_iov->iov_len - payload_offset),
+ frags->iov[niov].iov_base = payload_iov->iov_base + payload_offset;
+ frags->iov[niov].iov_len = min((int)(payload_iov->iov_len - payload_offset),
(int)payload_nob);
- PJK_UT_MSG("iov_base[%d]=%p\n",niov,tempiov->iov[niov].iov_base);
- PJK_UT_MSG("iov_len[%d] =%d\n",niov,tempiov->iov[niov].iov_len);
+ CDEBUG(D_NET, "iov_base[%d]=%p\n",niov,frags->iov[niov].iov_base);
+ CDEBUG(D_NET, "iov_len[%d] =%d\n",niov,(int)frags->iov[niov].iov_len);
payload_offset = 0;
- payload_nob -= tempiov->iov[niov].iov_len;
+ payload_nob -= frags->iov[niov].iov_len;
payload_iov++;
payload_niov--;
niov++;
}
- md->start = tempiov->iov;
+ md->start = frags->iov;
md->options |= PTL_MD_IOVEC;
}else{
while(payload_nob){
LASSERT( payload_offset < payload_kiov->kiov_len);
LASSERT (payload_niov > 0);
- LASSERT (niov < sizeof(tempiov->kiov)/sizeof(tempiov->kiov[0]));
+ LASSERT (niov < sizeof(frags->kiov)/sizeof(frags->kiov[0]));
- tempiov->kiov[niov].kiov_page = payload_kiov->kiov_page;
- tempiov->kiov[niov].kiov_offset = payload_kiov->kiov_offset + payload_offset;
- tempiov->kiov[niov].kiov_len = min((int)(payload_kiov->kiov_len - payload_offset),
+ frags->kiov[niov].kiov_page = payload_kiov->kiov_page;
+ frags->kiov[niov].kiov_offset = payload_kiov->kiov_offset + payload_offset;
+ frags->kiov[niov].kiov_len = min((int)(payload_kiov->kiov_len - payload_offset),
(int)payload_nob);
payload_offset = 0;
- payload_nob -= tempiov->kiov[niov].kiov_len;
+ payload_nob -= frags->kiov[niov].kiov_len;
payload_kiov++;
payload_niov--;
niov++;
}
- md->start = tempiov->kiov;
+ md->start = frags->kiov;
md->options |= PTL_MD_KIOV;
#else /* _USING_CRAY_PORTALS_ */
#error "Conflicting compilation directives"
#endif
- PJK_UT_MSG("payload_offset %d\n",payload_offset);
- PJK_UT_MSG("payload_niov %d\n",payload_niov);
- PJK_UT_MSG("payload_nob %d\n",payload_nob);
+ CDEBUG(D_NET, "payload_offset %d\n",payload_offset);
+ CDEBUG(D_NET, "payload_niov %d\n",payload_niov);
+ CDEBUG(D_NET, "payload_nob %d\n",payload_nob);
while (payload_offset >= payload_kiov->kiov_len) {
payload_offset -= payload_kiov->kiov_len;
LASSERT (payload_offset < payload_kiov->kiov_len);
LASSERT (payload_niov > 0);
- LASSERT (niov < sizeof(tempiov->iov)/sizeof(tempiov->iov[0]));
+ LASSERT (niov < sizeof(frags->iov)/sizeof(frags->iov[0]));
LASSERT (sizeof(void *) > 4 ||
(phys <= 0xffffffffULL &&
phys + (nob - 1) <= 0xffffffffULL));
- PJK_UT_MSG("kiov_page [%d]="LPX64" (phys)\n",niov,phys_page);
- PJK_UT_MSG("kiov_offset[%d]=%d (phys)\n",niov,payload_kiov->kiov_offset);
- PJK_UT_MSG("kiov_len [%d]=%d (phys)\n",niov,payload_kiov->kiov_len);
+ CDEBUG(D_NET, "kiov_page [%d]="LPX64" (phys)\n",niov,phys_page);
+ CDEBUG(D_NET, "kiov_offset[%d]=%d (phys)\n",niov,payload_kiov->kiov_offset);
+ CDEBUG(D_NET, "kiov_len [%d]=%d (phys)\n",niov,payload_kiov->kiov_len);
- tempiov->iov[niov].iov_base = (void *)((unsigned long)phys);
- tempiov->iov[niov].iov_len = nob;
+ frags->iov[niov].iov_base = (void *)((unsigned long)phys);
+ frags->iov[niov].iov_len = nob;
- PJK_UT_MSG("iov_base[%d]=%p\n",niov,tempiov->iov[niov].iov_base);
- PJK_UT_MSG("iov_len [%d]=%d\n",niov,tempiov->iov[niov].iov_len);
+ CDEBUG(D_NET, "iov_base[%d]=%p\n",niov,frags->iov[niov].iov_base);
+ CDEBUG(D_NET, "iov_len [%d]=%d\n",niov,(int)frags->iov[niov].iov_len);
payload_offset = 0;
- payload_nob -= tempiov->iov[niov].iov_len;
+ payload_nob -= frags->iov[niov].iov_len;
payload_kiov++;
payload_niov--;
niov++;
}
- md->start = tempiov->iov;
+ md->start = frags->iov;
md->options |= PTL_MD_IOVEC | PTL_MD_PHYS;
#endif
*/
md->length = niov;
- PJK_UT_MSG("md->options=%x\n",md->options);
- PJK_UT_MSG("md->length=%d\n",md->length);
+ CDEBUG(D_NET, "md->options=%x\n",md->options);
+ CDEBUG(D_NET, "md->length=%u\n",(unsigned)md->length);
}
int
ptl_err_t ptl_rc;
ptl_err_t ptl_rc2;
int rc;
- tempiov_t tempiov;
kptl_msg_t *rxmsg = rx->rx_msg;
kptl_peer_t *peer = rx->rx_peer;
unsigned long flags;
tx->tx_associated_rx = rx;
kptllnd_rx_addref(rx,"tx");
- PJK_UT_MSG_DATA(">>> %s rx=%p associated with tx=%p\n",
+ CDEBUG(D_NET, ">>> %s rx=%p associated with tx=%p\n",
op == PTL_MD_OP_GET ? "GET" : "PUT",
rx,tx);
- PJK_UT_MSG_DATA("matchibts=" LPX64 "\n",
+ CDEBUG(D_NET, "matchibts=" LPX64 "\n",
rxmsg->ptlm_u.req.kptlrm_matchbits);
/*
* Setup the MD
*/
- kptllnd_setup_md(kptllnd_data,&md,op,tx,
- payload_niov,payload_iov,payload_kiov,
- payload_offset,payload_nob,&tempiov);
+ kptllnd_setup_md(kptllnd_data, &md, op, tx,
+ payload_niov, payload_iov, payload_kiov,
+ payload_offset, payload_nob);
/*
* Attach the MD
*/
- ptl_rc = PtlMDBind(
- kptllnd_data->kptl_nih,
- md,
- PTL_UNLINK,
- &mdh);
- if(ptl_rc != PTL_OK){
+ ptl_rc = PtlMDBind(kptllnd_data->kptl_nih, md, PTL_UNLINK, &mdh);
+ if (ptl_rc != PTL_OK) {
CERROR("PtlMDBind failed %d\n",ptl_rc);
rc = -ENOMEM;
goto end;
*/
kptllnd_tx_decref(tx);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
}
kptl_data_t *kptllnd_data = ni->ni_data;
int nob;
- PJK_UT_MSG_DATA(">>> SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\n");
- PJK_UT_MSG_DATA("nob=%d nov=%d offset=%d to %s\n",
+ CDEBUG(D_NET, ">>> SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\n");
+ CDEBUG(D_NET, "nob=%d nov=%d offset=%d to %s\n",
payload_nob, payload_niov, payload_offset,
libcfs_id2str(target));
- PJK_UT_MSG_DATA("routing=%d target_is_router=%d\n",
- routing,target_is_router);
+ CDEBUG(D_NET, "routing=%d target_is_router=%d\n",
+ routing,target_is_router);
if(routing)
STAT_UPDATE(kps_send_routing);
case LNET_MSG_REPLY:
case LNET_MSG_PUT:
- PJK_UT_MSG_DATA("LNET_MSG_PUT/REPLY\n");
+ CDEBUG(D_NET, "LNET_MSG_PUT/REPLY\n");
/*
* Get an idle tx descriptor
kptllnd_do_put(tx,lntmsg,kptllnd_data);
- PJK_UT_MSG_DATA("<<< SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\n");
+ CDEBUG(D_NET, "<<< SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\n");
return 0;
case LNET_MSG_GET:
- PJK_UT_MSG_DATA("LNET_MSG_GET\n");
+ CDEBUG(D_NET, "LNET_MSG_GET\n");
/*
* Get an idle tx descriptor
if(target_is_router || routing)
break;
- PJK_UT_MSG_DATA("nob=%d\n",lntmsg->msg_md->md_length);
+ CDEBUG(D_NET, "nob=%d\n",lntmsg->msg_md->md_length);
/* Is the payload small enough not to need RDMA? */
nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[lntmsg->msg_md->md_length]);
goto launch;
case LNET_MSG_ACK:
- PJK_UT_MSG_DATA("LNET_MSG_ACK\n");
+ CDEBUG(D_NET, "LNET_MSG_ACK\n");
LASSERT (payload_nob == 0);
break;
}
if(tx == NULL){
- PJK_UT_MSG_DATA("PTLLND_MSG_TYPE_IMMEDIATE\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
/*
* Get an idle tx descriptor
return -ENOMEM;
}
}else{
- PJK_UT_MSG_DATA("Using PTLLND_MSG_TYPE_IMMEDIATE\n");
+ CDEBUG(D_NET, "Using PTLLND_MSG_TYPE_IMMEDIATE\n");
/*
* Repurpose this TX
*/
launch:
kptllnd_tx_launch(tx, target, lntmsg);
- PJK_UT_MSG_DATA("<<< SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\n");
+ CDEBUG(D_NET, "<<< SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\n");
return 0;
}
//kptl_data_t *kptllnd_data = ni->ni_data;
kptl_rx_t *rx = private;
- PJK_UT_MSG_DATA("Eager RX=%p RXB=%p\n",rx,rx->rx_rxb);
+ CDEBUG(D_NET, "Eager RX=%p RXB=%p\n",rx,rx->rx_rxb);
LASSERT(rx->rx_nob < *kptllnd_tunables.kptl_max_msg_size);
int nob;
int rc;
- PJK_UT_MSG_DATA(">>> RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\n");
- PJK_UT_MSG_DATA("niov=%d offset=%d mlen=%d rlen=%d\n",
+ CDEBUG(D_NET, ">>> RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR\n");
+ CDEBUG(D_NET, "niov=%d offset=%d mlen=%d rlen=%d\n",
niov,offset,mlen,rlen);
LASSERT (mlen <= rlen);
break;
case PTLLND_MSG_TYPE_IMMEDIATE:
- PJK_UT_MSG_DATA("PTLLND_MSG_TYPE_IMMEDIATE\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[rlen]);
if (nob > *kptllnd_tunables.kptl_max_msg_size) {
break;
case PTLLND_MSG_TYPE_GET:
- PJK_UT_MSG_DATA("PTLLND_MSG_TYPE_GET\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET\n");
if (lntmsg == NULL) {
/* No match for the GET request */
lntmsg->msg_kiov,
lntmsg->msg_offset,
lntmsg->msg_len);
- PJK_UT_MSG_DATA("<<< SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS rc=%d\n",rc);
}
break;
case PTLLND_MSG_TYPE_PUT:
- PJK_UT_MSG_DATA("PTLLND_MSG_TYPE_PUT\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT\n");
if (mlen == 0) { /* No payload */
lnet_finalize(ni, lntmsg, 0);
*/
kptllnd_rx_decref(rx,"lnet_parse",kptllnd_data);
- PJK_UT_MSG_DATA("<<< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR rc=%d\n",rc);
return rc;
}
int timeout;
int i;
- PJK_UT_MSG(">>>\n");
+ CDEBUG(D_NET, ">>>\n");
/*
* Daemonize
}
kptllnd_thread_fini(thread_data);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return (0);
};
kptl_rx_buffer_t *rxb = NULL;
kptl_tx_t *tx = NULL;
- PJK_UT_MSG(">>>\n");
+ CDEBUG(D_NET, ">>>\n");
/*
* Daemonize
if(rxb)
kptllnd_rx_buffer_post_handle_error(rxb);
if(tx){
- PJK_UT_MSG(">>> tx=%p\n",tx);
+ CDEBUG(D_NET, ">>> tx=%p\n",tx);
kptllnd_tx_done(tx);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
}
/*
}
kptllnd_thread_fini(thread_data);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return (0);
}
if(!list_empty(&kptllnd_data->kptl_canceled_peers)){
- PJK_UT_MSG("Cleaning Canceled Peers\n");
+ CDEBUG(D_NET, "Cleaning Canceled Peers\n");
STAT_UPDATE(kps_cleaning_caneled_peers);
}
CFS_MODULE_PARM(portal, "i", int, 0444,
"portal id");
+static int pid = PTLLND_PID;
+CFS_MODULE_PARM(pid, "i", int, 0444,
+ "portals pid");
+
static int rxb_npages = PTLLND_RXB_NPAGES;
CFS_MODULE_PARM(rxb_npages, "i", int, 0444,
"# of pages for rx buffers");
.kptl_concurrent_peers = &concurrent_peers,
.kptl_cksum = &cksum,
.kptl_portal = &portal,
+ .kptl_pid = &pid,
.kptl_timeout = &timeout,
.kptl_rxb_npages = &rxb_npages,
.kptl_credits = &credits,
sizeof(int), 0644, NULL, &proc_dointvec},
{5, "portal", &portal,
sizeof(int), 0444, NULL, &proc_dointvec},
- {6, "rxb_npages", &rxb_npages,
+ {6, "pid", &pid,
+ sizeof(int), 0444, NULL, &proc_dointvec},
+ {7, "rxb_npages", &rxb_npages,
sizeof(int), 0444, NULL, &proc_dointvec},
- {7, "credits", &credits,
+ {8, "credits", &credits,
sizeof(int), 0444, NULL, &proc_dointvec},
- {8, "peercredits", &peercredits,
+ {9, "peercredits", &peercredits,
sizeof(int), 0444, NULL, &proc_dointvec},
- {9, "max_msg_size", &max_msg_size,
+ {10,"max_msg_size", &max_msg_size,
sizeof(int), 0444, NULL, &proc_dointvec},
- {10, "peer_hash_table_size,", &peer_hash_table_size,
+ {11,"peer_hash_table_size,", &peer_hash_table_size,
sizeof(int), 0444, NULL, &proc_dointvec},
#ifdef PJK_DEBUGGING
- {11, "simulation_bitmap,", &simulation_bitmap,
+ {12, "simulation_bitmap,", &simulation_bitmap,
sizeof(int), 0444, NULL, &proc_dointvec},
#endif
kptllnd_peer_destroy (
kptl_peer_t *peer);
-kptl_peer_t *
-kptllnd_peer_find_holding_list_lock (
- kptl_data_t *kptllnd_data,
- lnet_process_id_t target);
-
-
int
kptllnd_peer_add_to_list_locked (
kptl_data_t *kptllnd_data,
/* And add this to the list */
LASSERT(list_empty(&peer->peer_list));
list_add_tail (&peer->peer_list,
- kptllnd_nid2peerlist (kptllnd_data,peer->peer_nid));
+ kptllnd_ptlnid2peerlist(kptllnd_data,peer->peer_ptlid.nid));
STAT_UPDATE(kps_peers_created);
}
int
-kptllnd_peer_allocate (
- kptl_data_t *kptllnd_data,
- kptl_peer_t **peerp,
- lnet_process_id_t target)
+kptllnd_peer_allocate (kptl_data_t *kptllnd_data,
+ kptl_peer_t **peerp,
+ ptl_process_id_t ptlid)
{
kptl_peer_t *peer;
int rc;
- PJK_UT_MSG(">>> id=%s\n",libcfs_id2str(target));
+ CDEBUG(D_NET, ">>> "FMT_NID"/%d\n", ptlid.nid, ptlid.pid);
- LASSERT (target.nid != PTL_NID_ANY);
+ LASSERT (ptlid.nid != PTL_NID_ANY);
LIBCFS_ALLOC(peer, sizeof (*peer));
if (peer == NULL) {
peer->peer_state = PEER_STATE_ALLOCATED;
peer->peer_kptllnd_data = kptllnd_data;
- peer->peer_nid = target.nid;
- peer->peer_pid = target.pid;
+
+ peer->peer_nid = ptl2lnetnid(kptllnd_data, ptlid.nid);
+ peer->peer_ptlid = ptlid;
+
//peer->peer_incarnation = 0;
//peer->peer_tx_seqnum = 0;
*/
atomic_set (&peer->peer_refcount, 1);
- PJK_UT_MSG("<<< Peer=%p id=%s\n",peer,libcfs_id2str(target));
+ CDEBUG(D_NET, "<<< Peer=%p nid=%s\n",
+ peer, libcfs_nid2str(peer->peer_nid));
*peerp = peer;
return 0;
}
{
kptl_data_t *kptllnd_data = peer->peer_kptllnd_data;
- PJK_UT_MSG("Peer=%p\n",peer);
+ CDEBUG(D_NET, "Peer=%p\n",peer);
LASSERT (atomic_read (&peer->peer_refcount) == 0);
/* Not on the peer list */
const char *owner)
{
atomic_inc(&peer->peer_refcount);
-
- /*
- * The below message could actually be out of sync
- * with the real ref count, and is for informational purposes
- * only
- */
- PJK_UT_MSG("peer=%p owner=%s count=%d\n",peer,owner,
- atomic_read(&peer->peer_refcount));
}
void
unsigned long flags;
kptl_data_t *kptllnd_data = peer->peer_kptllnd_data;
- if( !atomic_dec_and_test(&peer->peer_refcount)){
-
- /*
- * The below message could actually be out of sync
- * with the real ref count, and is for informational purposes
- * only
- */
- PJK_UT_MSG("peer=%p owner=%s count=%d\n",peer,owner,
- atomic_read(&peer->peer_refcount));
+ if( !atomic_dec_and_test(&peer->peer_refcount))
return;
- }
- PJK_UT_MSG("peer=%p owner=%s LAST REF\n",peer,owner);
+ CDEBUG(D_NET, "peer=%p owner=%s LAST REF\n",peer,owner);
write_lock_irqsave(&kptllnd_data->kptl_peer_rw_lock, flags);
list_del_init (&peer->peer_list);
spin_lock_irqsave(&peer->peer_lock, flags);
if(!list_empty(&peer->peer_pending_txs))
- PJK_UT_MSG("Clearing Pending TXs\n");
+ CDEBUG(D_NET, "Clearing Pending TXs\n");
list_for_each_safe (tx_temp, tx_next, &peer->peer_pending_txs) {
tx = list_entry (tx_temp, kptl_tx_t, tx_list);
spin_lock_irqsave(&peer->peer_lock, flags);
if(!list_empty(&peer->peer_active_txs))
- PJK_UT_MSG("Clearing Active TXs\n");
+ CDEBUG(D_NET, "Clearing Active TXs\n");
again:
*/
if(!PtlHandleIsEqual(tx->tx_mdh_msg,PTL_INVALID_HANDLE)){
- PJK_UT_MSG("Unlink mhd_msg\n");
+ CDEBUG(D_NET, "Unlink mhd_msg\n");
LASSERT(atomic_read(&tx->tx_refcount)>1);
ptl_rc = PtlMDUnlink(tx->tx_mdh_msg);
#ifndef LUSTRE_PORTALS_UNLINK_SEMANTICS
}
if(!PtlHandleIsEqual(tx->tx_mdh,PTL_INVALID_HANDLE)){
- PJK_UT_MSG("Unlink mdh\n");
+ CDEBUG(D_NET, "Unlink mdh\n");
LASSERT(atomic_read(&tx->tx_refcount)>1);
ptl_rc = PtlMDUnlink(tx->tx_mdh);
#ifndef LUSTRE_PORTALS_UNLINK_SEMANTICS
unsigned long flags;
int list_owns_ref=0;
- PJK_UT_MSG(">>> Peer=%p\n",peer);
+ CDEBUG(D_NET, ">>> Peer=%p\n",peer);
write_lock_irqsave(&kptllnd_data->kptl_peer_rw_lock, flags);
if(peer->peer_state != PEER_STATE_CANCELED){
if(list_owns_ref)
kptllnd_peer_decref(peer,"list");
- PJK_UT_MSG("<<< Peer=%p\n",peer);
+ CDEBUG(D_NET, "<<< Peer=%p\n",peer);
}
int
-kptllnd_peer_del (
- kptl_data_t *kptllnd_data,
- lnet_nid_t nid)
+kptllnd_peer_del (kptl_data_t *kptllnd_data, lnet_nid_t nid)
{
struct list_head *ptmp;
struct list_head *pnxt;
unsigned long flags;
int rc = -ENOENT;
-
- PJK_UT_MSG(">>> NID="LPX64"\n",nid);
+ CDEBUG(D_NET, ">>> NID="LPX64"\n",nid);
/*
- * Find the single bucket we are supposed to look at
- * or if nid = PTL_NID_ANY then look at all of the buckets
+ * Find the single bucket we are supposed to look at or if nid is a
+ * wildcard (LNET_NID_ANY) then look at all of the buckets
*/
- if (nid != PTL_NID_ANY)
- lo = hi = kptllnd_nid2peerlist(kptllnd_data,nid) - kptllnd_data->kptl_peers;
- else {
+ if (nid != LNET_NID_ANY) {
+ ptl_nid_t ptlnid = lnet2ptlnid(kptllnd_data, nid);
+ struct list_head *l = kptllnd_ptlnid2peerlist(kptllnd_data, ptlnid);
+
+ lo = hi = l - kptllnd_data->kptl_peers;
+ } else {
lo = 0;
hi = kptllnd_data->kptl_peer_hash_size - 1;
}
/*
* Is this the right one?
*/
- if (!(nid == PTL_NID_ANY || peer->peer_nid == nid))
+ if (!(nid == LNET_NID_ANY || peer->peer_nid == nid))
continue;
kptllnd_peer_addref(peer,"temp"); /* 1 ref for me... */
read_unlock_irqrestore(&kptllnd_data->kptl_peer_rw_lock, flags);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return (rc);
}
kptl_peer_t *peer,
kptl_tx_t *tx)
{
- PJK_UT_MSG("Peer=%p TX=%p\n",peer,tx);
+ CDEBUG(D_NET, "Peer=%p TX=%p\n",peer,tx);
LASSERT(peer->peer_state != PEER_STATE_CANCELED);
LASSERT(tx->tx_state == TX_STATE_ALLOCATED);
kptl_peer_t *peer,
kptl_tx_t *tx)
{
- PJK_UT_MSG("Peer=%p TX=%p\n",peer,tx);
+ CDEBUG(D_NET, "Peer=%p TX=%p\n",peer,tx);
LASSERT(peer->peer_state != PEER_STATE_CANCELED);
LASSERT(tx->tx_state == TX_STATE_ALLOCATED);
void
kptllnd_peer_check_sends (
- kptl_peer_t *peer )
+ kptl_peer_t *peer)
{
kptl_tx_t *tx;
ptl_handle_me_t meh;
ptl_handle_md_t mdh;
ptl_handle_md_t mdh_msg;
- ptl_process_id_t target;
unsigned long flags;
LASSERT(!in_interrupt());
*/
spin_lock_irqsave(&peer->peer_lock, flags);
- PJK_UT_MSG_DATA(">>>Peer=%p Credits=%d Outstanding=%d\n",
+ CDEBUG(D_NET, ">>>Peer=%p Credits=%d Outstanding=%d\n",
peer,peer->peer_credits,peer->peer_outstanding_credits);
if(list_empty(&peer->peer_pending_txs) &&
*/
tx = kptllnd_get_idle_tx(kptllnd_data,TX_TYPE_SMALL_MESSAGE);
if( tx == NULL ) {
- CERROR ("Can't return credits to "LPX64": tx descs exhausted\n",
- peer->peer_nid);
+ CERROR("Can't return credits to %s: tx descs exhausted\n",
+ libcfs_nid2str(peer->peer_nid));
}else{
kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_NOOP,0);
kptllnd_peer_queue_tx_locked(peer,tx);
*/
if (peer->peer_credits == 0) {
STAT_UPDATE(kps_no_credits);
- CDEBUG(D_NET, LPX64": no credits\n",peer->peer_nid);
+ CDEBUG(D_NET, "%s: no credits\n",
+ libcfs_nid2str(peer->peer_nid));
break;
}
if (peer->peer_credits == 1 &&
peer->peer_outstanding_credits == 0) {
STAT_UPDATE(kps_saving_last_credit);
- CDEBUG(D_NET, LPX64": not using last credit\n",
- peer->peer_nid);
+ CDEBUG(D_NET, "%s: not using last credit\n",
+ libcfs_nid2str(peer->peer_nid));
break;
}
spin_unlock_irqrestore(&peer->peer_lock, flags);
/* redundant NOOP */
kptllnd_tx_decref(tx);
- CDEBUG(D_NET, LPX64": redundant noop\n",
- peer->peer_nid);
+ CDEBUG(D_NET, "%s: redundant noop\n",
+ libcfs_nid2str(peer->peer_nid));
spin_lock_irqsave(&peer->peer_lock, flags);
continue;
}
- PJK_UT_MSG_DATA("--- TXTXTXTXTXTXTXTXTXTXTXTXTXTX\n");
- PJK_UT_MSG_DATA("Sending TX=%p Size=%d\n",tx,tx->tx_msg->ptlm_nob);
- PJK_UT_MSG_DATA("Target nid="LPX64" pid=%d\n",peer->peer_nid,peer->peer_pid);
+ CDEBUG(D_NET, "--- TXTXTXTXTXTXTXTXTXTXTXTXTXTX\n");
+ CDEBUG(D_NET, "Sending TX=%p Size=%d\n",tx,tx->tx_msg->ptlm_nob);
+ CDEBUG(D_NET, "Target nid=%s ptl "FMT_NID"/%d\n",
+ libcfs_nid2str(peer->peer_nid),
+ peer->peer_ptlid.nid, peer->peer_ptlid.pid);
mdh = PTL_INVALID_HANDLE;
mdh_msg =PTL_INVALID_HANDLE;
/*
* Assign matchbits for a put/get
*/
- if(tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_PUT ||
- tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_GET){
+ if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_PUT ||
+ tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_GET) {
- PJK_UT_MSG_DATA("next matchbits="LPX64" (before)\n",
+ CDEBUG(D_NET, "next matchbits="LPX64" (before)\n",
peer->peer_next_matchbits);
* not use them. Just skip over them. This check protects us
* even in the case of 64-bit rollover.
*/
- if(peer->peer_next_matchbits < PTL_RESERVED_MATCHBITS){
- CDEBUG(D_INFO,"Match Bits Rollover for "LPX64"\n",
- peer->peer_nid);
+ if (peer->peer_next_matchbits < PTL_RESERVED_MATCHBITS) {
+ CDEBUG(D_INFO,"Match Bits Rollover for %s\n",
+ libcfs_nid2str(peer->peer_nid));
peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
-
}
/*
tx->tx_msg->ptlm_u.req.kptlrm_matchbits =
peer->peer_next_matchbits ++;
- PJK_UT_MSG_DATA("next matchbits="LPX64" (after)\n",
+ CDEBUG(D_NET, "next matchbits="LPX64" (after)\n",
peer->peer_next_matchbits);
}
* Complete the message fill in all the rest
* of the header
*/
- kptllnd_msg_pack(
- tx->tx_msg,
- peer->peer_outstanding_credits,
- peer->peer_nid,
- peer->peer_incarnation,
- peer->peer_tx_seqnum,
- kptllnd_data);
-
+ kptllnd_msg_pack(tx->tx_msg,
+ peer->peer_outstanding_credits,
+ peer->peer_nid,
+ peer->peer_incarnation,
+ peer->peer_tx_seqnum,
+ kptllnd_data);
/*
* We just sent a packet
*/
* Construct an address that Portals needs from the NID
*/
- target.nid = lnet2ptlnid(kptllnd_data,peer->peer_nid);
- target.pid = peer->peer_pid;
+ CDEBUG(D_NET, "Msg NOB = %d\n",tx->tx_msg->ptlm_nob);
+ CDEBUG(D_NET, "Giving %d credits back to peer\n",
+ tx->tx_msg->ptlm_credits);
+ CDEBUG(D_NET, "Seq # = "LPX64"\n",tx->tx_msg->ptlm_seq);
- PJK_UT_MSG_DATA("Msg NOB = %d\n",tx->tx_msg->ptlm_nob);
- PJK_UT_MSG_DATA("Giving %d credits back to peer\n",tx->tx_msg->ptlm_credits);
- PJK_UT_MSG_DATA("Seq # = "LPX64"\n",tx->tx_msg->ptlm_seq);
+ CDEBUG(D_NET, "lnet TX %s\n", libcfs_nid2str(peer->peer_nid));
+ CDEBUG(D_NET, "ptl TX "FMT_NID"/%d\n",
+ peer->peer_ptlid.nid, peer->peer_ptlid.pid);
- PJK_UT_MSG("lnet TX nid=" LPX64 " pid=%d\n",peer->peer_nid,peer->peer_pid);
- PJK_UT_MSG("ptl TX nid=" FMT_NID " pid=%d\n",target.nid,target.pid);
-
- if(tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_GET ||
- tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_PUT){
- tempiov_t tempiov;
+ if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_GET ||
+ tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_PUT) {
+ int op;
+
+ if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_PUT)
+ op = PTL_MD_OP_GET;
+ else
+ op = PTL_MD_OP_PUT;
- PJK_UT_MSG_DATA("matchibts=" LPX64 "\n",
+ CDEBUG(D_NET, "matchibts=" LPX64 "\n",
tx->tx_msg->ptlm_u.req.kptlrm_matchbits);
- /*
- * Attach the ME
- */
- rc = PtlMEAttach(
- kptllnd_data->kptl_nih,
- *kptllnd_tunables.kptl_portal,
- target,
- tx->tx_msg->ptlm_u.req.kptlrm_matchbits,
- 0, /* all matchbits are valid - ignore none*/
- PTL_UNLINK,
- PTL_INS_BEFORE,
- &meh);
- if(rc != 0) {
+ rc = PtlMEAttach(kptllnd_data->kptl_nih,
+ *kptllnd_tunables.kptl_portal,
+ peer->peer_ptlid,
+ tx->tx_msg->ptlm_u.req.kptlrm_matchbits,
+ 0, /* ignore none */
+ PTL_UNLINK,
+ PTL_INS_BEFORE,
+ &meh);
+ if (rc != PTL_OK) {
CERROR("PtlMeAttach failed %d\n",rc);
goto failed_without_lock;
}
/* Setup the MD */
- kptllnd_setup_md(kptllnd_data,&md,
- tx->tx_msg->ptlm_type == LNET_MSG_GET ? PTL_MD_OP_PUT :
- PTL_MD_OP_GET,
- tx,
- tx->tx_payload_niov,
- tx->tx_payload_iov,
- tx->tx_payload_kiov,
- tx->tx_payload_offset,
- tx->tx_payload_nob,
- &tempiov);
+ kptllnd_setup_md(kptllnd_data, &md, op, tx,
+ tx->tx_payload_niov,
+ tx->tx_payload_iov,
+ tx->tx_payload_kiov,
+ tx->tx_payload_offset,
+ tx->tx_payload_nob);
/*
* Add a ref for this MD, because unlink
/*
* Bind the MD
*/
- rc = PtlMDBind (
- kptllnd_data->kptl_nih,
- md,
- PTL_UNLINK,
- &mdh_msg);
- if(rc != 0){
- if(!PtlHandleIsEqual(mdh,PTL_INVALID_HANDLE)){
+ rc = PtlMDBind(kptllnd_data->kptl_nih, md,
+ PTL_UNLINK, &mdh_msg);
+ if (rc != PTL_OK) {
+ if (!PtlHandleIsEqual(mdh,PTL_INVALID_HANDLE)) {
rc2 = PtlMDUnlink(mdh);
/*
* The unlink should succeed
LASSERT(PtlHandleIsEqual(tx->tx_mdh,PTL_INVALID_HANDLE));
LASSERT(PtlHandleIsEqual(tx->tx_mdh_msg,PTL_INVALID_HANDLE));
#ifdef _USING_LUSTRE_PORTALS_
- PJK_UT_MSG("tx_mdh = " LPX64 "\n",mdh.cookie);
- PJK_UT_MSG("tx_mdh_msg = " LPX64 "\n",mdh_msg.cookie);
+ CDEBUG(D_NET, "tx_mdh = " LPX64 "\n",mdh.cookie);
+ CDEBUG(D_NET, "tx_mdh_msg = " LPX64 "\n",mdh_msg.cookie);
#endif
tx->tx_mdh = mdh;
tx->tx_mdh_msg = mdh_msg;
- if(tx->tx_type == TX_TYPE_SMALL_MESSAGE)
- LASSERT(PtlHandleIsEqual(tx->tx_mdh,PTL_INVALID_HANDLE));
+ LASSERT (tx->tx_type != TX_TYPE_SMALL_MESSAGE ||
+ PtlHandleIsEqual(tx->tx_mdh,PTL_INVALID_HANDLE));
list_add_tail(&tx->tx_list, &peer->peer_active_txs);
peer->peer_active_txs_change_counter++;
- LASSERT(tx->tx_peer == peer);
+ LASSERT (tx->tx_peer == peer);
/*
* Grab a ref so the TX doesn't go away
spin_unlock_irqrestore(&peer->peer_lock, flags);
- rc = PtlPut (
- tx->tx_mdh_msg,
- PTL_NOACK_REQ, /* we dont need an ack */
- target, /* peer "address" */
- *kptllnd_tunables.kptl_portal, /* portal */
- 0, /* cookie */
- LNET_MSG_MATCHBITS, /* match bits */
- 0, /* offset */
- 0); /* header data */
- if(rc != 0){
+ rc = PtlPut (tx->tx_mdh_msg,
+ PTL_NOACK_REQ, /* we dont need an ack */
+ peer->peer_ptlid, /* peer "address" */
+ *kptllnd_tunables.kptl_portal, /* portal */
+ 0, /* cookie */
+ LNET_MSG_MATCHBITS, /* match bits */
+ 0, /* offset */
+ 0); /* header data */
+ if (rc != PTL_OK) {
CERROR("PtlPut error %d\n",rc);
-
/*
* Do the unlink which should succeed
*/
rc2 = PtlMDUnlink(tx->tx_mdh_msg);
LASSERT( rc2 == 0);
-
#ifndef LUSTRE_PORTALS_UNLINK_SEMANTICS
tx->tx_mdh_msg = PTL_INVALID_HANDLE;
kptllnd_tx_decref(tx);
}
-
spin_unlock_irqrestore(&peer->peer_lock, flags);
- PJK_UT_MSG_DATA("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return;
failed_without_lock:
*/
kptllnd_tx_decref(tx);
- PJK_UT_MSG("<<< FAILED\n");
+ CDEBUG(D_NET, "<<< FAILED\n");
}
int
if(!list_empty(&peer->peer_pending_txs)){
tx = list_entry(peer->peer_pending_txs.next,kptl_tx_t,tx_list);
if(time_after_eq(jiffies,tx->tx_deadline)){
- PJK_UT_MSG("Peer=%p PENDING tx=%p time=%lu sec\n",
+ CDEBUG(D_NET, "Peer=%p PENDING tx=%p time=%lu sec\n",
peer,tx,(jiffies - tx->tx_deadline)/HZ);
rc = 1;
}
if(!list_empty(&peer->peer_active_txs)){
tx = list_entry(peer->peer_active_txs.next,kptl_tx_t,tx_list);
if(time_after_eq(jiffies,tx->tx_deadline)){
- PJK_UT_MSG("Peer=%p ACTIVE tx=%p time=%lu sec\n",
+ CDEBUG(D_NET, "Peer=%p ACTIVE tx=%p time=%lu sec\n",
peer,tx,(jiffies - tx->tx_deadline)/HZ);
rc = 1;
}
unsigned long flags;
- /*PJK_UT_MSG("Bucket=%d\n",idx);*/
+ CDEBUG(D_INFO, "Bucket=%d\n",idx);
again:
/* NB. We expect to have a look at all the peers and not find any
list_for_each (ptmp, peers) {
peer = list_entry (ptmp, kptl_peer_t, peer_list);
- PJK_UT_MSG("Peer=%p Credits=%d Outstanding=%d\n",
- peer,peer->peer_credits,peer->peer_outstanding_credits);
+ CDEBUG(D_NET, "Peer=%p Credits=%d Outstanding=%d\n",
+ peer,peer->peer_credits,peer->peer_outstanding_credits);
/* In case we have enough credits to return via a
* NOOP, but there were no non-blocking tx descs
read_unlock_irqrestore(&kptllnd_data->kptl_peer_rw_lock,
flags);
- CERROR("Timed out RDMA with "LPX64"\n",peer->peer_nid);
+ CERROR("Timed out communications with %s\n",
+ libcfs_nid2str(peer->peer_nid));
kptllnd_peer_cancel(peer);
kptllnd_peer_decref(peer,"temp"); /* ...until here */
}
kptl_peer_t *
-kptllnd_peer_find (
- kptl_data_t *kptllnd_data,
- lnet_process_id_t target)
-{
- kptl_peer_t *peer;
- unsigned long flags;
- read_lock_irqsave(&kptllnd_data->kptl_peer_rw_lock, flags);
- peer = kptllnd_peer_find_holding_list_lock(kptllnd_data,target);
- read_unlock_irqrestore(&kptllnd_data->kptl_peer_rw_lock, flags);
- return peer;
-}
-
-kptl_peer_t *
-kptllnd_peer_find_holding_list_lock (
- kptl_data_t *kptllnd_data,
- lnet_process_id_t target)
+kptllnd_ptlnid2peer_locked (kptl_data_t *kptllnd_data,
+ ptl_nid_t nid)
{
- struct list_head *peer_list = kptllnd_nid2peerlist (kptllnd_data,target.nid);
+ struct list_head *peer_list = kptllnd_ptlnid2peerlist(kptllnd_data, nid);
struct list_head *tmp;
kptl_peer_t *peer;
- PJK_UT_MSG(">>> id=%s\n",libcfs_id2str(target));
+ CDEBUG(D_NET, ">>> id="FMT_NID"\n", nid);
list_for_each (tmp, peer_list) {
LASSERT(peer->peer_state != PEER_STATE_CANCELED);
- PJK_UT_MSG("NID: peer="LPX64" target="LPX64"\n",
- peer->peer_nid,target.nid);
- PJK_UT_MSG("PID: peer=%d target=%d\n",
- peer->peer_pid,target.pid);
-
- if (! (peer->peer_nid == target.nid &&
- peer->peer_pid == target.pid))
+ if (peer->peer_ptlid.nid != nid)
continue;
- CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
- peer, libcfs_id2str(target), atomic_read (&peer->peer_refcount));
-
kptllnd_peer_addref(peer,"find");
- PJK_UT_MSG("<<< Peer=%p\n",peer);
+
+ CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
+ peer, libcfs_nid2str(peer->peer_nid),
+ atomic_read (&peer->peer_refcount));
return peer;
}
- PJK_UT_MSG("<<< NOTFOUND\n");
+ CDEBUG(D_NET, "<<< NOTFOUND\n");
return NULL;
}
kptl_peer_t *
-kptllnd_peer_handle_hello (
- kptl_data_t *kptllnd_data,
- lnet_process_id_t initiator,
- kptl_msg_t *msg)
+kptllnd_ptlnid2peer (kptl_data_t *kptllnd_data, ptl_nid_t nid)
+{
+ kptl_peer_t *peer;
+ unsigned long flags;
+
+ read_lock_irqsave(&kptllnd_data->kptl_peer_rw_lock, flags);
+ peer = kptllnd_ptlnid2peer_locked(kptllnd_data, nid);
+ read_unlock_irqrestore(&kptllnd_data->kptl_peer_rw_lock, flags);
+
+ return peer;
+}
+
+kptl_peer_t *
+kptllnd_nid2peer_locked (kptl_data_t *kptllnd_data,
+ lnet_nid_t nid)
+{
+ return kptllnd_ptlnid2peer_locked(kptllnd_data,
+ lnet2ptlnid(kptllnd_data, nid));
+}
+
+kptl_peer_t *
+kptllnd_nid2peer (kptl_data_t *kptllnd_data, lnet_nid_t nid)
+{
+ return kptllnd_ptlnid2peer(kptllnd_data,
+ lnet2ptlnid(kptllnd_data, nid));
+}
+
+kptl_peer_t *
+kptllnd_peer_handle_hello (kptl_data_t *kptllnd_data,
+ ptl_process_id_t initiator,
+ kptl_msg_t *msg)
{
kptl_peer_t *peer = NULL;
- kptl_peer_t *peer_allocated = NULL;
+ kptl_peer_t *new_peer = NULL;
kptl_peer_t *peer_to_cancel = NULL;
unsigned long flags;
- kptl_tx_t *tx_hello = NULL;
+ kptl_tx_t *hello_tx = NULL;
int rc;
__u64 safe_matchbits_from_peer;
__u64 safe_matchbits_to_peer = 0;
-
- PJK_UT_MSG(">>>\n");
+ CDEBUG(D_NET, ">>> "FMT_NID"/%d\n", initiator.nid, initiator.pid);
safe_matchbits_from_peer = msg->ptlm_u.hello.kptlhm_matchbits +
*kptllnd_tunables.kptl_peercredits;
/*
* Immediate message sizes MUST be equal
*/
- if( msg->ptlm_u.hello.kptlhm_max_msg_size !=
- *kptllnd_tunables.kptl_max_msg_size){
+ if (msg->ptlm_u.hello.kptlhm_max_msg_size !=
+ *kptllnd_tunables.kptl_max_msg_size) {
CERROR("IMMD message size MUST be equal for all peers got %d expected %d\n",
- msg->ptlm_u.hello.kptlhm_max_msg_size,
- *kptllnd_tunables.kptl_max_msg_size);
-
+ msg->ptlm_u.hello.kptlhm_max_msg_size,
+ *kptllnd_tunables.kptl_max_msg_size);
return 0;
}
* Setup a connect HELLO message. We ultimately might not
* use it but likely we will.
*/
- tx_hello = kptllnd_get_idle_tx(kptllnd_data,TX_TYPE_SMALL_MESSAGE);
- if( tx_hello == NULL) {
- CERROR("Unable to allocate connect message for %s\n",libcfs_id2str(initiator));
+ hello_tx = kptllnd_get_idle_tx(kptllnd_data,TX_TYPE_SMALL_MESSAGE);
+ if (hello_tx == NULL) {
+ CERROR("Unable to allocate connect message for "FMT_NID"/%d\n",
+ initiator.nid, initiator.pid);
return 0;
}
- kptllnd_init_msg(
- tx_hello->tx_msg,
- PTLLND_MSG_TYPE_HELLO,
- sizeof(kptl_hello_msg_t));
+ kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
+ sizeof(kptl_hello_msg_t));
/*
- * Allocate a peer, even though we might not ultimatly use it
- * however we want to avoid doing this while holidng
+ * Allocate a peer, even though we might not ultimately use it
+ * however we want to avoid doing this while holding
* the peer_rw_lock and be forced into atomic context
*/
- rc = kptllnd_peer_allocate ( kptllnd_data, &peer_allocated, initiator);
- if(rc != 0){
- kptllnd_tx_decref(tx_hello);
- CERROR("Failed to create peer (id=%s)\n",libcfs_id2str(initiator));
+ rc = kptllnd_peer_allocate(kptllnd_data, &new_peer, initiator);
+ if (rc != 0){
+ kptllnd_tx_decref(hello_tx);
+ CERROR("Failed to create peer for "FMT_NID"/%d\n",
+ initiator.nid, initiator.pid);
return 0;
}
/*
* Look for peer because it could have been previously here
*/
- peer = kptllnd_peer_find_holding_list_lock(kptllnd_data,initiator);
+ peer = kptllnd_ptlnid2peer_locked(kptllnd_data, initiator.nid);
/*
* If peer is already here
*/
- if(peer != NULL){
-
- if(peer->peer_incarnation == 0) {
+ if (peer != NULL) {
+ if (peer->peer_incarnation == 0) {
/*
* Update the peer state
*/
/*
* Save the match bits
*/
- PJK_UT_MSG_DATA(" **** Updating Matchbits="LPX64" ****\n",
- safe_matchbits_from_peer);
+ CDEBUG(D_NET, " **** Updating Matchbits="LPX64" ****\n",
+ safe_matchbits_from_peer);
peer->peer_next_matchbits = safe_matchbits_from_peer;
- if(peer->peer_next_matchbits < PTL_RESERVED_MATCHBITS)
+ if (peer->peer_next_matchbits < PTL_RESERVED_MATCHBITS)
peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
- }
- /*
- * If the incarnation has changed then we need to
- * resend the hello.
- */
- else if( peer->peer_incarnation != msg->ptlm_srcnid ) {
+ } else if (peer->peer_incarnation != msg->ptlm_srcstamp ||
+ peer->peer_ptlid.pid != initiator.pid) {
+ CDEBUG(D_NET, "Peer %s reconnecting with pid,stamp: "
+ "%d,"LPX64" (old %d,"LPX64"\n",
+ libcfs_nid2str(peer->peer_nid),
+ initiator.pid, msg->ptlm_srcstamp,
+ peer->peer_ptlid.pid, peer->peer_incarnation);
/*
- * Put the match bits into the hello message
+ * If the incarnation or PID have changed, assume the
+ * peer has rebooted and resend the hello
*/
safe_matchbits_to_peer =
peer->peer_last_matchbits_seen + 1 +
peer_to_cancel = peer;
peer = NULL;
- }else{
+ } else {
CERROR("Receiving HELLO message on already connected peer %s\n",
- libcfs_id2str(initiator));
+ libcfs_nid2str(peer->peer_nid));
}
}
- if( peer == NULL) {
-
+ if (peer == NULL) {
/*
* Put the match bits into the hello message
*/
- tx_hello->tx_msg->ptlm_u.hello.kptlhm_matchbits =
+ hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits =
safe_matchbits_to_peer;
- tx_hello->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
+ hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
*kptllnd_tunables.kptl_max_msg_size;
/*
* Try and attach this peer to the list
*/
- rc = kptllnd_peer_add_to_list_locked ( kptllnd_data, peer_allocated);
- if(rc != 0){
- CERROR("Failed to create peer (id=%s)\n",
- libcfs_id2str(initiator));
+ rc = kptllnd_peer_add_to_list_locked(kptllnd_data, new_peer);
+ if (rc != 0) {
+ CERROR("Failed to create peer for "FMT_NID"/%d\n",
+ initiator.nid, initiator.pid);
goto failed;
}
- peer = peer_allocated;
- peer_allocated = NULL;
-
+ peer = new_peer;
+ new_peer = NULL;
LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO);
peer->peer_state = PEER_STATE_ACTIVE;
/*
* Save the match bits
*/
- PJK_UT_MSG_DATA("**** Setting Matchbits="LPX64" ****\n",
- safe_matchbits_from_peer);
+ CDEBUG(D_NET, "**** Setting Matchbits="LPX64" ****\n",
+ safe_matchbits_from_peer);
peer->peer_next_matchbits = safe_matchbits_from_peer;
if(peer->peer_next_matchbits < PTL_RESERVED_MATCHBITS)
peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
-
/*
* And save them from a previous incarnation
*/
/*
* Queue the message
*/
- kptllnd_peer_queue_tx_locked(peer,tx_hello);
+ kptllnd_peer_queue_tx_locked(peer,hello_tx);
/*
* And don't free it because it's queued
*/
- tx_hello = NULL;
-
+ hello_tx = NULL;
}
failed:
write_unlock_irqrestore(&kptllnd_data->kptl_peer_rw_lock,flags);
- if(tx_hello)
- kptllnd_tx_decref(tx_hello);
+ if (hello_tx != NULL)
+ kptllnd_tx_decref(hello_tx);
- /*
- *
- */
- if(peer){
+ if (peer != NULL)
kptllnd_peer_check_sends(peer);
- }
- if(peer_to_cancel) {
+ if (peer_to_cancel != NULL) {
kptllnd_peer_cancel(peer_to_cancel);
- kptllnd_peer_decref(peer_to_cancel,"find");
+ kptllnd_peer_decref(peer_to_cancel, "find");
}
- if(peer_allocated)
- kptllnd_peer_decref(peer_allocated,"alloc");
-
- PJK_UT_MSG("<<< Peer=%p\n",peer);
+ if (new_peer != NULL)
+ kptllnd_peer_decref(new_peer, "alloc");
+ CDEBUG(D_NET, "<<< Peer=%p\n", peer);
return peer;
}
void
-kptllnd_tx_launch (
- kptl_tx_t *tx,
- lnet_process_id_t target,
- lnet_msg_t *ptlmsg )
+kptllnd_tx_launch (kptl_tx_t *tx,
+ lnet_process_id_t target,
+ lnet_msg_t *ptlmsg)
{
kptl_data_t *kptllnd_data = tx->tx_po.po_kptllnd_data;
kptl_peer_t *peer = NULL;
- kptl_peer_t *peer_allocated = NULL;
+ kptl_peer_t *new_peer = NULL;
unsigned long flags;
rwlock_t *g_lock = &kptllnd_data->kptl_peer_rw_lock;
int rc;
- kptl_tx_t *tx_hello = NULL;
+ ptl_process_id_t ptlid;
+ kptl_tx_t *hello_tx = NULL;
/* If I get here, I've committed to send, so I complete the tx with
* failure on any problems */
- PJK_UT_MSG(">>> TX=%p target=%s\n",tx,libcfs_id2str(target));
+ CDEBUG(D_NET, ">>> TX=%p target=%s\n",tx,libcfs_id2str(target));
LASSERT (tx->tx_ptlmsg == NULL);
tx->tx_ptlmsg = ptlmsg; /* finalize ptlmsg on completion */
* First try to find the peer (this will grab the
* read lock
*/
- peer = kptllnd_peer_find (kptllnd_data,target);
+ peer = kptllnd_nid2peer(kptllnd_data, target.nid);
/*
* If we find the peer
spin_unlock_irqrestore(&peer->peer_lock, flags);
kptllnd_peer_check_sends(peer);
kptllnd_peer_decref(peer,"find");
- PJK_UT_MSG("<<< FOUND\n");
+ CDEBUG(D_NET, "<<< FOUND\n");
return;
}
* (in the case that the peer is racing to connect with us)
* but more than likely we will.
*/
- tx_hello = kptllnd_get_idle_tx(kptllnd_data,TX_TYPE_SMALL_MESSAGE);
- if( tx_hello == NULL) {
- CERROR("Unable to allocate connect message for %s\n",libcfs_id2str(target));
+ hello_tx = kptllnd_get_idle_tx(kptllnd_data,TX_TYPE_SMALL_MESSAGE);
+ if( hello_tx == NULL) {
+ CERROR("Unable to allocate connect message for %s\n",
+ libcfs_id2str(target));
kptllnd_tx_decref (tx);
return;
}
kptllnd_init_msg(
- tx_hello->tx_msg,
+ hello_tx->tx_msg,
PTLLND_MSG_TYPE_HELLO,
sizeof(kptl_hello_msg_t));
* We've never seen this peer before. So setup
* a default message.
*/
- tx_hello->tx_msg->ptlm_u.hello.kptlhm_matchbits = 0;
- tx_hello->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
+ hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = 0;
+ hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
*kptllnd_tunables.kptl_max_msg_size;
/*
* Allocate a new peer
* (it's not active until its on the list)
*/
- PJK_UT_MSG("TX %p creating NEW PEER %s\n",tx,libcfs_id2str(target));
- rc = kptllnd_peer_allocate ( kptllnd_data, &peer_allocated, target);
- if(rc != 0){
- CERROR("Failed to create peer %s\n",libcfs_id2str(target));
- kptllnd_tx_decref (tx);
- kptllnd_tx_decref (tx_hello);
+ CDEBUG(D_NET, "TX %p creating NEW PEER %s\n",
+ tx, libcfs_id2str(target));
+ ptlid.nid = lnet2ptlnid(kptllnd_data, target.nid);
+ ptlid.pid = kptllnd_data->kptl_portals_id.pid;
+
+ rc = kptllnd_peer_allocate(kptllnd_data, &new_peer, ptlid);
+
+ if (rc != 0) {
+ CERROR("Failed to create peer %s\n", libcfs_id2str(target));
+ kptllnd_tx_decref(tx);
+ kptllnd_tx_decref(hello_tx);
return;
}
*/
write_lock_irqsave(g_lock, flags);
- peer = kptllnd_peer_find_holding_list_lock (kptllnd_data,target);
+ peer = kptllnd_nid2peer_locked(kptllnd_data, target.nid);
/*
* If we find the peer
if (peer != NULL) {
write_unlock_irqrestore(g_lock, flags);
- CDEBUG(D_TRACE,"HELLO message race occurred for %s\n",libcfs_id2str(target));
+ CDEBUG(D_TRACE,"HELLO message race occurred for %s\n",
+ libcfs_id2str(target));
spin_lock_irqsave(&peer->peer_lock, flags);
kptllnd_peer_queue_tx_locked ( peer, tx );
kptllnd_peer_check_sends(peer);
kptllnd_peer_decref(peer,"find");
- kptllnd_peer_decref(peer_allocated,"alloc");
+ kptllnd_peer_decref(new_peer,"alloc");
/* and we don't need the connection tx*/
- kptllnd_tx_decref(tx_hello);
+ kptllnd_tx_decref(hello_tx);
- PJK_UT_MSG("<<< FOUND2\n");
+ CDEBUG(D_NET, "<<< FOUND2\n");
return;
}
- rc = kptllnd_peer_add_to_list_locked ( kptllnd_data, peer_allocated);
+ rc = kptllnd_peer_add_to_list_locked ( kptllnd_data, new_peer);
if(rc != 0){
write_unlock_irqrestore(g_lock, flags);
- CERROR("Failed to add peer to list for %s\n",libcfs_id2str(target));
+ CERROR("Failed to add peer to list for %s\n",
+ libcfs_id2str(target));
/* Drop these TXs tx*/
- kptllnd_tx_decref (tx);
- kptllnd_tx_decref (tx_hello);
- kptllnd_peer_decref(peer_allocated,"create");
+ kptllnd_tx_decref(tx);
+ kptllnd_tx_decref(hello_tx);
+ kptllnd_peer_decref(new_peer,"create");
return;
}
- peer = peer_allocated;
- peer_allocated = NULL;
+ peer = new_peer;
+ new_peer = NULL;
write_unlock_irqrestore(g_lock,flags);
* the connection request will go out, and
* the tx will wait for a reply.
*/
- PJK_UT_MSG("TXHello=%p\n",tx_hello);
+ CDEBUG(D_NET, "TXHello=%p\n", hello_tx);
spin_lock_irqsave(&peer->peer_lock, flags);
- kptllnd_peer_queue_tx_locked(peer,tx_hello);
- kptllnd_peer_queue_tx_locked(peer,tx);
+ kptllnd_peer_queue_tx_locked(peer, hello_tx);
+ kptllnd_peer_queue_tx_locked(peer, tx);
spin_unlock_irqrestore(&peer->peer_lock, flags);
kptllnd_peer_check_sends(peer);
kptllnd_peer_decref(peer,"find");
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
}
kptllnd_rx_buffer_pool_init(
kptl_rx_buffer_pool_t *rxbp)
{
- PJK_UT_MSG("kptllnd_rx_buffer_pool_init\n");
+ CDEBUG(D_NET, "kptllnd_rx_buffer_pool_init\n");
memset(rxbp,0,sizeof(*rxbp));
spin_lock_init (&rxbp->rxbp_lock);
int i;
unsigned long flags;
- PJK_UT_MSG("kptllnd_rx_buffer_pool_fini\n");
+ CDEBUG(D_NET, "kptllnd_rx_buffer_pool_fini\n");
spin_lock_irqsave(&rxbp->rxbp_lock, flags);
spin_lock_irqsave(&rxbp->rxbp_lock, flags);
}else{
- PJK_UT_MSG("PtlMDUnlink(%p) rc=%d\n",rxb,rc);
+ CDEBUG(D_NET, "PtlMDUnlink(%p) rc=%d\n",rxb,rc);
/*
* The unlinked failed so put this back
* on the list for later
*/
if(!list_empty(&rxbp->rxbp_list)){
i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ CDEBUG(((i & (-i)) == i) ? D_NET : D_NET, /* power of 2? */
"Waiting for %d Busy RX Buffers\n",
rxbp->rxbp_count);
spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
CDEBUG(D_TRACE,"|rxbp_list|=EMPTY\n");
if(rxbp->rxbp_count != 0){
- PJK_UT_MSG("Waiting for %d RX Buffers to unlink\n",rxbp->rxbp_count);
+ CDEBUG(D_NET, "Waiting for %d RX Buffers to unlink\n",rxbp->rxbp_count);
i = 2;
while (rxbp->rxbp_count != 0) {
i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ CDEBUG(((i & (-i)) == i) ? D_NET : D_NET, /* power of 2? */
"Waiting for %d RX Buffers to unlink\n",
rxbp->rxbp_count);
spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- PJK_UT_MSG("kptllnd_rx_buffer_pool_reserve(%d)\n",count);
+ CDEBUG(D_NET, "kptllnd_rx_buffer_pool_reserve(%d)\n",count);
/*
* Prevent reservation of anymore while we are shutting down
(PAGE_SIZE * (*kptllnd_tunables.kptl_rxb_npages));
++nbuffers ;
- PJK_UT_MSG("nbuffers=%d rxbp_count=%d\n",nbuffers,rxbp->rxbp_count);
+ CDEBUG(D_NET, "nbuffers=%d rxbp_count=%d\n",nbuffers,rxbp->rxbp_count);
if(rxbp->rxbp_count < nbuffers)
add = nbuffers - rxbp->rxbp_count;
- PJK_UT_MSG("adding=%d\n",add);
+ CDEBUG(D_NET, "adding=%d\n",add);
/*
* Under the same lock assume they are added
{
unsigned long flags;
spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- PJK_UT_MSG("kptllnd_rx_buffer_pool_unreserve(%d)\n",count);
+ CDEBUG(D_NET, "kptllnd_rx_buffer_pool_unreserve(%d)\n",count);
rxbp->rxbp_reserved -= count;
spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
}
kptl_data_t *kptllnd_data = rxb->rxb_po.po_kptllnd_data;
unsigned long flags;
- PJK_UT_MSG("rxb=%p\n",rxb);
+ CDEBUG(D_NET, "rxb=%p\n",rxb);
spin_lock_irqsave(&kptllnd_data->kptl_sched_lock, flags);
LASSERT(list_empty(&rxb->rxb_repost_list));
any.nid = PTL_NID_ANY;
any.pid = PTL_PID_ANY;
- /*PJK_UT_MSG("rxb=%p\n",rxb);*/
+ //CDEBUG(D_NET, "rxb=%p\n",rxb);
spin_lock_irqsave(&rxbp->rxbp_lock, flags);
STAT_UPDATE(kps_rx_unlink_event);
if(!rxbp->rxbp_shutdown){
- PJK_UT_MSG("RXB Callback %s(%d) rxb=%p id="FMT_NID" unlink=%d\n",
- get_ev_type_string(ev->type),ev->type,
- rxb,ev->initiator.nid,unlinked);
+ CDEBUG(D_NET, "RXB Callback %s(%d) rxb=%p id="FMT_NID" unlink=%d\n",
+ get_ev_type_string(ev->type),ev->type,
+ rxb,ev->initiator.nid,unlinked);
}
LASSERT( ev->md.start == rxb->rxb_buffer);
LASSERT( ev->type == PTL_EVENT_PUT_END || ev->type == PTL_EVENT_UNLINK);
LASSERT( ev->match_bits == LNET_MSG_MATCHBITS);
- CDEBUG((ev->ni_fail_type == PTL_OK) ? D_NET : D_ERROR,
+ CDEBUG((ev->ni_fail_type == PTL_NI_OK) ? D_NET : D_ERROR,
"event type %d, status %d from "FMT_NID"\n",
ev->type, ev->ni_fail_type,ev->initiator.nid);
return;
}
- PJK_UT_MSG_DATA("New RX=%p\n",rx);
+ CDEBUG(D_NET, "New RX=%p\n",rx);
/*
* If we are unlinked we can just transfer the ref
kptllnd_rx_schedule(rx);
if(!rxbp->rxbp_shutdown){
- PJK_UT_MSG("<<< rx=%p rxb=%p\n",rx,rxb);
+ CDEBUG(D_NET, "<<< rx=%p rxb=%p\n",rx,rxb);
}
}
unsigned long flags;
kptl_data_t *kptllnd_data = rx->rx_rxb->rxb_po.po_kptllnd_data;
- CDEBUG(D_NET, "rx\n");
-
- PJK_UT_MSG("RX Schedule %p\n",rx);
+ CDEBUG(D_NET, "RX Schedule %p\n",rx);
spin_lock_irqsave(&kptllnd_data->kptl_sched_lock, flags);
list_add_tail(&rx->rx_list,&kptllnd_data->kptl_sched_rxq);
kptl_data_t *kptllnd_data = rxb->rxb_po.po_kptllnd_data;
kptl_peer_t *peer = NULL;
int returned_credits = 0;
- int type = msg->ptlm_type;
- lnet_process_id_t lnet_initiator;
unsigned long flags;
+ CDEBUG(D_NET, ">>> RXRXRXRXRX rx=%p nob=%d "FMT_NID"/%d\n",
+ rx, rx->rx_nob, rx->rx_initiator.nid, rx->rx_initiator.pid);
- PJK_UT_MSG_DATA(">>> RXRXRXRXRXRXRXRXRXRXRXRX\n");
- PJK_UT_MSG_DATA("rx=%p nob=%d\n",rx,rx->rx_nob);
-
- /*
- * Setup the intiator for LNET
- */
- lnet_initiator.nid = ptl2lnetnid(kptllnd_data,rx->rx_initiator.nid);
- lnet_initiator.pid = rx->rx_initiator.pid;
-
- /*
- * If the nob==0 then silently discard this message
- */
- if(rx->rx_nob == 0)
- goto exit;
-
+ if (rx->rx_nob == 0) {
+ /* discard silently!!! */
+ goto out;
+ }
+
rc = kptllnd_msg_unpack(msg, rx->rx_nob, kptllnd_data);
if (rc != 0) {
- CERROR ("Error %d unpacking rx from "FMT_NID"\n",
- rc, rx->rx_initiator.nid);
- goto exit;
+ CERROR ("Error %d unpacking rx from "FMT_NID"/%d\n",
+ rc, rx->rx_initiator.nid, rx->rx_initiator.pid);
+ goto out;
}
- PJK_UT_MSG_DATA("RX=%p Type=%s(%d)\n",rx,
- get_msg_type_string(type),type);
- PJK_UT_MSG_DATA("Msg NOB = %d\n",msg->ptlm_nob);
- PJK_UT_MSG_DATA("Credits back from peer=%d\n",msg->ptlm_credits);
- PJK_UT_MSG_DATA("Seq # ="LPX64"\n",msg->ptlm_seq);
- PJK_UT_MSG_DATA("lnet RX nid=" LPX64 "\n",lnet_initiator.nid);
- PJK_UT_MSG("ptl RX nid=" FMT_NID " pid=%d\n",rx->rx_initiator.nid,rx->rx_initiator.pid);
-
- if(type == PTLLND_MSG_TYPE_HELLO)
- {
- peer = kptllnd_peer_handle_hello(
- kptllnd_data,
- lnet_initiator,
- msg);
- if( peer == NULL){
- CERROR ("Failed to create peer for %s\n",
- libcfs_id2str(lnet_initiator));
- goto exit;
+ CDEBUG(D_NET, "RX=%p Type=%s(%d)\n",
+ rx, get_msg_type_string(msg->ptlm_type), msg->ptlm_type);
+ CDEBUG(D_NET, "Msg NOB = %d\n", msg->ptlm_nob);
+ CDEBUG(D_NET, "Credits back from peer=%d\n", msg->ptlm_credits);
+ CDEBUG(D_NET, "Seq # ="LPX64"\n",msg->ptlm_seq);
+ CDEBUG(D_NET, "ptl RX id="FMT_NID"/%d\n",
+ rx->rx_initiator.nid, rx->rx_initiator.pid);
+
+ if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
+ peer = kptllnd_peer_handle_hello(kptllnd_data,
+ rx->rx_initiator,
+ msg);
+ if (peer == NULL) {
+ CERROR ("Failed to create peer for "FMT_NID"/%d\n",
+ rx->rx_initiator.nid, rx->rx_initiator.pid);
+ goto out;
}
- if (!( msg->ptlm_dststamp == kptllnd_data->kptl_incarnation ||
- msg->ptlm_dststamp == 0)) {
- CERROR ("Stale rx from "LPX64" dststamp "LPX64" expected "LPX64"\n",
- peer->peer_nid,
- msg->ptlm_dststamp,
- kptllnd_data->kptl_incarnation );
- goto exit;
+ if (!(msg->ptlm_dststamp == kptllnd_data->kptl_incarnation ||
+ msg->ptlm_dststamp == 0)) {
+ CERROR("Stale rx from %s dststamp "LPX64" expected "LPX64"\n",
+ libcfs_nid2str(peer->peer_nid),
+ msg->ptlm_dststamp,
+ kptllnd_data->kptl_incarnation);
+ goto out;
}
- }
- else
- {
- peer = kptllnd_peer_find(kptllnd_data,lnet_initiator);
+ } else {
+ peer = kptllnd_ptlnid2peer(kptllnd_data, rx->rx_initiator.nid);
if( peer == NULL){
- CERROR ("No connection with %s\n",
- libcfs_id2str(lnet_initiator));
- goto exit;
+ CERROR("No connection with "FMT_NID"/%d\n",
+ rx->rx_initiator.nid, rx->rx_initiator.pid);
+ goto out;
}
if (msg->ptlm_dststamp != kptllnd_data->kptl_incarnation) {
- CERROR ("Stale rx from "LPX64" dststamp "LPX64" expected "LPX64"\n",
- peer->peer_nid,
- msg->ptlm_dststamp,
- kptllnd_data->kptl_incarnation );
- goto exit;
+ CERROR("Stale rx from %s dststamp "LPX64" expected "LPX64"\n",
+ libcfs_nid2str(peer->peer_nid),
+ msg->ptlm_dststamp,
+ kptllnd_data->kptl_incarnation );
+ goto out;
}
}
- if( msg->ptlm_srcnid != peer->peer_nid){
- CERROR ("Stale rx srcnid "LPX64" expected "LPX64"\n",
- msg->ptlm_srcnid,
- peer->peer_nid );
- goto exit;
+ if (msg->ptlm_srcnid != peer->peer_nid) {
+ CERROR("Bad rx srcnid %s expected %s\n",
+ libcfs_nid2str(msg->ptlm_srcnid),
+ libcfs_nid2str(peer->peer_nid));
+ goto out;
}
- if( msg->ptlm_srcstamp != peer->peer_incarnation){
- CERROR ("Stale rx from "LPX64" srcstamp"LPX64" expected "LPX64"\n",
- peer->peer_nid,
+ if (msg->ptlm_srcstamp != peer->peer_incarnation) {
+ CERROR ("Stale rx from %s srcstamp "LPX64" expected "LPX64"\n",
+ libcfs_nid2str(peer->peer_nid),
msg->ptlm_srcstamp,
- peer->peer_incarnation );
- goto exit;
+ peer->peer_incarnation);
+ goto out;
}
- if( msg->ptlm_dstnid != kptllnd_data->kptl_ni->ni_nid){
- CERROR ("Stale rx from "LPX64" dststamp "LPX64" expected "LPX64"\n",
- peer->peer_nid,
- msg->ptlm_dstnid,
- kptllnd_data->kptl_ni->ni_nid );
- goto exit;
+ if (msg->ptlm_dstnid != kptllnd_data->kptl_ni->ni_nid) {
+ CERROR ("Bad rx from %s dstnid %s expected %s\n",
+ libcfs_nid2str(peer->peer_nid),
+ libcfs_nid2str(msg->ptlm_dstnid),
+ libcfs_nid2str(kptllnd_data->kptl_ni->ni_nid));
+ goto out;
}
/*
*kptllnd_tunables.kptl_peercredits);
spin_unlock_irqrestore(&peer->peer_lock, flags);
- PJK_UT_MSG("Peer=%p Credits=%d Outstanding=%d\n",
+ CDEBUG(D_NET, "Peer=%p Credits=%d Outstanding=%d\n",
peer,peer->peer_credits,peer->peer_outstanding_credits);
- PJK_UT_MSG_DATA("Getting %d credits back rx=%p\n",returned_credits,rx);
+ CDEBUG(D_NET, "Getting %d credits back rx=%p\n",returned_credits,rx);
kptllnd_peer_check_sends(peer);
}
- /*
- * Attach the peer to the RX
- * it now is responsibly for releaseing the refrence
- */
+ /* Attach the peer to the RX (it takes over my reference) */
rx->rx_peer = peer;
- peer = 0;
+ peer = NULL;
+
+ /* NB msg->ptlm_seq is ignored; it's only a debugging aid */
- /*
- * Note: We are explicitly ignore sequence #
- * It is informational only
- */
switch (msg->ptlm_type) {
default:
- CERROR("Bad PTL message type %x from "LPX64"\n",
- msg->ptlm_type, rx->rx_peer->peer_nid);
+ CERROR("Bad PTL message type %x from %s\n",
+ msg->ptlm_type, libcfs_nid2str(rx->rx_peer->peer_nid));
break;
case PTLLND_MSG_TYPE_HELLO:
- PJK_UT_MSG("PTLLND_MSG_TYPE_HELLO\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO\n");
break;
case PTLLND_MSG_TYPE_NOOP:
- PJK_UT_MSG("PTLLND_MSG_TYPE_NOOP\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP\n");
break;
case PTLLND_MSG_TYPE_IMMEDIATE:
- PJK_UT_MSG("PTLLND_MSG_TYPE_IMMEDIATE\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
rc = lnet_parse(kptllnd_data->kptl_ni,
- &msg->ptlm_u.immediate.kptlim_hdr,
- msg->ptlm_srcnid,
- rx, 0);
+ &msg->ptlm_u.immediate.kptlim_hdr,
+ msg->ptlm_srcnid,
+ rx, 0);
/* RX Completing asynchronously */
- if( rc >= 0)
- rx = 0;
+ if ( rc >= 0)
+ rx = NULL;
break;
case PTLLND_MSG_TYPE_PUT:
case PTLLND_MSG_TYPE_GET:
- PJK_UT_MSG("PTLLND_MSG_TYPE_%s\n",
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
msg->ptlm_type == PTLLND_MSG_TYPE_PUT ?
"PUT" : "GET");
-
/*
* Save the last match bits used
*/
spin_lock_irqsave(&rx->rx_peer->peer_lock, flags);
- if(msg->ptlm_u.req.kptlrm_matchbits > rx->rx_peer->peer_last_matchbits_seen)
- rx->rx_peer->peer_last_matchbits_seen = msg->ptlm_u.req.kptlrm_matchbits;
+ if (msg->ptlm_u.req.kptlrm_matchbits >
+ rx->rx_peer->peer_last_matchbits_seen)
+ rx->rx_peer->peer_last_matchbits_seen =
+ msg->ptlm_u.req.kptlrm_matchbits;
spin_unlock_irqrestore(&rx->rx_peer->peer_lock, flags);
rc = lnet_parse(kptllnd_data->kptl_ni,
- &msg->ptlm_u.req.kptlrm_hdr,
- msg->ptlm_srcnid,
- rx, 1);
+ &msg->ptlm_u.req.kptlrm_hdr,
+ msg->ptlm_srcnid,
+ rx, 1);
/* RX Completing asynchronously */
if( rc >= 0)
- rx = 0;
+ rx = NULL;
break;
}
-
- CDEBUG (D_NET, "Received %x[%d] from "LPX64"\n",
- type, returned_credits, peer->peer_nid);
-
-exit:
+out:
/* PEER == NULL if it is not yet assigned or already
* been attached to RX */
- if(peer)
- kptllnd_peer_decref(peer,"lookup");
+ if (peer != NULL)
+ kptllnd_peer_decref(peer, "lookup");
/* RX == NULL if it is completing asynchronously */
- if(rx)
- kptllnd_rx_decref(rx,"sched",kptllnd_data);
+ if (rx != NULL)
+ kptllnd_rx_decref(rx, "sched", kptllnd_data);
- PJK_UT_MSG_DATA("<<< RXRXRXRXRXRXRXRXRXRXRXRX rx=%p\n",rx);
+ CDEBUG(D_NET, "<<< RXRXRXRXRXRXRXRXRXRXRXRX rx=%p\n",rx);
return;
}
* with the real ref count, and is for informational purposes
* only
*/
- PJK_UT_MSG("rxb=%p owner=%s count=%d\n",rxb,owner,
+ CDEBUG(D_NET, "rxb=%p owner=%s count=%d\n",rxb,owner,
atomic_read(&rxb->rxb_refcount));
#endif
}
kptl_rx_buffer_t *rxb,
const char *owner)
{
- if( !atomic_dec_and_test (&rxb->rxb_refcount)){
-
-#if 0
- /*
- * The below message could actually be out of sync
- * with the real ref count, and is for informational purposes
- * only
- */
- PJK_UT_MSG("rxb=%p owner=%s count=%d\n",rxb,owner,
- atomic_read(&rxb->rxb_refcount));
-#endif
+ if (!atomic_dec_and_test (&rxb->rxb_refcount))
return;
- }
-
-#if 0
- PJK_UT_MSG("rxb=%p owner=%s LAST REF reposting\n",rxb,owner);
-#endif
+ CDEBUG(D_NET, "rxb=%p owner=%s LAST REF reposting\n",rxb,owner);
kptllnd_rx_buffer_post_handle_error(rxb);
}
kptl_rx_t* rx;
if(IS_SIMULATION_ENABLED( FAIL_BLOCKING_RX_ALLOC )){
- PJK_UT_MSG_SIMULATION("FAIL_BLOCKING_RX_ALLOC SIMULATION triggered\n");
CERROR ("FAIL_BLOCKING_RX_ALLOC SIMULATION triggered\n");
STAT_UPDATE(kps_rx_allocation_failed);
return 0;
kptl_peer_t *peer = rx->rx_peer;
unsigned long flags;
- PJK_UT_MSG(">>> rx=%p\n",rx);
+ CDEBUG(D_NET, ">>> rx=%p\n",rx);
STAT_UPDATE(kps_rx_released);
LASSERT(atomic_read(&rx->rx_refcount)==0);
if(rx->rx_rxb){
- PJK_UT_MSG("Release rxb=%p\n",rx->rx_rxb);
+ CDEBUG(D_NET, "Release rxb=%p\n",rx->rx_rxb);
kptllnd_rx_buffer_decref(rx->rx_rxb,"rx");
rx->rx_rxb = 0;
}else{
- PJK_UT_MSG("rxb already released\n");
+ CDEBUG(D_NET, "rxb already released\n");
}
if(peer){
*kptllnd_tunables.kptl_peercredits);
spin_unlock_irqrestore(&peer->peer_lock, flags);
- PJK_UT_MSG("Peer=%p Credits=%d Outstanding=%d\n",
- peer,peer->peer_credits,peer->peer_outstanding_credits);
+ CDEBUG(D_NET, "Peer=%p Credits=%d Outstanding=%d\n",
+ peer,peer->peer_credits,peer->peer_outstanding_credits);
/* Have I received credits that will let me send? */
kptllnd_peer_check_sends(peer);
cfs_mem_cache_free(kptllnd_data->kptl_rx_cache,rx);
- PJK_UT_MSG("<<< rx=%p\n",rx);
+ CDEBUG(D_NET, "<<< rx=%p\n",rx);
}
void
* with the real ref count, and is for informational purposes
* only
*/
- PJK_UT_MSG("rx=%p owner=%s count=%d\n",rx,owner,
- atomic_read(&rx->rx_refcount));
+ CDEBUG(D_NET, "rx=%p owner=%s count=%d\n",rx,owner,
+ atomic_read(&rx->rx_refcount));
}
void
kptllnd_rx_decref(kptl_rx_t *rx,const char *owner,kptl_data_t *kptllnd_data)
{
- if( !atomic_dec_and_test (&rx->rx_refcount)){
- /*
- * The below message could actually be out of sync
- * with the real ref count, and is for informational purposes
- * only
- */
- PJK_UT_MSG("rx=%p owner=%s count=%d\n",rx,owner,
- atomic_read(&rx->rx_refcount));
+ if (!atomic_dec_and_test (&rx->rx_refcount))
return;
- }
-
- PJK_UT_MSG("rx=%p owner=%s LAST REF destroying\n",rx,owner);
- kptllnd_rx_destroy(rx,kptllnd_data);
+ CDEBUG(D_NET, "rx=%p owner=%s LAST REF destroying\n",rx,owner);
+ kptllnd_rx_destroy(rx, kptllnd_data);
}
kptl_tx_t *tx;
int i;
- PJK_UT_MSG("\n");
+ CDEBUG(D_NET, "\n");
/*
* First initialize the tx descriptors
*/
tx->tx_state = TX_STATE_ON_IDLE_QUEUE;
- LIBCFS_ALLOC( tx->tx_msg, *kptllnd_tunables.kptl_max_msg_size );
- if(tx->tx_msg == NULL){
+ LIBCFS_ALLOC(tx->tx_msg, *kptllnd_tunables.kptl_max_msg_size);
+ if (tx->tx_msg == NULL) {
CERROR("Failed to allocate TX payload\n");
- kptllnd_cleanup_tx_descs(kptllnd_data);
+ goto failed;
}
-
+ LIBCFS_ALLOC(tx->tx_frags, sizeof(*tx->tx_frags));
+ if (tx->tx_frags == NULL) {
+ CERROR("Failed to allocate TX frags\n");
+ goto failed;
+ }
+
/*
* Add this to the queue
*/
list_add (&tx->tx_list,&kptllnd_data->kptl_idle_txs);
}
- return (0);
+ return 0;
+
+ failed:
+ kptllnd_cleanup_tx_descs(kptllnd_data);
+ return -ENOMEM;
}
void
kptl_tx_t *tx;
int i;
- PJK_UT_MSG("\n");
+ CDEBUG(D_NET, "\n");
for (i = 0; i < (*kptllnd_tunables.kptl_ntx); i++) {
tx = &kptllnd_data->kptl_tx_descs[i];
-
- /*
- * Handle partial initization by stopping
- * when we hit one that is not fully initialized
- */
- if( tx->tx_msg == NULL )
- break;
+ if (tx->tx_msg != NULL)
+ LIBCFS_FREE(tx->tx_msg,
+ *kptllnd_tunables.kptl_max_msg_size);
+
+ if (tx->tx_frags != NULL)
+ LIBCFS_FREE(tx->tx_frags, sizeof(*tx->tx_frags));
LASSERT( tx->tx_state == TX_STATE_ON_IDLE_QUEUE );
-
- LIBCFS_FREE(tx->tx_msg,*kptllnd_tunables.kptl_max_msg_size);
}
}
kptl_tx_t *
-kptllnd_get_idle_tx(
- kptl_data_t *kptllnd_data,
- kptl_tx_type_t purpose)
+kptllnd_get_idle_tx(kptl_data_t *kptllnd_data,
+ enum kptl_tx_type purpose)
{
kptl_tx_t *tx = NULL;
- PJK_UT_MSG(">>> purpose=%d\n",purpose);
+ CDEBUG(D_NET, ">>> purpose=%d\n",purpose);
if(IS_SIMULATION_ENABLED( FAIL_BLOCKING_TX_PUT_ALLOC ) && purpose == TX_TYPE_LARGE_PUT){
- PJK_UT_MSG_SIMULATION("FAIL_BLOCKING_TX_PUT_ALLOC SIMULATION triggered\n");
CERROR ("FAIL_BLOCKING_TX_PUT_ALLOC SIMULATION triggered\n");
tx = NULL;
STAT_UPDATE(kps_tx_allocation_failed);
goto exit;
}
if(IS_SIMULATION_ENABLED( FAIL_BLOCKING_TX_GET_ALLOC ) && purpose == TX_TYPE_LARGE_GET){
- PJK_UT_MSG_SIMULATION("FAIL_BLOCKING_TX_GET_ALLOC SIMULATION triggered\n");
CERROR ("FAIL_BLOCKING_TX_GET_ALLOC SIMULATION triggered\n");
tx = NULL;
STAT_UPDATE(kps_tx_allocation_failed);
goto exit;
}
if(IS_SIMULATION_ENABLED( FAIL_BLOCKING_TX )){
- PJK_UT_MSG_SIMULATION("FAIL_BLOCKING_TX SIMULATION triggered\n");
CERROR ("FAIL_BLOCKING_TX SIMULATION triggered\n");
tx = NULL;
STAT_UPDATE(kps_tx_allocation_failed);
exit:
- PJK_UT_MSG("<<< tx=%p\n",tx);
+ CDEBUG(D_NET, "<<< tx=%p\n",tx);
return tx;
}
LASSERT (!in_interrupt());
- PJK_UT_MSG(">>> tx=%p\n",tx);
+ CDEBUG(D_NET, ">>> tx=%p\n",tx);
LASSERT(tx->tx_state != TX_STATE_ON_IDLE_QUEUE);
LASSERT(PtlHandleIsEqual(tx->tx_mdh,PTL_INVALID_HANDLE));
* Release the associated RX if there is one
*/
if(tx->tx_associated_rx){
- PJK_UT_MSG("tx=%p destroy associated rx %p\n",tx,tx->tx_associated_rx);
+ CDEBUG(D_NET, "tx=%p destroy associated rx %p\n",tx,tx->tx_associated_rx);
kptllnd_rx_decref(tx->tx_associated_rx,"tx",kptllnd_data);
tx->tx_associated_rx = NULL;
}
* Cleanup resources associate with the peer
*/
if(tx->tx_peer){
- PJK_UT_MSG("tx=%p detach from peer=%p\n",tx,tx->tx_peer);
+ CDEBUG(D_NET, "tx=%p detach from peer=%p\n",tx,tx->tx_peer);
kptllnd_peer_dequeue_tx(tx->tx_peer,tx);
kptllnd_peer_decref(tx->tx_peer,"tx");
tx->tx_peer = NULL;
if (lnetmsg[1] != NULL)
lnet_finalize(kptllnd_data->kptl_ni, lnetmsg[1], status);
- PJK_UT_MSG("<<< tx=%p\n",tx);
+ CDEBUG(D_NET, "<<< tx=%p\n",tx);
}
void
kptl_data_t *kptllnd_data = tx->tx_po.po_kptllnd_data;
unsigned long flags;
- PJK_UT_MSG("tx=%p\n",tx);
+ CDEBUG(D_NET, "tx=%p\n",tx);
spin_lock_irqsave(&kptllnd_data->kptl_sched_lock, flags);
LASSERT(list_empty(&tx->tx_schedlist));
int do_decref = 0;
unsigned long flags;
- PJK_UT_MSG(">>> %s(%d) tx=%p fail=%d\n",
+ CDEBUG(D_NET, ">>> %s(%d) tx=%p fail=%d\n",
get_ev_type_string(ev->type),ev->type,tx,ev->ni_fail_type);
STAT_UPDATE(kps_tx_event);
#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- PJK_UT_MSG("ev->unlinked=%d\n",ev->unlinked);
+ CDEBUG(D_NET, "ev->unlinked=%d\n",ev->unlinked);
if(ev->unlinked)
STAT_UPDATE(kps_tx_unlink_event);
#endif
* event's and we've already cleaned up in
* those cases.
*/
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return;
#else
/*
tx->tx_status = -EINVAL;
kptllnd_tx_scheduled_decref(tx);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return;
#endif
}
break;
case TX_TYPE_SMALL_MESSAGE:
- PJK_UT_MSG("TX_TYPE_SMALL_MESSAGE\n");
+ CDEBUG(D_NET, "TX_TYPE_SMALL_MESSAGE\n");
LASSERT(PtlHandleIsEqual(tx->tx_mdh,PTL_INVALID_HANDLE));
/*
case TX_TYPE_LARGE_PUT:
case TX_TYPE_LARGE_GET:
- PJK_UT_MSG("TX_TYPE_LARGE_%s\n",
+ CDEBUG(D_NET, "TX_TYPE_LARGE_%s\n",
tx->tx_type == TX_TYPE_LARGE_PUT ?
"PUT" : "GET");
/*
break;
case TX_TYPE_LARGE_PUT_RESPONSE:
- PJK_UT_MSG("TX_TYPE_LARGE_PUT_RESPONSE\n");
+ CDEBUG(D_NET, "TX_TYPE_LARGE_PUT_RESPONSE\n");
LASSERT(PtlHandleIsEqual(tx->tx_mdh_msg,PTL_INVALID_HANDLE));
/*
break;
case TX_TYPE_LARGE_GET_RESPONSE:
- PJK_UT_MSG("TX_TYPE_LARGE_GET_RESPONSE\n");
+ CDEBUG(D_NET, "TX_TYPE_LARGE_GET_RESPONSE\n");
LASSERT(PtlHandleIsEqual(tx->tx_mdh_msg,PTL_INVALID_HANDLE));
/*
if(do_decref)
kptllnd_tx_scheduled_decref(tx);
- PJK_UT_MSG("<<< decref=%d\n",do_decref);
+ CDEBUG(D_NET, "<<< decref=%d\n",do_decref);
}
void
return;
}
- PJK_UT_MSG("tx=%p LAST REF\n",tx);
+ CDEBUG(D_NET, "tx=%p LAST REF\n",tx);
kptllnd_tx_done(tx);
}
* with the real ref count, and is for informational purposes
* only
*/
- PJK_UT_MSG("tx=%p count=%d\n",tx,
+ CDEBUG(D_NET, "tx=%p count=%d\n",tx,
atomic_read(&tx->tx_refcount));
return;
}
- PJK_UT_MSG("tx=%p LAST REF\n",tx);
+ CDEBUG(D_NET, "tx=%p LAST REF\n",tx);
kptllnd_tx_schedule(tx);
}
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleExecutable</key>
- <string>ksocknal</string>
+ <string>ksocklnd</string>
<key>CFBundleIconFile</key>
<string></string>
<key>CFBundleIdentifier</key>
- <string>com.clusterfs.lustre.ksocknal</string>
+ <string>com.clusterfs.lustre.ksocklnd</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundlePackageType</key>
<string>1.0.0b1</string>
<key>com.clusterfs.lustre.libcfs</key>
<string>1.0.0</string>
- <key>com.clusterfs.lustre.portals</key>
+ <key>com.clusterfs.lustre.lnet</key>
<string>1.0.0</string>
</dict>
</dict>
spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
- wake_up(&ksocknal_data.ksnd_connd_waitq);
+ cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq);
spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock, flags);
return 0;
#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */
-#define SOCKNAL_BUFFER_SIZE (8<<20) /* default socket buffer size */
+#define SOCKNAL_BUFFER_SIZE SOCK_BUFFER_SIZE /* default socket buffer size */
#define SOCKNAL_NAGLE 0 /* enable/disable NAGLE? */
#define SOCKNAL_IRQ_AFFINITY 1 /* enable/disable IRQ affinity? */
#define SOCKNAL_KEEPALIVE_IDLE 35 /* # seconds idle before 1st probe */
char name[16];
snprintf (name, sizeof (name),"socknal_sd%02d", id);
- libcfs_daemonize (name);
- libcfs_blockallsigs ();
+ cfs_daemonize (name);
+ cfs_block_allsigs ();
#if (CONFIG_SMP && CPU_AFFINITY)
id = ksocknal_sched2cpu(id);
int did_something;
snprintf (name, sizeof (name), "socknal_cd%02ld", id);
- libcfs_daemonize (name);
- libcfs_blockallsigs ();
+ cfs_daemonize (name);
+ cfs_block_allsigs ();
spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags);
int peer_index = 0;
cfs_time_t deadline = cfs_time_current();
- libcfs_daemonize ("socknal_reaper");
- libcfs_blockallsigs ();
+ cfs_daemonize ("socknal_reaper");
+ cfs_block_allsigs ();
CFS_INIT_LIST_HEAD(&enomem_conns);
cfs_waitlink_init (&wait);
if (!ksocknal_data.ksnd_shuttingdown &&
list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
list_empty (&ksocknal_data.ksnd_zombie_conns))
- cfs_waitq_timedwait (&wait, timeout);
+ cfs_waitq_timedwait (&wait, CFS_TASK_INTERRUPTIBLE, timeout);
set_current_state (TASK_RUNNING);
cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait);
#include "socklnd.h"
-#if 0
-#undef SOCKNAL_SINGLE_FRAG_TX
-#define SOCKNAL_SINGLE_FRAG_TX 1
-#undef SOCKNAL_SINGLE_FRAG_RX
-#define SOCKNAL_SINGLE_FRAG_RX 1
-#endif
-
-#if !CFS_SYSFS_MODULE_PARM
-#error "this can't use ksocknal_tunables to get the addresses of the tuning vars"
+# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
-SYSCTL_DECL(_portals);
+SYSCTL_DECL(_lnet);
-SYSCTL_NODE (_portals, OID_AUTO, ksocknal, CTLFLAG_RW,
- 0, "ksocknal_sysctl");
+SYSCTL_NODE (_lnet, OID_AUTO, ksocknal, CTLFLAG_RW,
+ 0, "ksocknal_sysctl");
-SYSCTL_INT(_portals_ksocknal, OID_AUTO, timeout,
- CTLTYPE_INT | CTLFLAG_RW , ksocknal_tunables.ksnd_timeout,
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, timeout,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_timeout,
0, "timeout");
-SYSCTL_INT(_portals_ksocknal, OID_AUTO, eager_ack,
- CTLTYPE_INT | CTLFLAG_RW , ksocknal_tunables.ksnd_eager_ack,
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, credits,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_credits,
+ 0, "credits");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, peer_credits,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_peercredits,
+ 0, "peer_credits");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nconnds,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nconnds,
+ 0, "nconnds");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_reconnectms,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_reconnectms,
+ 0, "min_reconnectms");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, max_reconnectms,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_max_reconnectms,
+ 0, "max_reconnectms");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, eager_ack,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack,
0, "eager_ack");
-SYSCTL_INT(_portals_ksocknal, OID_AUTO, typed,
- CTLTYPE_INT | CTLFLAG_RW , ksocknal_tunables.ksnd_typed_conns,
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, typed,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns,
0, "typed");
-SYSCTL_INT(_portals_ksocknal, OID_AUTO, min_bulk,
- CTLTYPE_INT | CTLFLAG_RW , ksocknal_tunables.ksnd_min_bulk,
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, min_bulk,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk,
0, "min_bulk");
-SYSCTL_INT(_portals_ksocknal, OID_AUTO, buffer_size,
- CTLTYPE_INT | CTLFLAG_RW , ksocknal_tunables.ksnd_buffer_size,
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, buffer_size,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_buffer_size,
0, "buffer_size");
-SYSCTL_INT(_portals_ksocknal, OID_AUTO, nagle,
- CTLTYPE_INT | CTLFLAG_RW , ksocknal_tunables.ksnd_nagle,
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, nagle,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle,
0, "nagle");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_idle,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_idle,
+ 0, "keepalive_idle");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_count,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_count,
+ 0, "keepalive_count");
+SYSCTL_INT(_lnet_ksocknal, OID_AUTO, keepalive_intvl,
+ CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_keepalive_intvl,
+ 0, "keepalive_intvl");
cfs_sysctl_table_t ksocknal_top_ctl_table [] = {
- &sysctl__portals_ksocknal,
- &sysctl__portals_ksocknal_timeout,
- &sysctl__portals_ksocknal_eager_ack,
- &sysctl__portals_ksocknal_typed,
- &sysctl__portals_ksocknal_min_bulk,
- &sysctl__portals_ksocknal_buffer_size,
- &sysctl__portals_ksocknal_nagle,
+ &sysctl__lnet_ksocknal,
+ &sysctl__lnet_ksocknal_timeout,
+ &sysctl__lnet_ksocknal_credits,
+ &sysctl__lnet_ksocknal_peer_credits,
+ &sysctl__lnet_ksocknal_nconnds,
+ &sysctl__lnet_ksocknal_min_reconnectms,
+ &sysctl__lnet_ksocknal_max_reconnectms,
+ &sysctl__lnet_ksocknal_eager_ack,
+ &sysctl__lnet_ksocknal_typed,
+ &sysctl__lnet_ksocknal_min_bulk,
+ &sysctl__lnet_ksocknal_buffer_size,
+ &sysctl__lnet_ksocknal_nagle,
+ &sysctl__lnet_ksocknal_keepalive_idle,
+ &sysctl__lnet_ksocknal_keepalive_count,
+ &sysctl__lnet_ksocknal_keepalive_intvl,
NULL
};
ksocknal_lib_tunables_init ()
{
ksocknal_tunables.ksnd_sysctl =
- register_sysctl_table (ksocknal_top_ctl_table, 0);
+ cfs_register_sysctl_table (ksocknal_top_ctl_table, 0);
if (ksocknal_tunables.ksnd_sysctl == NULL)
return -ENOMEM;
return 0;
}
-int
+void
ksocknal_lib_tunables_fini ()
{
if (ksocknal_tunables.ksnd_sysctl != NULL)
- unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
+ cfs_unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
}
#else
int
return 0;
}
-int
+void
ksocknal_lib_tunables_fini ()
{
}
#endif
-static unsigned long ksocknal_mbuf_size = (u_quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES);
-
-extern struct fileops socketops;
-
-void
-ksocknal_lib_release_sock(struct socket *so)
-{
- CFS_DECL_FUNNEL_DATA;
-
- CFS_NET_IN;
- soshutdown(so, 0);
- CFS_NET_EX;
-}
+/*
+ * To use bigger buffer for socket:
+ * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
+ * we must patch kernel).
+ * 2. Increase net.inet.tcp.reass.maxsegments
+ * 3. Increase net.inet.tcp.sendspace
+ * 4. Increase net.inet.tcp.recvspace
+ * 5. Increase kern.ipc.maxsockbuf
+ */
+#define KSOCKNAL_MAX_BUF (1152*1024)
void
ksocknal_lib_bind_irq (unsigned int irq)
}
unsigned int
-ksocknal_lib_sock_irq (struct socket *sock)
+ksocknal_lib_sock_irq (cfs_socket_t *sock)
{
return 0;
}
int
ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
{
- struct sockaddr_in *sin;
- struct sockaddr *sa;
- int rc;
- CFS_DECL_NET_DATA;
+ int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
+ &conn->ksnc_ipaddr,
+ &conn->ksnc_port);
- CFS_NET_IN;
- rc = conn->ksnc_sock->so_proto->pr_usrreqs->pru_peeraddr(conn->ksnc_sock, &sa);
- LASSERT (!conn->ksnc_closing);
- if (rc != 0) {
- CFS_NET_EX;
- if (sa) FREE(sa, M_SONAME);
- CERROR ("Error %d getting sock peer IP\n", rc);
- return rc;
+ /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
+ LASSERT (!conn->ksnc_closing);
+
+ if (rc != 0) {
+ CERROR ("Error %d getting sock peer IP\n", rc);
+ return rc;
+ }
+
+ rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
+ &conn->ksnc_myipaddr, NULL);
+ if (rc != 0) {
+ CERROR ("Error %d getting sock local IP\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+static int
+ksocknal_lib_buffersize (int current_sz, int tunable_sz)
+{
+ /* ensure >= SOCKNAL_MIN_BUFFER */
+ if (current_sz < SOCKNAL_MIN_BUFFER)
+ return MAX(SOCKNAL_MIN_BUFFER, tunable_sz);
+
+ if (tunable_sz > SOCKNAL_MIN_BUFFER)
+ return tunable_sz;
+
+ /* leave alone */
+ return 0;
+}
+
+#ifdef __DARWIN8__
+
+int
+ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ socket_t sock = C2B_SOCK(conn->ksnc_sock);
+ size_t sndlen;
+ int nob;
+ int rc;
+
+#if SOCKNAL_SINGLE_FRAG_TX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ unsigned int niov = tx->tx_niov;
+#endif
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT
+ };
+
+ int i;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = tx->tx_iov[i];
+ nob += scratchiov[i].iov_len;
}
- sin = (struct sockaddr_in *)sa;
- conn->ksnc_ipaddr = ntohl (sin->sin_addr.s_addr);
- conn->ksnc_port = ntohs (sin->sin_port);
- if (sa) FREE(sa, M_SONAME);
- rc = conn->ksnc_sock->so_proto->pr_usrreqs->pru_sockaddr(conn->ksnc_sock, &sa);
- CFS_NET_EX;
- if (rc != 0) {
- if (sa) FREE(sa, M_SONAME);
- CERROR ("Error %d getting sock local IP\n", rc);
- return rc;
+
+ /*
+ * XXX Liang:
+ * Linux has MSG_MORE, do wen have anyting to
+ * reduce number of partial TCP segments sent?
+ */
+ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
+ if (rc == 0)
+ rc = sndlen;
+ return rc;
+}
+
+int
+ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ socket_t sock = C2B_SOCK(conn->ksnc_sock);
+ lnet_kiov_t *kiov = tx->tx_kiov;
+ int rc;
+ int nob;
+ size_t sndlen;
+
+#if SOCKNAL_SINGLE_FRAG_TX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
+ unsigned int niov = tx->tx_nkiov;
+#endif
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = MSG_DONTWAIT
+ };
+
+ int i;
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) +
+ kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+
+ /*
+ * XXX Liang:
+ * Linux has MSG_MORE, do wen have anyting to
+ * reduce number of partial TCP segments sent?
+ */
+ rc = -sock_send(sock, &msg, MSG_DONTWAIT, &sndlen);
+ for (i = 0; i < niov; i++)
+ cfs_kunmap(kiov[i].kiov_page);
+ if (rc == 0)
+ rc = sndlen;
+ return rc;
+}
+
+int
+ksocknal_lib_recv_iov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ unsigned int niov = conn->ksnc_rx_niov;
+#endif
+ struct iovec *iov = conn->ksnc_rx_iov;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ size_t rcvlen;
+ int nob;
+ int i;
+ int rc;
+
+ LASSERT (niov > 0);
+
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i] = iov[i];
+ nob += scratchiov[i].iov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+ rc = -sock_receive (C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
+ if (rc == 0)
+ rc = rcvlen;
+
+ return rc;
+}
+
+int
+ksocknal_lib_recv_kiov (ksock_conn_t *conn)
+{
+#if SOCKNAL_SINGLE_FRAG_RX
+ struct iovec scratch;
+ struct iovec *scratchiov = &scratch;
+ unsigned int niov = 1;
+#else
+ struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
+ unsigned int niov = conn->ksnc_rx_nkiov;
+#endif
+ lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = scratchiov,
+ .msg_iovlen = niov,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0
+ };
+ int nob;
+ int i;
+ size_t rcvlen;
+ int rc;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone. */
+ for (nob = i = 0; i < niov; i++) {
+ scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + \
+ kiov[i].kiov_offset;
+ nob += scratchiov[i].iov_len = kiov[i].kiov_len;
+ }
+ LASSERT (nob <= conn->ksnc_rx_nob_wanted);
+ rc = sock_receive(C2B_SOCK(conn->ksnc_sock), &msg, MSG_DONTWAIT, &rcvlen);
+ for (i = 0; i < niov; i++)
+ cfs_kunmap(kiov[i].kiov_page);
+ if (rc == 0)
+ rc = rcvlen;
+ return (rc);
+}
+
+void
+ksocknal_lib_eager_ack (ksock_conn_t *conn)
+{
+ /* XXX Liang: */
+}
+
+int
+ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
+{
+ socket_t sock = C2B_SOCK(conn->ksnc_sock);
+ int len;
+ int rc;
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ *txmem = *rxmem = *nagle = 0;
+ return (-ESHUTDOWN);
+ }
+ rc = libcfs_sock_getbuf(B2C_SOCK(sock), txmem, rxmem);
+ if (rc == 0) {
+ len = sizeof(*nagle);
+ rc = -sock_getsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
+ nagle, &len);
+ }
+ ksocknal_connsock_decref(conn);
+
+ if (rc == 0)
+ *nagle = !*nagle;
+ else
+ *txmem = *rxmem = *nagle = 0;
+
+ return (rc);
+}
+
+int
+ksocknal_lib_setup_sock (cfs_socket_t *sock)
+{
+ int rc;
+ int option;
+ int sndbuf;
+ int rcvbuf;
+ int keep_idle;
+ int keep_intvl;
+ int keep_count;
+ int do_keepalive;
+ socket_t so = C2B_SOCK(sock);
+ struct linger linger;
+
+ /* Ensure this socket aborts active sends immediately when we close
+ * it. */
+ linger.l_onoff = 0;
+ linger.l_linger = 0;
+ rc = -sock_setsockopt(so, SOL_SOCKET, SO_LINGER, &linger, sizeof(linger));
+ if (rc != 0) {
+ CERROR ("Can't set SO_LINGER: %d\n", rc);
+ return (rc);
+ }
+
+ if (!*ksocknal_tunables.ksnd_nagle) {
+ option = 1;
+ rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
+ if (rc != 0) {
+ CERROR ("Can't disable nagle: %d\n", rc);
+ return (rc);
+ }
}
- conn->ksnc_myipaddr = ntohl (sin->sin_addr.s_addr);
- return 0;
+ rc = libcfs_sock_getbuf(sock, &sndbuf, &rcvbuf);
+ if (rc != 0) {
+ CERROR("Can't get buffer sizes: %d\n", rc);
+ return (rc);
+ }
+
+ sndbuf = ksocknal_lib_buffersize(sndbuf,
+ *ksocknal_tunables.ksnd_buffer_size);
+ rcvbuf = ksocknal_lib_buffersize(rcvbuf,
+ *ksocknal_tunables.ksnd_buffer_size);
+ rc = libcfs_sock_setbuf(sock, sndbuf, rcvbuf);
+ if (rc != 0) {
+ CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
+ sndbuf, rcvbuf, rc);
+ return (rc);
+ }
+
+ /* snapshot tunables */
+ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
+ keep_count = *ksocknal_tunables.ksnd_keepalive_count;
+ keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
+
+ do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
+ option = (do_keepalive ? 1 : 0);
+
+ rc = -sock_setsockopt(so, SOL_SOCKET, SO_KEEPALIVE, &option, sizeof(option));
+ if (rc != 0) {
+ CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
+ return (rc);
+ }
+
+ if (!do_keepalive)
+ return (rc);
+ rc = -sock_setsockopt(so, IPPROTO_TCP, TCP_KEEPALIVE,
+ &keep_idle, sizeof(keep_idle));
+
+ return (rc);
+}
+
+void
+ksocknal_lib_push_conn(ksock_conn_t *conn)
+{
+ socket_t sock;
+ int val = 1;
+ int rc;
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) /* being shut down */
+ return;
+ sock = C2B_SOCK(conn->ksnc_sock);
+
+ rc = -sock_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
+ LASSERT(rc == 0);
+
+ ksocknal_connsock_decref(conn);
+ return;
+}
+
+extern void ksocknal_read_callback (ksock_conn_t *conn);
+extern void ksocknal_write_callback (ksock_conn_t *conn);
+
+static void
+ksocknal_upcall(socket_t so, void *arg, int waitf)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)arg;
+ ENTRY;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+ if (conn == NULL)
+ goto out;
+
+ ksocknal_read_callback (conn);
+out:
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ EXIT;
+}
+
+void
+ksocknal_lib_save_callback(cfs_socket_t *sock, ksock_conn_t *conn)
+{
+ /* No callback need to save in osx */
+ return;
+}
+
+void
+ksocknal_lib_set_callback(cfs_socket_t *sock, ksock_conn_t *conn)
+{
+ sock->s_upcallarg = (void *)conn;
+ sock->s_upcall = ksocknal_upcall;
+ sock->s_flags |= CFS_SOCK_UPCALL;
+ return;
+}
+
+void
+ksocknal_lib_act_callback(cfs_socket_t *sock, ksock_conn_t *conn)
+{
+ ksocknal_upcall (C2B_SOCK(sock), (void *)conn, 0);
}
+void
+ksocknal_lib_reset_callback(cfs_socket_t *sock, ksock_conn_t *conn)
+{
+ sock->s_flags &= ~CFS_SOCK_UPCALL;
+ sock->s_upcall = NULL;
+ sock->s_upcallarg = NULL;
+}
+
+#else /* !__DARWIN8__ */
+
int
ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
{
CFS_NET_IN;
s = splnet();
+ /*
+ * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
+ * to send immediate ACK.
+ */
if (tp && tp->t_flags & TF_DELACK){
tp->t_flags &= ~TF_DELACK;
tp->t_flags |= TF_ACKNOW;
}
splx(s);
- /*
- * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo
- * to send immediate ACK. It's not the best resolution because
- * tcp_fasttimo will send out ACK for all delayed-ack tcp socket.
- * Anyway, it's working now.
- * extern void tcp_fasttimo();
- * tcp_fasttimo();
- */
CFS_NET_EX;
return;
}
int
-ksocknal_lib_sock_write (struct socket *sock, void *buffer, int nob)
-{
- int rc;
- CFS_DECL_NET_DATA;
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct uio suio = {
- .uio_iov = &iov,
- .uio_iovcnt = 1,
- .uio_offset = 0,
- .uio_resid = nob,
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_WRITE,
- .uio_procp = NULL
- };
-
- CFS_NET_IN;
- rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0);
- CFS_NET_EX;
-
- if (rc != 0) {
- if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
- rc == EWOULDBLOCK))
- rc = 0;
- if ( rc != 0 )
- return -rc;
- rc = nob - suio.uio_resid;
- buffer = ((char *)buffer) + rc;
- nob = suio.uio_resid;
- continue;
- }
- break;
- }
-
- return (0);
-}
-
-int
-ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob)
-{
- int rc;
- CFS_DECL_NET_DATA;
-
- while (nob > 0) {
- struct iovec iov = {
- .iov_base = buffer,
- .iov_len = nob
- };
- struct uio ruio = {
- .uio_iov = &iov,
- .uio_iovcnt = 1,
- .uio_offset = 0,
- .uio_resid = nob,
- .uio_segflg = UIO_SYSSPACE,
- .uio_rw = UIO_READ,
- .uio_procp = NULL
- };
-
- CFS_NET_IN;
- rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0);
- CFS_NET_EX;
-
- if (rc != 0) {
- if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
- rc == EWOULDBLOCK))
- rc = 0;
- if (rc != 0)
- return -rc;
- rc = nob - ruio.uio_resid;
- buffer = ((char *)buffer) + rc;
- nob = ruio.uio_resid;
- continue;
- }
- break;
- }
-
- return (0);
-}
-
-int
ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
{
- struct sockopt sopt;
struct socket *sock = conn->ksnc_sock;
- int len;
int rc;
- CFS_DECL_NET_DATA;
rc = ksocknal_connsock_addref(conn);
if (rc != 0) {
LASSERT (conn->ksnc_closing);
*txmem = *rxmem = *nagle = 0;
- rc = -ESHUTDOWN;
- goto out;
- }
- len = sizeof(*txmem);
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_GET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_SNDBUF;
- sopt.sopt_val = txmem;
- sopt.sopt_valsize = len;
-
- CFS_NET_IN;
- rc = sogetopt(sock, &sopt);
- if (rc == 0) {
- len = sizeof(*rxmem);
- sopt.sopt_name = SO_RCVBUF;
- sopt.sopt_val = rxmem;
- rc = sogetopt(sock, &sopt);
+ return -ESHUTDOWN;
}
+ rc = libcfs_sock_getbuf(sock, txmem, rxmem);
if (rc == 0) {
+ struct sockopt sopt;
+ int len;
+ CFS_DECL_NET_DATA;
+
len = sizeof(*nagle);
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_GET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
sopt.sopt_val = nagle;
- rc = sogetopt(sock, &sopt);
+ sopt.sopt_valsize = len;
+
+ CFS_NET_IN;
+ rc = -sogetopt(sock, &sopt);
+ CFS_NET_EX;
}
- CFS_NET_EX;
ksocknal_connsock_decref(conn);
*nagle = !*nagle;
else
*txmem = *rxmem = *nagle = 0;
-out:
- return (-rc);
+ return (rc);
}
int
struct sockopt sopt;
int rc;
int option;
+ int sndbuf;
+ int rcvbuf;
int keep_idle;
int keep_intvl;
int keep_count;
struct linger linger;
CFS_DECL_NET_DATA;
+ rc = libcfs_sock_getbuf(so, &sndbuf, &rcvbuf);
+ if (rc != 0) {
+ CERROR("Can't get buffer sizes: %d\n", rc);
+ return rc;
+ }
+
+ sndbuf = ksocknal_lib_buffersize(sndbuf,
+ *ksocknal_tunables.ksnd_buffer_size);
+ rcvbuf = ksocknal_lib_buffersize(rcvbuf,
+ *ksocknal_tunables.ksnd_buffer_size);
+ rc = libcfs_sock_setbuf(so, sndbuf, rcvbuf);
+ if (rc != 0) {
+ CERROR ("Can't set buffer tx %d, rx %d buffers: %d\n",
+ sndbuf, rcvbuf, rc);
+ return (rc);
+ }
+
/* Ensure this socket aborts active sends immediately when we close
* it. */
-
bzero(&sopt, sizeof sopt);
linger.l_onoff = 0;
sopt.sopt_valsize = sizeof(linger);
CFS_NET_IN;
- rc = sosetopt(so, &sopt);
+ rc = -sosetopt(so, &sopt);
if (rc != 0) {
CERROR ("Can't set SO_LINGER: %d\n", rc);
goto out;
}
-
if (!*ksocknal_tunables.ksnd_nagle) {
option = 1;
bzero(&sopt, sizeof sopt);
sopt.sopt_name = TCP_NODELAY;
sopt.sopt_val = &option;
sopt.sopt_valsize = sizeof(option);
- rc = sosetopt(so, &sopt);
+ rc = -sosetopt(so, &sopt);
if (rc != 0) {
CERROR ("Can't disable nagle: %d\n", rc);
goto out;
}
}
- if (*ksocknal_tunables.ksnd_buffer_size > 0) {
- option = *ksocknal_tunables.ksnd_buffer_size;
- if (option > ksocknal_mbuf_size)
- option = ksocknal_mbuf_size;
-
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_SNDBUF;
- sopt.sopt_val = &option;
- sopt.sopt_valsize = sizeof(option);
- rc = sosetopt(so, &sopt);
- if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
- option, rc);
- goto out;
- }
-
- sopt.sopt_name = SO_RCVBUF;
- rc = sosetopt(so, &sopt);
- if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
- option, rc);
- goto out;
- }
- }
+
/* snapshot tunables */
keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
keep_count = *ksocknal_tunables.ksnd_keepalive_count;
sopt.sopt_name = SO_KEEPALIVE;
sopt.sopt_val = &option;
sopt.sopt_valsize = sizeof(option);
- rc = sosetopt(so, &sopt);
+ rc = -sosetopt(so, &sopt);
if (rc != 0) {
CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
goto out;
sopt.sopt_name = TCP_KEEPALIVE;
sopt.sopt_val = &keep_idle;
sopt.sopt_valsize = sizeof(keep_idle);
- rc = sosetopt(so, &sopt);
+ rc = -sosetopt(so, &sopt);
if (rc != 0) {
CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc);
goto out;
}
out:
CFS_NET_EX;
- return (-rc);
-}
-
-int
-ksocknal_lib_connect_sock (struct socket **sockp, int *fatal,
- ksock_route_t *route, int local_port)
-{
- struct sockaddr_in locaddr;
- struct sockaddr_in srvaddr;
- struct timeval tv;
- int fd;
- struct socket *so;
- struct sockopt sopt;
- int option;
- int rc;
- int s;
- CFS_DECL_FUNNEL_DATA;
-
- ENTRY;
- bzero (&locaddr, sizeof (locaddr));
- locaddr.sin_len = sizeof(struct sockaddr_in);
- locaddr.sin_family = AF_INET;
- locaddr.sin_port = htons (local_port);
- locaddr.sin_addr.s_addr =
- (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr)
- : INADDR_ANY;
- bzero(&srvaddr, sizeof(srvaddr));
- srvaddr.sin_len = sizeof(struct sockaddr_in);
- srvaddr.sin_family = AF_INET;
- srvaddr.sin_port = htons (route->ksnr_port);
- srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
-
- *fatal = 1;
-
- CFS_NET_IN;
- rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
- CFS_NET_EX;
- *sockp = so;
- if (rc != 0) {
- CERROR ("Can't create autoconnect socket: %d\n", rc);
- return (-rc);
- }
-
- /* Set the socket timeouts, so our connection attempt completes in
- * finite time */
- tv.tv_sec = *ksocknal_tunables.ksnd_timeout;
- tv.tv_usec = 0;
- bzero(&sopt, sizeof sopt);
- sopt.sopt_dir = SOPT_SET;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_SNDTIMEO;
- sopt.sopt_val = &tv;
- sopt.sopt_valsize = sizeof(tv);
-
- CFS_NET_IN;
- rc = sosetopt(so, &sopt);
- if (rc != 0) {
- CFS_NET_EX;
- CERROR ("Can't set send timeout %d: %d\n",
- *ksocknal_tunables.ksnd_timeout, rc);
- goto out;
- }
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_RCVTIMEO;
- rc = sosetopt(so, &sopt);
- if (rc != 0) {
- CFS_NET_EX;
- CERROR ("Can't set receive timeout %d: %d\n",
- *ksocknal_tunables.ksnd_timeout, rc);
- goto out;
- }
- option = 1;
- sopt.sopt_level = SOL_SOCKET;
- sopt.sopt_name = SO_REUSEADDR;
- sopt.sopt_val = &option;
- sopt.sopt_valsize = sizeof(option);
- rc = sosetopt(so, &sopt);
- if (rc != 0) {
- CFS_NET_EX;
- CERROR ("Can't set sock reuse address: %d\n", rc);
- goto out;
- }
- rc = sobind(so, (struct sockaddr *)&locaddr);
- if (rc == EADDRINUSE) {
- CFS_NET_EX;
- CDEBUG(D_NET, "Port %d already in use\n", local_port);
- *fatal = 0;
- goto out;
- }
- if (rc != 0) {
- CFS_NET_EX;
- CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n",
- HIPQUAD(route->ksnr_myipaddr), rc);
- goto out;
- }
- rc = soconnect(so, (struct sockaddr *)&srvaddr);
- *fatal = !(rc == EADDRNOTAVAIL || rc == EADDRINUSE);
- if (rc != 0) {
- CFS_NET_EX;
- if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
- CERROR ("Can't connect to %s"
- " local IP: %u.%u.%u.%u,"
- " remote IP: %u.%u.%u.%u/%d: %d\n",
- libcfs_id2str(route->ksnr_peer->ksnp_id,
- HIPQUAD(route->ksnr_myipaddr),
- HIPQUAD(route->ksnr_ipaddr),
- route->ksnr_port, rc);
- goto out;
- }
-
- s = splnet();
- while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n");
- (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz);
- }
- LASSERT((so->so_state & SS_ISCONNECTED));
- splx(s);
- CFS_NET_EX;
-
- rc = so->so_error;
- if (rc != 0) {
- CERROR ("Error %d waiting for connection to %s"
- " local IP: %u.%u.%u.%u,"
- " remote IP: %u.%u.%u.%u/%d: %d\n", rc,
- libcfs_id2str(route->ksnr_peer->ksnp_id),
- HIPQUAD(route->ksnr_myipaddr),
- HIPQUAD(route->ksnr_ipaddr),
- route->ksnr_port, rc);
- goto out;
- }
- return (-rc);
-
- out:
- ksocknal_lib_release_sock(so);
- return (-rc);
+ return (rc);
}
void
return;
}
+
extern void ksocknal_read_callback (ksock_conn_t *conn);
extern void ksocknal_write_callback (ksock_conn_t *conn);
static void
ksocknal_upcall(struct socket *so, caddr_t arg, int waitf)
{
- ksock_conn_t *conn;
- CFS_DECL_NET_DATA;
+ ksock_conn_t *conn = (ksock_conn_t *)arg;
ENTRY;
read_lock (&ksocknal_data.ksnd_global_lock);
- conn = so->reserved3;
-
- if (conn == NULL){
- /* More processing is needed? */
+ if (conn == NULL)
goto out;
- }
- if ((so->so_rcv.sb_flags & SB_UPCALL) || !arg ) {
+
+ if (so->so_rcv.sb_flags & SB_UPCALL) {
extern int soreadable(struct socket *so);
- CFS_NET_IN;
- if (conn->ksnc_rx_nob_wanted && soreadable(so)){
+ if (conn->ksnc_rx_nob_wanted && soreadable(so))
/* To verify whether the upcall is for receive */
- CFS_NET_EX;
ksocknal_read_callback (conn);
- }else
- CFS_NET_EX;
}
/* go foward? */
- if ((so->so_snd.sb_flags & SB_UPCALL) || !arg){
+ if (so->so_snd.sb_flags & SB_UPCALL){
extern int sowriteable(struct socket *so);
- CFS_NET_IN;
- if (sowriteable(so)){
+ if (sowriteable(so))
/* socket is writable */
- CFS_NET_EX;
ksocknal_write_callback(conn);
- } else
- CFS_NET_EX;
}
out:
read_unlock (&ksocknal_data.ksnd_global_lock);
CFS_DECL_NET_DATA;
CFS_NET_IN;
- sock->so_upcallarg = (void *)sock; /* anything not NULL */
+ sock->so_upcallarg = (void *)conn;
sock->so_upcall = ksocknal_upcall;
sock->so_snd.sb_timeo = 0;
- sock->so_rcv.sb_timeo = 2 * HZ;
+ sock->so_rcv.sb_timeo = cfs_time_seconds(2);
sock->so_rcv.sb_flags |= SB_UPCALL;
sock->so_snd.sb_flags |= SB_UPCALL;
- sock->reserved3 = conn;
CFS_NET_EX;
return;
}
void
-ksocknal_lib_act_callback(struct socket *sock)
+ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
{
- /* upcall will take the network funnel */
- ksocknal_upcall (sock, 0, 0);
+ CFS_DECL_NET_DATA;
+
+ CFS_NET_IN;
+ ksocknal_upcall (sock, (void *)conn, 0);
+ CFS_NET_EX;
}
void
CFS_DECL_NET_DATA;
CFS_NET_IN;
- sock->so_upcall = NULL;
- sock->so_upcallarg = NULL;
sock->so_rcv.sb_flags &= ~SB_UPCALL;
sock->so_snd.sb_flags &= ~SB_UPCALL;
+ sock->so_upcall = NULL;
+ sock->so_upcallarg = NULL;
CFS_NET_EX;
}
-
+#endif /* !__DARWIN8__ */
#include <mach/mach_types.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
-#include <netat/sysglue.h>
#include <stdarg.h>
#include <libcfs/libcfs.h>
+#ifdef __DARWIN8__
+
+#define SOCKNAL_ARCH_EAGER_ACK 0
+
+#else /* !__DARWIN8__ */
+
#define SOCKNAL_ARCH_EAGER_ACK 1
-#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc)
-#define SOCK_ERROR(so) ((so)->so_error)
+#endif
-#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat)
+#define SOCK_BUFFER_SIZE (1152 * 1024)
static inline
int ksocknal_nsched(void)
{
+ /* XXX Liang: fix it */
return 1;
}
#define SOCKNAL_ARCH_EAGER_ACK 0
+#define SOCK_BUFFER_SIZE (8<<20)
+
#ifndef CONFIG_SMP
static inline
int ksocknal_nsched(void)
#include "socklnd.h"
-static int timeout = SOCKNAL_TIMEOUT;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
+static int sock_timeout = SOCKNAL_TIMEOUT;
+CFS_MODULE_PARM(sock_timeout, "i", int, 0644,
"dead socket timeout (seconds)");
static int credits = SOCKNAL_CREDITS;
#endif
ksock_tunables_t ksocknal_tunables = {
- .ksnd_timeout = &timeout,
+ .ksnd_timeout = &sock_timeout,
.ksnd_credits = &credits,
.ksnd_peercredits = &peer_credits,
.ksnd_nconnds = &nconnds,
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socklnd.h"
+
+lnd_t the_ksocklnd = {
+ /* .lnd_list = */ { &the_ksocklnd.lnd_list,
+ &the_ksocklnd.lnd_list },
+ /* .lnd_refcount = */ 0,
+ /* .lnd_type = */ SOCKLND,
+ /* .lnd_startup = */ ksocknal_startup,
+ /* .lnd_shutdown = */ ksocknal_shutdown,
+ /* .lnd_ctl = */ ksocknal_ctl,
+ /* .lnd_send = */ ksocknal_send,
+ /* .lnd_recv = */ ksocknal_recv,
+ /* .lnd_eager_recv = */ NULL,
+ /* .lnd_notify = */ ksocknal_notify,
+ /* .lnd_accept = */ ksocknal_accept
+};
+
+ksock_nal_data_t ksocknal_data;
+
+
+ksock_interface_t *
+ksocknal_ip2iface(lnet_ni_t *ni, __u32 ip)
+{
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ ksock_interface_t *iface;
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ LASSERT(i < LNET_MAX_INTERFACES);
+ iface = &net->ksnn_interfaces[i];
+
+ if (iface->ksni_ipaddr == ip)
+ return (iface);
+ }
+
+ return (NULL);
+}
+
+ksock_route_t *
+ksocknal_create_route (__u32 ipaddr, int port)
+{
+ ksock_route_t *route;
+
+ LIBCFS_ALLOC (route, sizeof (*route));
+ if (route == NULL)
+ return (NULL);
+
+ atomic_set (&route->ksnr_refcount, 1);
+ route->ksnr_peer = NULL;
+ route->ksnr_retry_interval = 0; /* OK to connect at any time */
+ route->ksnr_ipaddr = ipaddr;
+ route->ksnr_port = port;
+ route->ksnr_connecting = 0;
+ route->ksnr_connected = 0;
+ route->ksnr_deleted = 0;
+ route->ksnr_conn_count = 0;
+ route->ksnr_share_count = 0;
+
+ return (route);
+}
+
+void
+ksocknal_destroy_route (ksock_route_t *route)
+{
+ LASSERT (atomic_read(&route->ksnr_refcount) == 0);
+
+ if (route->ksnr_peer != NULL)
+ ksocknal_peer_decref(route->ksnr_peer);
+
+ LIBCFS_FREE (route, sizeof (*route));
+}
+
+int
+ksocknal_create_peer (ksock_peer_t **peerp, lnet_ni_t *ni, lnet_process_id_t id)
+{
+ ksock_net_t *net = ni->ni_data;
+ ksock_peer_t *peer;
+ unsigned long flags;
+
+ LASSERT (id.nid != LNET_NID_ANY);
+ LASSERT (id.pid != LNET_PID_ANY);
+ LASSERT (!in_interrupt());
+
+ LIBCFS_ALLOC (peer, sizeof (*peer));
+ if (peer == NULL)
+ return -ENOMEM;
+
+ memset (peer, 0, sizeof (*peer)); /* NULL pointers/clear flags etc */
+
+ peer->ksnp_ni = ni;
+ peer->ksnp_id = id;
+ atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */
+ peer->ksnp_closing = 0;
+ peer->ksnp_accepting = 0;
+ CFS_INIT_LIST_HEAD (&peer->ksnp_conns);
+ CFS_INIT_LIST_HEAD (&peer->ksnp_routes);
+ CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue);
+
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+
+ if (net->ksnn_shutdown) {
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ LIBCFS_FREE(peer, sizeof(*peer));
+ CERROR("Can't create peer: network shutdown\n");
+ return -ESHUTDOWN;
+ }
+
+ net->ksnn_npeers++;
+
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ *peerp = peer;
+ return 0;
+}
+
+void
+ksocknal_destroy_peer (ksock_peer_t *peer)
+{
+ ksock_net_t *net = peer->ksnp_ni->ni_data;
+ unsigned long flags;
+
+ CDEBUG (D_NET, "peer %s %p deleted\n",
+ libcfs_id2str(peer->ksnp_id), peer);
+
+ LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
+ LASSERT (peer->ksnp_accepting == 0);
+ LASSERT (list_empty (&peer->ksnp_conns));
+ LASSERT (list_empty (&peer->ksnp_routes));
+ LASSERT (list_empty (&peer->ksnp_tx_queue));
+
+ LIBCFS_FREE (peer, sizeof (*peer));
+
+ /* NB a peer's connections and routes keep a reference on their peer
+ * until they are destroyed, so we can be assured that _all_ state to
+ * do with this peer has been cleaned up when its refcount drops to
+ * zero. */
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ net->ksnn_npeers--;
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+}
+
+ksock_peer_t *
+ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id)
+{
+ struct list_head *peer_list = ksocknal_nid2peerlist(id.nid);
+ struct list_head *tmp;
+ ksock_peer_t *peer;
+
+ list_for_each (tmp, peer_list) {
+
+ peer = list_entry (tmp, ksock_peer_t, ksnp_list);
+
+ LASSERT (!peer->ksnp_closing);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ if (peer->ksnp_id.nid != id.nid ||
+ peer->ksnp_id.pid != id.pid)
+ continue;
+
+ CDEBUG(D_NET, "got peer [%p] -> %s (%d)\n",
+ peer, libcfs_id2str(id),
+ atomic_read(&peer->ksnp_refcount));
+ return (peer);
+ }
+ return (NULL);
+}
+
+ksock_peer_t *
+ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id)
+{
+ ksock_peer_t *peer;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+ peer = ksocknal_find_peer_locked (ni, id);
+ if (peer != NULL) /* +1 ref for caller? */
+ ksocknal_peer_addref(peer);
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ return (peer);
+}
+
+void
+ksocknal_unlink_peer_locked (ksock_peer_t *peer)
+{
+ int i;
+ __u32 ip;
+
+ for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
+ LASSERT (i < LNET_MAX_INTERFACES);
+ ip = peer->ksnp_passive_ips[i];
+
+ ksocknal_ip2iface(peer->ksnp_ni, ip)->ksni_npeers--;
+ }
+
+ LASSERT (list_empty(&peer->ksnp_conns));
+ LASSERT (list_empty(&peer->ksnp_routes));
+ LASSERT (!peer->ksnp_closing);
+ peer->ksnp_closing = 1;
+ list_del (&peer->ksnp_list);
+ /* lose peerlist's ref */
+ ksocknal_peer_decref(peer);
+}
+
+int
+ksocknal_get_peer_info (lnet_ni_t *ni, int index,
+ lnet_process_id_t *id, __u32 *myip, __u32 *peer_ip, int *port,
+ int *conn_count, int *share_count)
+{
+ ksock_peer_t *peer;
+ struct list_head *ptmp;
+ ksock_route_t *route;
+ struct list_head *rtmp;
+ int i;
+ int j;
+ int rc = -ENOENT;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+
+ list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ if (peer->ksnp_n_passive_ips == 0 &&
+ list_empty(&peer->ksnp_routes)) {
+ if (index-- > 0)
+ continue;
+
+ *id = peer->ksnp_id;
+ *myip = 0;
+ *peer_ip = 0;
+ *port = 0;
+ *conn_count = 0;
+ *share_count = 0;
+ rc = 0;
+ goto out;
+ }
+
+ for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
+ if (index-- > 0)
+ continue;
+
+ *id = peer->ksnp_id;
+ *myip = peer->ksnp_passive_ips[j];
+ *peer_ip = 0;
+ *port = 0;
+ *conn_count = 0;
+ *share_count = 0;
+ rc = 0;
+ goto out;
+ }
+
+ list_for_each (rtmp, &peer->ksnp_routes) {
+ if (index-- > 0)
+ continue;
+
+ route = list_entry(rtmp, ksock_route_t,
+ ksnr_list);
+
+ *id = peer->ksnp_id;
+ *myip = route->ksnr_myipaddr;
+ *peer_ip = route->ksnr_ipaddr;
+ *port = route->ksnr_port;
+ *conn_count = route->ksnr_conn_count;
+ *share_count = route->ksnr_share_count;
+ rc = 0;
+ goto out;
+ }
+ }
+ }
+ out:
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ return (rc);
+}
+
+void
+ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
+{
+ ksock_peer_t *peer = route->ksnr_peer;
+ int type = conn->ksnc_type;
+ ksock_interface_t *iface;
+
+ conn->ksnc_route = route;
+ ksocknal_route_addref(route);
+
+ if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
+ if (route->ksnr_myipaddr == 0) {
+ /* route wasn't bound locally yet (the initial route) */
+ CDEBUG(D_NET, "Binding %s %u.%u.%u.%u to %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr),
+ HIPQUAD(conn->ksnc_myipaddr));
+ } else {
+ CDEBUG(D_NET, "Rebinding %s %u.%u.%u.%u from "
+ "%u.%u.%u.%u to %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr),
+ HIPQUAD(route->ksnr_myipaddr),
+ HIPQUAD(conn->ksnc_myipaddr));
+
+ iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
+ route->ksnr_myipaddr);
+ if (iface != NULL)
+ iface->ksni_nroutes--;
+ }
+ route->ksnr_myipaddr = conn->ksnc_myipaddr;
+ iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
+ route->ksnr_myipaddr);
+ if (iface != NULL)
+ iface->ksni_nroutes++;
+ }
+
+ route->ksnr_connected |= (1<<type);
+ route->ksnr_connecting &= ~(1<<type);
+ route->ksnr_conn_count++;
+
+ /* Successful connection => further attempts can
+ * proceed immediately */
+ route->ksnr_retry_interval = 0;
+}
+
+void
+ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
+{
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+ int type;
+ ksock_route_t *route2;
+
+ LASSERT (route->ksnr_peer == NULL);
+ LASSERT (route->ksnr_connecting == 0);
+ LASSERT (route->ksnr_connected == 0);
+
+ /* LASSERT(unique) */
+ list_for_each(tmp, &peer->ksnp_routes) {
+ route2 = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
+ CERROR ("Duplicate route %s %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr));
+ LBUG();
+ }
+ }
+
+ route->ksnr_peer = peer;
+ ksocknal_peer_addref(peer);
+ /* peer's routelist takes over my ref on 'route' */
+ list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
+
+ list_for_each(tmp, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+ type = conn->ksnc_type;
+
+ if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
+ continue;
+
+ ksocknal_associate_route_conn_locked(route, conn);
+ /* keep going (typed routes) */
+ }
+}
+
+void
+ksocknal_del_route_locked (ksock_route_t *route)
+{
+ ksock_peer_t *peer = route->ksnr_peer;
+ ksock_interface_t *iface;
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+
+ LASSERT (!route->ksnr_deleted);
+
+ /* Close associated conns */
+ list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
+ conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_route != route)
+ continue;
+
+ ksocknal_close_conn_locked (conn, 0);
+ }
+
+ if (route->ksnr_myipaddr != 0) {
+ iface = ksocknal_ip2iface(route->ksnr_peer->ksnp_ni,
+ route->ksnr_myipaddr);
+ if (iface != NULL)
+ iface->ksni_nroutes--;
+ }
+
+ route->ksnr_deleted = 1;
+ list_del (&route->ksnr_list);
+ ksocknal_route_decref(route); /* drop peer's ref */
+
+ if (list_empty (&peer->ksnp_routes) &&
+ list_empty (&peer->ksnp_conns)) {
+ /* I've just removed the last route to a peer with no active
+ * connections */
+ ksocknal_unlink_peer_locked (peer);
+ }
+}
+
+int
+ksocknal_add_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ipaddr, int port)
+{
+ unsigned long flags;
+ struct list_head *tmp;
+ ksock_peer_t *peer;
+ ksock_peer_t *peer2;
+ ksock_route_t *route;
+ ksock_route_t *route2;
+ int rc;
+
+ if (id.nid == LNET_NID_ANY ||
+ id.pid == LNET_PID_ANY)
+ return (-EINVAL);
+
+ /* Have a brand new peer ready... */
+ rc = ksocknal_create_peer(&peer, ni, id);
+ if (rc != 0)
+ return rc;
+
+ route = ksocknal_create_route (ipaddr, port);
+ if (route == NULL) {
+ ksocknal_peer_decref(peer);
+ return (-ENOMEM);
+ }
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ peer2 = ksocknal_find_peer_locked (ni, id);
+ if (peer2 != NULL) {
+ ksocknal_peer_decref(peer);
+ peer = peer2;
+ } else {
+ /* peer table takes my ref on peer */
+ list_add_tail (&peer->ksnp_list,
+ ksocknal_nid2peerlist (id.nid));
+ }
+
+ route2 = NULL;
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route2 = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ if (route2->ksnr_ipaddr == ipaddr)
+ break;
+
+ route2 = NULL;
+ }
+ if (route2 == NULL) {
+ ksocknal_add_route_locked(peer, route);
+ route->ksnr_share_count++;
+ } else {
+ ksocknal_route_decref(route);
+ route2->ksnr_share_count++;
+ }
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ return (0);
+}
+
+void
+ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip)
+{
+ ksock_conn_t *conn;
+ ksock_route_t *route;
+ struct list_head *tmp;
+ struct list_head *nxt;
+ int nshared;
+
+ LASSERT (!peer->ksnp_closing);
+
+ /* Extra ref prevents peer disappearing until I'm done with it */
+ ksocknal_peer_addref(peer);
+
+ list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ /* no match */
+ if (!(ip == 0 || route->ksnr_ipaddr == ip))
+ continue;
+
+ route->ksnr_share_count = 0;
+ /* This deletes associated conns too */
+ ksocknal_del_route_locked (route);
+ }
+
+ nshared = 0;
+ list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+ nshared += route->ksnr_share_count;
+ }
+
+ if (nshared == 0) {
+ /* remove everything else if there are no explicit entries
+ * left */
+
+ list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ /* we should only be removing auto-entries */
+ LASSERT(route->ksnr_share_count == 0);
+ ksocknal_del_route_locked (route);
+ }
+
+ list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ ksocknal_close_conn_locked(conn, 0);
+ }
+ }
+
+ ksocknal_peer_decref(peer);
+ /* NB peer unlinks itself when last conn/route is removed */
+}
+
+int
+ksocknal_del_peer (lnet_ni_t *ni, lnet_process_id_t id, __u32 ip)
+{
+ unsigned long flags;
+ struct list_head *ptmp;
+ struct list_head *pnxt;
+ ksock_peer_t *peer;
+ int lo;
+ int hi;
+ int i;
+ int rc = -ENOENT;
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ if (id.nid != LNET_NID_ANY)
+ lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
+ else {
+ lo = 0;
+ hi = ksocknal_data.ksnd_peer_hash_size - 1;
+ }
+
+ for (i = lo; i <= hi; i++) {
+ list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ if (!((id.nid == LNET_NID_ANY || peer->ksnp_id.nid == id.nid) &&
+ (id.pid == LNET_PID_ANY || peer->ksnp_id.pid == id.pid)))
+ continue;
+
+ ksocknal_del_peer_locked (peer, ip);
+ rc = 0; /* matched! */
+ }
+ }
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ return (rc);
+}
+
+ksock_conn_t *
+ksocknal_get_conn_by_idx (lnet_ni_t *ni, int index)
+{
+ ksock_peer_t *peer;
+ struct list_head *ptmp;
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ int i;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ LASSERT (!peer->ksnp_closing);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ list_for_each (ctmp, &peer->ksnp_conns) {
+ if (index-- > 0)
+ continue;
+
+ conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
+ ksocknal_conn_addref(conn);
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ return (conn);
+ }
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ return (NULL);
+}
+
+ksock_sched_t *
+ksocknal_choose_scheduler_locked (unsigned int irq)
+{
+ ksock_sched_t *sched;
+ ksock_irqinfo_t *info;
+ int i;
+
+ LASSERT (irq < NR_IRQS);
+ info = &ksocknal_data.ksnd_irqinfo[irq];
+
+ if (irq != 0 && /* hardware NIC */
+ info->ksni_valid) { /* already set up */
+ return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
+ }
+
+ /* software NIC (irq == 0) || not associated with a scheduler yet.
+ * Choose the CPU with the fewest connections... */
+ sched = &ksocknal_data.ksnd_schedulers[0];
+ for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
+ if (sched->kss_nconns >
+ ksocknal_data.ksnd_schedulers[i].kss_nconns)
+ sched = &ksocknal_data.ksnd_schedulers[i];
+
+ if (irq != 0) { /* Hardware NIC */
+ info->ksni_valid = 1;
+ info->ksni_sched = sched - ksocknal_data.ksnd_schedulers;
+
+ /* no overflow... */
+ LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers);
+ }
+
+ return (sched);
+}
+
+int
+ksocknal_local_ipvec (lnet_ni_t *ni, __u32 *ipaddrs)
+{
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ int nip;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ nip = net->ksnn_ninterfaces;
+ LASSERT (nip < LNET_MAX_INTERFACES);
+
+ for (i = 0; i < nip; i++) {
+ ipaddrs[i] = net->ksnn_interfaces[i].ksni_ipaddr;
+ LASSERT (ipaddrs[i] != 0);
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ return (nip);
+}
+
+int
+ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
+{
+ int best_netmatch = 0;
+ int best_xor = 0;
+ int best = -1;
+ int this_xor;
+ int this_netmatch;
+ int i;
+
+ for (i = 0; i < nips; i++) {
+ if (ips[i] == 0)
+ continue;
+
+ this_xor = (ips[i] ^ iface->ksni_ipaddr);
+ this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
+
+ if (!(best < 0 ||
+ best_netmatch < this_netmatch ||
+ (best_netmatch == this_netmatch &&
+ best_xor > this_xor)))
+ continue;
+
+ best = i;
+ best_netmatch = this_netmatch;
+ best_xor = this_xor;
+ }
+
+ LASSERT (best >= 0);
+ return (best);
+}
+
+int
+ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
+{
+ rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
+ ksock_net_t *net = peer->ksnp_ni->ni_data;
+ unsigned long flags;
+ ksock_interface_t *iface;
+ ksock_interface_t *best_iface;
+ int n_ips;
+ int i;
+ int j;
+ int k;
+ __u32 ip;
+ __u32 xor;
+ int this_netmatch;
+ int best_netmatch;
+ int best_npeers;
+
+ /* CAVEAT EMPTOR: We do all our interface matching with an
+ * exclusive hold of global lock at IRQ priority. We're only
+ * expecting to be dealing with small numbers of interfaces, so the
+ * O(n**3)-ness shouldn't matter */
+
+ /* Also note that I'm not going to return more than n_peerips
+ * interfaces, even if I have more myself */
+
+ write_lock_irqsave(global_lock, flags);
+
+ LASSERT (n_peerips <= LNET_MAX_INTERFACES);
+ LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
+
+ n_ips = MIN(n_peerips, net->ksnn_ninterfaces);
+
+ for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
+ /* ^ yes really... */
+
+ /* If we have any new interfaces, first tick off all the
+ * peer IPs that match old interfaces, then choose new
+ * interfaces to match the remaining peer IPS.
+ * We don't forget interfaces we've stopped using; we might
+ * start using them again... */
+
+ if (i < peer->ksnp_n_passive_ips) {
+ /* Old interface. */
+ ip = peer->ksnp_passive_ips[i];
+ best_iface = ksocknal_ip2iface(peer->ksnp_ni, ip);
+
+ /* peer passive ips are kept up to date */
+ LASSERT(best_iface != NULL);
+ } else {
+ /* choose a new interface */
+ LASSERT (i == peer->ksnp_n_passive_ips);
+
+ best_iface = NULL;
+ best_netmatch = 0;
+ best_npeers = 0;
+
+ for (j = 0; j < net->ksnn_ninterfaces; j++) {
+ iface = &net->ksnn_interfaces[j];
+ ip = iface->ksni_ipaddr;
+
+ for (k = 0; k < peer->ksnp_n_passive_ips; k++)
+ if (peer->ksnp_passive_ips[k] == ip)
+ break;
+
+ if (k < peer->ksnp_n_passive_ips) /* using it already */
+ continue;
+
+ k = ksocknal_match_peerip(iface, peerips, n_peerips);
+ xor = (ip ^ peerips[k]);
+ this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
+
+ if (!(best_iface == NULL ||
+ best_netmatch < this_netmatch ||
+ (best_netmatch == this_netmatch &&
+ best_npeers > iface->ksni_npeers)))
+ continue;
+
+ best_iface = iface;
+ best_netmatch = this_netmatch;
+ best_npeers = iface->ksni_npeers;
+ }
+
+ best_iface->ksni_npeers++;
+ ip = best_iface->ksni_ipaddr;
+ peer->ksnp_passive_ips[i] = ip;
+ peer->ksnp_n_passive_ips = i+1;
+ }
+
+ LASSERT (best_iface != NULL);
+
+ /* mark the best matching peer IP used */
+ j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
+ peerips[j] = 0;
+ }
+
+ /* Overwrite input peer IP addresses */
+ memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
+
+ write_unlock_irqrestore(global_lock, flags);
+
+ return (n_ips);
+}
+
+void
+ksocknal_create_routes(ksock_peer_t *peer, int port,
+ __u32 *peer_ipaddrs, int npeer_ipaddrs)
+{
+ ksock_route_t *newroute = NULL;
+ rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
+ lnet_ni_t *ni = peer->ksnp_ni;
+ ksock_net_t *net = ni->ni_data;
+ unsigned long flags;
+ struct list_head *rtmp;
+ ksock_route_t *route;
+ ksock_interface_t *iface;
+ ksock_interface_t *best_iface;
+ int best_netmatch;
+ int this_netmatch;
+ int best_nroutes;
+ int i;
+ int j;
+
+ /* CAVEAT EMPTOR: We do all our interface matching with an
+ * exclusive hold of global lock at IRQ priority. We're only
+ * expecting to be dealing with small numbers of interfaces, so the
+ * O(n**3)-ness here shouldn't matter */
+
+ write_lock_irqsave(global_lock, flags);
+
+ LASSERT (npeer_ipaddrs <= LNET_MAX_INTERFACES);
+
+ for (i = 0; i < npeer_ipaddrs; i++) {
+ if (newroute != NULL) {
+ newroute->ksnr_ipaddr = peer_ipaddrs[i];
+ } else {
+ write_unlock_irqrestore(global_lock, flags);
+
+ newroute = ksocknal_create_route(peer_ipaddrs[i], port);
+ if (newroute == NULL)
+ return;
+
+ write_lock_irqsave(global_lock, flags);
+ }
+
+ /* Already got a route? */
+ route = NULL;
+ list_for_each(rtmp, &peer->ksnp_routes) {
+ route = list_entry(rtmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
+ break;
+
+ route = NULL;
+ }
+ if (route != NULL)
+ continue;
+
+ best_iface = NULL;
+ best_nroutes = 0;
+ best_netmatch = 0;
+
+ LASSERT (net->ksnn_ninterfaces <= LNET_MAX_INTERFACES);
+
+ /* Select interface to connect from */
+ for (j = 0; j < net->ksnn_ninterfaces; j++) {
+ iface = &net->ksnn_interfaces[j];
+
+ /* Using this interface already? */
+ list_for_each(rtmp, &peer->ksnp_routes) {
+ route = list_entry(rtmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_myipaddr == iface->ksni_ipaddr)
+ break;
+
+ route = NULL;
+ }
+ if (route != NULL)
+ continue;
+
+ this_netmatch = (((iface->ksni_ipaddr ^
+ newroute->ksnr_ipaddr) &
+ iface->ksni_netmask) == 0) ? 1 : 0;
+
+ if (!(best_iface == NULL ||
+ best_netmatch < this_netmatch ||
+ (best_netmatch == this_netmatch &&
+ best_nroutes > iface->ksni_nroutes)))
+ continue;
+
+ best_iface = iface;
+ best_netmatch = this_netmatch;
+ best_nroutes = iface->ksni_nroutes;
+ }
+
+ if (best_iface == NULL)
+ continue;
+
+ newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
+ best_iface->ksni_nroutes++;
+
+ ksocknal_add_route_locked(peer, newroute);
+ newroute = NULL;
+ }
+
+ write_unlock_irqrestore(global_lock, flags);
+ if (newroute != NULL)
+ ksocknal_route_decref(newroute);
+}
+
+int
+ksocknal_accept (lnet_ni_t *ni, struct socket *sock)
+{
+ ksock_connreq_t *cr;
+ int rc;
+ __u32 peer_ip;
+ int peer_port;
+ unsigned long flags;
+
+ rc = libcfs_sock_getaddr(sock, 1, &peer_ip, &peer_port);
+ LASSERT (rc == 0); /* we succeeded before */
+
+ LIBCFS_ALLOC(cr, sizeof(*cr));
+ if (cr == NULL) {
+ LCONSOLE_ERROR("Dropping connection request from "
+ "%u.%u.%u.%u: memory exhausted\n",
+ HIPQUAD(peer_ip));
+ return -ENOMEM;
+ }
+
+ lnet_ni_addref(ni);
+ cr->ksncr_ni = ni;
+ cr->ksncr_sock = sock;
+
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
+
+ list_add_tail(&cr->ksncr_list, &ksocknal_data.ksnd_connd_connreqs);
+ cfs_waitq_signal(&ksocknal_data.ksnd_connd_waitq);
+
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock, flags);
+ return 0;
+}
+
+int
+ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
+ struct socket *sock, int type)
+{
+ rwlock_t *global_lock = &ksocknal_data.ksnd_global_lock;
+ CFS_LIST_HEAD (zombies);
+ __u32 ipaddrs[LNET_MAX_INTERFACES];
+ int nipaddrs;
+ lnet_process_id_t peerid;
+ struct list_head *tmp;
+ __u64 incarnation;
+ unsigned long flags;
+ ksock_conn_t *conn;
+ ksock_conn_t *conn2;
+ ksock_peer_t *peer = NULL;
+ ksock_peer_t *peer2;
+ ksock_sched_t *sched;
+ unsigned int irq;
+ ksock_tx_t *tx;
+ int bits;
+ int rc;
+ char *warn = NULL;
+
+ LASSERT (route == NULL == (type == SOCKLND_CONN_NONE));
+
+ rc = ksocknal_lib_setup_sock (sock);
+ if (rc != 0)
+ return (rc);
+
+ irq = ksocknal_lib_sock_irq (sock);
+
+ rc = -ENOMEM;
+ LIBCFS_ALLOC(conn, sizeof(*conn));
+ if (conn == NULL)
+ goto failed_0;
+
+ memset (conn, 0, sizeof (*conn));
+ conn->ksnc_peer = NULL;
+ conn->ksnc_route = NULL;
+ conn->ksnc_sock = sock;
+ atomic_set (&conn->ksnc_sock_refcount, 1); /* 1 ref for conn */
+ conn->ksnc_type = type;
+ ksocknal_lib_save_callback(sock, conn);
+ atomic_set (&conn->ksnc_conn_refcount, 1); /* 1 ref for me */
+
+ conn->ksnc_rx_ready = 0;
+ conn->ksnc_rx_scheduled = 0;
+ ksocknal_new_packet (conn, 0);
+
+ CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue);
+ conn->ksnc_tx_ready = 0;
+ conn->ksnc_tx_scheduled = 0;
+ atomic_set (&conn->ksnc_tx_nob, 0);
+
+ /* stash conn's local and remote addrs */
+ rc = ksocknal_lib_get_conn_addrs (conn);
+ if (rc != 0)
+ goto failed_1;
+
+ /* Find out/confirm peer's NID and connection type and get the
+ * vector of interfaces she's willing to let me connect to.
+ * Passive connections use the listener timeout since the peer sends
+ * eagerly */
+
+ if (route != NULL) {
+ LASSERT(ni == route->ksnr_peer->ksnp_ni);
+
+ /* Active connection sends HELLO eagerly */
+ nipaddrs = ksocknal_local_ipvec(ni, ipaddrs);
+ peerid = route->ksnr_peer->ksnp_id;
+
+ rc = ksocknal_send_hello (ni, conn, peerid.nid,
+ ipaddrs, nipaddrs);
+ if (rc != 0)
+ goto failed_1;
+ } else {
+ peerid.nid = LNET_NID_ANY;
+ peerid.pid = LNET_PID_ANY;
+ }
+
+ rc = ksocknal_recv_hello (ni, conn, &peerid, &incarnation, ipaddrs);
+ if (rc < 0) {
+ if (rc == -EALREADY) {
+ CDEBUG(D_NET, "Lost connection race with %s\n",
+ libcfs_id2str(peerid));
+ /* Not an actual failure: return +ve RC so active
+ * connector can back off */
+ rc = EALREADY;
+ }
+ goto failed_1;
+ }
+
+ nipaddrs = rc;
+ LASSERT (peerid.nid != LNET_NID_ANY);
+
+ if (route != NULL) {
+ peer = route->ksnr_peer;
+ ksocknal_peer_addref(peer);
+
+ /* additional routes after interface exchange? */
+ ksocknal_create_routes(peer, conn->ksnc_port,
+ ipaddrs, nipaddrs);
+ rc = 0;
+ write_lock_irqsave (global_lock, flags);
+ } else {
+ rc = ksocknal_create_peer(&peer, ni, peerid);
+ if (rc != 0)
+ goto failed_1;
+
+ write_lock_irqsave(global_lock, flags);
+
+ peer2 = ksocknal_find_peer_locked(ni, peerid);
+ if (peer2 == NULL) {
+ /* NB this puts an "empty" peer in the peer
+ * table (which takes my ref) */
+ list_add_tail(&peer->ksnp_list,
+ ksocknal_nid2peerlist(peerid.nid));
+ } else {
+ ksocknal_peer_decref(peer);
+ peer = peer2;
+ }
+
+ /* +1 ref for me */
+ ksocknal_peer_addref(peer);
+ peer->ksnp_accepting++;
+
+ /* Am I already connecting/connected to this guy? Resolve in
+ * favour of higher NID... */
+ rc = 0;
+ if (peerid.nid < ni->ni_nid) {
+ bits = (1 << conn->ksnc_type);
+
+ list_for_each(tmp, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t,
+ ksnr_list);
+
+ if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
+ continue;
+
+ if ((route->ksnr_connecting & bits) == 0)
+ continue;
+
+ rc = EALREADY; /* not a failure */
+ warn = "connection race";
+ break;
+ }
+ }
+
+ write_unlock_irqrestore(global_lock, flags);
+
+ if (rc != 0) {
+ /* set CONN_NONE makes returned HELLO acknowledge I
+ * lost a connection race */
+ conn->ksnc_type = SOCKLND_CONN_NONE;
+ ksocknal_send_hello (ni, conn, peerid.nid,
+ ipaddrs, 0);
+ } else {
+ nipaddrs = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
+ rc = ksocknal_send_hello (ni, conn, peerid.nid,
+ ipaddrs, nipaddrs);
+ }
+
+ write_lock_irqsave(global_lock, flags);
+ peer->ksnp_accepting--;
+
+ if (rc != 0)
+ goto failed_2;
+ }
+
+ if (peer->ksnp_closing ||
+ (route != NULL && route->ksnr_deleted)) {
+ /* route/peer got closed under me */
+ rc = -ESTALE;
+ warn = "peer/route removed";
+ goto failed_2;
+ }
+
+ /* Refuse to duplicate an existing connection, unless this is a
+ * loopback connection */
+ if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
+ list_for_each(tmp, &peer->ksnp_conns) {
+ conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
+ conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
+ conn2->ksnc_type != conn->ksnc_type ||
+ conn2->ksnc_incarnation != incarnation)
+ continue;
+
+ rc = 0; /* more of a NOOP than a failure */
+ warn = "duplicate";
+ goto failed_2;
+ }
+ }
+
+ /* If the connection created by this route didn't bind to the IP
+ * address the route connected to, the connection/route matching
+ * code below probably isn't going to work. */
+ if (route != NULL &&
+ route->ksnr_ipaddr != conn->ksnc_ipaddr) {
+ CERROR("Route %s %u.%u.%u.%u connected to %u.%u.%u.%u\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(route->ksnr_ipaddr),
+ HIPQUAD(conn->ksnc_ipaddr));
+ }
+
+ /* Search for a route corresponding to the new connection and
+ * create an association. This allows incoming connections created
+ * by routes in my peer to match my own route entries so I don't
+ * continually create duplicate routes. */
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry(tmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
+ continue;
+
+ ksocknal_associate_route_conn_locked(route, conn);
+ break;
+ }
+
+ conn->ksnc_peer = peer; /* conn takes my ref on peer */
+ conn->ksnc_incarnation = incarnation;
+ peer->ksnp_last_alive = cfs_time_current();
+ peer->ksnp_error = 0;
+
+ sched = ksocknal_choose_scheduler_locked (irq);
+ sched->kss_nconns++;
+ conn->ksnc_scheduler = sched;
+
+ /* Set the deadline for the outgoing HELLO to drain */
+ conn->ksnc_tx_bufnob = 0;
+ conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ mb(); /* order with adding to peer's conn list */
+
+ list_add (&conn->ksnc_list, &peer->ksnp_conns);
+ ksocknal_conn_addref(conn);
+
+ /* NB my callbacks block while I hold ksnd_global_lock */
+ ksocknal_lib_set_callback(sock, conn);
+
+ /* Take all the packets blocking for a connection.
+ * NB, it might be nicer to share these blocked packets among any
+ * other connections that are becoming established. */
+ while (!list_empty (&peer->ksnp_tx_queue)) {
+ tx = list_entry (peer->ksnp_tx_queue.next,
+ ksock_tx_t, tx_list);
+
+ list_del (&tx->tx_list);
+ ksocknal_queue_tx_locked (tx, conn);
+ }
+
+ rc = ksocknal_close_stale_conns_locked(peer, incarnation);
+ write_unlock_irqrestore (global_lock, flags);
+
+ if (rc != 0)
+ CDEBUG(D_HA, "Closed %d stale conns to %s ip %d.%d.%d.%d\n",
+ rc, libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr));
+
+ ksocknal_lib_bind_irq (irq);
+
+ /* Call the callbacks right now to get things going. */
+ if (ksocknal_connsock_addref(conn) == 0) {
+ ksocknal_lib_act_callback(sock, conn);
+ ksocknal_connsock_decref(conn);
+ }
+
+ CDEBUG(D_NET, "New conn %s %u.%u.%u.%u -> %u.%u.%u.%u/%d"
+ " incarnation:"LPD64" sched[%d]/%d\n",
+ libcfs_id2str(peerid), HIPQUAD(conn->ksnc_myipaddr),
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
+ (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
+
+ ksocknal_conn_decref(conn);
+ return (0);
+
+ failed_2:
+ if (!peer->ksnp_closing &&
+ list_empty (&peer->ksnp_conns) &&
+ list_empty (&peer->ksnp_routes)) {
+ list_add(&zombies, &peer->ksnp_tx_queue);
+ list_del_init(&peer->ksnp_tx_queue);
+ ksocknal_unlink_peer_locked(peer);
+ }
+
+ write_unlock_irqrestore(global_lock, flags);
+
+ if (warn != NULL) {
+ if (rc < 0)
+ CERROR("Not creating conn %s type %d: %s\n",
+ libcfs_id2str(peerid), conn->ksnc_type, warn);
+ else
+ CDEBUG(D_NET, "Not creating conn %s type %d: %s\n",
+ libcfs_id2str(peerid), conn->ksnc_type, warn);
+ }
+
+ ksocknal_txlist_done(ni, &zombies);
+ ksocknal_peer_decref(peer);
+
+ failed_1:
+ LIBCFS_FREE (conn, sizeof(*conn));
+
+ failed_0:
+ libcfs_sock_release(sock);
+ return rc;
+}
+
+void
+ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
+{
+ /* This just does the immmediate housekeeping, and queues the
+ * connection for the reaper to terminate.
+ * Caller holds ksnd_global_lock exclusively in irq context */
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_route_t *route;
+ ksock_conn_t *conn2;
+ struct list_head *tmp;
+
+ LASSERT (peer->ksnp_error == 0);
+ LASSERT (!conn->ksnc_closing);
+ conn->ksnc_closing = 1;
+
+ /* ksnd_deathrow_conns takes over peer's ref */
+ list_del (&conn->ksnc_list);
+
+ route = conn->ksnc_route;
+ if (route != NULL) {
+ /* dissociate conn from route... */
+ LASSERT (!route->ksnr_deleted);
+ LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
+
+ conn2 = NULL;
+ list_for_each(tmp, &peer->ksnp_conns) {
+ conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn2->ksnc_route == route &&
+ conn2->ksnc_type == conn->ksnc_type)
+ break;
+
+ conn2 = NULL;
+ }
+ if (conn2 == NULL)
+ route->ksnr_connected &= ~(1 << conn->ksnc_type);
+
+ conn->ksnc_route = NULL;
+
+#if 0 /* irrelevent with only eager routes */
+ list_del (&route->ksnr_list); /* make route least favourite */
+ list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
+#endif
+ ksocknal_route_decref(route); /* drop conn's ref on route */
+ }
+
+ if (list_empty (&peer->ksnp_conns)) {
+ /* No more connections to this peer */
+
+ peer->ksnp_error = error; /* stash last conn close reason */
+
+ if (list_empty (&peer->ksnp_routes)) {
+ /* I've just closed last conn belonging to a
+ * peer with no routes to it */
+ ksocknal_unlink_peer_locked (peer);
+ }
+ }
+
+ spin_lock (&ksocknal_data.ksnd_reaper_lock);
+
+ list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
+ cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock (&ksocknal_data.ksnd_reaper_lock);
+}
+
+void
+ksocknal_peer_failed (ksock_peer_t *peer)
+{
+ time_t last_alive = 0;
+ int notify = 0;
+
+ /* There has been a connection failure or comms error; but I'll only
+ * tell LNET I think the peer is dead if it's to another kernel and
+ * there are no connections or connection attempts in existance. */
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ if ((peer->ksnp_id.pid & LNET_PID_USERFLAG) == 0 &&
+ list_empty(&peer->ksnp_conns) &&
+ peer->ksnp_accepting == 0 &&
+ ksocknal_find_connecting_route_locked(peer) == NULL) {
+ notify = 1;
+ last_alive = cfs_time_current_sec() -
+ cfs_duration_sec(cfs_time_current() -
+ peer->ksnp_last_alive);
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ if (notify)
+ lnet_notify (peer->ksnp_ni, peer->ksnp_id.nid, 0,
+ last_alive);
+}
+
+void
+ksocknal_terminate_conn (ksock_conn_t *conn)
+{
+ /* This gets called by the reaper (guaranteed thread context) to
+ * disengage the socket from its callbacks and close it.
+ * ksnc_refcount will eventually hit zero, and then the reaper will
+ * destroy it. */
+ unsigned long flags;
+ ksock_peer_t *peer = conn->ksnc_peer;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+ int failed = 0;
+
+ LASSERT(conn->ksnc_closing);
+
+ /* wake up the scheduler to "send" all remaining packets to /dev/null */
+ spin_lock_irqsave(&sched->kss_lock, flags);
+
+ if (!conn->ksnc_tx_scheduled &&
+ !list_empty(&conn->ksnc_tx_queue)){
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ /* a closing conn is always ready to tx */
+ conn->ksnc_tx_ready = 1;
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ ksocknal_conn_addref(conn);
+
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ /* serialise with callbacks */
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ ksocknal_lib_reset_callback(conn->ksnc_sock, conn);
+
+ /* OK, so this conn may not be completely disengaged from its
+ * scheduler yet, but it _has_ committed to terminate... */
+ conn->ksnc_scheduler->kss_nconns--;
+
+ if (peer->ksnp_error != 0) {
+ /* peer's last conn closed in error */
+ LASSERT (list_empty (&peer->ksnp_conns));
+ failed = 1;
+ }
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ /* The socket is closed on the final put; either here, or in
+ * ksocknal_{send,recv}msg(). Since we set up the linger2 option
+ * when the connection was established, this will close the socket
+ * immediately, aborting anything buffered in it. Any hung
+ * zero-copy transmits will therefore complete in finite time. */
+ ksocknal_connsock_decref(conn);
+
+ if (failed)
+ ksocknal_peer_failed(peer);
+}
+
+void
+ksocknal_queue_zombie_conn (ksock_conn_t *conn)
+{
+ /* Queue the conn for the reaper to destroy */
+ unsigned long flags;
+
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) == 0);
+ spin_lock_irqsave(&ksocknal_data.ksnd_reaper_lock, flags);
+
+ list_add_tail(&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
+ cfs_waitq_signal(&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags);
+}
+
+void
+ksocknal_destroy_conn (ksock_conn_t *conn)
+{
+ /* Final coup-de-grace of the reaper */
+ CDEBUG (D_NET, "connection %p\n", conn);
+
+ LASSERT (atomic_read (&conn->ksnc_conn_refcount) == 0);
+ LASSERT (atomic_read (&conn->ksnc_sock_refcount) == 0);
+ LASSERT (conn->ksnc_sock == NULL);
+ LASSERT (conn->ksnc_route == NULL);
+ LASSERT (!conn->ksnc_tx_scheduled);
+ LASSERT (!conn->ksnc_rx_scheduled);
+ LASSERT (list_empty(&conn->ksnc_tx_queue));
+
+ /* complete current receive if any */
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_BODY:
+ CERROR("Completing partial receive from %s"
+ ", ip %d.%d.%d.%d:%d, with error\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ lnet_finalize (conn->ksnc_peer->ksnp_ni,
+ conn->ksnc_cookie, -EIO);
+ break;
+ case SOCKNAL_RX_HEADER:
+ case SOCKNAL_RX_SLOP:
+ break;
+ default:
+ LBUG ();
+ break;
+ }
+
+ ksocknal_peer_decref(conn->ksnc_peer);
+
+ LIBCFS_FREE (conn, sizeof (*conn));
+}
+
+int
+ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
+{
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+ int count = 0;
+
+ list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
+ conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
+
+ if (ipaddr == 0 ||
+ conn->ksnc_ipaddr == ipaddr) {
+ count++;
+ ksocknal_close_conn_locked (conn, why);
+ }
+ }
+
+ return (count);
+}
+
+int
+ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
+{
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+ struct list_head *cnxt;
+ int count = 0;
+
+ list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
+ conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_incarnation == incarnation)
+ continue;
+
+ CDEBUG(D_NET, "Closing stale conn %s ip:%08x/%d "
+ "incarnation:"LPD64"("LPD64")\n",
+ libcfs_id2str(peer->ksnp_id),
+ conn->ksnc_ipaddr, conn->ksnc_port,
+ conn->ksnc_incarnation, incarnation);
+
+ count++;
+ ksocknal_close_conn_locked (conn, -ESTALE);
+ }
+
+ return (count);
+}
+
+int
+ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why)
+{
+ ksock_peer_t *peer = conn->ksnc_peer;
+ __u32 ipaddr = conn->ksnc_ipaddr;
+ unsigned long flags;
+ int count;
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ return (count);
+}
+
+int
+ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr)
+{
+ unsigned long flags;
+ ksock_peer_t *peer;
+ struct list_head *ptmp;
+ struct list_head *pnxt;
+ int lo;
+ int hi;
+ int i;
+ int count = 0;
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ if (id.nid != LNET_NID_ANY)
+ lo = hi = ksocknal_nid2peerlist(id.nid) - ksocknal_data.ksnd_peers;
+ else {
+ lo = 0;
+ hi = ksocknal_data.ksnd_peer_hash_size - 1;
+ }
+
+ for (i = lo; i <= hi; i++) {
+ list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
+
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+
+ if (!((id.nid == LNET_NID_ANY || id.nid == peer->ksnp_id.nid) &&
+ (id.pid == LNET_PID_ANY || id.pid == peer->ksnp_id.pid)))
+ continue;
+
+ count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
+ }
+ }
+
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ /* wildcards always succeed */
+ if (id.nid == LNET_NID_ANY || id.pid == LNET_PID_ANY || ipaddr == 0)
+ return (0);
+
+ return (count == 0 ? -ENOENT : 0);
+}
+
+void
+ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive)
+{
+ /* The router is telling me she's been notified of a change in
+ * gateway state.... */
+ lnet_process_id_t id = {/* .nid = */ gw_nid, /* .pid = */ LNET_PID_ANY};
+
+ CDEBUG (D_NET, "gw %s %s\n", libcfs_nid2str(gw_nid),
+ alive ? "up" : "down");
+
+ if (!alive) {
+ /* If the gateway crashed, close all open connections... */
+ ksocknal_close_matching_conns (id, 0);
+ return;
+ }
+
+ /* ...otherwise do nothing. We can only establish new connections
+ * if we have autroutes, and these connect on demand. */
+}
+
+void
+ksocknal_push_peer (ksock_peer_t *peer)
+{
+ int index;
+ int i;
+ struct list_head *tmp;
+ ksock_conn_t *conn;
+
+ for (index = 0; ; index++) {
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ i = 0;
+ conn = NULL;
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ if (i++ == index) {
+ conn = list_entry (tmp, ksock_conn_t, ksnc_list);
+ ksocknal_conn_addref(conn);
+ break;
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ if (conn == NULL)
+ break;
+
+ ksocknal_push_conn (conn);
+ ksocknal_conn_decref(conn);
+ }
+}
+
+int
+ksocknal_push (lnet_ni_t *ni, lnet_process_id_t id)
+{
+ ksock_peer_t *peer;
+ struct list_head *tmp;
+ int index;
+ int i;
+ int j;
+ int rc = -ENOENT;
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ for (j = 0; ; j++) {
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ index = 0;
+ peer = NULL;
+
+ list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry(tmp, ksock_peer_t,
+ ksnp_list);
+
+ if (!((id.nid == LNET_NID_ANY ||
+ id.nid == peer->ksnp_id.nid) &&
+ (id.pid == LNET_PID_ANY ||
+ id.pid == peer->ksnp_id.pid))) {
+ peer = NULL;
+ continue;
+ }
+
+ if (index++ == j) {
+ ksocknal_peer_addref(peer);
+ break;
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ if (peer != NULL) {
+ rc = 0;
+ ksocknal_push_peer (peer);
+ ksocknal_peer_decref(peer);
+ }
+ }
+
+ }
+
+ return (rc);
+}
+
+int
+ksocknal_add_interface(lnet_ni_t *ni, __u32 ipaddress, __u32 netmask)
+{
+ ksock_net_t *net = ni->ni_data;
+ unsigned long flags;
+ ksock_interface_t *iface;
+ int rc;
+ int i;
+ int j;
+ struct list_head *ptmp;
+ ksock_peer_t *peer;
+ struct list_head *rtmp;
+ ksock_route_t *route;
+
+ if (ipaddress == 0 ||
+ netmask == 0)
+ return (-EINVAL);
+
+ write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
+
+ iface = ksocknal_ip2iface(ni, ipaddress);
+ if (iface != NULL) {
+ /* silently ignore dups */
+ rc = 0;
+ } else if (net->ksnn_ninterfaces == LNET_MAX_INTERFACES) {
+ rc = -ENOSPC;
+ } else {
+ iface = &net->ksnn_interfaces[net->ksnn_ninterfaces++];
+
+ iface->ksni_ipaddr = ipaddress;
+ iface->ksni_netmask = netmask;
+ iface->ksni_nroutes = 0;
+ iface->ksni_npeers = 0;
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
+ peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
+
+ for (j = 0; i < peer->ksnp_n_passive_ips; j++)
+ if (peer->ksnp_passive_ips[j] == ipaddress)
+ iface->ksni_npeers++;
+
+ list_for_each(rtmp, &peer->ksnp_routes) {
+ route = list_entry(rtmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_myipaddr == ipaddress)
+ iface->ksni_nroutes++;
+ }
+ }
+ }
+
+ rc = 0;
+ /* NB only new connections will pay attention to the new interface! */
+ }
+
+ write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
+
+ return (rc);
+}
+
+void
+ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
+{
+ struct list_head *tmp;
+ struct list_head *nxt;
+ ksock_route_t *route;
+ ksock_conn_t *conn;
+ int i;
+ int j;
+
+ for (i = 0; i < peer->ksnp_n_passive_ips; i++)
+ if (peer->ksnp_passive_ips[i] == ipaddr) {
+ for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
+ peer->ksnp_passive_ips[j-1] =
+ peer->ksnp_passive_ips[j];
+ peer->ksnp_n_passive_ips--;
+ break;
+ }
+
+ list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
+ route = list_entry (tmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_myipaddr != ipaddr)
+ continue;
+
+ if (route->ksnr_share_count != 0) {
+ /* Manually created; keep, but unbind */
+ route->ksnr_myipaddr = 0;
+ } else {
+ ksocknal_del_route_locked(route);
+ }
+ }
+
+ list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
+ conn = list_entry(tmp, ksock_conn_t, ksnc_list);
+
+ if (conn->ksnc_myipaddr == ipaddr)
+ ksocknal_close_conn_locked (conn, 0);
+ }
+}
+
+int
+ksocknal_del_interface(lnet_ni_t *ni, __u32 ipaddress)
+{
+ ksock_net_t *net = ni->ni_data;
+ int rc = -ENOENT;
+ unsigned long flags;
+ struct list_head *tmp;
+ struct list_head *nxt;
+ ksock_peer_t *peer;
+ __u32 this_ip;
+ int i;
+ int j;
+
+ write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ this_ip = net->ksnn_interfaces[i].ksni_ipaddr;
+
+ if (!(ipaddress == 0 ||
+ ipaddress == this_ip))
+ continue;
+
+ rc = 0;
+
+ for (j = i+1; j < net->ksnn_ninterfaces; j++)
+ net->ksnn_interfaces[j-1] =
+ net->ksnn_interfaces[j];
+
+ net->ksnn_ninterfaces--;
+
+ for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
+ list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
+ peer = list_entry(tmp, ksock_peer_t, ksnp_list);
+
+ if (peer->ksnp_ni != ni)
+ continue;
+
+ ksocknal_peer_del_interface_locked(peer, this_ip);
+ }
+ }
+ }
+
+ write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
+
+ return (rc);
+}
+
+int
+ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
+{
+ struct libcfs_ioctl_data *data = arg;
+ int rc;
+
+ switch(cmd) {
+ case IOC_LIBCFS_GET_INTERFACE: {
+ ksock_net_t *net = ni->ni_data;
+ ksock_interface_t *iface;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ if (data->ioc_count < 0 ||
+ data->ioc_count >= net->ksnn_ninterfaces) {
+ rc = -ENOENT;
+ } else {
+ rc = 0;
+ iface = &net->ksnn_interfaces[data->ioc_count];
+
+ data->ioc_u32[0] = iface->ksni_ipaddr;
+ data->ioc_u32[1] = iface->ksni_netmask;
+ data->ioc_u32[2] = iface->ksni_npeers;
+ data->ioc_u32[3] = iface->ksni_nroutes;
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+ return rc;
+ }
+
+ case IOC_LIBCFS_ADD_INTERFACE:
+ return ksocknal_add_interface(ni,
+ data->ioc_u32[0], /* IP address */
+ data->ioc_u32[1]); /* net mask */
+
+ case IOC_LIBCFS_DEL_INTERFACE:
+ return ksocknal_del_interface(ni,
+ data->ioc_u32[0]); /* IP address */
+
+ case IOC_LIBCFS_GET_PEER: {
+ lnet_process_id_t id = {0,};
+ __u32 myip = 0;
+ __u32 ip = 0;
+ int port = 0;
+ int conn_count = 0;
+ int share_count = 0;
+
+ rc = ksocknal_get_peer_info(ni, data->ioc_count,
+ &id, &myip, &ip, &port,
+ &conn_count, &share_count);
+ if (rc != 0)
+ return rc;
+
+ data->ioc_nid = id.nid;
+ data->ioc_count = share_count;
+ data->ioc_u32[0] = ip;
+ data->ioc_u32[1] = port;
+ data->ioc_u32[2] = myip;
+ data->ioc_u32[3] = conn_count;
+ data->ioc_u32[4] = id.pid;
+ return 0;
+ }
+
+ case IOC_LIBCFS_ADD_PEER: {
+ lnet_process_id_t id = {/* .nid = */ data->ioc_nid,
+ /* .pid = */ LUSTRE_SRV_LNET_PID};
+ return ksocknal_add_peer (ni, id,
+ data->ioc_u32[0], /* IP */
+ data->ioc_u32[1]); /* port */
+ }
+ case IOC_LIBCFS_DEL_PEER: {
+ lnet_process_id_t id = {/* .nid = */ data->ioc_nid,
+ /* .pid = */ LNET_PID_ANY};
+ return ksocknal_del_peer (ni, id,
+ data->ioc_u32[0]); /* IP */
+ }
+ case IOC_LIBCFS_GET_CONN: {
+ int txmem;
+ int rxmem;
+ int nagle;
+ ksock_conn_t *conn = ksocknal_get_conn_by_idx (ni, data->ioc_count);
+
+ if (conn == NULL)
+ return -ENOENT;
+
+ ksocknal_lib_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
+
+ data->ioc_count = txmem;
+ data->ioc_nid = conn->ksnc_peer->ksnp_id.nid;
+ data->ioc_flags = nagle;
+ data->ioc_u32[0] = conn->ksnc_ipaddr;
+ data->ioc_u32[1] = conn->ksnc_port;
+ data->ioc_u32[2] = conn->ksnc_myipaddr;
+ data->ioc_u32[3] = conn->ksnc_type;
+ data->ioc_u32[4] = conn->ksnc_scheduler -
+ ksocknal_data.ksnd_schedulers;
+ data->ioc_u32[5] = rxmem;
+ data->ioc_u32[6] = conn->ksnc_peer->ksnp_id.pid;
+ ksocknal_conn_decref(conn);
+ return 0;
+ }
+
+ case IOC_LIBCFS_CLOSE_CONNECTION: {
+ lnet_process_id_t id = {/* .nid = */ data->ioc_nid,
+ /* .pid = */ LNET_PID_ANY};
+
+ return ksocknal_close_matching_conns (id,
+ data->ioc_u32[0]);
+ }
+ case IOC_LIBCFS_REGISTER_MYNID:
+ /* Ignore if this is a noop */
+ if (data->ioc_nid == ni->ni_nid)
+ return 0;
+
+ CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
+ libcfs_nid2str(data->ioc_nid),
+ libcfs_nid2str(ni->ni_nid));
+ return -EINVAL;
+
+ case IOC_LIBCFS_PUSH_CONNECTION: {
+ lnet_process_id_t id = {/* .nid = */ data->ioc_nid,
+ /* .pid = */ LNET_PID_ANY};
+
+ return ksocknal_push(ni, id);
+ }
+ default:
+ return -EINVAL;
+ }
+ /* not reached */
+}
+
+void
+ksocknal_free_buffers (void)
+{
+ LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_txs) == 0);
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ LIBCFS_FREE (ksocknal_data.ksnd_schedulers,
+ sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
+
+ LIBCFS_FREE (ksocknal_data.ksnd_peers,
+ sizeof (struct list_head) *
+ ksocknal_data.ksnd_peer_hash_size);
+}
+
+void
+ksocknal_base_shutdown (void)
+{
+ ksock_sched_t *sched;
+ int i;
+ unsigned long flags;
+
+ CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
+ atomic_read (&libcfs_kmemory));
+ LASSERT (ksocknal_data.ksnd_nnets == 0);
+
+ switch (ksocknal_data.ksnd_init) {
+ default:
+ LASSERT (0);
+
+ case SOCKNAL_INIT_ALL:
+ /* Wait for queued connreqs to clean up */
+ i = 2;
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
+ while (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock,
+ flags);
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for connreqs to clean up\n");
+ cfs_pause(cfs_time_seconds(1));
+
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
+ }
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock,
+ flags);
+
+ /* fall through */
+
+ case SOCKNAL_INIT_DATA:
+ LASSERT (ksocknal_data.ksnd_peers != NULL);
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
+ LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
+ }
+ LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
+ LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
+ LASSERT (list_empty (&ksocknal_data.ksnd_connd_connreqs));
+ LASSERT (list_empty (&ksocknal_data.ksnd_connd_routes));
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
+ ksock_sched_t *kss =
+ &ksocknal_data.ksnd_schedulers[i];
+
+ LASSERT (list_empty (&kss->kss_tx_conns));
+ LASSERT (list_empty (&kss->kss_rx_conns));
+ LASSERT (kss->kss_nconns == 0);
+ }
+
+ /* flag threads to terminate; wake and wait for them to die */
+ ksocknal_data.ksnd_shuttingdown = 1;
+ cfs_waitq_broadcast (&ksocknal_data.ksnd_connd_waitq);
+ cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq);
+
+ if (ksocknal_data.ksnd_schedulers != NULL)
+ for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
+ sched = &ksocknal_data.ksnd_schedulers[i];
+ cfs_waitq_broadcast(&sched->kss_waitq);
+ }
+
+ i = 4;
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ while (ksocknal_data.ksnd_nthreads != 0) {
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for %d threads to terminate\n",
+ ksocknal_data.ksnd_nthreads);
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+ cfs_pause(cfs_time_seconds(1));
+ read_lock(&ksocknal_data.ksnd_global_lock);
+ }
+ read_unlock(&ksocknal_data.ksnd_global_lock);
+
+ ksocknal_free_buffers();
+
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
+ break;
+ }
+
+ CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
+ atomic_read (&libcfs_kmemory));
+
+ PORTAL_MODULE_UNUSE;
+}
+
+
+__u64
+ksocknal_new_incarnation (void)
+{
+ struct timeval tv;
+
+ /* The incarnation number is the time this module loaded and it
+ * identifies this particular instance of the socknal. Hopefully
+ * we won't be able to reboot more frequently than 1MHz for the
+ * forseeable future :) */
+
+ do_gettimeofday(&tv);
+
+ return (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
+}
+
+int
+ksocknal_base_startup (void)
+{
+ int rc;
+ int i;
+
+ LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
+ LASSERT (ksocknal_data.ksnd_nnets == 0);
+
+ memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
+
+ ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
+ LIBCFS_ALLOC (ksocknal_data.ksnd_peers,
+ sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
+ if (ksocknal_data.ksnd_peers == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
+
+ rwlock_init(&ksocknal_data.ksnd_global_lock);
+
+ spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
+ cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_lock_init (&ksocknal_data.ksnd_connd_lock);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_connreqs);
+ CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_connd_routes);
+ cfs_waitq_init(&ksocknal_data.ksnd_connd_waitq);
+
+ /* NB memset above zeros whole of ksocknal_data, including
+ * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
+
+ /* flag lists/ptrs/locks initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
+ PORTAL_MODULE_USE;
+
+ ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
+ LIBCFS_ALLOC(ksocknal_data.ksnd_schedulers,
+ sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
+ if (ksocknal_data.ksnd_schedulers == NULL)
+ goto failed;
+
+ for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
+ ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
+
+ spin_lock_init (&kss->kss_lock);
+ CFS_INIT_LIST_HEAD (&kss->kss_rx_conns);
+ CFS_INIT_LIST_HEAD (&kss->kss_tx_conns);
+#if SOCKNAL_ZC
+ CFS_INIT_LIST_HEAD (&kss->kss_zctxdone_list);
+#endif
+ cfs_waitq_init (&kss->kss_waitq);
+ }
+
+ for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
+ rc = ksocknal_thread_start (ksocknal_scheduler,
+ &ksocknal_data.ksnd_schedulers[i]);
+ if (rc != 0) {
+ CERROR("Can't spawn socknal scheduler[%d]: %d\n",
+ i, rc);
+ goto failed;
+ }
+ }
+
+ for (i = 0; i < *ksocknal_tunables.ksnd_nconnds; i++) {
+ rc = ksocknal_thread_start (ksocknal_connd, (void *)((long)i));
+ if (rc != 0) {
+ CERROR("Can't spawn socknal connd: %d\n", rc);
+ goto failed;
+ }
+ }
+
+ rc = ksocknal_thread_start (ksocknal_reaper, NULL);
+ if (rc != 0) {
+ CERROR ("Can't spawn socknal reaper: %d\n", rc);
+ goto failed;
+ }
+
+ /* flag everything initialised */
+ ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
+
+ return 0;
+
+ failed:
+ ksocknal_base_shutdown();
+ return -ENETDOWN;
+}
+
+void
+ksocknal_shutdown (lnet_ni_t *ni)
+{
+ ksock_net_t *net = ni->ni_data;
+ int i;
+ unsigned long flags;
+ lnet_process_id_t anyid = { /* .nid = */ LNET_NID_ANY,
+ /* .pid = */ LNET_PID_ANY};
+
+ LASSERT(ksocknal_data.ksnd_init == SOCKNAL_INIT_ALL);
+ LASSERT(ksocknal_data.ksnd_nnets > 0);
+
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ net->ksnn_shutdown = 1; /* prevent new peers */
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ /* Delete all peers */
+ ksocknal_del_peer(ni, anyid, 0);
+
+ /* Wait for all peer state to clean up */
+ i = 2;
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ while (net->ksnn_npeers != 0) {
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ i++;
+ CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
+ "waiting for %d peers to disconnect\n",
+ net->ksnn_npeers);
+ cfs_pause(cfs_time_seconds(1));
+
+ spin_lock_irqsave(&net->ksnn_lock, flags);
+ }
+ spin_unlock_irqrestore(&net->ksnn_lock, flags);
+
+ for (i = 0; i < net->ksnn_ninterfaces; i++) {
+ LASSERT (net->ksnn_interfaces[i].ksni_npeers == 0);
+ LASSERT (net->ksnn_interfaces[i].ksni_nroutes == 0);
+ }
+
+ LIBCFS_FREE(net, sizeof(*net));
+
+ ksocknal_data.ksnd_nnets--;
+ if (ksocknal_data.ksnd_nnets == 0)
+ ksocknal_base_shutdown();
+}
+
+int
+ksocknal_enumerate_interfaces(ksock_net_t *net)
+{
+ char **names;
+ int i;
+ int j;
+ int rc;
+ int n;
+
+ n = libcfs_ipif_enumerate(&names);
+ if (n <= 0) {
+ CERROR("Can't enumerate interfaces: %d\n", n);
+ return n;
+ }
+
+ for (i = j = 0; i < n; i++) {
+ int up;
+ __u32 ip;
+ __u32 mask;
+
+ if (!strcmp(names[i], "lo")) /* skip the loopback IF */
+ continue;
+
+ rc = libcfs_ipif_query(names[i], &up, &ip, &mask);
+ if (rc != 0) {
+ CWARN("Can't get interface %s info: %d\n",
+ names[i], rc);
+ continue;
+ }
+
+ if (!up) {
+ CWARN("Ignoring interface %s (down)\n",
+ names[i]);
+ continue;
+ }
+
+ if (j == LNET_MAX_INTERFACES) {
+ CWARN("Ignoring interface %s (too many interfaces)\n",
+ names[i]);
+ continue;
+ }
+
+ net->ksnn_interfaces[j].ksni_ipaddr = ip;
+ net->ksnn_interfaces[j].ksni_netmask = mask;
+ j++;
+ }
+
+ libcfs_ipif_free_enumeration(names, n);
+
+ if (j == 0)
+ CERROR("Can't find any usable interfaces\n");
+
+ return j;
+}
+
+int
+ksocknal_startup (lnet_ni_t *ni)
+{
+ ksock_net_t *net;
+ int rc;
+ int i;
+
+ LASSERT (ni->ni_lnd == &the_ksocklnd);
+
+ if (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING) {
+ rc = ksocknal_base_startup();
+ if (rc != 0)
+ return rc;
+ }
+
+ LIBCFS_ALLOC(net, sizeof(*net));
+ if (net == NULL)
+ goto fail_0;
+
+ memset(net, 0, sizeof(*net));
+ spin_lock_init(&net->ksnn_lock);
+ net->ksnn_incarnation = ksocknal_new_incarnation();
+ ni->ni_data = net;
+ ni->ni_maxtxcredits = *ksocknal_tunables.ksnd_credits;
+ ni->ni_peertxcredits = *ksocknal_tunables.ksnd_peercredits;
+
+ if (ni->ni_interfaces[0] == NULL) {
+ rc = ksocknal_enumerate_interfaces(net);
+ if (rc <= 0)
+ goto fail_1;
+
+ net->ksnn_ninterfaces = rc;
+ } else {
+ for (i = 0; i < LNET_MAX_INTERFACES; i++) {
+ int up;
+
+ if (ni->ni_interfaces[i] == NULL)
+ break;
+
+ rc = libcfs_ipif_query(
+ ni->ni_interfaces[i], &up,
+ &net->ksnn_interfaces[i].ksni_ipaddr,
+ &net->ksnn_interfaces[i].ksni_netmask);
+
+ if (rc != 0) {
+ CERROR("Can't get interface %s info: %d\n",
+ ni->ni_interfaces[i], rc);
+ goto fail_1;
+ }
+
+ if (!up) {
+ CERROR("Interface %s is down\n",
+ ni->ni_interfaces[i]);
+ goto fail_1;
+ }
+ }
+ net->ksnn_ninterfaces = i;
+ }
+
+ ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid),
+ net->ksnn_interfaces[0].ksni_ipaddr);
+
+ ksocknal_data.ksnd_nnets++;
+
+ return 0;
+
+ fail_1:
+ LIBCFS_FREE(net, sizeof(*net));
+ fail_0:
+ if (ksocknal_data.ksnd_nnets == 0)
+ ksocknal_base_shutdown();
+
+ return -ENETDOWN;
+}
+
+
+void __exit
+ksocknal_module_fini (void)
+{
+ lnet_unregister_lnd(&the_ksocklnd);
+ ksocknal_lib_tunables_fini();
+}
+
+int __init
+ksocknal_module_init (void)
+{
+ int rc;
+
+ /* check ksnr_connected/connecting field large enough */
+ CLASSERT(SOCKLND_CONN_NTYPES <= 4);
+
+ rc = ksocknal_lib_tunables_init();
+ if (rc != 0)
+ return rc;
+
+ lnet_register_lnd(&the_ksocklnd);
+
+ return 0;
+}
+
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Kernel TCP Socket LND v1.0.0");
+MODULE_LICENSE("GPL");
+
+cfs_module(ksocknal, "1.0.0", ksocknal_module_init, ksocknal_module_fini);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#define DEBUG_PORTAL_ALLOC
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#define DEBUG_SUBSYSTEM S_LND
+
+#if defined(__linux__)
+#include "socklnd_lib-linux.h"
+#elif defined(__APPLE__)
+#include "socklnd_lib-darwin.h"
+#elif defined(__WINNT__)
+#include "socklnd_lib-winnt.h"
+#else
+#error Unsupported Operating System
+#endif
+
+#include <libcfs/kp30.h>
+#include <lnet/lnet.h>
+#include <lnet/lib-lnet.h>
+#include <lnet/socklnd.h>
+
+/* default vals for tunables/modparams */
+#define SOCKNAL_TIMEOUT 50 /* default comms timeout (seconds) */
+#define SOCKNAL_NCONND 4 /* # socknal connection daemons */
+#define SOCKNAL_MIN_RECONNECTMS 1000 /* first connection retry after (mS)... */
+#define SOCKNAL_MAX_RECONNECTMS 60000 /* ...exponentially increasing to this */
+#define SOCKNAL_EAGER_ACK SOCKNAL_ARCH_EAGER_ACK /* default eager ack (boolean) */
+#define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */
+#define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */
+#define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */
+#define SOCKNAL_BUFFER_SIZE (8<<20) /* default socket buffer size */
+#define SOCKNAL_NAGLE 0 /* enable/disable NAGLE? */
+#define SOCKNAL_IRQ_AFFINITY 1 /* enable/disable IRQ affinity? */
+#define SOCKNAL_KEEPALIVE_IDLE 35 /* # seconds idle before 1st probe */
+#define SOCKNAL_KEEPALIVE_COUNT 5 /* # unanswered probes to determine peer death */
+#define SOCKNAL_KEEPALIVE_INTVL 5 /* seconds between probes */
+#define SOCKNAL_CREDITS 256 /* # concurrent sends */
+#define SOCKNAL_PEERCREDITS 8 /* # concurrent sends to 1 peer */
+
+#define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */
+
+#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */
+#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */
+
+#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */
+#define SOCKNAL_ENOMEM_RETRY CFS_MIN_DELAY /* jiffies between retries */
+
+#define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */
+
+#define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */
+#define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */
+
+/* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled).
+ * no risk if we're not running on a CONFIG_HIGHMEM platform. */
+#ifdef CONFIG_HIGHMEM
+# define SOCKNAL_RISK_KMAP_DEADLOCK 0
+#else
+# define SOCKNAL_RISK_KMAP_DEADLOCK 1
+#endif
+
+/* minimum socket buffer required for connection handshake */
+#define SOCKNAL_MIN_BUFFER (2*(sizeof(lnet_hdr_t) + \
+ LNET_MAX_INTERFACES * sizeof(__u32)))
+
+typedef struct /* per scheduler state */
+{
+ spinlock_t kss_lock; /* serialise */
+ struct list_head kss_rx_conns; /* conn waiting to be read */
+ struct list_head kss_tx_conns; /* conn waiting to be written */
+#if SOCKNAL_ZC
+ struct list_head kss_zctxdone_list; /* completed ZC transmits */
+#endif
+ cfs_waitq_t kss_waitq; /* where scheduler sleeps */
+ int kss_nconns; /* # connections assigned to this scheduler */
+} ksock_sched_t;
+
+typedef struct
+{
+ int ksni_valid:1; /* been set yet? */
+ int ksni_bound:1; /* bound to a cpu yet? */
+ int ksni_sched:6; /* which scheduler (assumes < 64) */
+} ksock_irqinfo_t;
+
+typedef struct /* in-use interface */
+{
+ __u32 ksni_ipaddr; /* interface's IP address */
+ __u32 ksni_netmask; /* interface's network mask */
+ int ksni_nroutes; /* # routes using (active) */
+ int ksni_npeers; /* # peers using (passive) */
+ char ksni_name[16]; /* interface name */
+} ksock_interface_t;
+
+typedef struct
+{
+ int *ksnd_timeout; /* "stuck" socket timeout (seconds) */
+ int *ksnd_nconnds; /* # connection daemons */
+ int *ksnd_min_reconnectms; /* first connection retry after (ms)... */
+ int *ksnd_max_reconnectms; /* ...exponentially increasing to this */
+ int *ksnd_eager_ack; /* make TCP ack eagerly? */
+ int *ksnd_typed_conns; /* drive sockets by type? */
+ int *ksnd_min_bulk; /* smallest "large" message */
+ int *ksnd_buffer_size; /* socket buffer size */
+ int *ksnd_nagle; /* enable NAGLE? */
+ int *ksnd_keepalive_idle; /* # idle secs before 1st probe */
+ int *ksnd_keepalive_count; /* # probes */
+ int *ksnd_keepalive_intvl; /* time between probes */
+ int *ksnd_credits; /* # concurrent sends */
+ int *ksnd_peercredits; /* # concurrent sends to 1 peer */
+#if SOCKNAL_ZC
+ unsigned int *ksnd_zc_min_frag; /* minimum zero copy frag size */
+#endif
+#if CPU_AFFINITY
+ int *ksnd_irq_affinity; /* enable IRQ affinity? */
+#endif
+#if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
+ cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */
+#endif
+} ksock_tunables_t;
+
+typedef struct
+{
+ __u64 ksnn_incarnation; /* my epoch */
+ spinlock_t ksnn_lock; /* serialise */
+ int ksnn_npeers; /* # peers */
+ int ksnn_shutdown; /* shutting down? */
+ int ksnn_ninterfaces; /* IP interfaces */
+ ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES];
+} ksock_net_t;
+
+typedef struct
+{
+ int ksnd_init; /* initialisation state */
+ int ksnd_nnets; /* # networks set up */
+
+ rwlock_t ksnd_global_lock; /* stabilize peer/conn ops */
+ struct list_head *ksnd_peers; /* hash table of all my known peers */
+ int ksnd_peer_hash_size; /* size of ksnd_peers */
+
+ int ksnd_nthreads; /* # live threads */
+ int ksnd_shuttingdown; /* tell threads to exit */
+ int ksnd_nschedulers; /* # schedulers */
+ ksock_sched_t *ksnd_schedulers; /* their state */
+
+ atomic_t ksnd_nactive_txs; /* #active txs */
+
+ struct list_head ksnd_deathrow_conns; /* conns to close: reaper_lock*/
+ struct list_head ksnd_zombie_conns; /* conns to free: reaper_lock */
+ struct list_head ksnd_enomem_conns; /* conns to retry: reaper_lock*/
+ cfs_waitq_t ksnd_reaper_waitq; /* reaper sleeps here */
+ cfs_time_t ksnd_reaper_waketime; /* when reaper will wake */
+ spinlock_t ksnd_reaper_lock; /* serialise */
+
+ int ksnd_enomem_tx; /* test ENOMEM sender */
+ int ksnd_stall_tx; /* test sluggish sender */
+ int ksnd_stall_rx; /* test sluggish receiver */
+
+ struct list_head ksnd_connd_connreqs; /* incoming connection requests */
+ struct list_head ksnd_connd_routes; /* routes waiting to be connected */
+ cfs_waitq_t ksnd_connd_waitq; /* connds sleep here */
+ spinlock_t ksnd_connd_lock; /* serialise */
+
+ ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */
+
+} ksock_nal_data_t;
+
+#define SOCKNAL_INIT_NOTHING 0
+#define SOCKNAL_INIT_DATA 1
+#define SOCKNAL_INIT_ALL 2
+
+/* A packet just assembled for transmission is represented by 1 or more
+ * struct iovec fragments (the first frag contains the portals header),
+ * followed by 0 or more lnet_kiov_t fragments.
+ *
+ * On the receive side, initially 1 struct iovec fragment is posted for
+ * receive (the header). Once the header has been received, the payload is
+ * received into either struct iovec or lnet_kiov_t fragments, depending on
+ * what the header matched or whether the message needs forwarding. */
+
+struct ksock_conn; /* forward ref */
+struct ksock_peer; /* forward ref */
+struct ksock_route; /* forward ref */
+
+typedef struct /* transmit packet */
+{
+ struct list_head tx_list; /* queue on conn for transmission etc */
+ int tx_nob; /* # packet bytes */
+ int tx_resid; /* residual bytes */
+ int tx_niov; /* # packet iovec frags */
+ struct iovec *tx_iov; /* packet iovec frags */
+ int tx_nkiov; /* # packet page frags */
+ lnet_kiov_t *tx_kiov; /* packet page frags */
+ struct ksock_conn *tx_conn; /* owning conn */
+ lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() */
+#if SOCKNAL_ZC
+ zccd_t tx_zccd; /* zero copy callback descriptor */
+#endif
+ int tx_desc_size; /* size of this descriptor */
+ union {
+ struct {
+ struct iovec iov; /* virt hdr */
+ lnet_kiov_t kiov[0]; /* paged payload */
+ } paged;
+ struct {
+ struct iovec iov[1]; /* virt hdr + payload */
+ } virt;
+ } tx_frags;
+} ksock_tx_t;
+
+#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd)
+/* network zero copy callback descriptor embedded in ksock_tx_t */
+
+/* space for the rx frag descriptors; we either read a single contiguous
+ * header, or up to LNET_MAX_IOV frags of payload of either type. */
+typedef union {
+ struct iovec iov[LNET_MAX_IOV];
+ lnet_kiov_t kiov[LNET_MAX_IOV];
+} ksock_rxiovspace_t;
+
+#define SOCKNAL_RX_HEADER 1 /* reading header */
+#define SOCKNAL_RX_PARSE 2 /* Calling lnet_parse() */
+#define SOCKNAL_RX_PARSE_WAIT 3 /* waiting to be told to read the body */
+#define SOCKNAL_RX_BODY 4 /* reading body (to deliver here) */
+#define SOCKNAL_RX_SLOP 5 /* skipping body */
+
+typedef struct ksock_conn
+{
+ struct ksock_peer *ksnc_peer; /* owning peer */
+ struct ksock_route *ksnc_route; /* owning route */
+ struct list_head ksnc_list; /* stash on peer's conn list */
+ struct socket *ksnc_sock; /* actual socket */
+ void *ksnc_saved_data_ready; /* socket's original data_ready() callback */
+ void *ksnc_saved_write_space; /* socket's original write_space() callback */
+ atomic_t ksnc_conn_refcount; /* conn refcount */
+ atomic_t ksnc_sock_refcount; /* sock refcount */
+ ksock_sched_t *ksnc_scheduler; /* who schedules this connection */
+ __u32 ksnc_myipaddr; /* my IP */
+ __u32 ksnc_ipaddr; /* peer's IP */
+ int ksnc_port; /* peer's port */
+ int ksnc_closing; /* being shut down */
+ int ksnc_type; /* type of connection */
+ __u64 ksnc_incarnation; /* peer's incarnation */
+
+ /* reader */
+ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */
+ cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */
+ int ksnc_rx_started; /* started receiving a message */
+ int ksnc_rx_ready; /* data ready to read */
+ int ksnc_rx_scheduled; /* being progressed */
+ int ksnc_rx_state; /* what is being read */
+ int ksnc_rx_nob_left; /* # bytes to next hdr/body */
+ int ksnc_rx_nob_wanted; /* bytes actually wanted */
+ int ksnc_rx_niov; /* # iovec frags */
+ struct iovec *ksnc_rx_iov; /* the iovec frags */
+ int ksnc_rx_nkiov; /* # page frags */
+ lnet_kiov_t *ksnc_rx_kiov; /* the page frags */
+ ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */
+ void *ksnc_cookie; /* rx lnet_finalize passthru arg */
+ lnet_hdr_t ksnc_hdr; /* where I read headers into */
+
+ /* WRITER */
+ struct list_head ksnc_tx_list; /* where I enq waiting for output space */
+ struct list_head ksnc_tx_queue; /* packets waiting to be sent */
+ cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */
+ int ksnc_tx_bufnob; /* send buffer marker */
+ atomic_t ksnc_tx_nob; /* # bytes queued */
+ int ksnc_tx_ready; /* write space */
+ int ksnc_tx_scheduled; /* being progressed */
+
+#if !SOCKNAL_SINGLE_FRAG_RX
+ struct iovec ksnc_rx_scratch_iov[LNET_MAX_IOV];
+#endif
+#if !SOCKNAL_SINGLE_FRAG_TX
+ struct iovec ksnc_tx_scratch_iov[LNET_MAX_IOV];
+#endif
+} ksock_conn_t;
+
+#define KSNR_TYPED_ROUTES ((1 << SOCKLND_CONN_CONTROL) | \
+ (1 << SOCKLND_CONN_BULK_IN) | \
+ (1 << SOCKLND_CONN_BULK_OUT))
+
+typedef struct ksock_route
+{
+ struct list_head ksnr_list; /* chain on peer route list */
+ struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */
+ struct ksock_peer *ksnr_peer; /* owning peer */
+ atomic_t ksnr_refcount; /* # users */
+ cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */
+ cfs_duration_t ksnr_retry_interval; /* how long between retries */
+ __u32 ksnr_myipaddr; /* my IP */
+ __u32 ksnr_ipaddr; /* IP address to connect to */
+ int ksnr_port; /* port to connect to */
+ unsigned int ksnr_connecting:4; /* autoconnect in progress by type */
+ unsigned int ksnr_connected:4; /* connections established by type */
+ unsigned int ksnr_deleted:1; /* been removed from peer? */
+ unsigned int ksnr_share_count; /* created explicitly? */
+ int ksnr_conn_count; /* # conns established by this route */
+} ksock_route_t;
+
+typedef struct ksock_peer
+{
+ struct list_head ksnp_list; /* stash on global peer list */
+ lnet_process_id_t ksnp_id; /* who's on the other end(s) */
+ atomic_t ksnp_refcount; /* # users */
+ int ksnp_sharecount; /* lconf usage counter */
+ int ksnp_closing; /* being closed */
+ int ksnp_accepting; /* # passive connections pending */
+ int ksnp_error; /* errno on closing last conn */
+ struct list_head ksnp_conns; /* all active connections */
+ struct list_head ksnp_routes; /* routes */
+ struct list_head ksnp_tx_queue; /* waiting packets */
+ cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */
+ lnet_ni_t *ksnp_ni; /* which network */
+ int ksnp_n_passive_ips; /* # of... */
+ __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; /* preferred local interfaces */
+} ksock_peer_t;
+
+typedef struct ksock_connreq
+{
+ struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */
+ lnet_ni_t *ksncr_ni; /* chosen NI */
+ struct socket *ksncr_sock; /* accepted socket */
+} ksock_connreq_t;
+
+extern ksock_nal_data_t ksocknal_data;
+extern ksock_tunables_t ksocknal_tunables;
+
+static inline struct list_head *
+ksocknal_nid2peerlist (lnet_nid_t nid)
+{
+ unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
+
+ return (&ksocknal_data.ksnd_peers [hash]);
+}
+
+static inline void
+ksocknal_conn_addref (ksock_conn_t *conn)
+{
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
+ atomic_inc(&conn->ksnc_conn_refcount);
+}
+
+extern void ksocknal_queue_zombie_conn (ksock_conn_t *conn);
+
+static inline void
+ksocknal_conn_decref (ksock_conn_t *conn)
+{
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
+ if (atomic_dec_and_test(&conn->ksnc_conn_refcount))
+ ksocknal_queue_zombie_conn(conn);
+}
+
+static inline int
+ksocknal_connsock_addref (ksock_conn_t *conn)
+{
+ int rc = -ESHUTDOWN;
+
+ read_lock (&ksocknal_data.ksnd_global_lock);
+ if (!conn->ksnc_closing) {
+ LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
+ atomic_inc(&conn->ksnc_sock_refcount);
+ rc = 0;
+ }
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ return (rc);
+}
+
+static inline void
+ksocknal_connsock_decref (ksock_conn_t *conn)
+{
+ LASSERT (atomic_read(&conn->ksnc_sock_refcount) > 0);
+ if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) {
+ LASSERT (conn->ksnc_closing);
+ libcfs_sock_release(conn->ksnc_sock);
+ conn->ksnc_sock = NULL;
+ }
+}
+
+static inline void
+ksocknal_route_addref (ksock_route_t *route)
+{
+ LASSERT (atomic_read(&route->ksnr_refcount) > 0);
+ atomic_inc(&route->ksnr_refcount);
+}
+
+extern void ksocknal_destroy_route (ksock_route_t *route);
+
+static inline void
+ksocknal_route_decref (ksock_route_t *route)
+{
+ LASSERT (atomic_read (&route->ksnr_refcount) > 0);
+ if (atomic_dec_and_test(&route->ksnr_refcount))
+ ksocknal_destroy_route (route);
+}
+
+static inline void
+ksocknal_peer_addref (ksock_peer_t *peer)
+{
+ LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
+ atomic_inc(&peer->ksnp_refcount);
+}
+
+extern void ksocknal_destroy_peer (ksock_peer_t *peer);
+
+static inline void
+ksocknal_peer_decref (ksock_peer_t *peer)
+{
+ LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
+ if (atomic_dec_and_test(&peer->ksnp_refcount))
+ ksocknal_destroy_peer (peer);
+}
+
+int ksocknal_startup (lnet_ni_t *ni);
+void ksocknal_shutdown (lnet_ni_t *ni);
+int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
+int ksocknal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
+int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
+ int delayed, unsigned int niov,
+ struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen);
+int ksocknal_accept(lnet_ni_t *ni, struct socket *sock);
+
+extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port);
+extern ksock_peer_t *ksocknal_find_peer_locked (lnet_ni_t *ni, lnet_process_id_t id);
+extern ksock_peer_t *ksocknal_find_peer (lnet_ni_t *ni, lnet_process_id_t id);
+extern void ksocknal_peer_failed (ksock_peer_t *peer);
+extern int ksocknal_create_conn (lnet_ni_t *ni, ksock_route_t *route,
+ struct socket *sock, int type);
+extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
+extern void ksocknal_terminate_conn (ksock_conn_t *conn);
+extern void ksocknal_destroy_conn (ksock_conn_t *conn);
+extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
+extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
+extern int ksocknal_close_matching_conns (lnet_process_id_t id, __u32 ipaddr);
+
+extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
+extern void ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx, int asynch);
+extern void ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist);
+extern void ksocknal_notify (lnet_ni_t *ni, lnet_nid_t gw_nid, int alive);
+extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
+extern void ksocknal_thread_fini (void);
+extern ksock_route_t *ksocknal_find_connecting_route_locked (ksock_peer_t *peer);
+extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
+extern int ksocknal_scheduler (void *arg);
+extern int ksocknal_connd (void *arg);
+extern int ksocknal_reaper (void *arg);
+extern int ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ lnet_nid_t peer_nid,
+ __u32 *ipaddrs, int nipaddrs);
+extern int ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ lnet_process_id_t *id,
+ __u64 *incarnation, __u32 *ipaddrs);
+
+extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn);
+extern void ksocknal_lib_push_conn (ksock_conn_t *conn);
+extern void ksocknal_lib_bind_irq (unsigned int irq);
+extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn);
+extern unsigned int ksocknal_lib_sock_irq (struct socket *sock);
+extern int ksocknal_lib_setup_sock (struct socket *so);
+extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx);
+extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx);
+extern void ksocknal_lib_eager_ack (ksock_conn_t *conn);
+extern int ksocknal_lib_recv_iov (ksock_conn_t *conn);
+extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn);
+extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem,
+ int *rxmem, int *nagle);
+
+extern int ksocknal_lib_tunables_init(void);
+extern void ksocknal_lib_tunables_fini(void);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Zach Brown <zab@zabbo.net>
+ * Author: Peter J. Braam <braam@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socklnd.h"
+
+void
+ksocknal_free_tx (ksock_tx_t *tx)
+{
+ atomic_dec(&ksocknal_data.ksnd_nactive_txs);
+ LIBCFS_FREE(tx, tx->tx_desc_size);
+}
+
+
+int
+ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ struct iovec *iov = tx->tx_iov;
+ int fragsize = iov->iov_len;
+ ulong_ptr vaddr = (ulong_ptr)iov->iov_base;
+ int more = (tx->tx_nkiov > 0) ||
+ (!list_empty (&conn->ksnc_tx_queue));
+
+ int rc;
+
+ int len;
+ ksock_mdl_t * mdl;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only send 1 frag at a time. */
+ LASSERT (fragsize <= tx->tx_resid);
+ LASSERT (tx->tx_niov > 0);
+
+ /* lock the whole tx iovs into a single mdl chain */
+ mdl = ksocknal_lock_iovs(tx->tx_iov, tx->tx_niov, FALSE, &len);
+
+ if (mdl) {
+ /* send the total mdl chain */
+ rc = ksocknal_send_mdl(
+ conn->ksnc_sock, tx, mdl, len,
+ more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ if (rc <= 0) {
+ goto errorout;
+ }
+
+ tx->tx_resid -= rc;
+
+ len = rc;
+
+ while (len > 0) {
+
+ if ((unsigned int)len < iov->iov_len) {
+ /* didn't send whole iov entry... */
+ iov->iov_base = (char *)(iov->iov_base) + len;
+ iov->iov_len -= len;
+ len = 0;
+ } else {
+ len -= iov->iov_len;
+ tx->tx_iov++;
+ tx->tx_niov--;
+ iov = tx->tx_iov;
+ }
+ }
+
+errorout:
+
+ return (rc);
+}
+
+
+int
+ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ lnet_kiov_t *kiov = tx->tx_kiov;
+ int fragsize = kiov->kiov_len;
+ cfs_page_t *page = kiov->kiov_page;
+ int offset = kiov->kiov_offset;
+ int more = (!list_empty (&conn->ksnc_tx_queue));
+
+ int rc;
+ __u32 len;
+ PMDL mdl;
+
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only send 1 frag at a time. */
+ LASSERT (fragsize <= tx->tx_resid);
+ LASSERT (offset + fragsize <= PAGE_SIZE);
+ LASSERT (tx->tx_niov == 0);
+ LASSERT (tx->tx_nkiov > 0);
+
+ /* lock the whole tx kiovs into a single mdl chain */
+ mdl = ksocknal_lock_kiovs(tx->tx_kiov, tx->tx_nkiov, FALSE, &len);
+
+ if (mdl) {
+ /* send the total mdl chain */
+ rc = ksocknal_send_mdl(
+ conn->ksnc_sock, tx, mdl, len,
+ more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT);
+ } else {
+ rc = -ENOMEM;
+ }
+
+ if (rc <= 0) {
+ goto errorout;
+ }
+
+ tx->tx_resid -= rc;
+
+ len = rc;
+
+ while (len >0) {
+
+ if (len < kiov->kiov_len) {
+ kiov->kiov_offset += len;
+ kiov->kiov_len -= len;
+ len = 0;
+ } else {
+ len -= kiov->kiov_len;
+ tx->tx_kiov++;
+ tx->tx_nkiov--;
+ kiov = tx->tx_kiov;
+ }
+ }
+
+errorout:
+
+ return (rc);
+}
+
+
+int
+ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ int rc;
+ int bufnob;
+
+ if (ksocknal_data.ksnd_stall_tx != 0) {
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (cfs_time_seconds(ksocknal_data.ksnd_stall_tx));
+ }
+
+ LASSERT (tx->tx_resid != 0);
+
+ ksocknal_get_tconn (conn->ksnc_sock);
+
+ do {
+ if (ksocknal_data.ksnd_enomem_tx > 0) {
+ /* testing... */
+ ksocknal_data.ksnd_enomem_tx--;
+ rc = -EAGAIN;
+ } else if (tx->tx_niov != 0) {
+ rc = ksocknal_send_iov (conn, tx);
+ } else {
+ rc = ksocknal_send_kiov (conn, tx);
+ }
+
+ bufnob = 0; // conn->ksnc_sock->sk->sk_wmem_queued;
+ if (rc > 0) /* sent something? */
+ conn->ksnc_tx_bufnob += rc; /* account it */
+
+ if (bufnob < conn->ksnc_tx_bufnob) {
+ /* allocated send buffer bytes < computed; infer
+ * something got ACKed */
+ conn->ksnc_tx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_tx_bufnob = bufnob;
+ mb();
+ }
+
+ if (rc <= 0) {
+ /* Didn't write anything.
+ *
+ * NB: rc == 0 and rc == -EAGAIN both mean try
+ * again later (linux stack returns -EAGAIN for
+ * this, but Adaptech TOE returns 0).
+ *
+ * Also, sends never fail with -ENOMEM, just
+ * -EAGAIN, but with the added bonus that we can't
+ * expect write_space() to call us back to tell us
+ * when to try sending again. We use the
+ * SOCK_NOSPACE flag to diagnose... */
+
+ LASSERT(rc != -ENOMEM);
+
+ break;
+ }
+
+ /* socket's wmem_queued now includes 'rc' bytes */
+ atomic_sub (rc, &conn->ksnc_tx_nob);
+ rc = 0;
+
+ } while (tx->tx_resid != 0);
+
+ ksocknal_put_tconn (conn->ksnc_sock);
+ return (rc);
+}
+
+int
+ksocknal_recv_iov (ksock_conn_t *conn)
+{
+ struct iovec *iov = conn->ksnc_rx_iov;
+ int size;
+ int rc;
+
+ ksock_mdl_t * mdl;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only receive 1 frag at a time. */
+ LASSERT (conn->ksnc_rx_niov > 0);
+
+ /* lock the whole tx iovs into a single mdl chain */
+ mdl = ksocknal_lock_iovs(iov, conn->ksnc_rx_niov, TRUE, &size);
+
+ if (!mdl) {
+ rc = -ENOMEM;
+ return (rc);
+ }
+
+ LASSERT (size <= conn->ksnc_rx_nob_wanted);
+
+ /* try to request data for the whole mdl chain */
+ rc = ksocknal_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT);
+
+ if (rc <= 0)
+ return (rc);
+
+ /* received something... */
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_rx_deadline = cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+
+ conn->ksnc_rx_started = 1;
+
+ conn->ksnc_rx_nob_wanted -= rc;
+ conn->ksnc_rx_nob_left -= rc;
+
+ while (rc > 0) {
+
+ if (rc < (int)iov->iov_len) {
+ iov->iov_base = (char *)(iov->iov_base) + rc;
+ iov->iov_len -= rc;
+ rc = 0;
+ } else {
+ rc -= iov->iov_len;
+ conn->ksnc_rx_iov++;
+ conn->ksnc_rx_niov--;
+ iov = conn->ksnc_rx_iov;
+ }
+ }
+
+ return (1);
+}
+
+
+int
+ksocknal_recv_kiov (ksock_conn_t *conn)
+{
+ lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
+ int size;
+ int rc;
+
+ ksock_mdl_t * mdl;
+
+ /* NB we can't trust socket ops to either consume our iovs
+ * or leave them alone, so we only receive 1 frag at a time. */
+ LASSERT (conn->ksnc_rx_nkiov > 0);
+
+ /* lock the whole tx kiovs into a single mdl chain */
+ mdl = ksocknal_lock_kiovs(kiov, conn->ksnc_rx_nkiov, TRUE, &size);
+
+ if (!mdl) {
+ rc = -ENOMEM;
+ return (rc);
+ }
+
+ LASSERT (size <= conn->ksnc_rx_nob_wanted);
+
+ /* try to request data for the whole mdl chain */
+ rc = ksocknal_recv_mdl (conn->ksnc_sock, mdl, size, MSG_DONTWAIT);
+
+ if (rc <= 0)
+ return (rc);
+
+
+ /* received something... */
+ conn->ksnc_peer->ksnp_last_alive = cfs_time_current();
+ conn->ksnc_rx_deadline = cfs_time_shift (*ksocknal_tunables.ksnd_timeout);
+
+ conn->ksnc_rx_started = 1;
+
+ conn->ksnc_rx_nob_wanted -= rc;
+ conn->ksnc_rx_nob_left -= rc;
+
+ while (rc > 0) {
+
+ if (rc < (int) kiov->kiov_len) {
+ kiov->kiov_offset += rc;
+ kiov->kiov_len -= rc;
+ rc = 0;
+ } else {
+ rc -= kiov->kiov_len;
+ kiov->kiov_len = 0;
+ conn->ksnc_rx_kiov++;
+ conn->ksnc_rx_nkiov--;
+ kiov = conn->ksnc_rx_kiov;
+ }
+ }
+
+ return (1);
+}
+
+
+int
+ksocknal_receive (ksock_conn_t *conn)
+{
+ /* Return 1 on success, 0 on EOF, < 0 on error.
+ * Caller checks ksnc_rx_nob_wanted to determine
+ * progress/completion. */
+ int rc;
+ size_t size;
+
+ int count = 0;
+
+ ENTRY;
+
+ if (ksocknal_data.ksnd_stall_rx != 0) {
+ set_current_state (TASK_UNINTERRUPTIBLE);
+ schedule_timeout (cfs_time_seconds (ksocknal_data.ksnd_stall_rx));
+ }
+
+ rc = ksocknal_connsock_addref(conn);
+ if (rc != 0) {
+ LASSERT (conn->ksnc_closing);
+ return (-ESHUTDOWN);
+ }
+
+ rc = ksocknal_query_data(conn->ksnc_sock, &size, FALSE);
+ if (rc != 0) {
+ KsPrint((1, "ksocknal_receive: error querying data length ...\n"));
+ goto errorout;
+ }
+
+ if (!CAN_BE_SCHED(size, (ULONG)conn->ksnc_rx_nob_wanted)) {
+ KsPrint((1, "ksocknal_receive: queried data length = %xh rx_nob_wanted/left = %xh/%xh\n",
+ size, conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left ));
+ rc =1;
+ goto errorout;
+ }
+
+ for (;;) {
+
+ count++;
+
+ if (conn->ksnc_rx_niov != 0)
+ rc = ksocknal_recv_iov (conn);
+ else
+ rc = ksocknal_recv_kiov (conn);
+
+ if (rc <= 0) {
+ /* error/EOF or partial receive */
+ if (rc == -EAGAIN) {
+ rc = 1;
+ } else if (rc == 0 && conn->ksnc_rx_started) {
+ /* EOF in the middle of a message */
+ rc = -EPROTO;
+ }
+ break;
+ }
+
+ /* Completed a fragment */
+
+ if (conn->ksnc_rx_nob_wanted == 0) {
+ /* Completed a message segment (header or payload) */
+ if ((*ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 &&
+ (conn->ksnc_rx_state == SOCKNAL_RX_BODY)) {
+ /* Remind the socket to ack eagerly... */
+ ksocknal_eager_ack(conn);
+ }
+ rc = 1;
+ break;
+ }
+ }
+
+errorout:
+
+ ksocknal_connsock_decref(conn);
+ RETURN(rc);
+}
+
+
+#if SOCKNAL_ZC
+void
+ksocknal_zc_callback (zccd_t *zcd)
+{
+ ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd);
+ ksock_sched_t *sched = tx->tx_conn->ksnc_scheduler;
+ unsigned long flags;
+ ENTRY;
+
+ /* Schedule tx for cleanup (can't do it now due to lock conflicts) */
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list);
+ cfs_waitq_signal (&sched->kss_waitq);
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ EXIT;
+}
+#endif
+
+void
+ksocknal_tx_done (lnet_ni_t *ni, ksock_tx_t *tx, int asynch)
+{
+ ENTRY;
+
+ if (tx->tx_conn != NULL) {
+#if SOCKNAL_ZC
+ /* zero copy completion isn't always from
+ * process_transmit() so it needs to keep a ref on
+ * tx_conn... */
+ if (asynch)
+ ksocknal_conn_decref(tx->tx_conn);
+#else
+ LASSERT (!asynch);
+#endif
+ }
+
+ lnet_finalize (ni, tx->tx_lnetmsg, (tx->tx_resid == 0) ? 0 : -EIO);
+ ksocknal_free_tx (tx);
+ EXIT;
+}
+
+void
+ksocknal_txlist_done (lnet_ni_t *ni, struct list_head *txlist)
+{
+ ksock_tx_t *tx;
+
+ while (!list_empty (txlist)) {
+ tx = list_entry (txlist->next, ksock_tx_t, tx_list);
+
+ CERROR ("Deleting packet type %d len %d %s->%s\n",
+ le32_to_cpu (tx->tx_lnetmsg->msg_hdr.type),
+ le32_to_cpu (tx->tx_lnetmsg->msg_hdr.payload_length),
+ libcfs_nid2str(le64_to_cpu(tx->tx_lnetmsg->msg_hdr.src_nid)),
+ libcfs_nid2str(le64_to_cpu (tx->tx_lnetmsg->msg_hdr.dest_nid)));
+
+ list_del (&tx->tx_list);
+ ksocknal_tx_done (ni, tx, 0);
+ }
+}
+
+void
+ksocknal_tx_launched (ksock_tx_t *tx)
+{
+#if SOCKNAL_ZC
+ if (atomic_read (&tx->tx_zccd.zccd_count) != 1) {
+ ksock_conn_t *conn = tx->tx_conn;
+
+ /* zccd skbufs are still in-flight. First take a ref on
+ * conn, so it hangs about for ksocknal_tx_done... */
+ ksocknal_conn_addref(conn);
+
+ /* ...then drop the initial ref on zccd, so the zero copy
+ * callback can occur */
+ zccd_put (&tx->tx_zccd);
+ return;
+ }
+#endif
+ /* Any zero-copy-ness (if any) has completed; I can complete the
+ * transmit now, avoiding an extra schedule */
+ ksocknal_tx_done (tx->tx_conn->ksnc_peer->ksnp_ni, tx, 0);
+}
+
+int
+ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
+{
+ unsigned long flags;
+ int rc;
+
+ rc = ksocknal_transmit (conn, tx);
+
+ CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
+
+ if (tx->tx_resid == 0) {
+ /* Sent everything OK */
+ LASSERT (rc == 0);
+
+ ksocknal_tx_launched (tx);
+ return (0);
+ }
+
+ if (rc == -EAGAIN)
+ return (rc);
+
+ if (rc == -ENOMEM) {
+ static int counter;
+
+ counter++; /* exponential backoff warnings */
+ if ((counter & (-counter)) == counter)
+ CWARN("%d ENOMEM tx %p (%u allocated)\n",
+ counter, conn, atomic_read(&libcfs_kmemory));
+
+ /* Queue on ksnd_enomem_conns for retry after a timeout */
+ spin_lock_irqsave(&ksocknal_data.ksnd_reaper_lock, flags);
+
+ /* enomem list takes over scheduler's ref... */
+ LASSERT (conn->ksnc_tx_scheduled);
+ list_add_tail(&conn->ksnc_tx_list,
+ &ksocknal_data.ksnd_enomem_conns);
+ if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(),
+ SOCKNAL_ENOMEM_RETRY),
+ ksocknal_data.ksnd_reaper_waketime))
+ cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq);
+
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags);
+ return (rc);
+ }
+
+ /* Actual error */
+ LASSERT (rc < 0);
+
+ if (!conn->ksnc_closing) {
+ switch (rc) {
+ case -ECONNRESET:
+ LCONSOLE_WARN("Host %u.%u.%u.%u reset our connection "
+ "while we were sending data; it may have "
+ "rebooted.\n",
+ HIPQUAD(conn->ksnc_ipaddr));
+ break;
+ default:
+ LCONSOLE_WARN("There was an unexpected network error "
+ "while writing to %u.%u.%u.%u: %d.\n",
+ HIPQUAD(conn->ksnc_ipaddr), rc);
+ break;
+ }
+ CDEBUG(D_HA, "[%p] Error %d on write to %s"
+ " ip %d.%d.%d.%d:%d\n", conn, rc,
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ }
+
+ ksocknal_close_conn_and_siblings (conn, rc);
+ ksocknal_tx_launched (tx);
+
+ return (rc);
+}
+
+void
+ksocknal_launch_connection_locked (ksock_route_t *route)
+{
+ unsigned long flags;
+ int bits;
+
+ /* called holding write lock on ksnd_global_lock */
+ LASSERT (route->ksnr_connecting == 0);
+
+ bits = *ksocknal_tunables.ksnd_typed_conns ?
+ KSNR_TYPED_ROUTES : (1 << SOCKLND_CONN_ANY);
+ bits &= ~route->ksnr_connected;
+
+ LASSERT (bits != 0);
+
+ route->ksnr_connecting = bits; /* scheduling conn for connd */
+ ksocknal_route_addref(route); /* extra ref for connd */
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags);
+
+ list_add_tail (&route->ksnr_connd_list,
+ &ksocknal_data.ksnd_connd_routes);
+ cfs_waitq_signal (&ksocknal_data.ksnd_connd_waitq);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_connd_lock, flags);
+}
+
+ksock_conn_t *
+ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer)
+{
+ struct list_head *tmp;
+ ksock_conn_t *typed = NULL;
+ int tnob = 0;
+ ksock_conn_t *fallback = NULL;
+ int fnob = 0;
+ ksock_conn_t *conn;
+
+ list_for_each (tmp, &peer->ksnp_conns) {
+ ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list);
+#if SOCKNAL_ROUND_ROBIN
+ const int nob = 0;
+#else
+ int nob = atomic_read(&c->ksnc_tx_nob);
+#endif
+ LASSERT (!c->ksnc_closing);
+
+ if (fallback == NULL || nob < fnob) {
+ fallback = c;
+ fnob = nob;
+ }
+
+ if (!*ksocknal_tunables.ksnd_typed_conns)
+ continue;
+
+ switch (c->ksnc_type) {
+ default:
+ CERROR("ksnc_type bad: %u\n", c->ksnc_type);
+ LBUG();
+ case SOCKLND_CONN_ANY:
+ break;
+ case SOCKLND_CONN_BULK_IN:
+ continue;
+ case SOCKLND_CONN_BULK_OUT:
+ if (tx->tx_nob < *ksocknal_tunables.ksnd_min_bulk)
+ continue;
+ break;
+ case SOCKLND_CONN_CONTROL:
+ if (tx->tx_nob >= *ksocknal_tunables.ksnd_min_bulk)
+ continue;
+ break;
+ }
+
+ if (typed == NULL || nob < tnob) {
+ typed = c;
+ tnob = nob;
+ }
+ }
+
+ /* prefer the typed selection */
+ conn = (typed != NULL) ? typed : fallback;
+
+#if SOCKNAL_ROUND_ROBIN
+ if (conn != NULL) {
+ /* round-robin all else being equal */
+ list_del (&conn->ksnc_list);
+ list_add_tail (&conn->ksnc_list, &peer->ksnp_conns);
+ }
+#endif
+ return conn;
+}
+
+void
+ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
+{
+ unsigned long flags;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+
+ /* called holding global lock (read or irq-write) and caller may
+ * not have dropped this lock between finding conn and calling me,
+ * so we don't need the {get,put}connsock dance to deref
+ * ksnc_sock... */
+ LASSERT(!conn->ksnc_closing);
+ LASSERT(tx->tx_resid == tx->tx_nob);
+
+ CDEBUG (D_NET, "Sending to %s ip %d.%d.%d.%d:%d\n",
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+
+ atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
+ tx->tx_conn = conn;
+
+#if SOCKNAL_ZC
+ zccd_init (&tx->tx_zccd, ksocknal_zc_callback);
+ /* NB this sets 1 ref on zccd, so the callback can only occur after
+ * I've released this ref. */
+#endif
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ if (list_empty(&conn->ksnc_tx_queue)) {
+ /* First packet starts the timeout */
+ conn->ksnc_tx_deadline =
+ cfs_time_shift(*ksocknal_tunables.ksnd_timeout);
+ conn->ksnc_tx_bufnob = 0;
+ mb(); /* order with adding to tx_queue */
+ }
+
+ list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
+
+ if (conn->ksnc_tx_ready && /* able to send */
+ !conn->ksnc_tx_scheduled) { /* not scheduled to send */
+ /* +1 ref for scheduler */
+ ksocknal_conn_addref(conn);
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+}
+
+ksock_route_t *
+ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
+{
+ struct list_head *tmp;
+ ksock_route_t *route;
+ int bits;
+
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry (tmp, ksock_route_t, ksnr_list);
+ bits = route->ksnr_connected | route->ksnr_connecting;
+
+ if (*ksocknal_tunables.ksnd_typed_conns) {
+ /* All typed connections (being) established? */
+ if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES)
+ continue;
+ } else {
+ /* Untyped connection (being) established? */
+ if ((bits & (1 << SOCKLND_CONN_ANY)) != 0)
+ continue;
+ }
+
+ /* too soon to retry this guy? */
+ if (!(route->ksnr_retry_interval == 0 || /* first attempt */
+ cfs_time_aftereq (cfs_time_current(),
+ route->ksnr_timeout)))
+ continue;
+
+ return (route);
+ }
+
+ return (NULL);
+}
+
+ksock_route_t *
+ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
+{
+ struct list_head *tmp;
+ ksock_route_t *route;
+
+ list_for_each (tmp, &peer->ksnp_routes) {
+ route = list_entry (tmp, ksock_route_t, ksnr_list);
+
+ if (route->ksnr_connecting != 0)
+ return (route);
+ }
+
+ return (NULL);
+}
+
+int
+ksocknal_launch_packet (lnet_ni_t *ni, ksock_tx_t *tx, lnet_process_id_t id)
+{
+ unsigned long flags;
+ ksock_peer_t *peer;
+ ksock_conn_t *conn;
+ ksock_route_t *route;
+ rwlock_t *g_lock;
+ int retry;
+ int rc;
+
+ /* Ensure the frags we've been given EXACTLY match the number of
+ * bytes we want to send. Many TCP/IP stacks disregard any total
+ * size parameters passed to them and just look at the frags.
+ *
+ * We always expect at least 1 mapped fragment containing the
+ * complete portals header. */
+ LASSERT (lnet_iov_nob (tx->tx_niov, tx->tx_iov) +
+ lnet_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
+ LASSERT (tx->tx_niov >= 1);
+ LASSERT (tx->tx_iov[0].iov_len >= sizeof (lnet_hdr_t));
+ LASSERT (tx->tx_conn == NULL);
+ LASSERT (tx->tx_resid == tx->tx_nob);
+
+ CDEBUG (D_NET, "packet %p type %d, nob %d niov %d nkiov %d\n",
+ tx, ((lnet_hdr_t *)tx->tx_iov[0].iov_base)->type,
+ tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
+
+ g_lock = &ksocknal_data.ksnd_global_lock;
+
+ for (retry = 0;; retry = 1) {
+#if !SOCKNAL_ROUND_ROBIN
+ read_lock (g_lock);
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL) {
+ if (ksocknal_find_connectable_route_locked(peer) == NULL) {
+ conn = ksocknal_find_conn_locked (tx, peer);
+ if (conn != NULL) {
+ /* I've got no routes that need to be
+ * connecting and I do have an actual
+ * connection... */
+ ksocknal_queue_tx_locked (tx, conn);
+ read_unlock (g_lock);
+ return (0);
+ }
+ }
+ }
+
+ /* I'll need a write lock... */
+ read_unlock (g_lock);
+#endif
+ write_lock_irqsave(g_lock, flags);
+
+ peer = ksocknal_find_peer_locked(ni, id);
+ if (peer != NULL)
+ break;
+
+ write_unlock_irqrestore(g_lock, flags);
+
+ if ((id.pid & LNET_PID_USERFLAG) != 0) {
+ CERROR("Refusing to create a connection to "
+ "userspace process %s\n", libcfs_id2str(id));
+ return -EHOSTUNREACH;
+ }
+
+ if (retry) {
+ CERROR("Can't find peer %s\n", libcfs_id2str(id));
+ return -EHOSTUNREACH;
+ }
+
+ rc = ksocknal_add_peer(ni, id,
+ LNET_NIDADDR(id.nid),
+ lnet_acceptor_port());
+ if (rc != 0) {
+ CERROR("Can't add peer %s: %d\n",
+ libcfs_id2str(id), rc);
+ return rc;
+ }
+ }
+
+ for (;;) {
+ /* launch any/all connections that need it */
+ route = ksocknal_find_connectable_route_locked (peer);
+ if (route == NULL)
+ break;
+
+ ksocknal_launch_connection_locked (route);
+ }
+
+ conn = ksocknal_find_conn_locked (tx, peer);
+ if (conn != NULL) {
+ /* Connection exists; queue message on it */
+ ksocknal_queue_tx_locked (tx, conn);
+ write_unlock_irqrestore (g_lock, flags);
+ return (0);
+ }
+
+ if (peer->ksnp_accepting > 0 ||
+ ksocknal_find_connecting_route_locked (peer) != NULL) {
+ /* Queue the message until a connection is established */
+ list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
+ write_unlock_irqrestore (g_lock, flags);
+ return 0;
+ }
+
+ write_unlock_irqrestore (g_lock, flags);
+
+ /* NB Routes may be ignored if connections to them failed recently */
+ CERROR("No usable routes to %s\n", libcfs_id2str(id));
+ return (-EHOSTUNREACH);
+}
+
+int
+ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
+{
+ lnet_hdr_t *hdr = &lntmsg->msg_hdr;
+ int type = lntmsg->msg_type;
+ lnet_process_id_t target = lntmsg->msg_target;
+ unsigned int payload_niov = lntmsg->msg_niov;
+ struct iovec *payload_iov = lntmsg->msg_iov;
+ lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
+ unsigned int payload_offset = lntmsg->msg_offset;
+ unsigned int payload_nob = lntmsg->msg_len;
+ ksock_tx_t *tx;
+ int desc_size;
+ int rc;
+
+ /* NB 'private' is different depending on what we're sending.
+ * Just ignore it... */
+
+ CDEBUG(D_NET, "sending %u bytes in %d frags to %s\n",
+ payload_nob, payload_niov, libcfs_id2str(target));
+
+ LASSERT (payload_nob == 0 || payload_niov > 0);
+ LASSERT (payload_niov <= LNET_MAX_IOV);
+ /* payload is either all vaddrs or all pages */
+ LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
+ LASSERT (!in_interrupt ());
+
+ if (payload_iov != NULL)
+ desc_size = offsetof(ksock_tx_t,
+ tx_frags.virt.iov[1 + payload_niov]);
+ else
+ desc_size = offsetof(ksock_tx_t,
+ tx_frags.paged.kiov[payload_niov]);
+
+ LIBCFS_ALLOC(tx, desc_size);
+ if (tx == NULL) {
+ CERROR("Can't allocate tx desc type %d size %d\n",
+ type, desc_size);
+ return (-ENOMEM);
+ }
+
+ atomic_inc(&ksocknal_data.ksnd_nactive_txs);
+
+ tx->tx_conn = NULL; /* set when assigned a conn */
+ tx->tx_desc_size = desc_size;
+ tx->tx_lnetmsg = lntmsg;
+
+ if (payload_iov != NULL) {
+ tx->tx_kiov = NULL;
+ tx->tx_nkiov = 0;
+ tx->tx_iov = tx->tx_frags.virt.iov;
+ tx->tx_niov = 1 +
+ lnet_extract_iov(payload_niov, &tx->tx_iov[1],
+ payload_niov, payload_iov,
+ payload_offset, payload_nob);
+ } else {
+ tx->tx_niov = 1;
+ tx->tx_iov = &tx->tx_frags.paged.iov;
+ tx->tx_kiov = tx->tx_frags.paged.kiov;
+ tx->tx_nkiov = lnet_extract_kiov(payload_niov, tx->tx_kiov,
+ payload_niov, payload_kiov,
+ payload_offset, payload_nob);
+ }
+
+ /* first frag is the header */
+ tx->tx_iov[0].iov_base = (void *)hdr;
+ tx->tx_iov[0].iov_len = sizeof(*hdr);
+ tx->tx_resid = tx->tx_nob = sizeof (*hdr) + payload_nob;
+
+ rc = ksocknal_launch_packet(ni, tx, target);
+ if (rc == 0)
+ return (0);
+
+ ksocknal_free_tx(tx);
+ return (-EIO);
+}
+
+int
+ksocknal_thread_start (int (*fn)(void *arg), void *arg)
+{
+ long pid = cfs_kernel_thread (fn, arg, 0);
+ unsigned long flags;
+
+ if (pid < 0)
+ return ((int)pid);
+
+ write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
+ ksocknal_data.ksnd_nthreads++;
+ write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
+ return (0);
+}
+
+void
+ksocknal_thread_fini (void)
+{
+ unsigned long flags;
+
+ write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
+ ksocknal_data.ksnd_nthreads--;
+ write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
+}
+
+int
+ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
+{
+ static char ksocknal_slop_buffer[4096];
+
+ int nob;
+ unsigned int niov;
+ int skipped;
+
+ if (nob_to_skip == 0) { /* right at next packet boundary now */
+ conn->ksnc_rx_started = 0;
+ mb (); /* racing with timeout thread */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
+ conn->ksnc_rx_nob_wanted = sizeof (lnet_hdr_t);
+ conn->ksnc_rx_nob_left = sizeof (lnet_hdr_t);
+
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
+ conn->ksnc_rx_iov[0].iov_len = sizeof (lnet_hdr_t);
+ conn->ksnc_rx_niov = 1;
+
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ return (1);
+ }
+
+ /* Set up to skip as much a possible now. If there's more left
+ * (ran out of iov entries) we'll get called again */
+
+ conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
+ conn->ksnc_rx_nob_left = nob_to_skip;
+ conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
+ skipped = 0;
+ niov = 0;
+
+ do {
+ nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
+
+ conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
+ conn->ksnc_rx_iov[niov].iov_len = nob;
+ niov++;
+ skipped += nob;
+ nob_to_skip -=nob;
+
+ } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */
+ niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
+
+ conn->ksnc_rx_niov = niov;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_nob_wanted = skipped;
+ return (0);
+}
+
+int
+ksocknal_process_receive (ksock_conn_t *conn)
+{
+ int rc;
+
+ LASSERT (atomic_read(&conn->ksnc_conn_refcount) > 0);
+
+ /* NB: sched lock NOT held */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
+ conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
+ again:
+ if (conn->ksnc_rx_nob_wanted != 0) {
+ rc = ksocknal_receive(conn);
+
+ if (rc <= 0) {
+ LASSERT (rc != -EAGAIN);
+
+ if (rc == 0)
+ CDEBUG (D_NET, "[%p] EOF from %s"
+ " ip %d.%d.%d.%d:%d\n", conn,
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ else if (!conn->ksnc_closing)
+ CERROR ("[%p] Error %d on read from %s"
+ " ip %d.%d.%d.%d:%d\n",
+ conn, rc,
+ libcfs_id2str(conn->ksnc_peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+
+ ksocknal_close_conn_and_siblings (conn, rc);
+ return (rc == 0 ? -ESHUTDOWN : rc);
+ }
+
+ if (conn->ksnc_rx_nob_wanted != 0) {
+ /* short read */
+ return (-EAGAIN);
+ }
+ }
+
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_HEADER:
+ if ((conn->ksnc_peer->ksnp_id.pid & LNET_PID_USERFLAG) != 0) {
+ /* Userspace peer */
+ lnet_process_id_t *id = &conn->ksnc_peer->ksnp_id;
+
+ /* Substitute process ID assigned at connection time */
+ conn->ksnc_hdr.src_pid = cpu_to_le32(id->pid);
+ conn->ksnc_hdr.src_nid = cpu_to_le64(id->nid);
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_PARSE;
+ ksocknal_conn_addref(conn); /* ++ref while parsing */
+
+ rc = lnet_parse(conn->ksnc_peer->ksnp_ni, &conn->ksnc_hdr,
+ conn->ksnc_peer->ksnp_id.nid, conn);
+ if (rc < 0) {
+ /* I just received garbage: give up on this conn */
+ ksocknal_new_packet(conn, 0);
+ ksocknal_close_conn_and_siblings (conn, rc);
+ ksocknal_conn_decref(conn);
+ return (-EPROTO);
+ }
+
+ /* I'm racing with ksocknal_recv() */
+ LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_PARSE ||
+ conn->ksnc_rx_state == SOCKNAL_RX_BODY);
+
+ if (conn->ksnc_rx_state != SOCKNAL_RX_BODY)
+ return 0;
+
+ /* ksocknal_recv() got called */
+ goto again;
+
+ case SOCKNAL_RX_BODY:
+ /* payload all received */
+ lnet_finalize(conn->ksnc_peer->ksnp_ni, conn->ksnc_cookie, 0);
+ /* Fall through */
+
+ case SOCKNAL_RX_SLOP:
+ /* starting new packet? */
+ if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
+ return 0; /* come back later */
+ goto again; /* try to finish reading slop now */
+
+ default:
+ break;
+ }
+
+ /* Not Reached */
+ LBUG ();
+ return (-EINVAL); /* keep gcc happy */
+}
+
+int
+ksocknal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg, int delayed,
+ unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
+ unsigned int offset, unsigned int mlen, unsigned int rlen)
+{
+ ksock_conn_t *conn = (ksock_conn_t *)private;
+ ksock_sched_t *sched = conn->ksnc_scheduler;
+ unsigned long flags;
+
+ LASSERT (mlen <= rlen);
+ LASSERT (niov <= LNET_MAX_IOV);
+
+ conn->ksnc_cookie = msg;
+ conn->ksnc_rx_nob_wanted = mlen;
+ conn->ksnc_rx_nob_left = rlen;
+
+ if (mlen == 0 || iov != NULL) {
+ conn->ksnc_rx_nkiov = 0;
+ conn->ksnc_rx_kiov = NULL;
+ conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
+ conn->ksnc_rx_niov =
+ lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov,
+ niov, iov, offset, mlen);
+ } else {
+ conn->ksnc_rx_niov = 0;
+ conn->ksnc_rx_iov = NULL;
+ conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
+ conn->ksnc_rx_nkiov =
+ lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov,
+ niov, kiov, offset, mlen);
+ }
+
+ LASSERT (mlen ==
+ lnet_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
+ lnet_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
+
+ LASSERT (conn->ksnc_rx_scheduled);
+
+ spin_lock_irqsave(&sched->kss_lock, flags);
+
+ switch (conn->ksnc_rx_state) {
+ case SOCKNAL_RX_PARSE_WAIT:
+ list_add_tail(&conn->ksnc_rx_list, &sched->kss_rx_conns);
+ cfs_waitq_signal (&sched->kss_waitq);
+ LASSERT (conn->ksnc_rx_ready);
+ break;
+
+ case SOCKNAL_RX_PARSE:
+ /* scheduler hasn't noticed I'm parsing yet */
+ break;
+ }
+
+ conn->ksnc_rx_state = SOCKNAL_RX_BODY;
+
+ spin_unlock_irqrestore(&sched->kss_lock, flags);
+ ksocknal_conn_decref(conn);
+ return (0);
+}
+
+static inline int
+ksocknal_sched_cansleep(ksock_sched_t *sched)
+{
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&sched->kss_lock, flags);
+
+ rc = (!ksocknal_data.ksnd_shuttingdown &&
+#if SOCKNAL_ZC
+ list_empty(&sched->kss_zctxdone_list) &&
+#endif
+ list_empty(&sched->kss_rx_conns) &&
+ list_empty(&sched->kss_tx_conns));
+
+ spin_unlock_irqrestore(&sched->kss_lock, flags);
+ return (rc);
+}
+
+int ksocknal_scheduler (void *arg)
+{
+ ksock_sched_t *sched = (ksock_sched_t *)arg;
+ ksock_conn_t *conn;
+ ksock_tx_t *tx;
+ unsigned long flags;
+ int rc;
+ int nloops = 0;
+ int id = sched - ksocknal_data.ksnd_schedulers;
+ char name[16];
+
+ snprintf (name, sizeof (name),"socknal_sd%02d", id);
+ libcfs_daemonize (name);
+ libcfs_blockallsigs ();
+
+#if (CONFIG_SMP && CPU_AFFINITY)
+ id = ksocknal_sched2cpu(id);
+ if (cpu_online(id)) {
+ cpumask_t m;
+ cpu_set(id, m);
+ set_cpus_allowed(current, m);
+ } else {
+ CERROR ("Can't set CPU affinity for %s to %d\n", name, id);
+ }
+#endif /* CONFIG_SMP && CPU_AFFINITY */
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+ int did_something = 0;
+
+ /* Ensure I progress everything semi-fairly */
+
+ if (!list_empty (&sched->kss_rx_conns)) {
+ conn = list_entry(sched->kss_rx_conns.next,
+ ksock_conn_t, ksnc_rx_list);
+ list_del(&conn->ksnc_rx_list);
+
+ LASSERT(conn->ksnc_rx_scheduled);
+ LASSERT(conn->ksnc_rx_ready);
+
+ /* clear rx_ready in case receive isn't complete.
+ * Do it BEFORE we call process_recv, since
+ * data_ready can set it any time after we release
+ * kss_lock. */
+ conn->ksnc_rx_ready = 0;
+ spin_unlock_irqrestore(&sched->kss_lock, flags);
+
+ rc = ksocknal_process_receive(conn);
+
+ spin_lock_irqsave(&sched->kss_lock, flags);
+
+ /* I'm the only one that can clear this flag */
+ LASSERT(conn->ksnc_rx_scheduled);
+
+ /* Did process_receive get everything it wanted? */
+ if (rc == 0)
+ conn->ksnc_rx_ready = 1;
+
+ if (conn->ksnc_rx_state == SOCKNAL_RX_PARSE) {
+ /* Conn blocked waiting for ksocknal_recv()
+ * I change its state (under lock) to signal
+ * it can be rescheduled */
+ conn->ksnc_rx_state = SOCKNAL_RX_PARSE_WAIT;
+ } else if (conn->ksnc_rx_ready) {
+ /* reschedule for rx */
+ list_add_tail (&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ } else {
+ conn->ksnc_rx_scheduled = 0;
+ /* drop my ref */
+ ksocknal_conn_decref(conn);
+ }
+
+ did_something = 1;
+ }
+
+ if (!list_empty (&sched->kss_tx_conns)) {
+ conn = list_entry(sched->kss_tx_conns.next,
+ ksock_conn_t, ksnc_tx_list);
+ list_del (&conn->ksnc_tx_list);
+
+ LASSERT(conn->ksnc_tx_scheduled);
+ LASSERT(conn->ksnc_tx_ready);
+ LASSERT(!list_empty(&conn->ksnc_tx_queue));
+
+ tx = list_entry(conn->ksnc_tx_queue.next,
+ ksock_tx_t, tx_list);
+ /* dequeue now so empty list => more to send */
+ list_del(&tx->tx_list);
+
+ /* Clear tx_ready in case send isn't complete. Do
+ * it BEFORE we call process_transmit, since
+ * write_space can set it any time after we release
+ * kss_lock. */
+ conn->ksnc_tx_ready = 0;
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ rc = ksocknal_process_transmit(conn, tx);
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ if (rc == -ENOMEM || rc == -EAGAIN) {
+ /* Incomplete send: replace tx on HEAD of tx_queue */
+ list_add (&tx->tx_list, &conn->ksnc_tx_queue);
+ } else {
+ /* Complete send; assume space for more */
+ conn->ksnc_tx_ready = 1;
+ }
+
+ if (rc == -ENOMEM) {
+ /* Do nothing; after a short timeout, this
+ * conn will be reposted on kss_tx_conns. */
+ } else if (conn->ksnc_tx_ready &&
+ !list_empty (&conn->ksnc_tx_queue)) {
+ /* reschedule for tx */
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ } else {
+ conn->ksnc_tx_scheduled = 0;
+ /* drop my ref */
+ ksocknal_conn_decref(conn);
+ }
+
+ did_something = 1;
+ }
+#if SOCKNAL_ZC
+ if (!list_empty (&sched->kss_zctxdone_list)) {
+ ksock_tx_t *tx =
+ list_entry(sched->kss_zctxdone_list.next,
+ ksock_tx_t, tx_list);
+ did_something = 1;
+
+ list_del (&tx->tx_list);
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ ksocknal_tx_done (tx->tx_conn->ksnc_peer->ksnp_ni,
+ tx, 1);
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+ }
+#endif
+ if (!did_something || /* nothing to do */
+ ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ nloops = 0;
+
+ if (!did_something) { /* wait for something to do */
+ wait_event_interruptible_exclusive(
+ sched->kss_waitq,
+ !ksocknal_sched_cansleep(sched), rc);
+ LASSERT (rc == 0);
+ } else
+ our_cond_resched();
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+ }
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ ksocknal_thread_fini ();
+ return (0);
+}
+
+/*
+ * Add connection to kss_rx_conns of scheduler
+ * and wakeup the scheduler.
+ */
+void ksocknal_read_callback (ksock_conn_t *conn)
+{
+ ksock_sched_t *sched;
+ unsigned long flags;
+ ENTRY;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ conn->ksnc_rx_ready = 1;
+
+ if (!conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail(&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ ksocknal_conn_addref(conn);
+
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ EXIT;
+}
+
+/*
+ * Add connection to kss_tx_conns of scheduler
+ * and wakeup the scheduler.
+ */
+void ksocknal_write_callback (ksock_conn_t *conn)
+{
+ ksock_sched_t *sched;
+ unsigned long flags;
+ ENTRY;
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ conn->ksnc_tx_ready = 1;
+
+ if (!conn->ksnc_tx_scheduled && // not being progressed
+ !list_empty(&conn->ksnc_tx_queue)){//packets to send
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ ksocknal_conn_addref(conn);
+
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+
+ EXIT;
+}
+
+int
+ksocknal_send_hello (lnet_ni_t *ni, ksock_conn_t *conn, lnet_nid_t peer_nid,
+ __u32 *ipaddrs, int nipaddrs)
+{
+ /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
+ ksock_net_t *net = (ksock_net_t *)ni->ni_data;
+ struct socket *sock = conn->ksnc_sock;
+ lnet_hdr_t hdr;
+ lnet_magicversion_t *hmv = (lnet_magicversion_t *)&hdr.dest_nid;
+ int i;
+ int rc;
+ lnet_nid_t srcnid;
+
+ LASSERT (0 <= nipaddrs && nipaddrs <= LNET_MAX_INTERFACES);
+
+ /* No need for getconnsock/putconnsock */
+ LASSERT (!conn->ksnc_closing);
+ LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+ hmv->magic = cpu_to_le32 (LNET_PROTO_TCP_MAGIC);
+ hmv->version_major = cpu_to_le16 (LNET_PROTO_TCP_VERSION_MAJOR);
+ hmv->version_minor = cpu_to_le16 (LNET_PROTO_TCP_VERSION_MINOR);
+
+ srcnid = lnet_ptlcompat_srcnid(ni->ni_nid, peer_nid);
+
+ hdr.src_nid = cpu_to_le64 (srcnid);
+ hdr.src_pid = cpu_to_le64 (the_lnet.ln_pid);
+ hdr.type = cpu_to_le32 (LNET_MSG_HELLO);
+ hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs));
+
+ hdr.msg.hello.type = cpu_to_le32 (conn->ksnc_type);
+ hdr.msg.hello.incarnation = cpu_to_le64 (net->ksnn_incarnation);
+
+ for (i = 0; i < nipaddrs; i++) {
+ ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
+ }
+
+ /* socket buffer should have been set large enough not to block
+ * (timeout == 0) */
+ rc = libcfs_sock_write(sock, &hdr, sizeof(hdr), 0);
+ if (rc != 0) {
+ CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ return (rc);
+ }
+
+ if (nipaddrs == 0)
+ return (0);
+
+ rc = libcfs_sock_write(sock, ipaddrs, nipaddrs * sizeof(*ipaddrs), 0);
+ if (rc != 0)
+ CERROR ("Error %d sending HELLO payload (%d)"
+ " to %u.%u.%u.%u/%d\n", rc, nipaddrs,
+ HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
+ return (rc);
+}
+
+int
+ksocknal_invert_type(int type)
+{
+ switch (type)
+ {
+ case SOCKLND_CONN_ANY:
+ case SOCKLND_CONN_CONTROL:
+ return (type);
+ case SOCKLND_CONN_BULK_IN:
+ return SOCKLND_CONN_BULK_OUT;
+ case SOCKLND_CONN_BULK_OUT:
+ return SOCKLND_CONN_BULK_IN;
+ default:
+ return (SOCKLND_CONN_NONE);
+ }
+}
+
+int
+ksocknal_recv_hello (lnet_ni_t *ni, ksock_conn_t *conn,
+ lnet_process_id_t *peerid,
+ __u64 *incarnation, __u32 *ipaddrs)
+{
+ struct socket *sock = conn->ksnc_sock;
+ int active;
+ int timeout;
+ int rc;
+ int nips;
+ int i;
+ int type;
+ lnet_hdr_t hdr;
+ lnet_process_id_t recv_id;
+ lnet_magicversion_t *hmv;
+
+ active = (peerid->nid != LNET_NID_ANY);
+ timeout = active ? *ksocknal_tunables.ksnd_timeout :
+ lnet_acceptor_timeout();
+
+ hmv = (lnet_magicversion_t *)&hdr.dest_nid;
+ LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
+
+ rc = libcfs_sock_read(sock, &hmv->magic, sizeof (hmv->magic), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ return (rc);
+ }
+
+ if (!active &&
+ hmv->magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
+ /* Is this a generic acceptor connection request? */
+ rc = lnet_accept(ni, sock, hmv->magic);
+ if (rc != 0)
+ return -EPROTO;
+
+ /* Yes it is! Start over again now I've skipping the generic
+ * request */
+ rc = libcfs_sock_read(sock, &hmv->magic,
+ sizeof (hmv->magic), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ return (rc);
+ }
+ }
+
+ if (hmv->magic != le32_to_cpu (LNET_PROTO_TCP_MAGIC)) {
+ CERROR ("Bad magic %#08x (%#08x expected) from %u.%u.%u.%u\n",
+ __cpu_to_le32 (hmv->magic), LNET_PROTO_TCP_MAGIC,
+ HIPQUAD(conn->ksnc_ipaddr));
+ return (-EPROTO);
+ }
+
+ rc = libcfs_sock_read(sock, &hmv->magic + 1,
+ sizeof(*hmv) - sizeof(hmv->magic), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ return (rc);
+ }
+
+ if (hmv->version_major != cpu_to_le16 (LNET_PROTO_TCP_VERSION_MAJOR) ||
+ hmv->version_minor != cpu_to_le16 (LNET_PROTO_TCP_VERSION_MINOR)) {
+ CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
+ " from %u.%u.%u.%u\n",
+ le16_to_cpu (hmv->version_major),
+ le16_to_cpu (hmv->version_minor),
+ LNET_PROTO_TCP_VERSION_MAJOR,
+ LNET_PROTO_TCP_VERSION_MINOR,
+ HIPQUAD(conn->ksnc_ipaddr));
+ return (-EPROTO);
+ }
+
+#if (LNET_PROTO_TCP_VERSION_MAJOR != 1)
+# error "This code only understands protocol version 1.x"
+#endif
+ /* version 1 sends magic/version as the dest_nid of a 'hello'
+ * header, followed by payload full of interface IP addresses.
+ * Read the rest of it in now... */
+
+ rc = libcfs_sock_read(sock, hmv + 1, sizeof (hdr) - sizeof (*hmv),
+ timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
+ rc, HIPQUAD(conn->ksnc_ipaddr));
+ LASSERT (rc < 0 && rc != -EALREADY);
+ return (rc);
+ }
+
+ /* ...and check we got what we expected */
+ if (hdr.type != cpu_to_le32 (LNET_MSG_HELLO)) {
+ CERROR ("Expecting a HELLO hdr,"
+ " but got type %d from %u.%u.%u.%u\n",
+ le32_to_cpu (hdr.type),
+ HIPQUAD(conn->ksnc_ipaddr));
+ return (-EPROTO);
+ }
+
+ if (le64_to_cpu(hdr.src_nid) == LNET_NID_ANY) {
+ CERROR("Expecting a HELLO hdr with a NID, but got LNET_NID_ANY"
+ "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
+ return (-EPROTO);
+ }
+
+ if (conn->ksnc_port > LNET_ACCEPTOR_MAX_RESERVED_PORT) {
+ /* Userspace NAL assigns peer process ID from socket */
+ recv_id.pid = conn->ksnc_port | LNET_PID_USERFLAG;
+ recv_id.nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), conn->ksnc_ipaddr);
+ } else {
+ recv_id.nid = le64_to_cpu(hdr.src_nid);
+
+ if (the_lnet.ln_ptlcompat > 1 && /* portals peers may exist */
+ LNET_NIDNET(recv_id.nid) == 0) /* this is one */
+ recv_id.pid = the_lnet.ln_pid; /* give it a sensible pid */
+ else
+ recv_id.pid = le32_to_cpu(hdr.src_pid);
+
+ }
+
+ if (!active) { /* don't know peer's nid yet */
+ *peerid = recv_id;
+ } else if (peerid->pid != recv_id.pid ||
+ !lnet_ptlcompat_matchnid(peerid->nid, recv_id.nid)) {
+ LCONSOLE_ERROR("Connected successfully to %s on host "
+ "%u.%u.%u.%u, but they claimed they were "
+ "%s; please check your Lustre "
+ "configuration.\n",
+ libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr),
+ libcfs_id2str(recv_id));
+ return (-EPROTO);
+ }
+
+ type = __le32_to_cpu(hdr.msg.hello.type);
+
+ if (conn->ksnc_type == SOCKLND_CONN_NONE) {
+ /* I've accepted this connection; peer determines type */
+ conn->ksnc_type = ksocknal_invert_type(type);
+ if (conn->ksnc_type == SOCKLND_CONN_NONE) {
+ CERROR ("Unexpected type %d from %s ip %u.%u.%u.%u\n",
+ type, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr));
+ return (-EPROTO);
+ }
+ } else if (type == SOCKLND_CONN_NONE) {
+ /* lost a connection race */
+ return -EALREADY;
+ } else if (ksocknal_invert_type(type) != conn->ksnc_type) {
+ CERROR ("Mismatched types: me %d, %s ip %u.%u.%u.%u %d\n",
+ conn->ksnc_type, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr),
+ le32_to_cpu(hdr.msg.hello.type));
+ return (-EPROTO);
+ }
+
+ *incarnation = le64_to_cpu(hdr.msg.hello.incarnation);
+
+ nips = __le32_to_cpu (hdr.payload_length) / sizeof (__u32);
+
+ if (nips > LNET_MAX_INTERFACES ||
+ nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) {
+ CERROR("Bad payload length %d from %s ip %u.%u.%u.%u\n",
+ __le32_to_cpu (hdr.payload_length),
+ libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr));
+ }
+
+ if (nips == 0)
+ return (0);
+
+ rc = libcfs_sock_read(sock, ipaddrs, nips * sizeof(*ipaddrs), timeout);
+ if (rc != 0) {
+ CERROR ("Error %d reading IPs from %s ip %u.%u.%u.%u\n",
+ rc, libcfs_id2str(*peerid), HIPQUAD(conn->ksnc_ipaddr));
+ return (rc);
+ }
+
+ for (i = 0; i < nips; i++) {
+ ipaddrs[i] = __le32_to_cpu(ipaddrs[i]);
+
+ if (ipaddrs[i] == 0) {
+ CERROR("Zero IP[%d] from %s ip %u.%u.%u.%u\n",
+ i, libcfs_id2str(*peerid),
+ HIPQUAD(conn->ksnc_ipaddr));
+ return (-EPROTO);
+ }
+ }
+
+ return (nips);
+}
+
+void
+ksocknal_connect (ksock_route_t *route)
+{
+ CFS_LIST_HEAD (zombies);
+ ksock_peer_t *peer = route->ksnr_peer;
+ unsigned long flags;
+ int type;
+ struct socket *sock;
+ cfs_time_t deadline;
+ int rc = 0;
+
+ deadline = cfs_time_add(cfs_time_current(),
+ cfs_time_seconds(*ksocknal_tunables.ksnd_timeout));
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ for (;;) {
+ if (!*ksocknal_tunables.ksnd_typed_conns) {
+ if ((route->ksnr_connected & (1<<SOCKLND_CONN_ANY)) == 0)
+ type = SOCKLND_CONN_ANY;
+ else
+ break; /* got connected while route queued */
+ } else {
+ if ((route->ksnr_connected & (1<<SOCKLND_CONN_CONTROL)) == 0)
+ type = SOCKLND_CONN_CONTROL;
+ else if ((route->ksnr_connected & (1<<SOCKLND_CONN_BULK_IN)) == 0)
+ type = SOCKLND_CONN_BULK_IN;
+ else if ((route->ksnr_connected & (1<<SOCKLND_CONN_BULK_OUT)) == 0)
+ type = SOCKLND_CONN_BULK_OUT;
+ else
+ break; /* got connected while route queued */
+ }
+
+ write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
+
+ if (cfs_time_aftereq(cfs_time_current(), deadline)) {
+ lnet_connect_console_error(-ETIMEDOUT, peer->ksnp_id.nid,
+ route->ksnr_ipaddr,
+ route->ksnr_port);
+ goto failed;
+ }
+
+ rc = lnet_connect(&sock, peer->ksnp_id.nid,
+ route->ksnr_myipaddr,
+ route->ksnr_ipaddr, route->ksnr_port);
+ if (rc != 0)
+ goto failed;
+
+ rc = ksocknal_create_conn(peer->ksnp_ni, route, sock, type);
+ if (rc < 0) {
+ lnet_connect_console_error(rc, peer->ksnp_id.nid,
+ route->ksnr_ipaddr,
+ route->ksnr_port);
+ goto failed;
+ }
+
+ if (rc != 0) {
+ /* lost connection race; peer is connecting to me, so
+ * give her some time... */
+ cfs_pause(cfs_time_seconds(1));
+ }
+
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+ }
+
+ LASSERT (route->ksnr_connecting == 0);
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+ return;
+
+ failed:
+ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
+
+ route->ksnr_connecting = 0;
+
+ /* This is a retry rather than a new connection */
+ route->ksnr_retry_interval *= 2;
+ route->ksnr_retry_interval =
+ MAX(route->ksnr_retry_interval,
+ cfs_time_seconds(*ksocknal_tunables.ksnd_min_reconnectms)/1000);
+ route->ksnr_retry_interval =
+ MIN(route->ksnr_retry_interval,
+ cfs_time_seconds(*ksocknal_tunables.ksnd_max_reconnectms)/1000);
+
+ LASSERT (route->ksnr_retry_interval != 0);
+ route->ksnr_timeout = cfs_time_add(cfs_time_current(),
+ route->ksnr_retry_interval);
+
+ if (!list_empty(&peer->ksnp_tx_queue) &&
+ peer->ksnp_accepting == 0 &&
+ ksocknal_find_connecting_route_locked(peer) == NULL) {
+ /* ksnp_tx_queue is queued on a conn on successful
+ * connection */
+ LASSERT (list_empty (&peer->ksnp_conns));
+
+ /* take all the blocked packets while I've got the lock and
+ * complete below... */
+ list_add(&zombies, &peer->ksnp_tx_queue);
+ list_del_init(&peer->ksnp_tx_queue);
+ }
+
+#if 0 /* irrelevent with only eager routes */
+ if (!route->ksnr_deleted) {
+ /* make this route least-favourite for re-selection */
+ list_del(&route->ksnr_list);
+ list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
+ }
+#endif
+ write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
+
+ ksocknal_peer_failed(peer);
+ ksocknal_txlist_done(peer->ksnp_ni, &zombies);
+}
+
+int
+ksocknal_connd (void *arg)
+{
+ long id = (long)arg;
+ char name[16];
+ unsigned long flags;
+ ksock_connreq_t *cr;
+ ksock_route_t *route;
+ int rc;
+ int did_something;
+
+ snprintf (name, sizeof (name), "socknal_cd%02ld", id);
+ libcfs_daemonize (name);
+ libcfs_blockallsigs ();
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_connd_lock, flags);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+
+ did_something = 0;
+
+ if (!list_empty(&ksocknal_data.ksnd_connd_connreqs)) {
+ /* Connection accepted by the listener */
+ cr = list_entry(ksocknal_data.ksnd_connd_connreqs.next,
+ ksock_connreq_t, ksncr_list);
+
+ list_del(&cr->ksncr_list);
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock,
+ flags);
+
+ ksocknal_create_conn(cr->ksncr_ni, NULL,
+ cr->ksncr_sock, SOCKLND_CONN_NONE);
+ lnet_ni_decref(cr->ksncr_ni);
+ LIBCFS_FREE(cr, sizeof(*cr));
+
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock,
+ flags);
+ did_something = 1;
+ }
+
+ if (!list_empty (&ksocknal_data.ksnd_connd_routes)) {
+ /* Connection request */
+ route = list_entry (ksocknal_data.ksnd_connd_routes.next,
+ ksock_route_t, ksnr_connd_list);
+
+ list_del (&route->ksnr_connd_list);
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_connd_lock, flags);
+
+ ksocknal_connect (route);
+ ksocknal_route_decref(route);
+
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock,
+ flags);
+ did_something = 1;
+ }
+
+ if (did_something)
+ continue;
+
+ spin_unlock_irqrestore(&ksocknal_data.ksnd_connd_lock,
+ flags);
+
+ wait_event_interruptible(ksocknal_data.ksnd_connd_waitq,
+ ksocknal_data.ksnd_shuttingdown ||
+ !list_empty(&ksocknal_data.ksnd_connd_connreqs) ||
+ !list_empty(&ksocknal_data.ksnd_connd_routes), rc);
+
+ spin_lock_irqsave(&ksocknal_data.ksnd_connd_lock, flags);
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_connd_lock, flags);
+
+ ksocknal_thread_fini ();
+ return (0);
+}
+
+ksock_conn_t *
+ksocknal_find_timed_out_conn (ksock_peer_t *peer)
+{
+ /* We're called with a shared lock on ksnd_global_lock */
+ ksock_conn_t *conn;
+ struct list_head *ctmp;
+
+ list_for_each (ctmp, &peer->ksnp_conns) {
+ conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
+
+ /* Don't need the {get,put}connsock dance to deref ksnc_sock... */
+ LASSERT (!conn->ksnc_closing);
+
+ if (conn->ksnc_rx_started &&
+ cfs_time_aftereq (cfs_time_current(), conn->ksnc_rx_deadline)) {
+ /* Timed out incomplete incoming message */
+ ksocknal_conn_addref(conn);
+ LCONSOLE_ERROR("A timeout occurred receiving data from "
+ "%u.%u.%u.%u; the network or that node "
+ "may be down.\n",
+ HIPQUAD(conn->ksnc_ipaddr));
+ CERROR ("Timed out RX from %s %p %d.%d.%d.%d\n",
+ libcfs_id2str(peer->ksnp_id),
+ conn, HIPQUAD(conn->ksnc_ipaddr));
+ return (conn);
+ }
+
+ if ((!list_empty (&conn->ksnc_tx_queue)) &&
+ cfs_time_aftereq (cfs_time_current(), conn->ksnc_tx_deadline)) {
+ /* Timed out messages queued for sending or
+ * buffered in the socket's send buffer */
+ ksocknal_conn_addref(conn);
+ LCONSOLE_ERROR("A timeout occurred sending data to "
+ "%u.%u.%u.%u; the network or that node "
+ "may be down.\n",
+ HIPQUAD(conn->ksnc_ipaddr));
+ return (conn);
+ }
+ }
+
+ return (NULL);
+}
+
+void
+ksocknal_check_peer_timeouts (int idx)
+{
+ struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
+ struct list_head *ptmp;
+ ksock_peer_t *peer;
+ ksock_conn_t *conn;
+
+ again:
+ /* NB. We expect to have a look at all the peers and not find any
+ * connections to time out, so we just use a shared lock while we
+ * take a look... */
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ list_for_each (ptmp, peers) {
+ peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
+ conn = ksocknal_find_timed_out_conn (peer);
+
+ if (conn != NULL) {
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ CERROR ("Timeout out conn->%s ip %d.%d.%d.%d:%d\n",
+ libcfs_id2str(peer->ksnp_id),
+ HIPQUAD(conn->ksnc_ipaddr),
+ conn->ksnc_port);
+ ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
+
+ /* NB we won't find this one again, but we can't
+ * just proceed with the next peer, since we dropped
+ * ksnd_global_lock and it might be dead already! */
+ ksocknal_conn_decref(conn);
+ goto again;
+ }
+ }
+
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+}
+
+int
+ksocknal_reaper (void *arg)
+{
+ cfs_waitlink_t wait;
+ unsigned long flags;
+ ksock_conn_t *conn;
+ ksock_sched_t *sched;
+ struct list_head enomem_conns;
+ int nenomem_conns;
+ cfs_duration_t timeout;
+ int i;
+ int peer_index = 0;
+ cfs_time_t deadline = cfs_time_current();
+
+ libcfs_daemonize ("socknal_reaper");
+ libcfs_blockallsigs ();
+
+ CFS_INIT_LIST_HEAD(&enomem_conns);
+ cfs_waitlink_init (&wait);
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ while (!ksocknal_data.ksnd_shuttingdown) {
+
+ if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) {
+ conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ ksocknal_terminate_conn (conn);
+ ksocknal_conn_decref(conn);
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+ continue;
+ }
+
+ if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) {
+ conn = list_entry (ksocknal_data.ksnd_zombie_conns.next,
+ ksock_conn_t, ksnc_list);
+ list_del (&conn->ksnc_list);
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ ksocknal_destroy_conn (conn);
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+ continue;
+ }
+
+ if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) {
+ list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns);
+ list_del_init(&ksocknal_data.ksnd_enomem_conns);
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ /* reschedule all the connections that stalled with ENOMEM... */
+ nenomem_conns = 0;
+ while (!list_empty (&enomem_conns)) {
+ conn = list_entry (enomem_conns.next,
+ ksock_conn_t, ksnc_tx_list);
+ list_del (&conn->ksnc_tx_list);
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ LASSERT (conn->ksnc_tx_scheduled);
+ conn->ksnc_tx_ready = 1;
+ list_add_tail(&conn->ksnc_tx_list,&sched->kss_tx_conns);
+ cfs_waitq_signal (&sched->kss_waitq);
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ nenomem_conns++;
+ }
+
+ /* careful with the jiffy wrap... */
+ while ((timeout = cfs_time_sub(deadline,
+ cfs_time_current())) <= 0) {
+ const int n = 4;
+ const int p = 1;
+ int chunk = ksocknal_data.ksnd_peer_hash_size;
+
+ /* Time to check for timeouts on a few more peers: I do
+ * checks every 'p' seconds on a proportion of the peer
+ * table and I need to check every connection 'n' times
+ * within a timeout interval, to ensure I detect a
+ * timeout on any connection within (n+1)/n times the
+ * timeout interval. */
+
+ if (*ksocknal_tunables.ksnd_timeout > n * p)
+ chunk = (chunk * n * p) /
+ *ksocknal_tunables.ksnd_timeout;
+ if (chunk == 0)
+ chunk = 1;
+
+ for (i = 0; i < chunk; i++) {
+ ksocknal_check_peer_timeouts (peer_index);
+ peer_index = (peer_index + 1) %
+ ksocknal_data.ksnd_peer_hash_size;
+ }
+
+ deadline = cfs_time_add(deadline, cfs_time_seconds(p));
+ }
+
+ if (nenomem_conns != 0) {
+ /* Reduce my timeout if I rescheduled ENOMEM conns.
+ * This also prevents me getting woken immediately
+ * if any go back on my enomem list. */
+ timeout = SOCKNAL_ENOMEM_RETRY;
+ }
+ ksocknal_data.ksnd_reaper_waketime =
+ cfs_time_add(cfs_time_current(), timeout);
+
+ set_current_state (TASK_INTERRUPTIBLE);
+ cfs_waitq_add (&ksocknal_data.ksnd_reaper_waitq, &wait);
+
+ if (!ksocknal_data.ksnd_shuttingdown &&
+ list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
+ list_empty (&ksocknal_data.ksnd_zombie_conns))
+ cfs_waitq_timedwait (&wait, CFS_TASK_INTERRUPTIBLE, timeout);
+
+ set_current_state (TASK_RUNNING);
+ cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait);
+
+ spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
+ }
+
+ spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
+
+ ksocknal_thread_fini ();
+ return (0);
+}
--- /dev/null
+#include "socklnd.h"
+
+# if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
+static ctl_table ksocknal_ctl_table[18];
+
+ctl_table ksocknal_top_ctl_table[] = {
+ {200, "socknal", NULL, 0, 0555, ksocknal_ctl_table},
+ { 0 }
+};
+
+int
+ksocknal_lib_tunables_init ()
+{
+ int i = 0;
+ int j = 1;
+
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "timeout", ksocknal_tunables.ksnd_timeout,
+ sizeof (int), 0644, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "credits", ksocknal_tunables.ksnd_credits,
+ sizeof (int), 0444, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "peer_credits", ksocknal_tunables.ksnd_peercredits,
+ sizeof (int), 0444, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "nconnds", ksocknal_tunables.ksnd_nconnds,
+ sizeof (int), 0444, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "min_reconnectms", ksocknal_tunables.ksnd_min_reconnectms,
+ sizeof (int), 0444, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "max_reconnectms", ksocknal_tunables.ksnd_max_reconnectms,
+ sizeof (int), 0444, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "eager_ack", ksocknal_tunables.ksnd_eager_ack,
+ sizeof (int), 0644, NULL, &proc_dointvec};
+#if SOCKNAL_ZC
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "zero_copy", ksocknal_tunables.ksnd_zc_min_frag,
+ sizeof (int), 0644, NULL, &proc_dointvec};
+#endif
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "typed", ksocknal_tunables.ksnd_typed_conns,
+ sizeof (int), 0444, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "min_bulk", ksocknal_tunables.ksnd_min_bulk,
+ sizeof (int), 0644, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "buffer_size", ksocknal_tunables.ksnd_buffer_size,
+ sizeof(int), 0644, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "nagle", ksocknal_tunables.ksnd_nagle,
+ sizeof(int), 0644, NULL, &proc_dointvec};
+#if CPU_AFFINITY
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "irq_affinity", ksocknal_tunables.ksnd_irq_affinity,
+ sizeof(int), 0644, NULL, &proc_dointvec};
+#endif
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "keepalive_idle", ksocknal_tunables.ksnd_keepalive_idle,
+ sizeof(int), 0644, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "keepalive_count", ksocknal_tunables.ksnd_keepalive_count,
+ sizeof(int), 0644, NULL, &proc_dointvec};
+ ksocknal_ctl_table[i++] = (ctl_table)
+ {j++, "keepalive_intvl", ksocknal_tunables.ksnd_keepalive_intvl,
+ sizeof(int), 0644, NULL, &proc_dointvec};
+
+ LASSERT (j == i+1);
+ LASSERT (i < sizeof(ksocknal_ctl_table)/sizeof(ksocknal_ctl_table[0]));
+
+ ksocknal_tunables.ksnd_sysctl =
+ register_sysctl_table(ksocknal_top_ctl_table, 0);
+
+ if (ksocknal_tunables.ksnd_sysctl == NULL)
+ CWARN("Can't setup /proc tunables\n");
+
+ return 0;
+}
+
+void
+ksocknal_lib_tunables_fini ()
+{
+ if (ksocknal_tunables.ksnd_sysctl != NULL)
+ unregister_sysctl_table(ksocknal_tunables.ksnd_sysctl);
+}
+#else
+int
+ksocknal_lib_tunables_init ()
+{
+ return 0;
+}
+
+void
+ksocknal_lib_tunables_fini ()
+{
+}
+#endif
+
+void
+ksocknal_lib_bind_irq (unsigned int irq)
+{
+}
+
+int
+ksocknal_lib_get_conn_addrs (ksock_conn_t *conn)
+{
+ int rc = libcfs_sock_getaddr(conn->ksnc_sock, 1,
+ &conn->ksnc_ipaddr,
+ &conn->ksnc_port);
+
+ /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
+ LASSERT (!conn->ksnc_closing);
+
+ if (rc != 0) {
+ CERROR ("Error %d getting sock peer IP\n", rc);
+ return rc;
+ }
+
+ rc = libcfs_sock_getaddr(conn->ksnc_sock, 0,
+ &conn->ksnc_myipaddr, NULL);
+ if (rc != 0) {
+ CERROR ("Error %d getting sock local IP\n", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+unsigned int
+ksocknal_lib_sock_irq (struct socket *sock)
+{
+ int irq = 0;
+ return irq;
+}
+
+#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
+static struct page *
+ksocknal_kvaddr_to_page (unsigned long vaddr)
+{
+ struct page *page;
+
+ if (vaddr >= VMALLOC_START &&
+ vaddr < VMALLOC_END)
+ page = vmalloc_to_page ((void *)vaddr);
+#if CONFIG_HIGHMEM
+ else if (vaddr >= PKMAP_BASE &&
+ vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
+ page = vmalloc_to_page ((void *)vaddr);
+ /* in 2.4 ^ just walks the page tables */
+#endif
+ else
+ page = virt_to_page (vaddr);
+
+ if (page == NULL ||
+ !VALID_PAGE (page))
+ return (NULL);
+
+ return (page);
+}
+#endif
+
+
+void
+ksocknal_lib_eager_ack (ksock_conn_t *conn)
+{
+}
+
+int
+ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
+{
+ ksock_tconn_t * tconn = conn->ksnc_sock;
+ int len;
+ int rc;
+
+ ksocknal_get_tconn (tconn);
+
+ *txmem = *rxmem = 0;
+
+ len = sizeof(*nagle);
+
+ rc = ksocknal_get_tcp_option(
+ tconn, TCP_SOCKET_NODELAY,
+ (__u32 *)nagle, &len);
+
+ ksocknal_put_tconn (tconn);
+
+ printk("ksocknal_get_conn_tunables: nodelay = %d rc = %d\n", *nagle, rc);
+
+ if (rc == 0)
+ *nagle = !*nagle;
+ else
+ *txmem = *rxmem = *nagle = 0;
+
+ return (rc);
+}
+
+int
+ksocknal_lib_buffersize (int current_sz, int tunable_sz)
+{
+ /* ensure >= SOCKNAL_MIN_BUFFER */
+ if (current_sz < SOCKNAL_MIN_BUFFER)
+ return MAX(SOCKNAL_MIN_BUFFER, tunable_sz);
+
+ if (tunable_sz > SOCKNAL_MIN_BUFFER)
+ return tunable_sz;
+
+ /* leave alone */
+ return 0;
+}
+
+int
+ksocknal_lib_setup_sock (struct socket *sock)
+{
+ int rc;
+
+ int keep_idle;
+ int keep_count;
+ int keep_intvl;
+ int keep_alive;
+
+ __u32 option;
+
+ /* set the window size */
+
+#if 0
+ tconn->kstc_snd_wnd = ksocknal_tunables.ksnd_buffer_size;
+ tconn->kstc_rcv_wnd = ksocknal_tunables.ksnd_buffer_size;
+#endif
+
+ /* disable nagle */
+ if (!ksocknal_tunables.ksnd_nagle) {
+ option = 1;
+
+ rc = ksocknal_set_tcp_option(
+ sock, TCP_SOCKET_NODELAY,
+ &option, sizeof (option));
+ if (rc != 0) {
+ printk ("Can't disable nagle: %d\n", rc);
+ return (rc);
+ }
+ }
+
+ /* snapshot tunables */
+ keep_idle = *ksocknal_tunables.ksnd_keepalive_idle;
+ keep_count = *ksocknal_tunables.ksnd_keepalive_count;
+ keep_intvl = *ksocknal_tunables.ksnd_keepalive_intvl;
+
+ keep_alive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
+
+ option = (__u32)(keep_alive ? 1 : 0);
+
+ rc = ksocknal_set_tcp_option(
+ sock, TCP_SOCKET_KEEPALIVE,
+ &option, sizeof (option));
+ if (rc != 0) {
+ CERROR (("Can't disable nagle: %d\n", rc));
+ return (rc);
+ }
+
+ return (0);
+}
+
+void
+ksocknal_push_conn (ksock_conn_t *conn)
+{
+ ksock_tconn_t * tconn;
+ __u32 nagle;
+ __u32 val = 1;
+ int rc;
+
+ tconn = conn->ksnc_sock;
+
+ ksocknal_get_tconn(tconn);
+
+ spin_lock(&tconn->kstc_lock);
+ if (tconn->kstc_type == kstt_sender) {
+ nagle = tconn->sender.kstc_info.nagle;
+ tconn->sender.kstc_info.nagle = 0;
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ nagle = tconn->child.kstc_info.nagle;
+ tconn->child.kstc_info.nagle = 0;
+ }
+
+ spin_unlock(&tconn->kstc_lock);
+
+ val = 1;
+ rc = ksocknal_set_tcp_option(
+ tconn,
+ TCP_SOCKET_NODELAY,
+ &(val),
+ sizeof(__u32)
+ );
+
+ LASSERT (rc == 0);
+ spin_lock(&tconn->kstc_lock);
+
+ if (tconn->kstc_type == kstt_sender) {
+ tconn->sender.kstc_info.nagle = nagle;
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ tconn->child.kstc_info.nagle = nagle;
+ }
+ spin_unlock(&tconn->kstc_lock);
+
+ ksocknal_put_tconn(tconn);
+}
+
+/* @mode: 0: receiving mode / 1: sending mode */
+void
+ksocknal_sched_conn (ksock_conn_t *conn, int mode, ksock_tx_t *tx)
+{
+ int flags;
+ ksock_sched_t * sched;
+ ENTRY;
+
+ /* interleave correctly with closing sockets... */
+ read_lock (&ksocknal_data.ksnd_global_lock);
+
+ sched = conn->ksnc_scheduler;
+
+ spin_lock_irqsave (&sched->kss_lock, flags);
+
+ if (mode) { /* transmission can continue ... */
+
+ conn->ksnc_tx_ready = 1;
+
+ if (tx) {
+ /* Incomplete send: place tx on HEAD of tx_queue */
+ list_add (&tx->tx_list, &conn->ksnc_tx_queue);
+ }
+
+ if ( !conn->ksnc_tx_scheduled &&
+ !list_empty(&conn->ksnc_tx_queue)) { //packets to send
+ list_add_tail (&conn->ksnc_tx_list,
+ &sched->kss_tx_conns);
+ conn->ksnc_tx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_conn_refcount);
+
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+ } else { /* receiving can continue ... */
+
+ conn->ksnc_rx_ready = 1;
+
+ if ( !conn->ksnc_rx_scheduled) { /* not being progressed */
+ list_add_tail(&conn->ksnc_rx_list,
+ &sched->kss_rx_conns);
+ conn->ksnc_rx_scheduled = 1;
+ /* extra ref for scheduler */
+ atomic_inc (&conn->ksnc_conn_refcount);
+
+ cfs_waitq_signal (&sched->kss_waitq);
+ }
+ }
+
+ spin_unlock_irqrestore (&sched->kss_lock, flags);
+ read_unlock (&ksocknal_data.ksnd_global_lock);
+
+ EXIT;
+}
+
+void ksocknal_schedule_callback(struct socket*sock, int mode, void * tx, ulong_ptr bytes)
+{
+ ksock_conn_t * conn = (ksock_conn_t *) sock->kstc_conn;
+
+ if (mode) {
+ ksocknal_sched_conn(conn, mode, tx);
+ } else {
+ if ( CAN_BE_SCHED(bytes, (ulong_ptr)conn->ksnc_rx_nob_wanted )) {
+ ksocknal_sched_conn(conn, mode, tx);
+ }
+ }
+}
+
+
+void
+ksocknal_fini_sending(ksock_tcpx_fini_t *tcpx)
+{
+ ksocknal_tx_launched(tcpx->tx);
+ cfs_free(tcpx);
+}
+
+PVOID
+ksocknal_update_tx(
+ struct socket* tconn,
+ PVOID txp,
+ ulong_ptr rc
+ )
+{
+ ksock_tx_t * tx = (ksock_tx_t *)txp;
+
+ /*
+ * the transmission was done, we need update the tx
+ */
+
+ LASSERT(tx->tx_resid >= (int)rc);
+ tx->tx_resid -= (int)rc;
+
+ /*
+ * just partial of tx is sent out, we need update
+ * the fields of tx and schedule later transmission.
+ */
+
+ if (tx->tx_resid) {
+
+ if (tx->tx_niov > 0) {
+
+ /* if there's iov, we need process iov first */
+ while (rc > 0 ) {
+ if (rc < tx->tx_iov->iov_len) {
+ /* didn't send whole iov entry... */
+ tx->tx_iov->iov_base =
+ (char *)(tx->tx_iov->iov_base) + rc;
+ tx->tx_iov->iov_len -= rc;
+ rc = 0;
+ } else {
+ /* the whole of iov was sent out */
+ rc -= tx->tx_iov->iov_len;
+ tx->tx_iov++;
+ tx->tx_niov--;
+ }
+ }
+
+ } else {
+
+ /* now we need process the kiov queues ... */
+
+ while (rc > 0 ) {
+
+ if (rc < tx->tx_kiov->kiov_len) {
+ /* didn't send whole kiov entry... */
+ tx->tx_kiov->kiov_offset += rc;
+ tx->tx_kiov->kiov_len -= rc;
+ rc = 0;
+ } else {
+ /* whole kiov was sent out */
+ rc -= tx->tx_kiov->kiov_len;
+ tx->tx_kiov++;
+ tx->tx_nkiov--;
+ }
+ }
+ }
+
+ } else {
+
+ ksock_tcpx_fini_t * tcpx =
+ cfs_alloc(sizeof(ksock_tcpx_fini_t), CFS_ALLOC_ZERO);
+
+ ASSERT(tx->tx_resid == 0);
+
+ if (!tcpx) {
+
+ ksocknal_tx_launched (tx);
+
+ } else {
+
+ tcpx->tx = tx;
+ ExInitializeWorkItem(
+ &(tcpx->item),
+ ksocknal_fini_sending,
+ tcpx
+ );
+ ExQueueWorkItem(
+ &(tcpx->item),
+ CriticalWorkQueue
+ );
+ }
+
+ tx = NULL;
+ }
+
+ return (PVOID)tx;
+}
+
+void
+ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn)
+{
+}
+
+void
+ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ sock->kstc_sched_cb = ksocknal_schedule_callback;
+ sock->kstc_update_tx = ksocknal_update_tx;
+
+ return;
+}
+
+void
+ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ sock->kstc_sched_cb(sock, TRUE, NULL, 0);
+ sock->kstc_sched_cb(sock, FALSE, NULL, 0);
+
+ return;
+}
+
+void
+ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn)
+{
+ return ;
+}
+
+/*
+ * ksocknal_lock_kiovs
+ * Lock the kiov pages into MDL structure
+ *
+ * Arguments:
+ * kiov: the array of kiov pages
+ * niov: number of kiov to be locked
+ * len: the real length of the kiov arrary
+ *
+ * Return Value:
+ * PMDL: the Mdl of the locked buffers or NULL
+ * pointer in failure case
+ *
+ * Notes:
+ * N/A
+ */
+ksock_mdl_t *
+ksocknal_lock_kiovs(
+ IN lnet_kiov_t * kiov,
+ IN int nkiov,
+ IN int recving,
+ IN int * len )
+{
+ int rc = 0;
+ int i = 0;
+ int total = 0;
+ ksock_mdl_t * mdl = NULL;
+ ksock_mdl_t * tail = NULL;
+
+ LASSERT(kiov != NULL);
+ LASSERT(nkiov > 0);
+ LASSERT(len != NULL);
+
+ for (i=0; i < nkiov; i++) {
+
+ ksock_mdl_t * Iovec = NULL;
+
+
+ //
+ // Lock the kiov page into Iovec ¡Â
+ //
+
+ rc = ksocknal_lock_buffer(
+ (PUCHAR)kiov[i].kiov_page->addr +
+ kiov[i].kiov_offset,
+ FALSE,
+ kiov[i].kiov_len,
+ recving ? IoWriteAccess : IoReadAccess,
+ &Iovec
+ );
+
+ if (rc < 0) {
+ break;
+ }
+
+ //
+ // Attach the Iovec to the mdl chain
+ //
+
+ if (tail) {
+ tail->Next = Iovec;
+ } else {
+ mdl = Iovec;
+ }
+
+ tail = Iovec;
+
+ total += kiov[i].kiov_len;
+
+ }
+
+ if (rc >= 0) {
+ *len = total;
+ } else {
+ if (mdl) {
+ ksocknal_release_mdl(mdl, FALSE);
+ mdl = NULL;
+ }
+ }
+
+ return mdl;
+}
+
+void
+ksocknal_eager_ack (ksock_conn_t *conn)
+{
+ return;
+}
\ No newline at end of file
--- /dev/null
+#define DEBUG_PORTAL_ALLOC
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifndef __WINNT_SOCKNAL_LIB_H__
+#define __WINNT_SOCKNAL_LIB_H__
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define SOCKNAL_ARCH_EAGER_ACK 0
+
+#ifndef CONFIG_SMP
+
+static inline
+int ksocknal_nsched(void)
+{
+ return 1;
+}
+
+#else
+
+static inline int
+ksocknal_nsched(void)
+{
+ return num_online_cpus();
+}
+
+static inline int
+ksocknal_sched2cpu(int i)
+{
+ return i;
+}
+
+static inline int
+ksocknal_irqsched2cpu(int i)
+{
+ return i;
+}
+
+#endif
+
+#endif
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ * Author: Eric Barton <eric@bartonsoftware.com>
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Portals; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "socklnd.h"
+
+static int timeout = SOCKNAL_TIMEOUT;
+CFS_MODULE_PARM(timeout, "i", int, 0644,
+ "dead socket timeout (seconds)");
+
+static int credits = SOCKNAL_CREDITS;
+CFS_MODULE_PARM(credits, "i", int, 0444,
+ "# concurrent sends");
+
+static int peer_credits = SOCKNAL_PEERCREDITS;
+CFS_MODULE_PARM(peer_credits, "i", int, 0444,
+ "# concurrent sends to 1 peer");
+
+static int nconnds = SOCKNAL_NCONND;
+CFS_MODULE_PARM(nconnds, "i", int, 0444,
+ "# connection daemons");
+
+static int min_reconnectms = SOCKNAL_MIN_RECONNECTMS;
+CFS_MODULE_PARM(min_reconnectms, "i", int, 0644,
+ "min connection retry interval (mS)");
+
+static int max_reconnectms = SOCKNAL_MAX_RECONNECTMS;
+CFS_MODULE_PARM(max_reconnectms, "i", int, 0644,
+ "max connection retry interval (mS)");
+
+static int eager_ack = SOCKNAL_EAGER_ACK;
+CFS_MODULE_PARM(eager_ack, "i", int, 0644,
+ "send tcp ack packets eagerly");
+
+static int typed_conns = SOCKNAL_TYPED_CONNS;
+CFS_MODULE_PARM(typed_conns, "i", int, 0444,
+ "use different sockets for bulk");
+
+static int min_bulk = SOCKNAL_MIN_BULK;
+CFS_MODULE_PARM(min_bulk, "i", int, 0644,
+ "smallest 'large' message");
+
+static int buffer_size = SOCKNAL_BUFFER_SIZE;
+CFS_MODULE_PARM(buffer_size, "i", int, 0644,
+ "socket buffer size");
+
+static int nagle = SOCKNAL_NAGLE;
+CFS_MODULE_PARM(nagle, "i", int, 0644,
+ "enable NAGLE?");
+
+static int keepalive_idle = SOCKNAL_KEEPALIVE_IDLE;
+CFS_MODULE_PARM(keepalive_idle, "i", int, 0644,
+ "# idle seconds before probe");
+
+static int keepalive_count = SOCKNAL_KEEPALIVE_COUNT;
+CFS_MODULE_PARM(keepalive_count, "i", int, 0644,
+ "# missed probes == dead");
+
+static int keepalive_intvl = SOCKNAL_KEEPALIVE_INTVL;
+CFS_MODULE_PARM(keepalive_intvl, "i", int, 0644,
+ "seconds between probes");
+
+#if CPU_AFFINITY
+static int enable_irq_affinity = SOCKNAL_IRQ_AFFINITY;
+CFS_MODULE_PARM(enable_irq_affinity, "i", int, 0644,
+ "enable IRQ affinity");
+#endif
+
+#if SOCKNAL_ZC
+static unsigned int zc_min_frag = SOCKNAL_ZC_MIN_FRAG;
+CFS_MODULE_PARM(zc_min_frag, "i", int, 0644,
+ "minimum fragment to zero copy");
+#endif
+
+ksock_tunables_t ksocknal_tunables = {
+ /* .ksnd_timeout = */ &timeout,
+ /* .ksnd_credits = */ &credits,
+ /* .ksnd_peercredits = */ &peer_credits,
+ /* .ksnd_nconnds = */ &nconnds,
+ /* .ksnd_min_reconnectms = */ &min_reconnectms,
+ /* .ksnd_max_reconnectms = */ &max_reconnectms,
+ /* .ksnd_eager_ack = */ &eager_ack,
+ /* .ksnd_typed_conns = */ &typed_conns,
+ /* .ksnd_min_bulk = */ &min_bulk,
+ /* .ksnd_buffer_size = */ &buffer_size,
+ /* .ksnd_nagle = */ &nagle,
+ /* .ksnd_keepalive_idle = */ &keepalive_idle,
+ /* .ksnd_keepalive_count = */ &keepalive_count,
+ /* .ksnd_keepalive_intvl = */ &keepalive_intvl,
+#if SOCKNAL_ZC
+ /* .ksnd_zc_min_frag = */ &zc_min_frag,
+#endif
+#if CPU_AFFINITY
+ /* .ksnd_irq_affinity = */ &enable_irq_affinity,
+#endif
+};
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _TDINAL_H_
+#define _TDINAL_H_
+
+/*
+ * Included Headers
+ */
+
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include <lnet/lnet.h>
+#include <lnet/lib-lnet.h>
+#include <lnet/socklnd.h>
+
+
+/*
+ * tdinal routines
+ */
+
+//
+// daemon.c
+//
+
+struct ksock_daemon *
+ksocknal_alloc_daemon(
+ unsigned short port,
+ int backlog
+ );
+
+void
+ksocknal_free_daemon(
+ struct ksock_daemon * daemon
+ );
+
+int
+ksocknal_daemon(
+ void * context
+ );
+
+void
+ksocknal_shut_daemon(
+ struct ksock_daemon *daemon
+ );
+
+int
+ksocknal_start_daemon(
+ unsigned short port,
+ int backlog
+ );
+
+void
+ksocknal_stop_daemon(
+ unsigned short port
+ );
+
+void
+ksocknal_stop_all_daemons();
+
+ksock_tconn_t *
+ksocknal_create_child_tconn(
+ ksock_tconn_t * parent
+ );
+
+void
+ksocknal_replenish_backlogs(
+ ksock_daemon_t * daemon
+ );
+
+int
+ksocknal_start_listen(
+ struct ksock_daemon * daemon
+ );
+
+int
+ksocknal_wait_child_tconn(
+ struct ksock_daemon * daemon,
+ ksock_tconn_t ** child
+ );
+
+ksock_tconn_t *
+ksocknal_get_vacancy_backlog(
+ ksock_tconn_t * parent
+ );
+
+
+//
+// debug.c
+//
+
+
+PUCHAR
+KsNtStatusToString (IN NTSTATUS Status);
+
+
+VOID
+KsPrintf(
+ IN LONG DebugPrintLevel,
+ IN PCHAR DebugMessage,
+ IN ...
+ );
+
+
+//
+// tconn.c
+//
+
+
+ksock_mdl_t *
+ksocknal_lock_iovs(
+ IN struct iovec *iov,
+ IN int niov,
+ IN int recv,
+ IN int * len
+ );
+
+ksock_mdl_t *
+ksocknal_lock_kiovs(
+ IN lnet_kiov_t * kiov,
+ IN int nkiov,
+ IN int recv,
+ IN int * len
+ );
+
+int
+ksocknal_send_mdl(
+ ksock_tconn_t * tconn,
+ ksock_tx_t * tx,
+ ksock_mdl_t * mdl,
+ int len,
+ int flags
+ );
+
+int
+ksocknal_query_data(
+ ksock_tconn_t * tconn,
+ size_t * size,
+ int bIsExpedited);
+
+int
+ksocknal_recv_mdl(
+ ksock_tconn_t * tconn,
+ ksock_mdl_t * mdl,
+ int size,
+ int flags
+ );
+
+int
+ksocknal_get_tcp_option (
+ ksock_tconn_t * tconn,
+ ULONG ID,
+ PVOID OptionValue,
+ PULONG Length
+ );
+
+NTSTATUS
+ksocknal_set_tcp_option (
+ ksock_tconn_t * tconn,
+ ULONG ID,
+ PVOID OptionValue,
+ ULONG Length
+ );
+
+int
+ksocknal_bind_tconn (
+ ksock_tconn_t * tconn,
+ ksock_tconn_t * parent,
+ ulong_ptr addr,
+ unsigned short port
+ );
+
+int
+ksocknal_build_tconn(
+ ksock_tconn_t * tconn,
+ ulong_ptr addr,
+ unsigned short port
+ );
+
+int
+ksocknal_disconnect_tconn(
+ ksock_tconn_t * tconn,
+ ulong_ptr flags
+ );
+
+void
+ksocknal_abort_tconn(
+ ksock_tconn_t * tconn
+ );
+
+int
+ksocknal_query_local_ipaddr(
+ ksock_tconn_t * tconn
+ );
+
+int
+ksocknal_init_tdi_data();
+
+void
+ksocknal_fini_tdi_data();
+
+int
+ksocknal_tconn_write (ksock_tconn_t *tconn, void *buffer, int nob);
+
+int
+ksocknal_tconn_read (ksock_tconn_t * tconn, void *buffer, int nob);
+
+int
+ksocknal_test_nagle(void * context);
+
+//
+// tcp.c
+//
+
+NTSTATUS
+KsTcpCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ );
+
+NTSTATUS
+KsDisconectCompletionRoutine (
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ );
+
+NTSTATUS
+KsTcpReceiveCompletionRoutine(
+ IN PIRP Irp,
+ IN PKS_TCP_COMPLETION_CONTEXT Context
+ );
+
+NTSTATUS
+KsTcpSendCompletionRoutine(
+ IN PIRP Irp,
+ IN PKS_TCP_COMPLETION_CONTEXT Context
+ );
+
+NTSTATUS
+KsAcceptCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ );
+
+
+NTSTATUS
+KsConnectEventHandler(
+ IN PVOID TdiEventContext,
+ IN LONG RemoteAddressLength,
+ IN PVOID RemoteAddress,
+ IN LONG UserDataLength,
+ IN PVOID UserData,
+ IN LONG OptionsLength,
+ IN PVOID Options,
+ OUT CONNECTION_CONTEXT * ConnectionContext,
+ OUT PIRP * AcceptIrp
+ );
+
+NTSTATUS
+KsDisconnectEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN LONG DisconnectDataLength,
+ IN PVOID DisconnectData,
+ IN LONG DisconnectInformationLength,
+ IN PVOID DisconnectInformation,
+ IN ULONG DisconnectFlags
+ );
+
+NTSTATUS
+KsTcpReceiveEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG BytesIndicated,
+ IN ULONG BytesAvailable,
+ OUT ULONG * BytesTaken,
+ IN PVOID Tsdu,
+ OUT PIRP * IoRequestPacket
+ );
+
+NTSTATUS
+KsTcpReceiveExpeditedEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG BytesIndicated,
+ IN ULONG BytesAvailable,
+ OUT ULONG * BytesTaken,
+ IN PVOID Tsdu,
+ OUT PIRP * IoRequestPacket
+ );
+
+NTSTATUS
+KsTcpChainedReceiveEventHandler (
+ IN PVOID TdiEventContext, // the event context
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG ReceiveLength,
+ IN ULONG StartingOffset, // offset of start of client data in TSDU
+ IN PMDL Tsdu, // TSDU data chain
+ IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
+ );
+
+NTSTATUS
+KsTcpChainedReceiveExpeditedEventHandler (
+ IN PVOID TdiEventContext, // the event context
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG ReceiveLength,
+ IN ULONG StartingOffset, // offset of start of client data in TSDU
+ IN PMDL Tsdu, // TSDU data chain
+ IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
+ );
+
+
+
+VOID
+KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem);
+
+
+//
+// tdi.c
+//
+
+ULONG
+ksocknal_tdi_send_flags(ULONG SockFlags);
+
+PIRP
+KsBuildTdiIrp(
+ IN PDEVICE_OBJECT DeviceObject
+ );
+
+NTSTATUS
+KsSubmitTdiIrp(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN BOOLEAN bSynchronous,
+ OUT PULONG Information
+ );
+
+NTSTATUS
+KsOpenControl(
+ IN PUNICODE_STRING DeviceName,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ );
+
+NTSTATUS
+KsCloseControl(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ );
+
+NTSTATUS
+KsOpenAddress(
+ IN PUNICODE_STRING DeviceName,
+ IN PTRANSPORT_ADDRESS pAddress,
+ IN ULONG AddressLength,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ );
+
+NTSTATUS
+KsCloseAddress(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ );
+
+NTSTATUS
+KsOpenConnection(
+ IN PUNICODE_STRING DeviceName,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ );
+
+NTSTATUS
+KsCloseConnection(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ );
+
+NTSTATUS
+KsAssociateAddress(
+ IN HANDLE AddressHandle,
+ IN PFILE_OBJECT ConnectionObject
+ );
+
+
+NTSTATUS
+KsDisassociateAddress(
+ IN PFILE_OBJECT ConnectionObject
+ );
+
+
+NTSTATUS
+KsSetEventHandlers(
+ IN PFILE_OBJECT AddressObject,
+ IN PVOID EventContext,
+ IN PKS_EVENT_HANDLERS Handlers
+ );
+
+
+NTSTATUS
+KsQueryProviderInfo(
+ PWSTR TdiDeviceName,
+ PTDI_PROVIDER_INFO ProviderInfo
+ );
+
+NTSTATUS
+KsQueryAddressInfo(
+ IN PFILE_OBJECT FileObject,
+ OUT PTDI_ADDRESS_INFO AddressInfo,
+ OUT PULONG AddressSize
+ );
+
+NTSTATUS
+KsQueryConnectionInfo(
+ IN PFILE_OBJECT ConnectionObject,
+ OUT PTDI_CONNECTION_INFO ConnectionInfo,
+ OUT PULONG ConnectionSize
+ );
+
+ULONG
+KsInitializeTdiAddress(
+ IN OUT PTA_IP_ADDRESS pTransportAddress,
+ IN ULONG IpAddress,
+ IN USHORT IpPort
+ );
+
+ULONG
+KsQueryMdlsSize (IN PMDL Mdl);
+
+
+ULONG
+KsQueryTdiAddressLength(
+ OUT PTRANSPORT_ADDRESS pTransportAddress
+ );
+
+NTSTATUS
+KsQueryIpAddress(
+ IN PFILE_OBJECT FileObject,
+ OUT PVOID TdiAddress,
+ OUT ULONG* AddressLength
+ );
+
+
+NTSTATUS
+KsErrorEventHandler(
+ IN PVOID TdiEventContext,
+ IN NTSTATUS Status
+ );
+
+int
+ksocknal_set_handlers(
+ ksock_tconn_t * tconn
+ );
+
+
+
+//
+// Strusup.c
+//
+
+VOID
+KsPrintProviderInfo(
+ PWSTR DeviceName,
+ PTDI_PROVIDER_INFO ProviderInfo
+ );
+
+VOID
+KsInitialize(VOID);
+
+VOID
+KsUninitialize(VOID);
+
+
+ksock_tconn_t *
+ksocknal_create_tconn();
+
+void
+ksocknal_free_tconn(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_init_listener(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_init_sender(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_init_child(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_get_tconn(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_put_tconn(
+ ksock_tconn_t * tconn
+ );
+
+int
+ksocknal_reset_handlers(
+ ksock_tconn_t * tconn
+ );
+
+void
+ksocknal_destroy_tconn(
+ ksock_tconn_t * tconn
+ );
+
+
+PKS_TSDU
+KsAllocateKsTsdu();
+
+VOID
+KsPutKsTsdu(
+ PKS_TSDU KsTsdu
+ );
+
+VOID
+KsFreeKsTsdu(
+ PKS_TSDU KsTsdu
+ );
+
+VOID
+KsInitializeKsTsdu(
+ PKS_TSDU KsTsdu,
+ ULONG Length
+ );
+
+
+VOID
+KsInitializeKsTsduMgr(
+ PKS_TSDUMGR TsduMgr
+ );
+
+VOID
+KsInitializeKsChain(
+ PKS_CHAIN KsChain
+ );
+
+NTSTATUS
+KsCleanupTsduMgr(
+ PKS_TSDUMGR KsTsduMgr
+ );
+
+NTSTATUS
+KsCleanupKsChain(
+ PKS_CHAIN KsChain
+ );
+
+NTSTATUS
+KsCleanupTsdu(
+ ksock_tconn_t * tconn
+ );
+
+NTSTATUS
+KsCopyMdlChainToMdlChain(
+ IN PMDL SourceMdlChain,
+ IN ULONG SourceOffset,
+ IN PMDL DestinationMdlChain,
+ IN ULONG DestinationOffset,
+ IN ULONG BytesTobecopied,
+ OUT PULONG BytesCopied
+ );
+
+ULONG
+KsQueryMdlsSize (PMDL Mdl);
+
+NTSTATUS
+KsLockUserBuffer (
+ IN PVOID UserBuffer,
+ IN BOOLEAN bPaged,
+ IN ULONG Length,
+ IN LOCK_OPERATION Operation,
+ OUT PMDL * pMdl
+ );
+
+PVOID
+KsMapMdlBuffer (PMDL Mdl);
+
+VOID
+KsReleaseMdl ( IN PMDL Mdl,
+ IN int Paged );
+
+int
+ksocknal_lock_buffer (
+ void * buffer,
+ int paged,
+ int length,
+ LOCK_OPERATION access,
+ ksock_mdl_t ** kmdl
+ );
+
+void *
+ksocknal_map_mdl (ksock_mdl_t * mdl);
+
+void
+ksocknal_release_mdl (ksock_mdl_t *mdl, int paged);
+
+
+#endif //_TDINAL_H_
\ No newline at end of file
if LIBLUSTRE
noinst_LIBRARIES= libcfs.a
-libcfs_a_SOURCES= debug.c
+libcfs_a_SOURCES= debug.c user-prim.c user-lock.c
libcfs_a_CPPFLAGS = $(LLCPPFLAGS)
libcfs_a_CFLAGS = $(LLCFLAGS)
endif
if DARWIN
macos_PROGRAMS := libcfs
-nodist_libcfs_SOURCES := debug.c module.c tracefile.c nidstrings.c \
- darwin/darwin-debug.c darwin/darwin-fs.c darwin/darwin-mem.c \
- darwin/darwin-module.c darwin/darwin-prim.c \
- darwin/darwin-proc.c darwin/darwin-tracefile.c \
- darwin/darwin-utils.c darwin/darwin-sync.c \
- darwin/darwin-curproc.c user-prim.c user-lock.c
+nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c \
+ darwin/darwin-prim.c darwin/darwin-fs.c darwin/darwin-curproc.c \
+ darwin/darwin-tcpip.c darwin/darwin-utils.c \
+ darwin/darwin-debug.c darwin/darwin-proc.c \
+ darwin/darwin-tracefile.c darwin/darwin-module.c \
+ debug.c module.c tracefile.c nidstrings.c
libcfs_CFLAGS := $(EXTRA_KCFLAGS)
libcfs_LDFLAGS := $(EXTRA_KLDFLAGS)
EXTRA_DIST := Info.plist
MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs
-DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h
+DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c user-lock.c
darwin-fs.c \
darwin-prim.c \
darwin-tracefile.c \
- darwin-curproc.c
+ darwin-curproc.c \
+ darwin-tcpip.c
#include <libcfs/kp30.h>
/*
- * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * Implementation of cfs_curproc API (see lnet/include/libcfs/curproc.h)
* for XNU kernel.
*/
static inline struct ucred *curproc_ucred(void)
{
+#ifdef __DARWIN8__
+ return proc_ucred(current_proc());
+#else
return current_proc()->p_cred->pc_ucred;
+#endif
}
uid_t cfs_curproc_uid(void)
uid_t cfs_curproc_fsuid(void)
{
+#ifdef __DARWIN8__
+ return curproc_ucred()->cr_ruid;
+#else
return current_proc()->p_cred->p_ruid;
+#endif
}
gid_t cfs_curproc_fsgid(void)
{
+#ifdef __DARWIN8__
+ return curproc_ucred()->cr_rgid;
+#else
return current_proc()->p_cred->p_rgid;
+#endif
}
pid_t cfs_curproc_pid(void)
{
+#ifdef __DARWIN8__
+ return proc_pid(current_proc());
+#else
return current_proc()->p_pid;
+#endif
}
int cfs_curproc_groups_nr(void)
mode_t cfs_curproc_umask(void)
{
+#ifdef __DARWIN8__
+ /*
+ * XXX Liang:
+ *
+ * fd_cmask is not available in kexts, so we just assume
+ * verything is permited.
+ */
+ return -1;
+#else
return current_proc()->p_fd->fd_cmask;
+#endif
}
char *cfs_curproc_comm(void)
{
+#ifdef __DARWIN8__
+ /*
+ * Writing to proc->p_comm is not permited in Darwin8,
+ * because proc_selfname() only return a copy of proc->p_comm,
+ * so this function is not really working.
+ */
+ static char pcomm[MAXCOMLEN+1];
+
+ proc_selfname(pcomm, MAXCOMLEN+1);
+ return pcomm;
+#else
return current_proc()->p_comm;
+#endif
}
cfs_kernel_cap_t cfs_curproc_cap_get(void)
{
- return 0;
+ return -1;
}
void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
return;
}
-cfs_task_t *libcfs_current(void)
-{
- return cfs_current();
+void libcfs_run_lbug_upcall(char *file, const char *fn, const int line)
+{
+}
+
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+ CEMERG("LBUG: pid: %u thread: %#x\n",
+ (unsigned)cfs_curproc_pid(), (unsigned)current_thread());
+ libcfs_debug_dumplog();
+ libcfs_run_lbug_upcall(file, func, line);
+ while (1)
+ cfs_schedule();
+
+ /* panic("lbug_with_loc(%s, %s, %d)", file, func, line) */
}
-int portals_arch_debug_init(unsigned long bufsize)
+#if ENTRY_NESTING_SUPPORT
+
+static inline struct cfs_debug_data *__current_cdd(void)
{
- return 0;
+ struct cfs_debug_data *cdd;
+
+ cdd = (struct cfs_debug_data *)current_uthread()->uu_nlminfo;
+ if (cdd != NULL &&
+ cdd->magic1 == CDD_MAGIC1 && cdd->magic2 == CDD_MAGIC2 &&
+ cdd->nesting_level < 1000)
+ return cdd;
+ else
+ return NULL;
}
-int portals_arch_debug_cleanup(void)
+static inline void __current_cdd_set(struct cfs_debug_data *cdd)
{
- return 0;
+ current_uthread()->uu_nlminfo = (void *)cdd;
+}
+
+void __entry_nesting(struct cfs_debug_data *child)
+{
+ struct cfs_debug_data *parent;
+
+ parent = __current_cdd();
+ if (parent != NULL) {
+ child->parent = parent;
+ child->nesting_level = parent->nesting_level + 1;
+ }
+ __current_cdd_set(child);
+}
+
+void __exit_nesting(struct cfs_debug_data *child)
+{
+ __current_cdd_set(child->parent);
+}
+
+unsigned int __current_nesting_level(void)
+{
+ struct cfs_debug_data *cdd;
+
+ cdd = __current_cdd();
+ if (cdd != NULL)
+ return cdd->nesting_level;
+ else
+ return 0;
}
+/* ENTRY_NESTING_SUPPORT */
+#endif
#include <sys/file.h>
#include <sys/malloc.h>
#include <sys/conf.h>
-#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/uio.h>
#include <sys/filedesc.h>
*
* Public functions
*/
+
+#ifdef __DARWIN8__
+#include <sys/vnode.h>
+
+extern int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t);
+
+/* vnode_size() is not exported */
+static errno_t
+vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx)
+{
+ struct vnode_attr va;
+ int error;
+
+ VATTR_INIT(&va);
+ VATTR_WANTED(&va, va_data_size);
+ error = vnode_getattr(vp, &va, ctx);
+ if (!error)
+ *sizep = va.va_data_size;
+ return(error);
+}
+
+/*
+ * XXX Liang:
+ *
+ * kern_file_*() are not safe for multi-threads now,
+ * however, we need them only for tracefiled, so it's
+ * not so important to implement for MT.
+ */
+int
+kern_file_size(struct cfs_kern_file *fp, off_t *psize)
+{
+ int error;
+ off_t size;
+
+ error = vnode_size(fp->f_vp, &size, fp->f_ctxt);
+ if (error)
+ return error;
+
+ if (psize)
+ *psize = size;
+ return 0;
+}
+
+struct cfs_kern_file *
+kern_file_open(const char * filename, int uflags, int mode, int *err)
+{
+ struct cfs_kern_file *fp;
+ vnode_t vp;
+ int error;
+
+ fp = (struct cfs_kern_file *)_MALLOC(sizeof(struct cfs_kern_file), M_TEMP, M_WAITOK);
+ if (fp == NULL) {
+ if (err != NULL)
+ *err = -ENOMEM;
+ return NULL;
+ }
+ fp->f_flags = FFLAGS(uflags);
+ fp->f_ctxt = vfs_context_create(NULL);
+
+ if ((error = vnode_open(filename, fp->f_flags,
+ mode, 0, &vp, fp->f_ctxt))){
+ if (err != NULL)
+ *err = -error;
+ _FREE(fp, M_TEMP);
+ } else {
+ if (err != NULL)
+ *err = 0;
+ fp->f_vp = vp;
+ }
+
+ return fp;
+}
+
+int
+kern_file_close(struct cfs_kern_file *fp)
+{
+ vnode_close(fp->f_vp, fp->f_flags, fp->f_ctxt);
+ vfs_context_rele(fp->f_ctxt);
+ _FREE(fp, M_TEMP);
+
+ return 0;
+}
+
+int
+kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+ struct proc *p = current_proc();
+ int resid;
+ int error;
+
+ assert(buf != NULL);
+ assert(fp != NULL && fp->f_vp != NULL);
+
+ error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos,
+ UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
+ if ((error) || (nbytes == resid)) {
+ if (!error)
+ error = -EINVAL;
+ return error;
+ }
+ *pos += nbytes - resid;
+
+ return (int)(nbytes - resid);
+}
+
int
-filp_node_size(struct file *fp, off_t *size)
+kern_file_write(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+ struct proc *p = current_proc();
+ int resid;
+ int error;
+
+ assert(buf != NULL);
+ assert(fp != NULL && fp->f_vp != NULL);
+
+ error = vn_rdwr(UIO_WRITE, fp->f_vp, buf, nbytes, *pos,
+ UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
+ if ((error) || (nbytes == resid)) {
+ if (!error)
+ error = -EINVAL;
+ return error;
+ }
+ *pos += nbytes - resid;
+
+ return (int)(nbytes - resid);
+
+}
+
+int
+kern_file_sync (struct cfs_kern_file *fp)
+{
+ return VNOP_FSYNC(fp->f_vp, MNT_WAIT, fp->f_ctxt);
+}
+
+#else /* !__DARWIN8__ */
+
+int
+kern_file_size(struct file *fp, off_t *size)
{
struct vnode *vp = (struct vnode *)fp->f_data;
struct stat sb;
}
cfs_file_t *
-filp_open(const char * filename, int flags, int mode, int *err)
+kern_file_open(const char * filename, int flags, int mode, int *err)
{
struct nameidata nd;
- register cfs_file_t *fp;
+ cfs_file_t *fp;
register struct vnode *vp;
- cfs_file_t *nfp;
int rc;
extern struct fileops vnops;
extern int nfiles;
CFS_CONE_IN;
nfiles++;
- MALLOC_ZONE(nfp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO);
- bzero(nfp, sizeof(cfs_file_t));
- nfp->f_count = 1;
- fp = nfp;
+ MALLOC_ZONE(fp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO);
+ bzero(fp, sizeof(cfs_file_t));
+ fp->f_count = 1;
+ LIST_CIRCLE(fp, f_list);
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc());
if ((rc = vn_open(&nd, flags, mode)) != 0){
printf("filp_open failed at (%d)\n", rc);
if (err != NULL)
*err = rc;
- ffree(fp);
+ FREE_ZONE(fp, sizeof *fp, M_FILE);
CFS_CONE_EX;
return NULL;
}
}
int
-filp_close (cfs_file_t *fp)
+kern_file_close (cfs_file_t *fp)
{
struct vnode *vp;
CFS_DECL_CONE_DATA;
* Write buffer to filp inside kernel
*/
int
-filp_write (cfs_file_t *fp, void *buf, size_t nbyte, off_t *pos)
+kern_file_write (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos)
{
struct uio auio;
struct iovec aiov;
struct proc *p = current_proc();
long cnt, error = 0;
+ int flags = 0;
CFS_DECL_CONE_DATA;
aiov.iov_base = (void *)(uintptr_t)buf;
aiov.iov_len = nbyte;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
- if (pos != NULL)
+ if (pos != NULL) {
auio.uio_offset = *pos;
- else
+ /*
+ * Liang: If don't set FOF_OFFSET, vn_write()
+ * will use fp->f_offset as the the real offset.
+ * Same in vn_read()
+ */
+ flags |= FOF_OFFSET;
+ } else
auio.uio_offset = (off_t)-1;
if (nbyte > INT_MAX)
return (EINVAL);
CFS_CONE_IN;
if (fp->f_type == DTYPE_VNODE)
bwillwrite(); /* empty stuff now */
- if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) {
+ if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
if (auio.uio_resid != cnt && (error == ERESTART ||\
error == EINTR || error == EWOULDBLOCK))
error = 0;
else
cnt -= auio.uio_resid;
if (pos != NULL)
- *pos = auio.uio_offset;
+ *pos += cnt;
return cnt;
}
* Read from filp inside kernel
*/
int
-filp_read (cfs_file_t *fp, void *buf, size_t nbyte, off_t *pos)
+kern_file_read (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos)
{
struct uio auio;
struct iovec aiov;
struct proc *p = current_proc();
long cnt, error = 0;
+ int flags = 0;
CFS_DECL_CONE_DATA;
aiov.iov_base = (caddr_t)buf;
aiov.iov_len = nbyte;
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
- if (pos != NULL)
+ if (pos != NULL) {
auio.uio_offset = *pos;
- else
+ flags |= FOF_OFFSET;
+ } else
auio.uio_offset = (off_t)-1;
if (nbyte > INT_MAX)
return (EINVAL);
cnt = nbyte;
CFS_CONE_IN;
- if ((error = fo_read(fp, &auio, fp->f_cred, 0, p)) != 0) {
+ if ((error = fo_read(fp, &auio, fp->f_cred, flags, p)) != 0) {
if (auio.uio_resid != cnt && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
error = 0;
else
cnt -= auio.uio_resid;
if (pos != NULL)
- *pos = auio.uio_offset;
+ *pos += cnt;
return cnt;
}
int
-filp_fsync (cfs_file_t *fp)
+kern_file_sync (cfs_file_t *fp)
{
struct vnode *vp = (struct vnode *)fp->f_data;
struct proc *p = current_proc();
return error;
}
-int
-ref_file(cfs_file_t *fp)
+#endif /* !__DARWIN8__ */
+
+cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor)
{
- CFS_DECL_CONE_DATA;
+ return makedev(major, minor);
+}
- CFS_CONE_IN;
- fref(fp);
- CFS_CONE_EX;
- return 0;
+cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev)
+{
+ return major(rdev);
}
-int
-rele_file(cfs_file_t *fp)
+cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev)
{
- CFS_DECL_CONE_DATA;
+ return minor(rdev);
+}
- CFS_CONE_IN;
- frele(fp);
- CFS_CONE_EX;
- return 0;
+struct posix_acl *posix_acl_alloc(int count, int flags)
+{
+ static struct posix_acl acl;
+ return &acl;
}
/*
- * Private functions
+ * XXX Liang: I've not converted all of them,
+ * more is needed?
*/
-void vrele_safe(struct vnode *nd)
-{
- CFS_DECL_CONE_DATA;
-
- CFS_CONE_IN;
- vrele(nd);
- CFS_CONE_EX;
-}
-
-int
-path_lookup(const char *path, unsigned int flags, struct nameidata *nd)
+int cfs_oflags2univ(int flags)
{
- int ret = 0;
- CFS_DECL_CONE_DATA;
+ int f;
- CFS_CONE_IN;
- NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)path, current_proc());
- if ((ret = namei(nd)) != 0){
- CERROR("path_lookup fail!\n");
- }
- CFS_CONE_EX;
-
- return ret;
+ f = flags & O_ACCMODE;
+ f |= (flags & O_CREAT) ? CFS_O_CREAT: 0;
+ f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0;
+ f |= (flags & O_EXCL) ? CFS_O_EXCL: 0;
+ f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0;
+ f |= (flags & O_APPEND) ? CFS_O_APPEND: 0;
+ f |= (flags & O_NOFOLLOW) ? CFS_O_NOFOLLOW: 0;
+ f |= (flags & O_SYNC)? CFS_O_SYNC: 0;
+ return f;
}
-int
-file_count(struct file *fp)
+/*
+ * XXX Liang: we don't need it in OSX.
+ * But it should be implemented anyway.
+ */
+int cfs_univ2oflags(int flags)
{
- return fcount(fp);
+ return flags;
}
-
-
--- /dev/null
+#ifndef __LIBCFS_DARWIN_INTERNAL_H__
+#define __LIBCFS_DARWIN_INTERNAL_H__
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+int cfs_sysctl_isvalid(void);
+struct sysctl_oid *cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, int (*handler) SYSCTL_HANDLER_ARGS);
+struct sysctl_oid *cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int n,
+ const char *name, int *ptr, int val);
+struct sysctl_oid * cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, int *ptr, int val);
+struct sysctl_oid * cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, char *ptr, int len);
+struct sysctl_oid * cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, void *ptr, int size);
+
+#endif
* vim:expandtab:shiftwidth=8:tabstop=8:
*
* Copyright (C) 2002 Cluster File Systems, Inc.
- * Author: Phil Schwan <phil@clusterfs.com>
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ * Nikita Danilov <nikita@clusterfs.com>
*
* This file is part of Lustre, http://www.lustre.org.
*
#include <mach/mach_types.h>
#include <string.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <sys/file.h>
-#include <sys/conf.h>
-#include <sys/vnode.h>
-#include <sys/uio.h>
-#include <sys/filedesc.h>
-#include <sys/namei.h>
-#include <miscfs/devfs/devfs.h>
-#include <kern/kalloc.h>
-#include <kern/zalloc.h>
-#include <kern/thread.h>
+#include <sys/malloc.h>
#include <libcfs/libcfs.h>
#include <libcfs/kp30.h>
+#include "darwin-internal.h"
-/*
- * Definition of struct zone, copied from osfmk/kern/zalloc.h.
- */
-struct zone_hack {
- int count; /* Number of elements used now */
- vm_offset_t free_elements;
- vm_size_t cur_size; /* current memory utilization */
- vm_size_t max_size; /* how large can this zone grow */
- vm_size_t elem_size; /* size of an element */
- vm_size_t alloc_size; /* size used for more memory */
- char *zone_name; /* a name for the zone */
- unsigned int
- /* boolean_t */ exhaustible :1, /* (F) merely return if empty? */
- /* boolean_t */ collectable :1, /* (F) garbage collect empty pages */
- /* boolean_t */ expandable :1, /* (T) expand zone (with message)? */
- /* boolean_t */ allows_foreign :1,/* (F) allow non-zalloc space */
- /* boolean_t */ doing_alloc :1, /* is zone expanding now? */
- /* boolean_t */ waiting :1, /* is thread waiting for expansion? */
- /* boolean_t */ async_pending :1; /* asynchronous allocation pending? */
- struct zone_hack * next_zone; /* Link for all-zones list */
- /*
- * more fields follow, but we don't need them. We only need
- * offset from the beginning of struct zone to ->next_zone
- * field: it allows us to scan the list of all zones.
- */
+#if CFS_INDIVIDUAL_ZONE
+extern zone_t zinit( vm_size_t, vm_size_t, vm_size_t, const char *);
+extern void * zalloc(zone_t zone);
+extern void *zalloc_noblock(zone_t zone);
+extern void zfree(zone_t zone, void *addr);
+
+struct cfs_zone_nob {
+ struct list_head *z_nob; /* Pointer to z_link */
+ struct list_head z_link; /* Do NOT access it directly */
};
-decl_simple_lock_data(extern, all_zones_lock)
+static struct cfs_zone_nob cfs_zone_nob;
+static spinlock_t cfs_zone_guard;
-/*
- * returns true iff zone with name @name already exists.
- *
- * XXX nikita: this function is defined in this file only because there is no
- * better place to put it in.
- */
-zone_t cfs_find_zone(const char *name)
+cfs_mem_cache_t *mem_cache_find(const char *name, size_t objsize)
{
- struct zone_hack *scan;
+ cfs_mem_cache_t *walker = NULL;
- /* from osfmk/kern/zalloc.c */
- extern zone_t first_zone;
+ LASSERT(cfs_zone_nob.z_nob != NULL);
- LASSERT(name != NULL);
+ spin_lock(&cfs_zone_guard);
+ list_for_each_entry(walker, cfs_zone_nob.z_nob, mc_link) {
+ if (!strcmp(walker->mc_name, name) && \
+ walker->mc_size == objsize)
+ break;
+ }
+ spin_unlock(&cfs_zone_guard);
- simple_lock(&all_zones_lock);
- for (scan = (struct zone_hack *)first_zone;
- scan != NULL; scan = scan->next_zone) {
- if (!strcmp(scan->zone_name, name))
- break;
- }
- simple_unlock(&all_zones_lock);
- return((zone_t)scan);
+ return walker;
}
/*
* survives kext unloading, so that @name cannot be just static string
* embedded into kext image.
*/
-zone_t cfs_zinit(vm_size_t size, vm_size_t max, int alloc, const char *name)
+cfs_mem_cache_t *mem_cache_create(vm_size_t objsize, const char *name)
{
+ cfs_mem_cache_t *mc = NULL;
char *cname;
+ MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO);
+ if (mc == NULL){
+ CERROR("cfs_mem_cache created fail!\n");
+ return NULL;
+ }
+
cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK);
LASSERT(cname != NULL);
- return zinit(size, max, alloc, strcpy(cname, name));
+ mc->mc_cache = zinit(objsize, (KMEM_MAX_ZONE * objsize), 0, strcpy(cname, name));
+ mc->mc_size = objsize;
+ CFS_INIT_LIST_HEAD(&mc->mc_link);
+ strncpy(mc->mc_name, name, 1 + strlen(name));
+ return mc;
+}
+
+void mem_cache_destroy(cfs_mem_cache_t *mc)
+{
+ /*
+ * zone can NOT be destroyed after creating,
+ * so just keep it in list.
+ *
+ * We will not lost a zone after we unload
+ * libcfs, it can be found by from libcfs.zone
+ */
+ return;
}
+#define mem_cache_alloc(mc) zalloc((mc)->mc_cache)
+#ifdef __DARWIN8__
+# define mem_cache_alloc_nb(mc) zalloc((mc)->mc_cache)
+#else
+/* XXX Liang: Tiger doesn't export zalloc_noblock() */
+# define mem_cache_alloc_nb(mc) zalloc_noblock((mc)->mc_cache)
+#endif
+#define mem_cache_free(mc, p) zfree((mc)->mc_cache, p)
+
+#else /* !CFS_INDIVIDUAL_ZONE */
+
cfs_mem_cache_t *
-cfs_mem_cache_create (const char *name, size_t objsize, size_t off, unsigned long arg1,
- void (*arg2)(void *, cfs_mem_cache_t *, unsigned long),
- void (*arg3)(void *, cfs_mem_cache_t *, unsigned long))
+mem_cache_find(const char *name, size_t objsize)
{
- cfs_mem_cache_t *new = NULL;
+ return NULL;
+}
- MALLOC(new, cfs_mem_cache_t *, objsize, M_TEMP, M_WAITOK|M_ZERO);
- if (new == NULL){
+cfs_mem_cache_t *mem_cache_create(vm_size_t size, const char *name)
+{
+ cfs_mem_cache_t *mc = NULL;
+
+ MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO);
+ if (mc == NULL){
CERROR("cfs_mem_cache created fail!\n");
return NULL;
}
- new->size = objsize;
- CFS_INIT_LIST_HEAD(&new->link);
- strncpy(new->name, name, 1 + strlen(name));
- new->zone = cfs_find_zone(name);
- if (new->zone == NULL) {
- new->zone = cfs_zinit (objsize, KMEM_MAX_ZONE * objsize, 0, name);
- if (new->zone == NULL) {
- CERROR("zone create fault!\n");
- FREE (new, M_TEMP);
- return NULL;
- }
- }
- return new;
+ mc->mc_cache = OSMalloc_Tagalloc(name, OSMT_DEFAULT);
+ mc->mc_size = size;
+ return mc;
}
-int
-cfs_mem_cache_destroy (cfs_mem_cache_t *cachep)
+void mem_cache_destroy(cfs_mem_cache_t *mc)
{
- FREE (cachep, M_TEMP);
- return 0;
+ OSMalloc_Tagfree(mc->mc_cache);
+ FREE(mc, M_TEMP);
}
-void *
-cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags)
+#define mem_cache_alloc(mc) OSMalloc((mc)->mc_size, (mc)->mc_cache)
+#define mem_cache_alloc_nb(mc) OSMalloc_noblock((mc)->mc_size, (mc)->mc_cache)
+#define mem_cache_free(mc, p) OSFree(p, (mc)->mc_size, (mc)->mc_cache)
+
+#endif /* !CFS_INDIVIDUAL_ZONE */
+
+cfs_mem_cache_t *
+cfs_mem_cache_create (const char *name,
+ size_t objsize, size_t off, unsigned long arg1)
+{
+ cfs_mem_cache_t *mc;
+
+ mc = mem_cache_find(name, objsize);
+ if (mc)
+ return mc;
+ mc = mem_cache_create(objsize, name);
+ return mc;
+}
+
+int cfs_mem_cache_destroy (cfs_mem_cache_t *cachep)
{
- return (void *)zalloc(cachep->zone);
+ mem_cache_destroy(cachep);
+ return 0;
+}
+
+void *cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags)
+{
+ void *result;
+
+ /* zalloc_canblock() is not exported... Emulate it. */
+ if (flags & CFS_ALLOC_ATOMIC) {
+ result = (void *)mem_cache_alloc_nb(cachep);
+ } else {
+ LASSERT(get_preemption_level() == 0);
+ result = (void *)mem_cache_alloc(cachep);
+ }
+ if (result != NULL && (flags & CFS_ALLOC_ZERO))
+ memset(result, 0, cachep->mc_size);
+
+ return result;
}
-void
-cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp)
+void cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp)
{
- zfree (cachep->zone, (vm_address_t)objp);
+ mem_cache_free(cachep, objp);
}
/* ---------------------------------------------------------------------------
* "Raw" pages
*/
-extern vm_map_t zone_map;
-static inline vm_map_t page_map(struct xnu_raw_page *pg)
-{
- LASSERT(pg != NULL);
-
- return pg->order == 0 ? zone_map : kernel_map;
-}
-
-static int raw_page_init(struct xnu_raw_page *pg)
-{
- vm_size_t size = (1UL << pg->order) * PAGE_SIZE;
- int upl_flags = UPL_SET_INTERNAL |
- UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_COPYOUT_FROM;
- int kr = 0;
-
- /* XXX is it necessary? */
- kr = vm_map_get_upl(page_map(pg),
- pg->virtual, &size, &pg->upl, 0, 0, &upl_flags, 0);
- return kr;
-}
-
-static void raw_page_done(struct xnu_raw_page *pg)
-{
- ubc_upl_abort(pg->upl, UPL_ABORT_FREE_ON_EMPTY);
- return;
-}
+static unsigned int raw_pages = 0;
+static cfs_mem_cache_t *raw_page_cache = NULL;
static struct xnu_page_ops raw_page_ops;
static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = {
[XNU_PAGE_RAW] = &raw_page_ops
};
+#if defined(LIBCFS_DEBUG)
static int page_type_is_valid(cfs_page_t *page)
{
LASSERT(page != NULL);
{
return page->type == XNU_PAGE_RAW;
}
+#endif
static struct xnu_raw_page *as_raw(cfs_page_t *page)
{
.page_address = raw_page_address
};
+extern int get_preemption_level(void);
-extern vm_size_t kalloc_max;
-extern vm_size_t kalloc_max_prerounded;
-extern int first_k_zone;
-extern struct zone *k_zone[16];
-extern vm_offset_t zalloc_canblock( register zone_t, boolean_t );
-extern vm_map_t zone_map;
-
-static inline vm_address_t
-page_zone_alloc(int flags, int order)
-{
- register int zindex;
- register vm_size_t allocsize;
- vm_size_t size = (1UL << order) * PAGE_SIZE;
- vm_address_t addr;
- kern_return_t kr;
-
- assert(order >= 0);
- if (size > PAGE_SIZE){
- /* XXX Liang:
- * zalloc_canblock() call kernel_memory_allocate to allocate
- * pages, kernel_memory_allocate cannot guarantee contig pages!
- * So any request bigger then PAGE_SIZE should not call zalloc()
- *
- * NB. kmem_alloc_contig could be very slow!!!! Anyway, I dont
- * know what will happen if order >= 1 :-(
- * */
- CDEBUG(D_MALLOC, "Allocate contig pages!\n");
- kr = kmem_alloc_contig(kernel_map, &addr, size, 0, 0);
- if (kr)
- return 0;
- return addr;
- }
- allocsize = KALLOC_MINSIZE;
- zindex = first_k_zone;
- while (allocsize < size) {
- allocsize <<= 1;
- zindex++;
- }
- assert(allocsize < kalloc_max);
- if (flags & M_NOWAIT != 0)
- addr = zalloc_canblock(k_zone[zindex], FALSE);
- else
- addr = zalloc_canblock(k_zone[zindex], TRUE);
- return addr;
-}
+struct list_head page_death_row;
+spinlock_t page_death_row_phylax;
-/* Allocate a "page", actually upl of darwin */
-struct xnu_raw_page *alloc_raw_pages(u_int32_t flags, u_int32_t order)
+static void raw_page_finish(struct xnu_raw_page *pg)
{
- kern_return_t kr;
- vm_size_t size = (1UL << order) * PAGE_SIZE;
- u_int32_t mflags = 0;
- struct xnu_raw_page *pg;
-
- if (flags & CFS_ALLOC_ATOMIC != 0)
- mflags |= M_NOWAIT;
- else
- mflags |= M_WAITOK;
- if (flags & CFS_ALLOC_ZERO != 0)
- mflags |= M_ZERO;
+ -- raw_pages;
+ if (pg->virtual != NULL)
+ cfs_mem_cache_free(pg->virtual, raw_page_cache);
+ cfs_free(pg);
+}
- MALLOC (pg, struct xnu_raw_page *, sizeof *pg, M_TEMP, mflags);
- if (pg == NULL)
- return NULL;
- pg->header.type = XNU_PAGE_RAW;
- pg->order = order;
- cfs_set_page_count(&pg->header, 1);
- pg->virtual = page_zone_alloc(flags, order);
- if (!pg->virtual)
- /*
- * XXX nikita: Liang, shouldn't pg be freed here?
- */
- return NULL;
+void raw_page_death_row_clean(void)
+{
+ struct xnu_raw_page *pg;
- kr = raw_page_init(pg);
- if (kr != 0) {
- size = (1UL << order) * PAGE_SIZE;
- kmem_free(page_map(pg), pg->virtual, size);
- return NULL;
- }
- return pg;
+ spin_lock(&page_death_row_phylax);
+ while (!list_empty(&page_death_row)) {
+ pg = container_of(page_death_row.next,
+ struct xnu_raw_page, link);
+ list_del(&pg->link);
+ spin_unlock(&page_death_row_phylax);
+ raw_page_finish(pg);
+ spin_lock(&page_death_row_phylax);
+ }
+ spin_unlock(&page_death_row_phylax);
}
/* Free a "page" */
-void free_raw_pages(struct xnu_raw_page *pg, u_int32_t order)
+void free_raw_page(struct xnu_raw_page *pg)
{
- vm_size_t size = (1UL << order) * PAGE_SIZE;
-
if (!atomic_dec_and_test(&pg->count))
return;
- raw_page_done(pg);
- kmem_free(page_map(pg), pg->virtual, size);
- FREE(pg, M_TEMP);
-}
-
-cfs_page_t *cfs_alloc_pages(u_int32_t flags, u_int32_t order)
-{
- return &alloc_raw_pages(flags, order)->header;
+ /*
+ * kmem_free()->vm_map_remove()->vm_map_delete()->lock_write() may
+ * block. (raw_page_done()->upl_abort() can block too) On the other
+ * hand, cfs_free_page() may be called in non-blockable context. To
+ * work around this, park pages on global list when cannot block.
+ */
+ if (get_preemption_level() > 0) {
+ spin_lock(&page_death_row_phylax);
+ list_add(&pg->link, &page_death_row);
+ spin_unlock(&page_death_row_phylax);
+ } else {
+ raw_page_finish(pg);
+ raw_page_death_row_clean();
+ }
}
cfs_page_t *cfs_alloc_page(u_int32_t flags)
{
- return cfs_alloc_pages(flags, 0);
-}
-
-void cfs_free_pages(cfs_page_t *pages, int order)
-{
- free_raw_pages(as_raw(pages), order);
+ struct xnu_raw_page *page;
+
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
+ page = cfs_alloc(sizeof *page, flags);
+ if (page != NULL) {
+ page->virtual = cfs_mem_cache_alloc(raw_page_cache, flags);
+ if (page->virtual != NULL) {
+ ++ raw_pages;
+ page->header.type = XNU_PAGE_RAW;
+ atomic_set(&page->count, 1);
+ } else
+ cfs_free(page);
+ }
+ return page != NULL ? &page->header : NULL;
}
-void cfs_free_page(cfs_page_t *page)
+void cfs_free_page(cfs_page_t *pages)
{
- cfs_free_pages(page, 0);
+ free_raw_page(as_raw(pages));
}
void cfs_get_page(cfs_page_t *p)
void *cfs_page_address(cfs_page_t *pg)
{
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
LASSERT(page_type_is_valid(pg));
return page_ops[pg->type]->page_address(pg);
}
int mflags;
mflags = 0;
- if (flags & CFS_ALLOC_ATOMIC != 0) {
- mflags |= 0 /* M_NOWAIT */;
+ if (flags & CFS_ALLOC_ATOMIC) {
+ mflags |= M_NOWAIT;
} else {
LASSERT(get_preemption_level() == 0);
mflags |= M_WAITOK;
}
- if (flags & CFS_ALLOC_ZERO != 0)
+ if (flags & CFS_ALLOC_ZERO)
mflags |= M_ZERO;
return _MALLOC(nr_bytes, M_TEMP, mflags);
void cfs_free_large(void *addr)
{
+ LASSERT(get_preemption_level() == 0);
return _FREE(addr, M_TEMP);
}
+
+/*
+ * Lookup cfs_zone_nob by sysctl.zone, if it cannot be
+ * found (first load of * libcfs since boot), allocate
+ * sysctl libcfs.zone.
+ */
+int cfs_mem_cache_init(void)
+{
+#if CFS_INDIVIDUAL_ZONE
+ int rc;
+ size_t len;
+
+ len = sizeof(struct cfs_zone_nob);
+ rc = sysctlbyname("libcfs.zone",
+ (void *)&cfs_zone_nob, &len, NULL, 0);
+ if (rc == ENOENT) {
+ /* zone_nob is not register in libcfs_sysctl */
+ struct cfs_zone_nob *nob;
+ struct sysctl_oid *oid;
+
+ assert(cfs_sysctl_isvalid());
+
+ nob = _MALLOC(sizeof(struct cfs_zone_nob),
+ M_TEMP, M_WAITOK | M_ZERO);
+ CFS_INIT_LIST_HEAD(&nob->z_link);
+ nob->z_nob = &nob->z_link;
+ oid = cfs_alloc_sysctl_struct(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
+ "zone", nob, sizeof(struct cfs_zone_nob));
+ if (oid == NULL) {
+ _FREE(nob, M_TEMP);
+ return -ENOMEM;
+ }
+ sysctl_register_oid(oid);
+
+ cfs_zone_nob.z_nob = nob->z_nob;
+ }
+ spin_lock_init(&cfs_zone_guard);
+#endif
+ raw_page_cache = cfs_mem_cache_create("raw-page", CFS_PAGE_SIZE, 0, 0);
+ return 0;
+}
+
+void cfs_mem_cache_fini(void)
+{
+ cfs_mem_cache_destroy(raw_page_cache);
+
+#if CFS_INDIVIDUAL_ZONE
+ cfs_zone_nob.z_nob = NULL;
+ spin_lock_done(&cfs_zone_guard);
+#endif
+}
int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
{
- struct libcfs_ioctl_hdr *hdr;
- struct libcfs_ioctl_data *data;
+ struct libcfs_ioctl_hdr *hdr;
+ struct libcfs_ioctl_data *data;
int err = 0;
ENTRY;
- hdr = (struct libcfs_ioctl_hdr *)buf;
+ hdr = (struct libcfs_ioctl_hdr *)buf;
data = (struct libcfs_ioctl_data *)buf;
- /* portals_ioctl_data has been copied in by ioctl of osx */
+ /* libcfs_ioctl_data has been copied in by ioctl of osx */
memcpy(buf, arg, sizeof(struct libcfs_ioctl_data));
if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
- CERROR("PORTALS: version mismatch kernel vs application\n");
+ CERROR("LIBCFS: version mismatch kernel vs application\n");
RETURN(-EINVAL);
}
if (hdr->ioc_len + buf >= end) {
- CERROR("PORTALS: user buffer exceeds kernel buffer\n");
+ CERROR("LIBCFS: user buffer exceeds kernel buffer\n");
RETURN(-EINVAL);
}
if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
- CERROR("PORTALS: user buffer too small for ioctl\n");
+ CERROR("LIBCFS: user buffer too small for ioctl\n");
RETURN(-EINVAL);
}
buf += size_round(sizeof(*data));
- if (data->ioc_inllen1) {
- err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1));
+ if (data->ioc_inllen1) {
+ err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1));
if (err)
RETURN(err);
- data->ioc_inlbuf1 = buf;
- buf += size_round(data->ioc_inllen1);
- }
-
- if (data->ioc_inllen2) {
- copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2));
+ data->ioc_inlbuf1 = buf;
+ buf += size_round(data->ioc_inllen1);
+ }
+
+ if (data->ioc_inllen2) {
+ copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2));
if (err)
RETURN(err);
- data->ioc_inlbuf2 = buf;
- }
+ data->ioc_inlbuf2 = buf;
+ }
RETURN(err);
}
extern struct cfs_psdev_ops libcfs_psdev_ops;
-struct libcfs_device_userstate *mdev_state[16];
+struct libcfs_device_userstate *mdev_state[16];
-static int
+static int
libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p)
-{
+{
struct libcfs_device_userstate *mstat = NULL;
int rc = 0;
- int devid;
- devid = minor(dev);
+ int devid;
+ devid = minor(dev);
if (devid > 16) return (-ENXIO);
rc = libcfs_psdev_ops.p_open(0, &mstat);
else
rc = -EPERM;
- if (!rc)
- return rc;
- mdev_state[devid] = mstat;
+ if (rc == 0)
+ mdev_state[devid] = mstat;
return rc;
}
-static int
+static int
libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p)
{
- int devid;
- devid = minor(dev);
+ int devid;
+ devid = minor(dev);
int rc = 0;
if (devid > 16) return (-ENXIO);
rc = libcfs_psdev_ops.p_close(0, mdev_state[devid]);
else
rc = -EPERM;
- if (rc)
- return rc;
- mdev_state[devid] = NULL;
+ if (rc == 0)
+ mdev_state[devid] = NULL;
return rc;
}
-static int
+static int
libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
-{
- int rc = 0;
- struct cfs_psdev_file pfile;
- int devid;
- devid = minor(dev);
+{
+ int rc = 0;
+ struct cfs_psdev_file pfile;
+ int devid;
+ devid = minor(dev);
if (devid > 16) return (-ENXIO);
- if (suser(p->p_ucred, &p->p_acflag))
- return (-EPERM);
+ if (!is_suser())
+ return (-EPERM);
pfile.off = 0;
pfile.private_data = mdev_state[devid];
- if (libcfs_psdev_ops.p_ioctl != NULL)
+ if (libcfs_psdev_ops.p_ioctl != NULL)
rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
- else
+ else
rc = -EPERM;
return rc;
}
static struct cdevsw libcfs_devsw =
-{
- libcfs_psdev_open, /* open */
- libcfs_psdev_close, /* close */
- NULL, /* read */
- NULL, /* write */
- libcfs_ioctl, /* ioctl */
- NULL, /* stop */
- NULL, /* reset */
- NULL, /* tty's */
- NULL, /* select */
- NULL, /* mmap */
- NULL, /* strategy */
- NULL, /* getc */
- NULL, /* putc */
- 0 /* type */
+{
+ .d_open = libcfs_psdev_open,
+ .d_close = libcfs_psdev_close,
+ .d_read = eno_rdwrt,
+ .d_write = eno_rdwrt,
+ .d_ioctl = libcfs_ioctl,
+ .d_stop = eno_stop,
+ .d_reset = eno_reset,
+ .d_ttys = NULL,
+ .d_select = eno_select,
+ .d_mmap = eno_mmap,
+ .d_strategy = eno_strat,
+ .d_getc = eno_getc,
+ .d_putc = eno_putc,
+ .d_type = 0
};
-cfs_psdev_t libcfs_dev = {
- -1,
- NULL,
- "portals",
- &libcfs_devsw,
+cfs_psdev_t libcfs_dev = {
+ -1,
+ NULL,
+ "lnet",
+ &libcfs_devsw,
NULL
};
-void
-libcfs_daemonize (char *str)
+extern void cfs_sync_init(void);
+extern void cfs_sync_fini(void);
+extern int cfs_sysctl_init(void);
+extern void cfs_sysctl_fini(void);
+extern int cfs_mem_cache_init(void);
+extern int cfs_mem_cache_fini(void);
+extern spinlock_t trace_cpu_serializer;
+extern struct list_head page_death_row;
+extern spinlock_t page_death_row_phylax;
+extern void raw_page_death_row_clean(void);
+extern void cfs_thread_agent_init(void);
+extern void cfs_thread_agent_fini(void);
+extern void cfs_symbol_clean(void);
+extern struct rw_semaphore cfs_symbol_lock;
+extern struct list_head cfs_symbol_list;
+
+int libcfs_arch_init(void)
{
- printf("Daemonize request: %s.\n", str);
- return;
+ cfs_sync_init();
+
+ cfs_sysctl_init();
+ cfs_mem_cache_init();
+
+ init_rwsem(&cfs_symbol_lock);
+ CFS_INIT_LIST_HEAD(&cfs_symbol_list);
+
+ cfs_thread_agent_init();
+
+ spin_lock_init(&trace_cpu_serializer);
+
+ CFS_INIT_LIST_HEAD(&page_death_row);
+ spin_lock_init(&page_death_row_phylax);
+ return 0;
}
-void
-libcfs_blockallsigs(void)
+void libcfs_arch_cleanup(void)
{
- return;
+ cfs_symbol_clean();
+
+ spin_lock_done(&trace_cpu_serializer);
+
+ cfs_thread_agent_fini();
+
+ raw_page_death_row_clean();
+ spin_lock_done(&page_death_row_phylax);
+
+ fini_rwsem(&cfs_symbol_lock);
+
+ cfs_mem_cache_fini();
+ cfs_sysctl_fini();
+
+ cfs_sync_fini();
}
+
#include <mach/mach_types.h>
#include <string.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
#include <sys/file.h>
#include <sys/conf.h>
-#include <sys/vnode.h>
#include <sys/uio.h>
#include <sys/filedesc.h>
#include <sys/namei.h>
#include <miscfs/devfs/devfs.h>
-#include <kern/kalloc.h>
-#include <kern/zalloc.h>
#include <kern/thread.h>
#include <libcfs/libcfs.h>
#include <libcfs/kp30.h>
-void *darwin_current_journal_info = NULL;
-int darwin_current_cap_effective = -1;
-
-/*
- * cfs pseudo device, actually pseudo char device in darwin
+/*
+ * cfs pseudo device, actually pseudo char device in darwin
*/
-#define KPORTAL_MAJOR -1
+#define KLNET_MAJOR -1
kern_return_t cfs_psdev_register(cfs_psdev_t *dev) {
- dev->index = cdevsw_add(KPORTAL_MAJOR, dev->devsw);
+ dev->index = cdevsw_add(KLNET_MAJOR, dev->devsw);
if (dev->index < 0) {
- printf("portal_init: failed to allocate a major number!\n");
+ printf("libcfs_init: failed to allocate a major number!\n");
return KERN_FAILURE;
}
- dev->handle = devfs_make_node(makedev (dev->index, 0),
- DEVFS_CHAR, UID_ROOT,
+ dev->handle = devfs_make_node(makedev (dev->index, 0),
+ DEVFS_CHAR, UID_ROOT,
GID_WHEEL, 0666, (char *)dev->name, 0);
return KERN_SUCCESS;
}
return KERN_SUCCESS;
}
-/*
- * KPortal symbol register / unregister support
+/*
+ * KPortal symbol register / unregister support
*/
-static struct rw_semaphore cfs_symbol_lock;
-struct list_head cfs_symbol_list;
+struct rw_semaphore cfs_symbol_lock;
+struct list_head cfs_symbol_list;
void *
cfs_symbol_get(const char *name)
sym->ref ++;
break;
}
- }
+ }
up_read(&cfs_symbol_lock);
- if (sym != NULL)
+ if (sym != NULL)
return sym->value;
return NULL;
}
LASSERT(sym->ref >= 0);
break;
}
- }
+ }
up_read(&cfs_symbol_lock);
LASSERT(sym != NULL);
return;
}
-/*
- * Register sysctl table
- */
-cfs_sysctl_table_header_t *
-register_cfs_sysctl_table (cfs_sysctl_table_t *table, int arg)
+struct kernel_thread_arg
{
- cfs_sysctl_table_t item;
- int i = 0;
+ spinlock_t lock;
+ atomic_t inuse;
+ cfs_thread_t func;
+ void *arg;
+};
- while ((item = table[i++]) != NULL) {
- sysctl_register_oid(item);
- }
- return table;
-}
+struct kernel_thread_arg cfs_thread_arg;
+
+#define THREAD_ARG_FREE 0
+#define THREAD_ARG_HOLD 1
+#define THREAD_ARG_RECV 2
+
+#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v)
+#define get_targ_stat(a) atomic_read(&(a)->inuse)
/*
- * Unregister sysctl table
+ * Hold the thread argument and set the status of thread_status
+ * to THREAD_ARG_HOLD, if the thread argument is held by other
+ * threads (It's THREAD_ARG_HOLD already), current-thread has to wait.
*/
-void
-unregister_cfs_sysctl_table (cfs_sysctl_table_header_t *table) {
- int i = 0;
- cfs_sysctl_table_t item;
+#define thread_arg_hold(pta, _func, _arg) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ if (get_targ_stat(pta) == THREAD_ARG_FREE) { \
+ set_targ_stat((pta), THREAD_ARG_HOLD); \
+ (pta)->arg = (void *)_arg; \
+ (pta)->func = _func; \
+ spin_unlock(&(pta)->lock); \
+ break; \
+ } \
+ spin_unlock(&(pta)->lock); \
+ cfs_schedule(); \
+ } while(1); \
- while ((item = table[i++]) != NULL) {
- sysctl_unregister_oid(item);
- }
- return;
-}
+/*
+ * Release the thread argument if the thread argument has been
+ * received by the child-thread (Status of thread_args is
+ * THREAD_ARG_RECV), otherwise current-thread has to wait.
+ * After release, the thread_args' status will be set to
+ * THREAD_ARG_FREE, and others can re-use the thread_args to
+ * create new kernel_thread.
+ */
+#define thread_arg_release(pta) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ if (get_targ_stat(pta) == THREAD_ARG_RECV) { \
+ (pta)->arg = NULL; \
+ (pta)->func = NULL; \
+ set_targ_stat(pta, THREAD_ARG_FREE); \
+ spin_unlock(&(pta)->lock); \
+ break; \
+ } \
+ spin_unlock(&(pta)->lock); \
+ cfs_schedule(); \
+ } while(1)
-struct kernel_thread_arg cfs_thread_arg;
+/*
+ * Receive thread argument (Used in child thread), set the status
+ * of thread_args to THREAD_ARG_RECV.
+ */
+#define __thread_arg_recv_fin(pta, _func, _arg, fin) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \
+ if (fin) \
+ set_targ_stat(pta, THREAD_ARG_RECV);\
+ _arg = (pta)->arg; \
+ _func = (pta)->func; \
+ spin_unlock(&(pta)->lock); \
+ break; \
+ } \
+ spin_unlock(&(pta)->lock); \
+ cfs_schedule(); \
+ } while (1); \
+
+/*
+ * Just set the thread_args' status to THREAD_ARG_RECV
+ */
+#define thread_arg_fin(pta) \
+ do { \
+ spin_lock(&(pta)->lock); \
+ assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \
+ set_targ_stat(pta, THREAD_ARG_RECV); \
+ spin_unlock(&(pta)->lock); \
+ } while(0)
+
+#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1)
+#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0)
void
-cfs_thread_agent_init()
-{
- set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE);
- spin_lock_init(&cfs_thread_arg.lock);
- cfs_thread_arg.arg = NULL;
- cfs_thread_arg.func = NULL;
+cfs_thread_agent_init(void)
+{
+ set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE);
+ spin_lock_init(&cfs_thread_arg.lock);
+ cfs_thread_arg.arg = NULL;
+ cfs_thread_arg.func = NULL;
}
void
-cfs_thread_agent (void)
+cfs_thread_agent_fini(void)
+{
+ assert(get_targ_stat(&cfs_thread_arg) == THREAD_ARG_FREE);
+
+ spin_lock_done(&cfs_thread_arg.lock);
+}
+
+/*
+ *
+ * All requests to create kernel thread will create a new
+ * thread instance of cfs_thread_agent, one by one.
+ * cfs_thread_agent will call the caller's thread function
+ * with argument supplied by caller.
+ */
+void
+cfs_thread_agent (void)
{
cfs_thread_t func = NULL;
void *arg = NULL;
thread_arg_recv(&cfs_thread_arg, func, arg);
- printf("entry of thread agent (func: %08lx).\n", (void *)func);
+ /* printf("entry of thread agent (func: %08lx).\n", (void *)func); */
assert(func != NULL);
func(arg);
- printf("thread agent exit. (func: %08lx)\n", (void *)func);
- (void) thread_terminate(current_act());
+ /* printf("thread agent exit. (func: %08lx)\n", (void *)func); */
+ (void) thread_terminate(current_thread());
}
+extern thread_t kernel_thread(task_t task, void (*start)(void));
+
int
cfs_kernel_thread(cfs_thread_t func, void *arg, int flag)
-{
- int ret = 0;
- thread_t th = NULL;
-
- thread_arg_hold(&cfs_thread_arg, func, arg);
- th = kernel_thread(kernel_task, cfs_thread_agent);
- thread_arg_release(&cfs_thread_arg);
- if (th == THREAD_NULL)
- ret = -1;
+{
+ int ret = 0;
+ thread_t th = NULL;
+
+ thread_arg_hold(&cfs_thread_arg, func, arg);
+ th = kernel_thread(kernel_task, cfs_thread_agent);
+ thread_arg_release(&cfs_thread_arg);
+ if (th == THREAD_NULL)
+ ret = -1;
return ret;
}
+void cfs_daemonize(char *str)
+{
+ snprintf(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s", str);
+ return;
+}
+
+int cfs_signal_pending(void)
+{
+#ifdef __DARWIN8__
+ extern int thread_issignal(proc_t, thread_t, sigset_t);
+ return thread_issignal(current_proc(), current_thread(), (sigset_t)-1);
+#else
+ return SHOULDissignal(current_proc(), current_uthread())
+#endif
+}
+
+/*
+ * XXX Liang: kexts cannot access sigmask in Darwin8.
+ * it's almost impossible for us to get/set signal mask
+ * without patching kernel.
+ * Should we provide these functions in xnu?
+ *
+ * There are several functions/MACRO which are very
+ * confusing for me:
+ *
+ * proc_pendingsignals()
+ * thread_issignal()
+ * SHOULDissignal()
+ */
+extern int block_procsigmask(struct proc *p, int bit);
+
+void cfs_block_allsigs()
+{
+#ifdef __DARWIN8__
+#else
+ block_procsigmask(current_proc(), -1);
+#endif
+}
+
+void cfs_block_sigs(sigset_t bit)
+{
+#ifdef __DARWIN8__
+#else
+ block_procsigmask(current_proc(), bit);
+#endif
+}
+
+#ifdef __DARWIN8__
+
+#else /* !__DARWIN8__ */
+
void lustre_cone_in(boolean_t *state, funnel_t **cone)
{
*cone = thread_funnel_get();
else if (cone == NULL)
(void) thread_funnel_set(network_flock, state);
}
-
+#endif /* !__DARWIN8__ */
void cfs_waitq_init(struct cfs_waitq *waitq)
{
}
void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
-{
+{
link->wl_waitq = waitq;
ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
}
void cfs_waitq_signal(struct cfs_waitq *waitq)
{
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
ksleep_wake(&waitq->wq_ksleep_chan);
}
ksleep_wake_all(&waitq->wq_ksleep_chan);
}
-void cfs_waitq_wait(struct cfs_waitlink *link)
-{
- ksleep_wait(&link->wl_waitq->wq_ksleep_chan);
+void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state)
+{
+ ksleep_wait(&link->wl_waitq->wq_ksleep_chan, state);
}
-cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+ cfs_task_state_t state,
cfs_duration_t timeout)
-{
- CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout);
- return ksleep_timedwait(&link->chan->c, timeout);
+{
+ CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout);
+ return ksleep_timedwait(&link->wl_waitq->wq_ksleep_chan,
+ state, timeout);
}
typedef void (*ktimer_func_t)(void *);
void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg)
-{
+{
ktimer_init(&t->t, (ktimer_func_t)func, arg);
}
void cfs_timer_done(struct cfs_timer *t)
-{
+{
ktimer_done(&t->t);
}
void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline)
-{
+{
ktimer_arm(&t->t, deadline);
}
void cfs_timer_disarm(struct cfs_timer *t)
-{
+{
ktimer_disarm(&t->t);
}
int cfs_timer_is_armed(struct cfs_timer *t)
-{
+{
return ktimer_is_armed(&t->t);
}
cfs_time_t cfs_timer_deadline(struct cfs_timer *t)
-{
+{
return ktimer_deadline(&t->t);
}
-int
-libcfs_arch_init(void)
+void cfs_enter_debugger(void)
{
- init_rwsem(&cfs_symbol_lock);
- CFS_INIT_LIST_HEAD(&cfs_symbol_list);
- cfs_thread_agent_init();
- return 0;
+#ifdef __DARWIN8__
+ extern void Debugger(const char * reason);
+ Debugger("CFS");
+#else
+ extern void PE_enter_debugger(char *cause);
+ PE_enter_debugger("CFS");
+#endif
}
-void
-libcfs_arch_cleanup(void)
+int cfs_online_cpus(void)
{
- cfs_symbol_clean();
-}
+ int activecpu;
+ size_t size;
+
+#ifdef __DARWIN8__
+ size = sizeof(int);
+ sysctlbyname("hw.activecpu", &activecpu, &size, NULL, 0);
+ return activecpu;
+#else
+ host_basic_info_data_t hinfo;
+ kern_return_t kret;
+ int count = HOST_BASIC_INFO_COUNT;
+#define BSD_HOST 1
+ kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count);
+ if (kret == KERN_SUCCESS)
+ return (hinfo.avail_cpus);
+ return(-EINVAL);
+#endif
+}
+
+int cfs_ncpus(void)
+{
+ int ncpu;
+ size_t size;
+ size = sizeof(int);
+
+ sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0);
+ return ncpu;
+}
#include <mach/mach_types.h>
#define DEBUG_SUBSYSTEM S_LNET
+
#include <libcfs/libcfs.h>
-static cfs_sysctl_table_header_t *portals_table_header = NULL;
+#define LIBCFS_SYSCTL "libcfs"
+#define LIBCFS_SYSCTL_SPRITE "sprite"
+#define LIBCFS_SYSCTL_MAGIC 0xbabeface
+
+static struct libcfs_sysctl_sprite {
+ int ss_magic;
+ struct sysctl_oid_list *ss_link;
+} libcfs_sysctl_sprite = { 0, NULL };
+
+static cfs_sysctl_table_header_t *libcfs_table_header = NULL;
extern unsigned int libcfs_debug;
extern char debug_file_path[1024];
extern unsigned int libcfs_subsystem_debug;
extern int cfs_trace_daemon SYSCTL_HANDLER_ARGS;
extern int cfs_debug_mb SYSCTL_HANDLER_ARGS;
/*
- * sysctl table for portals
+ * sysctl table for lnet
*/
-SYSCTL_NODE (, OID_AUTO, portals, CTLFLAG_RW,
- 0, "portals sysctl top");
-SYSCTL_INT(_portals, OID_AUTO, debug,
+SYSCTL_NODE (, OID_AUTO, lnet, CTLFLAG_RW,
+ 0, "lnet sysctl top");
+
+SYSCTL_INT(_lnet, OID_AUTO, debug,
CTLTYPE_INT | CTLFLAG_RW , &libcfs_debug,
0, "debug");
-SYSCTL_INT(_portals, OID_AUTO, subsystem_debug,
+SYSCTL_INT(_lnet, OID_AUTO, subsystem_debug,
CTLTYPE_INT | CTLFLAG_RW, &libcfs_subsystem_debug,
0, "subsystem debug");
-SYSCTL_INT(_portals, OID_AUTO, printk,
+SYSCTL_INT(_lnet, OID_AUTO, printk,
CTLTYPE_INT | CTLFLAG_RW, &libcfs_printk,
0, "printk");
-SYSCTL_STRING(_portals, OID_AUTO, debug_path,
+SYSCTL_STRING(_lnet, OID_AUTO, debug_path,
CTLTYPE_STRING | CTLFLAG_RW, debug_file_path,
1024, "debug path");
-SYSCTL_INT(_portals, OID_AUTO, memused,
+SYSCTL_INT(_lnet, OID_AUTO, memused,
CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_kmemory.counter,
0, "memused");
-SYSCTL_PROC(_portals, OID_AUTO, trace_daemon,
+SYSCTL_INT(_lnet, OID_AUTO, catastrophe,
+ CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_catastrophe,
+ 0, "catastrophe");
+SYSCTL_PROC(_lnet, OID_AUTO, trace_daemon,
CTLTYPE_STRING | CTLFLAG_RW, 0,
0, &cfs_trace_daemon, "A", "trace daemon");
-SYSCTL_PROC(_portals, OID_AUTO, debug_mb,
+SYSCTL_PROC(_lnet, OID_AUTO, debug_mb,
CTLTYPE_INT | CTLFLAG_RW, &max_debug_mb,
0, &cfs_debug_mb, "L", "max debug size");
static cfs_sysctl_table_t top_table[] = {
- &sysctl__portals,
- &sysctl__portals_debug,
- &sysctl__portals_subsystem_debug,
- &sysctl__portals_printk,
- &sysctl__portals_debug_path,
- &sysctl__portals_memused,
- &sysctl__portals_trace_daemon,
- &sysctl__portals_debug_mb,
+ &sysctl__lnet,
+ &sysctl__lnet_debug,
+ &sysctl__lnet_subsystem_debug,
+ &sysctl__lnet_printk,
+ &sysctl__lnet_debug_path,
+ &sysctl__lnet_memused,
+ &sysctl__lnet_catastrophe,
+ &sysctl__lnet_trace_daemon,
+ &sysctl__lnet_debug_mb,
NULL
};
+/*
+ * Register sysctl table
+ */
+cfs_sysctl_table_header_t *
+cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg)
+{
+ cfs_sysctl_table_t item;
+ int i = 0;
+
+ while ((item = table[i++]) != NULL)
+ sysctl_register_oid(item);
+ return table;
+}
+
+/*
+ * Unregister sysctl table
+ */
+void
+cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table) {
+ int i = 0;
+ cfs_sysctl_table_t item;
+
+ while ((item = table[i++]) != NULL)
+ sysctl_unregister_oid(item);
+ return;
+}
+
+/*
+ * Allocate a sysctl oid.
+ */
+static struct sysctl_oid *
+cfs_alloc_sysctl(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, void *arg1, int arg2, const char *fmt,
+ int (*handler) SYSCTL_HANDLER_ARGS)
+{
+ struct sysctl_oid *oid;
+ char *sname = NULL;
+ char *sfmt = NULL;
+
+ if (strlen(name) + 1 > CTL_MAXNAME) {
+ printf("libcfs: sysctl name: %s is too long.\n", name);
+ return NULL;
+ }
+ oid = (struct sysctl_oid*)_MALLOC(sizeof(struct sysctl_oid),
+ M_TEMP, M_WAITOK | M_ZERO);
+ if (oid == NULL)
+ return NULL;
+
+ sname = (char *)_MALLOC(sizeof(CTL_MAXNAME),
+ M_TEMP, M_WAITOK | M_ZERO);
+ if (sname == NULL)
+ goto error;
+ strcpy(sname, name);
+
+ sfmt = (char *)_MALLOC(4, M_TEMP, M_WAITOK | M_ZERO);
+ if (sfmt == NULL)
+ goto error;
+ strcpy(sfmt, fmt);
+
+ if (parent == NULL)
+ oid->oid_parent = &sysctl__children;
+ else
+ oid->oid_parent = parent;
+ oid->oid_number = nbr;
+ oid->oid_kind = access;
+ oid->oid_name = sname;
+ oid->oid_handler = handler;
+ oid->oid_fmt = sfmt;
+
+ if (access & CTLTYPE_NODE != 0) {
+ /* It's a sysctl node */
+ struct sysctl_oid_list *link;
+
+ link = (struct sysctl_oid_list *)_MALLOC(sizeof(struct sysctl_oid_list),
+ M_TEMP, M_WAITOK | M_ZERO);
+ if (link == NULL)
+ goto error;
+ oid->oid_arg1 = link;
+ oid->oid_arg2 = 0;
+ } else {
+ oid->oid_arg1 = arg1;
+ oid->oid_arg2 = arg2;
+ }
+
+ return oid;
+error:
+ if (sfmt != NULL)
+ _FREE(sfmt, M_TEMP);
+ if (sname != NULL)
+ _FREE(sname, M_TEMP);
+ if (oid != NULL)
+ _FREE(oid, M_TEMP);
+ return NULL;
+}
+
+void cfs_free_sysctl(struct sysctl_oid *oid)
+{
+ if (oid->oid_name != NULL)
+ _FREE((void *)oid->oid_name, M_TEMP);
+ if (oid->oid_fmt != NULL)
+ _FREE((void *)oid->oid_fmt, M_TEMP);
+ if ((oid->oid_kind & CTLTYPE_NODE != 0) && oid->oid_arg1)
+ /* XXX Liang: need to assert the list is empty */
+ _FREE(oid->oid_arg1, M_TEMP);
+ _FREE(oid, M_TEMP);
+}
+
+#define CFS_SYSCTL_ISVALID ((libcfs_sysctl_sprite.ss_magic == LIBCFS_SYSCTL_MAGIC) && \
+ (libcfs_sysctl_sprite.ss_link != NULL))
+
+int
+cfs_sysctl_isvalid(void)
+{
+ return CFS_SYSCTL_ISVALID;
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, int (*handler) SYSCTL_HANDLER_ARGS)
+{
+ if (parent == NULL && CFS_SYSCTL_ISVALID)
+ parent = libcfs_sysctl_sprite.ss_link;
+ return cfs_alloc_sysctl(parent, nbr, CTLTYPE_NODE | access, name,
+ NULL, 0, "N", handler);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, int *ptr, int val)
+{
+ if (parent == NULL && CFS_SYSCTL_ISVALID)
+ parent = libcfs_sysctl_sprite.ss_link;
+ return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name,
+ ptr, val, "I", sysctl_handle_int);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, int *ptr, int val)
+{
+ if (parent == NULL && CFS_SYSCTL_ISVALID)
+ parent = libcfs_sysctl_sprite.ss_link;
+ return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name,
+ ptr, val, "L", sysctl_handle_long);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, char *ptr, int len)
+{
+ if (parent == NULL && CFS_SYSCTL_ISVALID)
+ parent = libcfs_sysctl_sprite.ss_link;
+ return cfs_alloc_sysctl(parent, nbr, CTLTYPE_STRING | access, name,
+ ptr, len, "A", sysctl_handle_string);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access,
+ const char *name, void *ptr, int size)
+{
+ if (parent == NULL && CFS_SYSCTL_ISVALID)
+ parent = libcfs_sysctl_sprite.ss_link;
+ return cfs_alloc_sysctl(parent, nbr, CTLTYPE_OPAQUE | access, name,
+ ptr, size, "S", sysctl_handle_opaque);
+}
+
/* no proc in osx */
cfs_proc_dir_entry_t *
cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent)
insert_proc(void)
{
#if 1
- if (!portals_table_header)
- portals_table_header = register_cfs_sysctl_table(top_table, 0);
+ if (!libcfs_table_header)
+ libcfs_table_header = cfs_register_sysctl_table(top_table, 0);
#endif
return 0;
}
remove_proc(void)
{
#if 1
- if (portals_table_header != NULL)
- unregister_cfs_sysctl_table(portals_table_header);
- portals_table_header = NULL;
+ if (libcfs_table_header != NULL)
+ cfs_unregister_sysctl_table(libcfs_table_header);
+ libcfs_table_header = NULL;
#endif
return;
}
+int
+cfs_sysctl_init(void)
+{
+ struct sysctl_oid *oid_root;
+ struct sysctl_oid *oid_sprite;
+ struct libcfs_sysctl_sprite *sprite;
+ size_t len;
+ int rc;
+
+ len = sizeof(struct libcfs_sysctl_sprite);
+ rc = sysctlbyname("libcfs.sprite",
+ (void *)&libcfs_sysctl_sprite, &len, NULL, 0);
+ if (rc == 0) {
+ /*
+ * XXX Liang: assert (rc == 0 || rc == ENOENT)
+ *
+ * libcfs.sprite has been registered by previous
+ * loading of libcfs
+ */
+ if (libcfs_sysctl_sprite.ss_magic != LIBCFS_SYSCTL_MAGIC) {
+ printf("libcfs: magic number of libcfs.sprite "
+ "is not right (%lx, %lx)\n",
+ libcfs_sysctl_sprite.ss_magic,
+ LIBCFS_SYSCTL_MAGIC);
+ return -1;
+ }
+ printf("libcfs: registered libcfs.sprite found.\n");
+ return 0;
+ }
+ oid_root = cfs_alloc_sysctl_node(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
+ LIBCFS_SYSCTL, 0);
+ if (oid_root == NULL)
+ return -1;
+ sysctl_register_oid(oid_root);
+
+ sprite = (struct libcfs_sysctl_sprite *)_MALLOC(sizeof(struct libcfs_sysctl_sprite),
+ M_TEMP, M_WAITOK | M_ZERO);
+ if (sprite == NULL) {
+ sysctl_unregister_oid(oid_root);
+ cfs_free_sysctl(oid_root);
+ return -1;
+ }
+ sprite->ss_magic = LIBCFS_SYSCTL_MAGIC;
+ sprite->ss_link = (struct sysctl_oid_list *)oid_root->oid_arg1;
+ oid_sprite = cfs_alloc_sysctl_struct((struct sysctl_oid_list *)oid_root->oid_arg1,
+ OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
+ LIBCFS_SYSCTL_SPRITE, sprite,
+ sizeof(struct libcfs_sysctl_sprite));
+ if (oid_sprite == NULL) {
+ cfs_free_sysctl(oid_sprite);
+ sysctl_unregister_oid(oid_root);
+ cfs_free_sysctl(oid_root);
+ return -1;
+ }
+ sysctl_register_oid(oid_sprite);
+
+ libcfs_sysctl_sprite.ss_magic = sprite->ss_magic;
+ libcfs_sysctl_sprite.ss_link = sprite->ss_link;
+
+ return 0;
+}
+
+void
+cfs_sysctl_fini(void)
+{
+ libcfs_sysctl_sprite.ss_magic = 0;
+ libcfs_sysctl_sprite.ss_link = NULL;
+}
*
* Created by nikita on Sun Jul 18 2004.
*
- * Prototypes of XNU synchronization primitives.
+ * XNU synchronization primitives.
*/
/*
* A lot can be optimized here.
*/
-#include <mach/mach_types.h>
-#include <sys/types.h>
-#include <kern/simple_lock.h>
-
#define DEBUG_SUBSYSTEM S_LNET
+#ifdef __DARWIN8__
+# include <kern/locks.h>
+#else
+# include <mach/mach_types.h>
+# include <sys/types.h>
+# include <kern/simple_lock.h>
+#endif
+
#include <libcfs/libcfs.h>
#include <libcfs/kp30.h>
#define get_preemption_level() (0)
#endif
-/*
- * Warning: low level portals debugging code (portals_debug_msg(), for
- * example), uses spin-locks, so debugging output here may lead to nasty
- * surprises.
- */
-
#if SMP
+#ifdef __DARWIN8__
+
+static lck_grp_t *cfs_lock_grp = NULL;
+
+/* hw_lock_* are not exported by Darwin8 */
+static inline void xnu_spin_init(xnu_spin_t *s)
+{
+ SLASSERT(cfs_lock_grp != NULL);
+ *s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL);
+}
+
+static inline void xnu_spin_done(xnu_spin_t *s)
+{
+ SLASSERT(cfs_lock_grp != NULL);
+ *s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL);
+ lck_spin_free(*s, cfs_lock_grp);
+ *s = NULL;
+}
+
+#define xnu_spin_lock(s) lck_spin_lock(*(s))
+#define xnu_spin_unlock(s) lck_spin_unlock(*(s))
+
+#warning "Darwin8 does not export lck_spin_try_lock"
+#define xnu_spin_try(s) (1)
+#else /* DARWIN8 */
extern void hw_lock_init(hw_lock_t);
extern void hw_lock_lock(hw_lock_t);
extern void hw_lock_unlock(hw_lock_t);
extern unsigned int hw_lock_try(hw_lock_t);
extern unsigned int hw_lock_held(hw_lock_t);
+#define xnu_spin_init(s) hw_lock_init(s)
+#define xnu_spin_done(s) do {} while (0)
+#define xnu_spin_lock(s) hw_lock_lock(s)
+#define xnu_spin_unlock(s) hw_lock_unlock(s)
+#define xnu_spin_try(s) hw_lock_try(s)
+#endif /* DARWIN8 */
+
+#else /* SMP */
+#define xnu_spin_init(s) do {} while (0)
+#define xnu_spin_done(s) do {} while (0)
+#define xnu_spin_lock(s) do {} while (0)
+#define xnu_spin_unlock(s) do {} while (0)
+#define xnu_spin_try(s) (1)
+#endif /* SMP */
+
+/*
+ * Warning: low level libcfs debugging code (libcfs_debug_msg(), for
+ * example), uses spin-locks, so debugging output here may lead to nasty
+ * surprises.
+ *
+ * In uniprocessor version of spin-lock. Only checks.
+ */
+
void kspin_init(struct kspin *spin)
{
SLASSERT(spin != NULL);
- hw_lock_init(&spin->lock);
+ xnu_spin_init(&spin->lock);
ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC);
ON_SYNC_DEBUG(spin->owner = NULL);
}
SLASSERT(spin != NULL);
SLASSERT(spin->magic == KSPIN_MAGIC);
SLASSERT(spin->owner == NULL);
+ xnu_spin_done(&spin->lock);
}
void kspin_lock(struct kspin *spin)
{
SLASSERT(spin != NULL);
SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner != current_thread);
+ SLASSERT(spin->owner != current_thread());
+
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
- hw_lock_lock(&spin->lock);
+ xnu_spin_lock(&spin->lock);
SLASSERT(spin->owner == NULL);
- ON_SYNC_DEBUG(spin->owner = current_thread);
+ ON_SYNC_DEBUG(spin->owner = current_thread());
}
void kspin_unlock(struct kspin *spin)
{
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
SLASSERT(spin != NULL);
SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == current_thread);
+ SLASSERT(spin->owner == current_thread());
ON_SYNC_DEBUG(spin->owner = NULL);
- hw_lock_unlock(&spin->lock);
+ xnu_spin_unlock(&spin->lock);
}
int kspin_trylock(struct kspin *spin)
SLASSERT(spin != NULL);
SLASSERT(spin->magic == KSPIN_MAGIC);
- if (hw_lock_try(&spin->lock)) {
+ if (xnu_spin_try(&spin->lock)) {
SLASSERT(spin->owner == NULL);
- ON_SYNC_DEBUG(spin->owner = current_thread);
+ ON_SYNC_DEBUG(spin->owner = current_thread());
return 1;
} else
return 0;
}
-/* SMP */
-#else
-
-/*
- * uniprocessor version of spin-lock. Only checks.
- */
-
-void kspin_init(struct kspin *spin)
+#if XNU_SYNC_DEBUG
+int kspin_islocked(struct kspin *spin)
{
SLASSERT(spin != NULL);
- ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC);
- ON_SYNC_DEBUG(spin->owner = NULL);
+ SLASSERT(spin->magic == KSPIN_MAGIC);
+ return spin->owner == current_thread();
}
-void kspin_done(struct kspin *spin)
+int kspin_isnotlocked(struct kspin *spin)
{
SLASSERT(spin != NULL);
SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == NULL);
+ return spin->owner != current_thread();
}
+#endif
-void kspin_lock(struct kspin *spin)
+/*
+ * read/write spin-lock
+ */
+void krw_spin_init(struct krw_spin *rwspin)
{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == NULL);
- ON_SYNC_DEBUG(spin->owner = current_thread);
+ SLASSERT(rwspin != NULL);
+
+ kspin_init(&rwspin->guard);
+ rwspin->count = 0;
+ ON_SYNC_DEBUG(rwspin->magic = KRW_SPIN_MAGIC);
}
-void kspin_unlock(struct kspin *spin)
+void krw_spin_done(struct krw_spin *rwspin)
{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == current_thread);
- ON_SYNC_DEBUG(spin->owner = NULL);
+ SLASSERT(rwspin != NULL);
+ SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+ SLASSERT(rwspin->count == 0);
+ kspin_done(&rwspin->guard);
}
-int kspin_trylock(struct kspin *spin)
+void krw_spin_down_r(struct krw_spin *rwspin)
{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- SLASSERT(spin->owner == NULL);
- ON_SYNC_DEBUG(spin->owner = current_thread);
- return 1;
+ SLASSERT(rwspin != NULL);
+ SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+
+ while(1) {
+ kspin_lock(&rwspin->guard);
+ if (rwspin->count >= 0)
+ break;
+ kspin_unlock(&rwspin->guard);
+ }
+ ++ rwspin->count;
+ kspin_unlock(&rwspin->guard);
}
-/* SMP */
-#endif
+void krw_spin_down_w(struct krw_spin *rwspin)
+{
+ SLASSERT(rwspin != NULL);
+ SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
-#if XNU_SYNC_DEBUG
-int kspin_islocked(struct kspin *spin)
+ while (1) {
+ kspin_lock(&rwspin->guard);
+ if (rwspin->count == 0)
+ break;
+ kspin_unlock(&rwspin->guard);
+ }
+ rwspin->count = -1;
+ kspin_unlock(&rwspin->guard);
+}
+
+void krw_spin_up_r(struct krw_spin *rwspin)
{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- return spin->owner == current_thread;
+ SLASSERT(rwspin != NULL);
+ SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+ SLASSERT(rwspin->count > 0);
+
+ kspin_lock(&rwspin->guard);
+ -- rwspin->count;
+ kspin_unlock(&rwspin->guard);
}
-int kspin_isnotlocked(struct kspin *spin)
+void krw_spin_up_w(struct krw_spin *rwspin)
{
- SLASSERT(spin != NULL);
- SLASSERT(spin->magic == KSPIN_MAGIC);
- return spin->owner != current_thread;
+ SLASSERT(rwspin != NULL);
+ SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+ SLASSERT(rwspin->count == -1);
+
+ kspin_lock(&rwspin->guard);
+ rwspin->count = 0;
+ kspin_unlock(&rwspin->guard);
}
-#endif
+/*
+ * semaphore
+ */
+#ifdef __DARWIN8__
+
+#define xnu_waitq_init(q, a) do {} while (0)
+#define xnu_waitq_done(q) do {} while (0)
+#define xnu_waitq_wakeup_one(q, e, s) ({wakeup_one((void *)(e)); KERN_SUCCESS;})
+#define xnu_waitq_wakeup_all(q, e, s) ({wakeup((void *)(e)); KERN_SUCCESS;})
+#define xnu_waitq_assert_wait(q, e, s) assert_wait((e), s)
+
+#else /* DARWIN8 */
+
+#define xnu_waitq_init(q, a) wait_queue_init((q), a)
+#define xnu_waitq_done(q) do {} while (0)
+#define xnu_waitq_wakeup_one(q, e, s) wait_queue_wakeup_one((q), (event_t)(e), s)
+#define xnu_waitq_wakeup_all(q, e, s) wait_queue_wakeup_all((q), (event_t)(e), s)
+#define xnu_waitq_assert_wait(q, e, s) wait_queue_assert_wait((q), (event_t)(e), s)
+
+#endif /* DARWIN8 */
void ksem_init(struct ksem *sem, int value)
{
SLASSERT(sem != NULL);
kspin_init(&sem->guard);
- wait_queue_init(&sem->q, SYNC_POLICY_FIFO);
+ xnu_waitq_init(&sem->q, SYNC_POLICY_FIFO);
sem->value = value;
ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC);
}
kspin_lock(&sem->guard);
sem->value += value;
if (sem->value == 0)
- result = wait_queue_wakeup_one(&sem->q, (event_t)sem,
- THREAD_AWAKENED);
+ result = xnu_waitq_wakeup_one(&sem->q, sem,
+ THREAD_AWAKENED);
else
- result = wait_queue_wakeup_all(&sem->q, (event_t)sem,
- THREAD_AWAKENED);
+ result = xnu_waitq_wakeup_all(&sem->q, sem,
+ THREAD_AWAKENED);
kspin_unlock(&sem->guard);
SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
return (result == KERN_SUCCESS) ? 0 : 1;
kspin_lock(&sem->guard);
while (sem->value < value) {
- result = wait_queue_assert_wait(&sem->q, (event_t)sem,
- THREAD_UNINT);
+ result = xnu_waitq_assert_wait(&sem->q, sem,
+ THREAD_UNINT);
SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
kspin_unlock(&sem->guard);
if (result == THREAD_WAITING)
{
SLASSERT(mut != NULL);
SLASSERT(mut->magic == KMUT_MAGIC);
- SLASSERT(mut->owner != current_thread);
+ SLASSERT(mut->owner != current_thread());
SLASSERT(get_preemption_level() == 0);
ksem_down(&mut->s, 1);
- ON_SYNC_DEBUG(mut->owner = current_thread);
+ ON_SYNC_DEBUG(mut->owner = current_thread());
}
void kmut_unlock(struct kmut *mut)
{
SLASSERT(mut != NULL);
SLASSERT(mut->magic == KMUT_MAGIC);
- SLASSERT(mut->owner == current_thread);
+ SLASSERT(mut->owner == current_thread());
ON_SYNC_DEBUG(mut->owner = NULL);
ksem_up(&mut->s, 1);
{
SLASSERT(mut != NULL);
SLASSERT(mut->magic == KMUT_MAGIC);
- return mut->owner == current_thread;
+ return mut->owner == current_thread();
}
int kmut_isnotlocked(struct kmut *mut)
{
SLASSERT(mut != NULL);
SLASSERT(mut->magic == KMUT_MAGIC);
- return mut->owner != current_thread;
+ return mut->owner != current_thread();
}
#endif
CFS_INIT_LIST_HEAD(&link->linkage);
link->flags = 0;
- link->event = current_thread;
+ link->event = current_thread();
link->hits = 0;
link->forward = NULL;
ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC);
{
struct ksleep_link *scan;
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
SLASSERT(kspin_islocked(&chan->guard));
list_for_each_entry(scan, &chan->waiters, linkage) {
if (scan->event == event) {
}
}
-void ksleep_wait(struct ksleep_chan *chan)
+void ksleep_wait(struct ksleep_chan *chan, cfs_task_state_t state)
{
event_t event;
int result;
SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
SLASSERT(get_preemption_level() == 0);
- event = current_thread;
+ event = current_thread();
kspin_lock(&chan->guard);
if (!has_hits(chan, event)) {
- result = assert_wait(event, THREAD_UNINT);
+ result = assert_wait(event, state);
kspin_unlock(&chan->guard);
SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
if (result == THREAD_WAITING)
EXIT;
}
-int64_t ksleep_timedwait(struct ksleep_chan *chan, uint64_t timeout)
+/*
+ * Sleep on @chan for no longer than @timeout nano-seconds. Return remaining
+ * sleep time (non-zero only if thread was waken by a signal (not currently
+ * implemented), or waitq was already in the "signalled" state).
+ */
+int64_t ksleep_timedwait(struct ksleep_chan *chan,
+ cfs_task_state_t state,
+ uint64_t timeout)
{
event_t event;
- int64_t result;
- AbsoluteTime clock_current;
- AbsoluteTime clock_delay;
ENTRY;
CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout);
- event = current_thread;
- result = 0;
+ event = current_thread();
kspin_lock(&chan->guard);
if (!has_hits(chan, event)) {
- result = assert_wait(event, THREAD_UNINT);
+ int result;
+ uint64_t expire;
+ result = assert_wait(event, state);
if (timeout > 0) {
/*
* arm a timer. thread_set_timer()'s first argument is
* uint32_t, so we have to cook deadline ourselves.
*/
- clock_get_uptime(&clock_current);
- nanoseconds_to_absolutetime(timeout, &clock_delay);
- ADD_ABSOLUTETIME(&clock_current, &clock_delay);
- thread_set_timer_deadline(clock_current);
+ nanoseconds_to_absolutetime(timeout, &expire);
+ clock_absolutetime_interval_to_deadline(expire, &expire);
+ thread_set_timer_deadline(expire);
}
kspin_unlock(&chan->guard);
SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
result = thread_block(THREAD_CONTINUE_NULL);
thread_cancel_timer();
- clock_get_uptime(&clock_delay);
- SUB_ABSOLUTETIME(&clock_delay, &clock_current);
- if (result == THREAD_TIMED_OUT)
- result = 0;
- else {
- absolutetime_to_nanoseconds(clock_delay, &result);
- if (result < 0)
- result = 0;
- }
- } else
+ if (result == THREAD_TIMED_OUT)
+ timeout = 0;
+ else {
+ uint64_t now;
+ clock_get_uptime(&now);
+ if (expire > now)
+ absolutetime_to_nanoseconds(expire - now, &timeout);
+ else
+ timeout = 0;
+ }
+ } else {
+ timeout = 0;
kspin_unlock(&chan->guard);
+ }
- RETURN(result);
+ RETURN(timeout);
}
/*
*/
void ksleep_wake(struct ksleep_chan *chan)
{
- ENTRY;
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
ksleep_wake_nr(chan, 1);
- EXIT;
}
/*
struct ksleep_link *scan;
int result;
- ENTRY;
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
SLASSERT(chan != NULL);
SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
if (forward != NULL)
kspin_lock(&forward->guard);
result = thread_wakeup(scan->event);
- CDEBUG(D_INFO, "waking 0x%x: %d\n",
- (unsigned int)scan->event, result);
SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
if (result == KERN_NOT_WAITING) {
++ scan->hits;
break;
}
kspin_unlock(&chan->guard);
- EXIT;
}
void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg)
t->func(t->arg);
}
+extern boolean_t thread_call_func_cancel(thread_call_func_t, thread_call_param_t, boolean_t);
+extern void thread_call_func_delayed(thread_call_func_t, thread_call_param_t, uint64_t);
+
static void ktimer_disarm_locked(struct ktimer *t)
{
SLASSERT(t != NULL);
thread_call_func_cancel(ktimer_actor, t, FALSE);
}
+/*
+ * Received deadline is nanoseconds, but time checked by
+ * thread_call is absolute time (The abstime unit is equal to
+ * the length of one bus cycle, so the duration is dependent
+ * on the bus speed of the computer), so we need to convert
+ * nanotime to abstime by nanoseconds_to_absolutetime().
+ *
+ * Refer to _delayed_call_timer(...)
+ *
+ * if thread_call_func_delayed is not exported in the future,
+ * we can use timeout() or bsd_timeout() to replace it.
+ */
void ktimer_arm(struct ktimer *t, u_int64_t deadline)
{
+ cfs_time_t abstime;
SLASSERT(t != NULL);
SLASSERT(t->magic == KTIMER_MAGIC);
kspin_lock(&t->guard);
ktimer_disarm_locked(t);
t->armed = 1;
- thread_call_func_delayed(ktimer_actor, t, *(AbsoluteTime *)&deadline);
+ nanoseconds_to_absolutetime(deadline, &abstime);
+ thread_call_func_delayed(ktimer_actor, t, deadline);
kspin_unlock(&t->guard);
}
return t->deadline;
}
+void cfs_sync_init(void)
+{
+#ifdef __DARWIN8__
+ /* Initialize lock group */
+ cfs_lock_grp = lck_grp_alloc_init("libcfs sync", LCK_GRP_ATTR_NULL);
+#endif
+}
+
+void cfs_sync_fini(void)
+{
+#ifdef __DARWIN8__
+ /* destroy lock group */
+ lck_grp_free(cfs_lock_grp);
+ /* XXX Liang: check reference count of lock group */
+ cfs_lock_grp = NULL;
+#endif
+}
/*
* Local variables:
* c-indentation-style: "K&R"
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+
+#include <mach/mach_types.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/protosw.h>
+#include <net/if.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+static __inline__ struct sockaddr_in
+blank_sin()
+{
+ struct sockaddr_in blank = { sizeof(struct sockaddr_in), AF_INET };
+ return (blank);
+}
+
+void
+libcfs_ipif_free_enumeration (char **names, int n)
+{
+ int i;
+
+ LASSERT (n > 0);
+
+ for (i = 0; i < n && names[i] != NULL; i++)
+ LIBCFS_FREE(names[i], IFNAMSIZ);
+
+ LIBCFS_FREE(names, n * sizeof(*names));
+}
+
+#ifdef __DARWIN8__
+/*
+ * Darwin 8.x
+ *
+ * No hack kernel structre, all using KPI.
+ */
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+ struct ifreq ifr;
+ socket_t so;
+ __u32 val;
+ int nob;
+ int rc;
+
+ rc = -sock_socket(PF_INET, SOCK_STREAM, 0,
+ NULL, NULL, &so);
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return rc;
+ }
+
+ nob = strnlen(name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ CERROR("Interface name %s too long\n", name);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+ bzero(&ifr, sizeof(ifr));
+ strcpy(ifr.ifr_name, name);
+ rc = -sock_ioctl (so, SIOCGIFFLAGS, &ifr);
+
+ if (rc != 0) {
+ CERROR("Can't get flags for interface %s\n", name);
+ goto out;
+ }
+
+ if ((ifr.ifr_flags & IFF_UP) == 0) {
+ CDEBUG(D_NET, "Interface %s down\n", name);
+ *up = 0;
+ *ip = *mask = 0;
+ goto out;
+ }
+
+ *up = 1;
+
+ bzero(&ifr, sizeof(ifr));
+ strcpy(ifr.ifr_name, name);
+ *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+ rc = -sock_ioctl(so, SIOCGIFADDR, &ifr);
+
+ if (rc != 0) {
+ CERROR("Can't get IP address for interface %s\n", name);
+ goto out;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ *ip = ntohl(val);
+
+ bzero(&ifr, sizeof(ifr));
+ strcpy(ifr.ifr_name, name);
+ *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+ rc = -sock_ioctl(so, SIOCGIFNETMASK, &ifr);
+
+ if (rc != 0) {
+ CERROR("Can't get netmask for interface %s\n", name);
+ goto out;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ *mask = ntohl(val);
+out:
+ sock_close(so);
+ return rc;
+}
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+ /* Allocate and fill in 'names', returning # interfaces/error */
+ char **names;
+ int toobig;
+ int nalloc;
+ int nfound;
+ socket_t so;
+ struct ifreq *ifr;
+ struct ifconf ifc;
+ int rc;
+ int nob;
+ int i;
+
+ rc = -sock_socket(PF_INET, SOCK_STREAM, 0,
+ NULL, NULL, &so);
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return (rc);
+ }
+
+ nalloc = 16; /* first guess at max interfaces */
+ toobig = 0;
+ for (;;) {
+ if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
+ toobig = 1;
+ nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
+ CWARN("Too many interfaces: only enumerating first %d\n",
+ nalloc);
+ }
+
+ LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+ if (ifr == NULL) {
+ CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+ rc = -ENOMEM;
+ goto out0;
+ }
+
+ ifc.ifc_buf = (char *)ifr;
+ ifc.ifc_len = nalloc * sizeof(*ifr);
+
+#if 1
+ /*
+ * XXX Liang:
+ * sock_ioctl(..., SIOCGIFCONF, ...) is not usable for calling in kernel,
+ * it always use copyout(...) to copy ifreq to userspace.
+ * So we can't get interfaces name by sock_ioctl(...,SIOCGIFCONF,...).
+ */
+ nfound = 0;
+ for (i = 0; i < 16; i++) {
+ struct ifreq en;
+ bzero(&en, sizeof(en));
+ snprintf(en.ifr_name, IFNAMSIZ, "en%d", i);
+ rc = -sock_ioctl (so, SIOCGIFFLAGS, &en);
+ if (rc != 0)
+ continue;
+ strcpy(ifr[nfound++].ifr_name, en.ifr_name);
+ }
+
+#else /* NOT in using now */
+ rc = -sock_ioctl(so, SIOCGIFCONF, (caddr_t)&ifc);
+
+ if (rc < 0) {
+ CERROR ("Error %d enumerating interfaces\n", rc);
+ goto out1;
+ }
+
+ nfound = ifc.ifc_len/sizeof(*ifr);
+ LASSERT (nfound <= nalloc);
+#endif
+
+ if (nfound < nalloc || toobig)
+ break;
+
+ LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+ nalloc *= 2;
+ }
+ if (nfound == 0)
+ goto out1;
+
+ LIBCFS_ALLOC(names, nfound * sizeof(*names));
+ if (names == NULL) {
+ rc = -ENOMEM;
+ goto out1;
+ }
+ /* NULL out all names[i] */
+ memset (names, 0, nfound * sizeof(*names));
+
+ for (i = 0; i < nfound; i++) {
+
+ nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ /* no space for terminating NULL */
+ CERROR("interface name %.*s too long (%d max)\n",
+ nob, ifr[i].ifr_name, IFNAMSIZ);
+ rc = -ENAMETOOLONG;
+ goto out2;
+ }
+
+ LIBCFS_ALLOC(names[i], IFNAMSIZ);
+ if (names[i] == NULL) {
+ rc = -ENOMEM;
+ goto out2;
+ }
+
+ memcpy(names[i], ifr[i].ifr_name, nob);
+ names[i][nob] = 0;
+ }
+
+ *namesp = names;
+ rc = nfound;
+
+out2:
+ if (rc < 0)
+ libcfs_ipif_free_enumeration(names, nfound);
+out1:
+ LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+out0:
+ sock_close(so);
+ return rc;
+
+}
+
+/*
+ * Public entry of socket upcall.
+ *
+ * so_upcall can only be installed while create/accept of socket in
+ * Darwin 8.0, so we setup libcfs_sock_upcall() as upcall for all
+ * sockets in creat/accept, it will call upcall provided by user
+ * which can be setup after create/accept of socket.
+ */
+static void libcfs_sock_upcall(socket_t so, void* arg, int waitf)
+{
+ cfs_socket_t *sock;
+
+ sock = B2C_SOCK(so);
+ if ((sock->s_flags & CFS_SOCK_UPCALL) != 0 && sock->s_upcall != NULL)
+ sock->s_upcall((struct socket *)so, sock->s_upcallarg, waitf);
+ return;
+}
+
+void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg)
+{
+ sock->s_upcall = callback;
+ sock->s_upcallarg = arg;
+ sock->s_flags |= CFS_SOCK_UPCALL;
+ return;
+}
+
+void libcfs_sock_reset_cb(cfs_socket_t *sock)
+{
+ sock->s_flags &= ~CFS_SOCK_UPCALL;
+ sock->s_upcall = NULL;
+ sock->s_upcallarg = NULL;
+ return;
+}
+
+static int
+libcfs_sock_create (cfs_socket_t **sockp, int *fatal,
+ __u32 local_ip, int local_port)
+{
+ struct sockaddr_in locaddr;
+ cfs_socket_t *sock;
+ int option;
+ int optlen;
+ int rc;
+
+ /* All errors are fatal except bind failure if the port is in use */
+ *fatal = 1;
+
+ sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO);
+ if (!sock) {
+ CERROR("Can't allocate cfs_socket.\n");
+ return -ENOMEM;
+ }
+ *sockp = sock;
+
+ rc = -sock_socket(PF_INET, SOCK_STREAM, 0,
+ libcfs_sock_upcall, NULL, &C2B_SOCK(sock));
+ if (rc != 0)
+ goto out;
+ option = 1;
+ optlen = sizeof(option);
+ rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET,
+ SO_REUSEADDR, &option, optlen);
+ if (rc != 0)
+ goto out;
+
+ /* can't specify a local port without a local IP */
+ LASSERT (local_ip == 0 || local_port != 0);
+
+ if (local_ip != 0 || local_port != 0) {
+ bzero (&locaddr, sizeof (locaddr));
+ locaddr.sin_len = sizeof(struct sockaddr_in);
+ locaddr.sin_family = AF_INET;
+ locaddr.sin_port = htons (local_port);
+ locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY;
+ rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr);
+ if (rc == -EADDRINUSE) {
+ CDEBUG(D_NET, "Port %d already in use\n", local_port);
+ *fatal = 0;
+ goto out;
+ }
+ if (rc != 0) {
+ CERROR("Error trying to bind to port %d: %d\n",
+ local_port, rc);
+ goto out;
+ }
+ }
+ return 0;
+out:
+ if (C2B_SOCK(sock) != NULL)
+ sock_close(C2B_SOCK(sock));
+ FREE(sock, M_TEMP);
+ return rc;
+}
+
+int
+libcfs_sock_listen (cfs_socket_t **sockp,
+ __u32 local_ip, int local_port, int backlog)
+{
+ cfs_socket_t *sock;
+ int fatal;
+ int rc;
+
+ rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port);
+ if (rc != 0) {
+ if (!fatal)
+ CERROR("Can't create socket: port %d already in use\n",
+ local_port);
+ return rc;
+
+ }
+ rc = -sock_listen(C2B_SOCK(sock), backlog);
+ if (rc == 0) {
+ *sockp = sock;
+ return 0;
+ }
+
+ if (C2B_SOCK(sock) != NULL)
+ sock_close(C2B_SOCK(sock));
+ FREE(sock, M_TEMP);
+ return rc;
+}
+
+int
+libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock)
+{
+ cfs_socket_t *newsock;
+ int rc;
+
+ newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO);
+ if (!newsock) {
+ CERROR("Can't allocate cfs_socket.\n");
+ return -ENOMEM;
+ }
+ /*
+ * thread will sleep in sock_accept by calling of msleep(),
+ * it can be interrupted because msleep() use PCATCH as argument.
+ */
+ rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0,
+ libcfs_sock_upcall, NULL, &C2B_SOCK(newsock));
+ if (rc) {
+ if (C2B_SOCK(newsock) != NULL) sock_close(C2B_SOCK(newsock));
+ FREE(newsock, M_TEMP);
+ return rc;
+ }
+ *newsockp = newsock;
+ return 0;
+}
+
+void
+libcfs_sock_abort_accept (cfs_socket_t *sock)
+{
+ /*
+ * XXX Liang:
+ *
+ * we want to wakeup thread blocked by sock_accept, but we don't
+ * know the address where thread is sleeping on, so we cannot
+ * wakeup it directly.
+ * The thread slept in sock_accept will be waken up while:
+ * 1. interrupt by signal
+ * 2. new connection is coming (sonewconn)
+ * 3. disconnecting of the socket (soisconnected)
+ *
+ * Cause we can't send signal to a thread directly(no KPI), so the
+ * only thing can be done here is disconnect the socket (by
+ * sock_shutdown() or sth else? ).
+ *
+ * Shutdown request of socket with SHUT_WR or SHUT_RDWR will
+ * be issured to the protocol.
+ * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()->
+ * tcp_close()->soisdisconnected(), it will wakeup thread by
+ * wakeup((caddr_t)&so->so_timeo);
+ */
+ sock_shutdown(C2B_SOCK(sock), SHUT_RDWR);
+}
+
+int
+libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout)
+{
+ size_t rcvlen;
+ int rc;
+ cfs_duration_t to = cfs_time_seconds(timeout);
+ cfs_time_t then;
+ struct timeval tv;
+
+ LASSERT(nob > 0);
+
+ for (;;) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0,
+ };
+ cfs_duration_usec(to, &tv);
+ rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO,
+ &tv, sizeof(tv));
+ if (rc != 0) {
+ CERROR("Can't set socket recv timeout "
+ "%ld.%06d: %d\n",
+ (long)tv.tv_sec, (int)tv.tv_usec, rc);
+ return rc;
+ }
+
+ then = cfs_time_current();
+ rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen);
+ to -= cfs_time_current() - then;
+
+ if (rc != 0) {
+ if (rcvlen != nob && \
+ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
+ rc = 0;
+ if (rc != 0)
+ return rc;
+ }
+
+ if (rcvlen == nob)
+ return 0;
+
+ if (to <= 0)
+ return -EAGAIN;
+
+ buffer = ((char *)buffer) + rcvlen;
+ nob -= rcvlen;
+ }
+ return 0;
+}
+
+int
+libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout)
+{
+ size_t sndlen;
+ int rc;
+ cfs_duration_t to = cfs_time_seconds(timeout);
+ cfs_time_t then;
+ struct timeval tv;
+
+ LASSERT(nob > 0);
+
+ for (;;) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = NULL,
+ .msg_controllen = 0,
+ .msg_flags = 0,
+ };
+ cfs_duration_usec(to, &tv);
+ rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO,
+ &tv, sizeof(tv));
+ if (rc != 0) {
+ CERROR("Can't set socket send timeout "
+ "%ld.%06d: %d\n",
+ (long)tv.tv_sec, (int)tv.tv_usec, rc);
+ return rc;
+ }
+
+ then = cfs_time_current();
+ rc = -sock_send(C2B_SOCK(sock), &msg, 0, &sndlen);
+ to -= cfs_time_current() - then;
+
+ if (rc != 0) {
+ if (sndlen != nob && \
+ (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK))
+ rc = 0;
+ if (rc != 0)
+ return rc;
+ }
+
+ if (sndlen == nob)
+ return 0;
+ if (to <= 0)
+ return -EAGAIN;
+ buffer = ((char *)buffer) + sndlen;
+ nob -= sndlen;
+ }
+ return 0;
+
+}
+
+int
+libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port)
+{
+ struct sockaddr_in sin;
+ int rc;
+
+ if (remote != 0)
+ /* Get remote address */
+ rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
+ else
+ /* Get local address */
+ rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
+ if (rc != 0) {
+ CERROR ("Error %d getting sock %s IP/port\n",
+ rc, remote ? "peer" : "local");
+ return rc;
+ }
+
+ if (ip != NULL)
+ *ip = ntohl (sin.sin_addr.s_addr);
+
+ if (port != NULL)
+ *port = ntohs (sin.sin_port);
+ return 0;
+}
+
+int
+libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize)
+{
+ int option;
+ int rc;
+
+ if (txbufsize != 0) {
+ option = txbufsize;
+ rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
+ (char *)&option, sizeof (option));
+ if (rc != 0) {
+ CERROR ("Can't set send buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+
+ if (rxbufsize != 0) {
+ option = rxbufsize;
+ rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
+ (char *)&option, sizeof (option));
+ if (rc != 0) {
+ CERROR ("Can't set receive buffer %d: %d\n",
+ option, rc);
+ return (rc);
+ }
+ }
+ return 0;
+}
+
+int
+libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize)
+{
+ int option;
+ int optlen;
+ int rc;
+
+ if (txbufsize != NULL) {
+ optlen = sizeof(option);
+ rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
+ (char *)&option, &optlen);
+ if (rc != 0) {
+ CERROR ("Can't get send buffer size: %d\n", rc);
+ return (rc);
+ }
+ *txbufsize = option;
+ }
+
+ if (rxbufsize != NULL) {
+ optlen = sizeof(option);
+ rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
+ (char *)&option, &optlen);
+ if (rc != 0) {
+ CERROR ("Can't get receive buffer size: %d\n", rc);
+ return (rc);
+ }
+ *rxbufsize = option;
+ }
+ return 0;
+}
+
+void
+libcfs_sock_release (cfs_socket_t *sock)
+{
+ if (C2B_SOCK(sock) != NULL) {
+ sock_shutdown(C2B_SOCK(sock), 2);
+ sock_close(C2B_SOCK(sock));
+ }
+ FREE(sock, M_TEMP);
+}
+
+int
+libcfs_sock_connect (cfs_socket_t **sockp, int *fatal,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port)
+{
+ cfs_socket_t *sock;
+ struct sockaddr_in srvaddr;
+ int rc;
+
+ rc = libcfs_sock_create(&sock, fatal, local_ip, local_port);
+ if (rc != 0)
+ return rc;
+
+ bzero(&srvaddr, sizeof(srvaddr));
+ srvaddr.sin_len = sizeof(struct sockaddr_in);
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons(peer_port);
+ srvaddr.sin_addr.s_addr = htonl(peer_ip);
+
+ rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0);
+ if (rc == 0) {
+ *sockp = sock;
+ return 0;
+ }
+
+ *fatal = !(rc == -EADDRNOTAVAIL);
+ CDEBUG(*fatal ? D_ERROR : D_NET,
+ "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+ HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+
+ libcfs_sock_release(sock);
+ return rc;
+}
+
+#else /* !__DARWIN8__ */
+
+/*
+ * To use bigger buffer for socket:
+ * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
+ * we must patch kernel).
+ * 2. Increase net.inet.tcp.reass.maxsegments
+ * 3. Increase net.inet.tcp.sendspace
+ * 4. Increase net.inet.tcp.recvspace
+ * 5. Increase kern.ipc.maxsockbuf
+ */
+#define KSOCK_MAX_BUF (1152*1024)
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+ struct socket *so;
+ struct ifreq ifr;
+ int nob;
+ int rc;
+ __u32 val;
+ CFS_DECL_FUNNEL_DATA;
+
+ CFS_NET_IN;
+ rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return (-rc);
+ }
+ nob = strnlen(name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ CERROR("Interface name %s too long\n", name);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+ strcpy(ifr.ifr_name, name);
+ CFS_NET_IN;
+ rc = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ifr, current_proc());
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ CERROR("Can't get flags for interface %s\n", name);
+ goto out;
+ }
+ if ((ifr.ifr_flags & IFF_UP) == 0) {
+ CDEBUG(D_NET, "Interface %s down\n", name);
+ *up = 0;
+ *ip = *mask = 0;
+ goto out;
+ }
+
+ *up = 1;
+ strcpy(ifr.ifr_name, name);
+ *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+ CFS_NET_IN;
+ rc = ifioctl(so, SIOCGIFADDR, (caddr_t)&ifr, current_proc());
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ CERROR("Can't get IP address for interface %s\n", name);
+ goto out;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ *ip = ntohl(val);
+
+ strcpy(ifr.ifr_name, name);
+ *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+ CFS_NET_IN;
+ rc = ifioctl(so, SIOCGIFNETMASK, (caddr_t)&ifr, current_proc());
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ CERROR("Can't get netmask for interface %s\n", name);
+ goto out;
+ }
+
+ val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+ *mask = ntohl(val);
+out:
+ CFS_NET_IN;
+ soclose(so);
+ CFS_NET_EX;
+ return -rc;
+}
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+ /* Allocate and fill in 'names', returning # interfaces/error */
+ char **names;
+ int toobig;
+ int nalloc;
+ int nfound;
+ struct socket *so;
+ struct ifreq *ifr;
+ struct ifconf ifc;
+ int rc;
+ int nob;
+ int i;
+ CFS_DECL_FUNNEL_DATA;
+
+ CFS_NET_IN;
+ rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return (-rc);
+ }
+
+ nalloc = 16; /* first guess at max interfaces */
+ toobig = 0;
+ for (;;) {
+ if (nalloc * sizeof(*ifr) > PAGE_SIZE) {
+ toobig = 1;
+ nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
+ CWARN("Too many interfaces: only enumerating first %d\n",
+ nalloc);
+ }
+
+ LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+ if (ifr == NULL) {
+ CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+ rc = -ENOMEM;
+ goto out0;
+ }
+
+ ifc.ifc_buf = (char *)ifr;
+ ifc.ifc_len = nalloc * sizeof(*ifr);
+
+ CFS_NET_IN;
+ rc = -ifioctl(so, SIOCGIFCONF, (caddr_t)&ifc, current_proc());
+ CFS_NET_EX;
+
+ if (rc < 0) {
+ CERROR ("Error %d enumerating interfaces\n", rc);
+ goto out1;
+ }
+
+ nfound = ifc.ifc_len/sizeof(*ifr);
+ LASSERT (nfound <= nalloc);
+
+ if (nfound < nalloc || toobig)
+ break;
+
+ LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+ nalloc *= 2;
+ }
+ if (nfound == 0)
+ goto out1;
+
+ LIBCFS_ALLOC(names, nfound * sizeof(*names));
+ if (names == NULL) {
+ rc = -ENOMEM;
+ goto out1;
+ }
+ /* NULL out all names[i] */
+ memset (names, 0, nfound * sizeof(*names));
+
+ for (i = 0; i < nfound; i++) {
+
+ nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+ if (nob == IFNAMSIZ) {
+ /* no space for terminating NULL */
+ CERROR("interface name %.*s too long (%d max)\n",
+ nob, ifr[i].ifr_name, IFNAMSIZ);
+ rc = -ENAMETOOLONG;
+ goto out2;
+ }
+
+ LIBCFS_ALLOC(names[i], IFNAMSIZ);
+ if (names[i] == NULL) {
+ rc = -ENOMEM;
+ goto out2;
+ }
+
+ memcpy(names[i], ifr[i].ifr_name, nob);
+ names[i][nob] = 0;
+ }
+
+ *namesp = names;
+ rc = nfound;
+
+out2:
+ if (rc < 0)
+ libcfs_ipif_free_enumeration(names, nfound);
+out1:
+ LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+out0:
+ CFS_NET_IN;
+ soclose(so);
+ CFS_NET_EX;
+ return rc;
+}
+
+static int
+libcfs_sock_create (struct socket **sockp, int *fatal,
+ __u32 local_ip, int local_port)
+{
+ struct sockaddr_in locaddr;
+ struct socket *so;
+ struct sockopt sopt;
+ int option;
+ int rc;
+ CFS_DECL_FUNNEL_DATA;
+
+ *fatal = 1;
+ CFS_NET_IN;
+ rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't create socket: %d\n", rc);
+ return (-rc);
+ }
+
+ bzero(&sopt, sizeof sopt);
+ option = 1;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_REUSEADDR;
+ sopt.sopt_val = &option;
+ sopt.sopt_valsize = sizeof(option);
+ CFS_NET_IN;
+ rc = sosetopt(so, &sopt);
+ if (rc != 0) {
+ CFS_NET_EX;
+ CERROR ("Can't set sock reuse address: %d\n", rc);
+ goto out;
+ }
+ /* can't specify a local port without a local IP */
+ LASSERT (local_ip == 0 || local_port != 0);
+
+ if (local_ip != 0 || local_port != 0) {
+ bzero (&locaddr, sizeof (locaddr));
+ locaddr.sin_len = sizeof(struct sockaddr_in);
+ locaddr.sin_family = AF_INET;
+ locaddr.sin_port = htons (local_port);
+ locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) :
+ INADDR_ANY;
+
+ rc = sobind(so, (struct sockaddr *)&locaddr);
+ if (rc == EADDRINUSE) {
+ CFS_NET_EX;
+ CDEBUG(D_NET, "Port %d already in use\n", local_port);
+ *fatal = 0;
+ goto out;
+ }
+ if (rc != 0) {
+ CFS_NET_EX;
+ CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n",
+ HIPQUAD(local_ip), rc);
+ goto out;
+ }
+ }
+ *sockp = so;
+ return 0;
+out:
+ CFS_NET_IN;
+ soclose(so);
+ CFS_NET_EX;
+ return -rc;
+}
+
+int
+libcfs_sock_listen (struct socket **sockp,
+ __u32 local_ip, int local_port, int backlog)
+{
+ int fatal;
+ int rc;
+ CFS_DECL_FUNNEL_DATA;
+
+ rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
+ if (rc != 0) {
+ if (!fatal)
+ CERROR("Can't create socket: port %d already in use\n",
+ local_port);
+ return rc;
+ }
+ CFS_NET_IN;
+ rc = solisten(*sockp, backlog);
+ CFS_NET_EX;
+ if (rc == 0)
+ return 0;
+ CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
+ CFS_NET_IN;
+ soclose(*sockp);
+ CFS_NET_EX;
+ return -rc;
+}
+
+int
+libcfs_sock_accept (struct socket **newsockp, struct socket *sock)
+{
+ struct socket *so;
+ struct sockaddr *sa;
+ int error, s;
+ CFS_DECL_FUNNEL_DATA;
+
+ CFS_NET_IN;
+ s = splnet();
+ if ((sock->so_options & SO_ACCEPTCONN) == 0) {
+ splx(s);
+ CFS_NET_EX;
+ return (-EINVAL);
+ }
+
+ if ((sock->so_state & SS_NBIO) && sock->so_comp.tqh_first == NULL) {
+ splx(s);
+ CFS_NET_EX;
+ return (-EWOULDBLOCK);
+ }
+
+ error = 0;
+ while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
+ if (sock->so_state & SS_CANTRCVMORE) {
+ sock->so_error = ECONNABORTED;
+ break;
+ }
+ error = tsleep((caddr_t)&sock->so_timeo, PSOCK | PCATCH,
+ "accept", 0);
+ if (error) {
+ splx(s);
+ CFS_NET_EX;
+ return (-error);
+ }
+ }
+ if (sock->so_error) {
+ error = sock->so_error;
+ sock->so_error = 0;
+ splx(s);
+ CFS_NET_EX;
+ return (-error);
+ }
+
+ /*
+ * At this point we know that there is at least one connection
+ * ready to be accepted. Remove it from the queue prior to
+ * allocating the file descriptor for it since falloc() may
+ * block allowing another process to accept the connection
+ * instead.
+ */
+ so = TAILQ_FIRST(&sock->so_comp);
+ TAILQ_REMOVE(&sock->so_comp, so, so_list);
+ sock->so_qlen--;
+
+ so->so_state &= ~SS_COMP;
+ so->so_head = NULL;
+ sa = 0;
+ (void) soaccept(so, &sa);
+
+ *newsockp = so;
+ FREE(sa, M_SONAME);
+ splx(s);
+ CFS_NET_EX;
+ return (-error);
+}
+
+void
+libcfs_sock_abort_accept (struct socket *sock)
+{
+ wakeup(&sock->so_timeo);
+}
+
+/*
+ * XXX Liang: timeout for write is not supported yet.
+ */
+int
+libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ while (nob > 0) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct uio suio = {
+ .uio_iov = &iov,
+ .uio_iovcnt = 1,
+ .uio_offset = 0,
+ .uio_resid = nob,
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_WRITE,
+ .uio_procp = NULL
+ };
+
+ CFS_NET_IN;
+ rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0);
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
+ rc == EWOULDBLOCK))
+ rc = 0;
+ if ( rc != 0 )
+ return -rc;
+ rc = nob - suio.uio_resid;
+ buffer = ((char *)buffer) + rc;
+ nob = suio.uio_resid;
+ continue;
+ }
+ break;
+ }
+ return (0);
+}
+
+/*
+ * XXX Liang: timeout for read is not supported yet.
+ */
+int
+libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ while (nob > 0) {
+ struct iovec iov = {
+ .iov_base = buffer,
+ .iov_len = nob
+ };
+ struct uio ruio = {
+ .uio_iov = &iov,
+ .uio_iovcnt = 1,
+ .uio_offset = 0,
+ .uio_resid = nob,
+ .uio_segflg = UIO_SYSSPACE,
+ .uio_rw = UIO_READ,
+ .uio_procp = NULL
+ };
+
+ CFS_NET_IN;
+ rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0);
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
+ rc == EWOULDBLOCK))
+ rc = 0;
+ if (rc != 0)
+ return -rc;
+ rc = nob - ruio.uio_resid;
+ buffer = ((char *)buffer) + rc;
+ nob = ruio.uio_resid;
+ continue;
+ }
+ break;
+ }
+ return (0);
+}
+
+int
+libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
+{
+ struct sockopt sopt;
+ int rc = 0;
+ int option;
+ CFS_DECL_NET_DATA;
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_val = &option;
+ sopt.sopt_valsize = sizeof(option);
+
+ if (txbufsize != 0) {
+ option = txbufsize;
+ if (option > KSOCK_MAX_BUF)
+ option = KSOCK_MAX_BUF;
+
+ sopt.sopt_name = SO_SNDBUF;
+ CFS_NET_IN;
+ rc = sosetopt(sock, &sopt);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't set send buffer %d: %d\n",
+ option, rc);
+
+ return -rc;
+ }
+ }
+
+ if (rxbufsize != 0) {
+ option = rxbufsize;
+ sopt.sopt_name = SO_RCVBUF;
+ CFS_NET_IN;
+ rc = sosetopt(sock, &sopt);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't set receive buffer %d: %d\n",
+ option, rc);
+ return -rc;
+ }
+ }
+ return 0;
+}
+
+int
+libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port)
+{
+ struct sockaddr_in *sin;
+ struct sockaddr *sa = NULL;
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ if (remote != 0) {
+ CFS_NET_IN;
+ rc = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa);
+ CFS_NET_EX;
+
+ if (rc != 0) {
+ if (sa) FREE(sa, M_SONAME);
+ CERROR ("Error %d getting sock peer IP\n", rc);
+ return -rc;
+ }
+ } else {
+ CFS_NET_IN;
+ rc = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa);
+ CFS_NET_EX;
+ if (rc != 0) {
+ if (sa) FREE(sa, M_SONAME);
+ CERROR ("Error %d getting sock local IP\n", rc);
+ return -rc;
+ }
+ }
+ if (sa != NULL) {
+ sin = (struct sockaddr_in *)sa;
+ if (ip != NULL)
+ *ip = ntohl (sin->sin_addr.s_addr);
+ if (port != NULL)
+ *port = ntohs (sin->sin_port);
+ if (sa)
+ FREE(sa, M_SONAME);
+ }
+ return 0;
+}
+
+int
+libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
+{
+ struct sockopt sopt;
+ int rc;
+ CFS_DECL_NET_DATA;
+
+ bzero(&sopt, sizeof sopt);
+ sopt.sopt_dir = SOPT_GET;
+ sopt.sopt_level = SOL_SOCKET;
+
+ if (txbufsize != NULL) {
+ sopt.sopt_val = txbufsize;
+ sopt.sopt_valsize = sizeof(*txbufsize);
+ sopt.sopt_name = SO_SNDBUF;
+ CFS_NET_IN;
+ rc = sogetopt(sock, &sopt);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't get send buffer size: %d\n", rc);
+ return -rc;
+ }
+ }
+
+ if (rxbufsize != NULL) {
+ sopt.sopt_val = rxbufsize;
+ sopt.sopt_valsize = sizeof(*rxbufsize);
+ sopt.sopt_name = SO_RCVBUF;
+ CFS_NET_IN;
+ rc = sogetopt(sock, &sopt);
+ CFS_NET_EX;
+ if (rc != 0) {
+ CERROR ("Can't get receive buffer size: %d\n", rc);
+ return -rc;
+ }
+ }
+ return 0;
+}
+
+int
+libcfs_sock_connect (struct socket **sockp, int *fatal,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port)
+{
+ struct sockaddr_in srvaddr;
+ struct socket *so;
+ int s;
+ int rc;
+ CFS_DECL_FUNNEL_DATA;
+
+ rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
+ if (rc != 0)
+ return rc;
+ so = *sockp;
+ bzero(&srvaddr, sizeof(srvaddr));
+ srvaddr.sin_len = sizeof(struct sockaddr_in);
+ srvaddr.sin_family = AF_INET;
+ srvaddr.sin_port = htons (peer_port);
+ srvaddr.sin_addr.s_addr = htonl (peer_ip);
+
+ CFS_NET_IN;
+ rc = soconnect(so, (struct sockaddr *)&srvaddr);
+ if (rc != 0) {
+ CFS_NET_EX;
+ if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
+ CDEBUG(*fatal ? D_ERROR : D_NET,
+ "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+ HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+ goto out;
+ }
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n");
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz);
+ }
+ if ((rc = so->so_error) != 0) {
+ so->so_error = 0;
+ splx(s);
+ CFS_NET_EX;
+ CDEBUG(*fatal ? D_ERROR : D_NET,
+ "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+ HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+ goto out;
+ }
+ LASSERT(so->so_state & SS_ISCONNECTED);
+ splx(s);
+ CFS_NET_EX;
+ if (sockp)
+ *sockp = so;
+ return (0);
+out:
+ CFS_NET_IN;
+ soshutdown(so, 2);
+ soclose(so);
+ CFS_NET_EX;
+ return (-rc);
+}
+
+void
+libcfs_sock_release (struct socket *sock)
+{
+ CFS_DECL_FUNNEL_DATA;
+ CFS_NET_IN;
+ soshutdown(sock, 0);
+ CFS_NET_EX;
+}
+
+#endif
extern union trace_data_union trace_data[NR_CPUS];
extern char *tracefile;
extern long long tracefile_size;
-extern struct rw_semaphore tracefile_sem;
extern int trace_start_thread(void);
extern void trace_stop_thread(void);
long max_debug_mb = M_TCD_MAX_PAGES;
static long max_permit_mb = (64 * 1024);
-inline struct trace_cpu_data *
-__trace_get_tcd (unsigned long *flags)
+spinlock_t trace_cpu_serializer;
+
+/*
+ * thread currently executing tracefile code or NULL if none does. Used to
+ * detect recursive calls to libcfs_debug_msg().
+ */
+static thread_t trace_owner = NULL;
+
+extern int get_preemption_level(void);
+extern atomic_t tage_allocated;
+
+struct rw_semaphore tracefile_sem;
+
+void tracefile_lock_init() {
+ init_rwsem(&tracefile_sem);
+}
+
+void tracefile_read_lock() {
+ down_read(&tracefile_sem);
+}
+
+void tracefile_read_unlock() {
+ up_read(&tracefile_sem);
+}
+
+void tracefile_write_lock() {
+ down_write(&tracefile_sem);
+}
+
+void tracefile_write_unlock() {
+ up_write(&tracefile_sem);
+}
+
+inline struct trace_cpu_data *__trace_get_tcd(unsigned long *flags)
{
- return &trace_data[0].tcd;
+ struct trace_cpu_data *tcd;
+ int nr_pages;
+ struct list_head pages;
+
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
+ /*
+ * debugging check for recursive call to libcfs_debug_msg()
+ */
+ if (trace_owner == current_thread()) {
+ /*
+ * Cannot assert here.
+ */
+ printk(KERN_EMERG "recursive call to %s", __FUNCTION__);
+ /*
+ * "The death of God left the angels in a strange position."
+ */
+ cfs_enter_debugger();
+ }
+ tcd = &trace_data[0].tcd;
+ CFS_INIT_LIST_HEAD(&pages);
+ if (get_preemption_level() == 0)
+ nr_pages = trace_refill_stock(tcd, CFS_ALLOC_STD, &pages);
+ else
+ nr_pages = 0;
+ spin_lock(&trace_cpu_serializer);
+ trace_owner = current_thread();
+ tcd->tcd_cur_stock_pages += nr_pages;
+ list_splice(&pages, &tcd->tcd_stock_pages);
+ return tcd;
+}
+
+extern void raw_page_death_row_clean(void);
+
+inline void __trace_put_tcd(struct trace_cpu_data *tcd, unsigned long flags)
+{
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ LASSERT(trace_owner == current_thread());
+ trace_owner = NULL;
+ spin_unlock(&trace_cpu_serializer);
+ if (get_preemption_level() == 0)
+ /* purge all pending pages */
+ raw_page_death_row_clean();
}
-inline void
-__trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
{
- return;
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ /* XNU has global tcd, and all pages are owned by it */
+ return 1;
}
void
-set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
const int line, unsigned long stack)
-{
- struct timeval tv;
+{
+ struct timeval tv;
- do_gettimeofday(&tv);
- header->ph_subsys = subsys;
- header->ph_mask = mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_sec = (__u32)tv.tv_sec;
- header->ph_usec = tv.tv_usec;
- header->ph_stack = stack;
- header->ph_pid = 0;
- header->ph_line_num = line;
- header->ph_extern_pid = 0;
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ do_gettimeofday(&tv);
+ header->ph_subsys = subsys;
+ header->ph_mask = mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_sec = (__u32)tv.tv_sec;
+ header->ph_usec = tv.tv_usec;
+ header->ph_stack = stack;
+ header->ph_pid = cfs_curproc_pid();
+ header->ph_line_num = line;
+ header->ph_extern_pid = (__u32)current_thread();
}
-void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
- int len, char *file, const char *fn)
-{
- char *prefix = NULL, *ptype = NULL;
-
- if ((mask & D_EMERG) != 0) {
- prefix = "LustreError";
- ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
- prefix = "LustreError";
- ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
- prefix = "Lustre";
- ptype = KERN_WARNING;
+void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
+ int len, char *file, const char *fn)
+{
+ char *prefix = "Lustre", *ptype = KERN_INFO;
+
+ /*
+ * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ if ((mask & D_EMERG) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_EMERG;
+ } else if ((mask & D_ERROR) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_ERR;
+ } else if ((mask & D_WARNING) != 0) {
+ prefix = "Lustre";
+ ptype = KERN_WARNING;
} else if (libcfs_printk != 0 || (mask & D_CONSOLE)) {
- prefix = "Lustre";
- ptype = KERN_INFO;
- }
+ prefix = "Lustre";
+ ptype = KERN_INFO;
+ }
if ((mask & D_CONSOLE) != 0) {
printk("%s%s: %.*s", ptype, prefix, len, buf);
} else {
- printk("%s%s: %d:%d:(%s:%d:%s()) %*s", ptype, prefix, hdr->ph_pid,
- hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
+ printk("%s%s: %d:%d:(%s:%d:%s()) %*s",
+ ptype, prefix, hdr->ph_pid, hdr->ph_extern_pid,
+ file, hdr->ph_line_num, fn, len, buf);
}
}
MALLOC(name, char *, req->newlen + 1, M_TEMP, M_WAITOK | M_ZERO);
if (name == NULL)
return -ENOMEM;
- down_write(&tracefile_sem);
+ tracefile_write_lock();
error = sysctl_handle_string(oidp, name, req->newlen + 1, req);
- if (!error || req->newptr != NULL) {
+ if (!error || !req->newptr) {
/* write */
if (strcmp(name, "stop") == 0) {
/* stop tracefile daemon */
tracefile = NULL;
trace_stop_thread();
- goto out;
- }else if (strncmp(name, "size=", 5) == 0) {
- tracefile_size = simple_strtoul(name + 5, NULL, 0);
- if (tracefile_size < 10 || tracefile_size > 20480)
- tracefile_size = TRACEFILE_SIZE;
- else
- tracefile_size <<= 20;
+ goto out;
+ }else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
goto out;
}
- if (name[0] != '/') {
- error = -EINVAL;
- goto out;
- }
- if (tracefile != NULL)
+ if (name[0] != '/') {
+ error = -EINVAL;
+ goto out;
+ }
+ if (tracefile != NULL)
cfs_free(tracefile);
- tracefile = name;
- name = NULL;
+ tracefile = name;
+ name = NULL;
trace_start_thread();
- } else if (req->newptr != NULL) {
+ } else if (!req->newptr) {
/* Something was wrong with the write request */
printf("sysctl debug daemon failed: %d.\n", error);
goto out;
SYSCTL_OUT(req, tracefile, sizeof(tracefile));
}
out:
- if (name != NULL)
+ if (name != NULL)
FREE(name, M_TEMP);
- up_write(&tracefile_sem);
+ tracefile_write_unlock();
return error;
}
int error = 0;
error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
- if (!error && req->newptr != NULL) {
+ if (!error && !req->newptr) {
/* We have a new value stored in the standard location */
if (max_debug_mb <= 0)
return -EINVAL;
if (max_debug_mb > max_permit_mb) {
printf("sysctl debug_mb is too big: %d.\n", max_debug_mb);
return 0;
- }
- for (i = 0; i < NR_CPUS; i++) {
- struct trace_cpu_data *tcd;
- tcd = &trace_data[i].tcd;
+ }
+ for (i = 0; i < NR_CPUS; i++) {
+ struct trace_cpu_data *tcd;
+ tcd = &trace_data[i].tcd;
tcd->tcd_max_pages = max_debug_mb;
}
- } else if (req->newptr != NULL) {
+ } else if (!req->newptr) {
/* Something was wrong with the write request */
printf ("sysctl debug_mb fault: %d.\n", error);
} else {
#include <sys/fcntl.h>
#include <lnet/types.h>
+#include <libcfs/kp30.h>
+
#ifndef isspace
inline int
isspace(char c)
-{
+{
return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
}
#endif
char *
strrchr(const char *p, int ch)
-{
- const char *end = p + strlen(p);
- do {
- if (*end == (char)ch)
- return (char *)end;
- } while (--end >= p);
+{
+ const char *end = p + strlen(p);
+ do {
+ if (*end == (char)ch)
+ return (char *)end;
+ } while (--end >= p);
return NULL;
}
int sign;
int code;
- static int errno_xlate[] = {
+ static int errno_xlate[] = {
/* success is always success */
[0] = 0,
[LINUX_EPERM] = EPERM,
[LINUX_ELIBMAX] = EINVAL /* ELIBMAX */,
[LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */,
[LINUX_EILSEQ] = EILSEQ,
- [LINUX_ERESTART] = ERESTART,
+ [LINUX_ERESTART] = EINVAL /* because ERESTART is
+ * negative in XNU */,
[LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */,
[LINUX_EUSERS] = EUSERS,
[LINUX_ENOTSOCK] = ENOTSOCK,
[LINUX_EDQUOT] = EDQUOT,
[LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */,
[LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */,
- };
+ };
code = (int)ecode;
- if (code >= 0) {
+ if (code >= 0) {
sign = +1;
} else {
sign = -1;
code = -code;
}
- if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0]))
+ if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) {
code = errno_xlate[code];
- else
- /*
- * Unknown error. Reserved for the future.
- */
- code = EINVAL;
- return sign * code;
+ LASSERT(code >= 0);
+ }
+ return sign * code;
}
enum {
*/
int convert_client_oflag(int cflag, int *result)
{
- int sflag;
+ int sflag = 0;
cflag = 0;
obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY);
} else
return -EINVAL;
}
+
+#ifdef __DARWIN8__
+#else /* !__DARWIN8__ */
+extern int unix_syscall();
+extern int unix_syscall_return();
+
+extern int ktrsysret();
+extern int ktrace();
+
+extern int ast_taken();
+extern int ast_check();
+
+extern int trap();
+extern int syscall_trace();
+
+static int is_addr_in_range(void *addr, void *start, void *end)
+{
+ return start <= addr && addr <= end;
+}
+
+extern void cfs_thread_agent (void);
+
+static int is_last_frame(void *addr)
+{
+ if (addr == NULL)
+ return 1;
+ else if (is_addr_in_range(addr, unix_syscall, unix_syscall_return))
+ return 1;
+ else if (is_addr_in_range(addr, ktrsysret, ktrace))
+ return 1;
+ else if (is_addr_in_range(addr, ast_taken, ast_check))
+ return 1;
+ else if (is_addr_in_range(addr, trap, syscall_trace))
+ return 1;
+ else if (is_addr_in_range(addr, cfs_thread_agent, cfs_kernel_thread))
+ return 1;
+ else
+ return 0;
+}
+
+static void *get_frame(int i)
+{
+ void *result;
+
+#define CASE(i) case (i): result = __builtin_return_address(i); break
+ switch (i + 1) {
+ CASE(1);
+ CASE(2);
+ CASE(3);
+ CASE(4);
+ CASE(5);
+ CASE(6);
+ CASE(7);
+ CASE(8);
+ CASE(9);
+ CASE(10);
+ CASE(11);
+ CASE(12);
+ CASE(13);
+ CASE(14);
+ CASE(15);
+ CASE(16);
+ CASE(17);
+ CASE(18);
+ CASE(19);
+ CASE(20);
+ default:
+ panic("impossible frame number: %d\n", i);
+ result = NULL;
+ }
+ return result;
+}
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{
+ int i;
+
+ memset(trace, 0, sizeof *trace);
+ for (i = 0; i < sizeof_array(trace->frame); ++ i) {
+ void *addr;
+
+ addr = get_frame(i);
+ trace->frame[i] = addr;
+ if (is_last_frame(addr))
+ break;
+ }
+}
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+ if (0 <= frame_no && frame_no < sizeof_array(trace->frame))
+ return trace->frame[frame_no];
+ else
+ return NULL;
+}
+#endif /* !__DARWIN8__ */
# define DEBUG_SUBSYSTEM S_LNET
-#ifdef __KERNEL__
#include <libcfs/kp30.h>
#include <libcfs/libcfs.h>
#include "tracefile.h"
-#else
-#include <stdio.h>
-#include <stdlib.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <sys/time.h>
-#include <libcfs/libcfs.h>
-#endif
#ifdef __KERNEL__
unsigned int libcfs_subsystem_debug = ~0 - (S_LNET | S_LND);
unsigned int libcfs_printk;
EXPORT_SYMBOL(libcfs_printk);
+unsigned int libcfs_debug_binary = 1;
+EXPORT_SYMBOL(libcfs_debug_binary);
+
unsigned int libcfs_stack;
EXPORT_SYMBOL(libcfs_stack);
+unsigned int portal_enter_debugger = 0;
+EXPORT_SYMBOL(portal_enter_debugger);
+
unsigned int libcfs_catastrophe;
EXPORT_SYMBOL(libcfs_catastrophe);
int libcfs_debug_dumplog_thread(void *arg)
{
- libcfs_daemonize("");
+ cfs_daemonize("");
reparent_to_init();
libcfs_debug_dumplog_internal(arg);
cfs_waitq_signal(&debug_ctlwq);
rc = cfs_kernel_thread(libcfs_debug_dumplog_thread,
(void *)(long)cfs_curproc_pid(),
CLONE_VM | CLONE_FS | CLONE_FILES);
- if (rc < 0)
+ if (rc < 0)
printk(KERN_ERR "LustreError: cannot start log dump thread: "
"%d\n", rc);
else
- schedule();
+ cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
/* be sure to teardown if kernel_thread() failed */
cfs_waitq_del(&debug_ctlwq, &wait);
libcfs_run_upcall (argv);
}
+#ifdef __arch_um__
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+ CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n");
+ libcfs_debug_dumplog();
+ libcfs_run_lbug_upcall(file, func, line);
+ panic("LBUG");
+}
+#else
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+ CEMERG("LBUG\n");
+ libcfs_debug_dumpstack(NULL);
+ libcfs_debug_dumplog();
+ libcfs_run_lbug_upcall(file, func, line);
+ set_task_state(current, TASK_UNINTERRUPTIBLE);
+ while (1)
+ schedule();
+}
+#endif /* __arch_um__ */
+
#ifdef __KERNEL__
void libcfs_debug_dumpstack(struct task_struct *tsk)
EXPORT_SYMBOL(libcfs_run_upcall);
EXPORT_SYMBOL(libcfs_run_lbug_upcall);
+EXPORT_SYMBOL(lbug_with_loc);
# define DEBUG_SUBSYSTEM S_LNET
#include <linux/fs.h>
+#include <linux/kdev_t.h>
#include <linux/ctype.h>
#include <asm/uaccess.h>
*/
cfs_file_t *filp = NULL;
- filp = filp_open(name, flags, mode);
- if (IS_ERR(filp)) {
+ filp = filp_open(name, flags, mode);
+ if (IS_ERR(filp)) {
int rc;
- rc = PTR_ERR(filp);
- printk(KERN_ERR "LustreError: can't open %s file: err %d\n",
+ rc = PTR_ERR(filp);
+ printk(KERN_ERR "LustreError: can't open %s file: err %d\n",
name, rc);
if (err)
*err = rc;
mm_segment_t fs;
ssize_t size = 0;
- fs = get_fs();
+ fs = get_fs();
set_fs(KERNEL_DS);
while (count > 0) {
size = filp->f_op->write(filp, (char *)buf, count, offset);
size = 0;
}
set_fs(fs);
-
+
return size;
}
+cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor)
+{
+ return MKDEV(major, minor);
+}
+
+cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev)
+{
+ return MAJOR(rdev);
+}
+
+cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev)
+{
+ return MINOR(rdev);
+}
+
+#if !(CFS_O_CREAT == O_CREAT && CFS_O_EXCL == O_EXCL && \
+ CFS_O_TRUNC == O_TRUNC && CFS_O_APPEND == O_APPEND &&\
+ CFS_O_NONBLOCK == O_NONBLOCK && CFS_O_NDELAY == O_NDELAY &&\
+ CFS_O_SYNC == O_SYNC && CFS_O_ASYNC == FASYNC &&\
+ CFS_O_DIRECT == O_DIRECT && CFS_O_LARGEFILE == O_LARGEFILE &&\
+ CFS_O_DIRECTORY == O_DIRECTORY && CFS_O_NOFOLLOW == O_NOFOLLOW)
+
+int cfs_oflags2univ(int flags)
+{
+ int f;
+
+ f = flags & O_ACCMODE;
+ f |= (flags & O_CREAT) ? CFS_O_CREAT: 0;
+ f |= (flags & O_EXCL) ? CFS_O_EXCL: 0;
+ f |= (flags & O_NOCTTY) ? CFS_O_NOCTTY: 0;
+ f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0;
+ f |= (flags & O_APPEND) ? CFS_O_APPEND: 0;
+ f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0;
+ f |= (flags & O_SYNC)? CFS_O_SYNC: 0;
+ f |= (flags & FASYNC)? CFS_O_ASYNC: 0;
+ f |= (flags & O_DIRECTORY)? CFS_O_DIRECTORY: 0;
+ f |= (flags & O_DIRECT)? CFS_O_DIRECT: 0;
+ f |= (flags & O_LARGEFILE)? CFS_O_LARGEFILE: 0;
+ f |= (flags & O_NOFOLLOW)? CFS_O_NOFOLLOW: 0;
+ f |= (flags & O_NOATIME)? CFS_O_NOATIME: 0;
+ return f;
+}
+#else
+
+int cfs_oflags2univ(int flags)
+{
+ return (flags);
+}
+#endif
+
+/*
+ * XXX Liang: we don't need cfs_univ2oflags() now.
+ */
+int cfs_univ2oflags(int flags)
+{
+ return (flags);
+}
+
EXPORT_SYMBOL(cfs_filp_open);
EXPORT_SYMBOL(cfs_user_write);
+EXPORT_SYMBOL(cfs_oflags2univ);
+EXPORT_SYMBOL(cfs_univ2oflags);
void *
cfs_alloc(size_t nr_bytes, u_int32_t flags)
{
- void *ptr = NULL;
- unsigned int mflags = 0;
+ void *ptr = NULL;
+ unsigned int mflags = 0;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- if (flags & CFS_ALLOC_ATOMIC)
- mflags |= __GFP_HIGH;
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
else if (flags & CFS_ALLOC_WAIT)
mflags |= __GFP_WAIT;
- else
- mflags |= (__GFP_HIGH | __GFP_WAIT);
+ else
+ mflags |= (__GFP_HIGH | __GFP_WAIT);
- if (flags & CFS_ALLOC_FS)
- mflags |= __GFP_FS;
- if (flags & CFS_ALLOC_IO)
- mflags |= __GFP_IO | __GFP_HIGHIO;
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO | __GFP_HIGHIO;
#else
if (flags & CFS_ALLOC_ATOMIC)
mflags |= __GFP_HIGH;
mflags |= __GFP_IO;
#endif
- ptr = kmalloc(nr_bytes, mflags);
- if (ptr != NULL && (flags & CFS_ALLOC_ZERO))
- memset(ptr, 0, nr_bytes);
- return ptr;
+ ptr = kmalloc(nr_bytes, mflags);
+ if (ptr != NULL && (flags & CFS_ALLOC_ZERO))
+ memset(ptr, 0, nr_bytes);
+ return ptr;
}
void
cfs_free(void *addr)
{
- kfree(addr);
+ kfree(addr);
}
void *
cfs_alloc_large(size_t nr_bytes)
{
- return vmalloc(nr_bytes);
+ return vmalloc(nr_bytes);
}
void
cfs_free_large(void *addr)
{
- vfree(addr);
+ vfree(addr);
}
-cfs_page_t *
-cfs_alloc_pages(unsigned int flags, unsigned int order)
+cfs_page_t *cfs_alloc_page(unsigned int flags)
{
unsigned int mflags = 0;
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- if (flags & CFS_ALLOC_ATOMIC)
- mflags |= __GFP_HIGH;
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
else if (flags & CFS_ALLOC_WAIT)
mflags |= __GFP_WAIT;
- else
- mflags |= (__GFP_HIGH | __GFP_WAIT);
+ else
+ mflags |= (__GFP_HIGH | __GFP_WAIT);
- if (flags & CFS_ALLOC_FS)
- mflags |= __GFP_FS;
- if (flags & CFS_ALLOC_IO)
- mflags |= __GFP_IO | __GFP_HIGHIO;
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO | __GFP_HIGHIO;
if (flags & CFS_ALLOC_HIGH)
mflags |= __GFP_HIGHMEM;
#else
mflags |= __GFP_HIGHMEM;
#endif
- return alloc_pages(mflags, order);
+ return alloc_pages(mflags, 0);
}
cfs_mem_cache_t *
cfs_mem_cache_create (const char *name, size_t size, size_t offset,
- unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long),
- void (*dtor)(void*, cfs_mem_cache_t *, unsigned long))
+ unsigned long flags)
{
- return kmem_cache_create(name, size, offset, flags, ctor, dtor);
+ return kmem_cache_create(name, size, offset, flags, NULL, NULL);
}
int
{
unsigned int mflags = 0;
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- if (flags & CFS_SLAB_ATOMIC)
- mflags |= __GFP_HIGH;
+ if (flags & CFS_ALLOC_ATOMIC)
+ mflags |= __GFP_HIGH;
else if (flags & CFS_ALLOC_WAIT)
mflags |= __GFP_WAIT;
- else
- mflags |= (__GFP_HIGH | __GFP_WAIT);
+ else
+ mflags |= (__GFP_HIGH | __GFP_WAIT);
- if (flags & CFS_SLAB_FS)
- mflags |= __GFP_FS;
- if (flags & CFS_SLAB_IO)
- mflags |= __GFP_IO | __GFP_HIGHIO;
+ if (flags & CFS_ALLOC_FS)
+ mflags |= __GFP_FS;
+ if (flags & CFS_ALLOC_IO)
+ mflags |= __GFP_IO | __GFP_HIGHIO;
#else
- if (flags & CFS_SLAB_ATOMIC)
+
+ if (flags & CFS_ALLOC_ATOMIC)
mflags |= __GFP_HIGH;
else
mflags |= __GFP_WAIT;
- if (flags & CFS_SLAB_FS)
+ if (flags & CFS_ALLOC_FS)
mflags |= __GFP_FS;
- if (flags & CFS_SLAB_IO)
+ if (flags & CFS_ALLOC_IO)
mflags |= __GFP_IO;
#endif
EXPORT_SYMBOL(cfs_free);
EXPORT_SYMBOL(cfs_alloc_large);
EXPORT_SYMBOL(cfs_free_large);
-EXPORT_SYMBOL(cfs_alloc_pages);
+EXPORT_SYMBOL(cfs_alloc_page);
EXPORT_SYMBOL(cfs_mem_cache_create);
EXPORT_SYMBOL(cfs_mem_cache_destroy);
EXPORT_SYMBOL(cfs_mem_cache_alloc);
#define LNET_MINOR 240
-
void
libcfs_daemonize (char *str)
{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
daemonize(str);
-#else
- daemonize();
+#else
+ daemonize();
snprintf (current->comm, sizeof (current->comm), "%s", str);
#endif
}
void
libcfs_blockallsigs ()
-{
- unsigned long flags;
-
- SIGNAL_MASK_LOCK(current, flags);
- sigfillset(¤t->blocked);
- RECALC_SIGPENDING;
+{
+ unsigned long flags;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
SIGNAL_MASK_UNLOCK(current, flags);
}
extern struct cfs_psdev_ops libcfs_psdev_ops;
-static int
+static int
libcfs_psdev_open(struct inode * inode, struct file * file)
-{
+{
struct libcfs_device_userstate **pdu = NULL;
int rc = 0;
- if (!inode)
+ if (!inode)
return (-EINVAL);
pdu = (struct libcfs_device_userstate **)&file->private_data;
if (libcfs_psdev_ops.p_open != NULL)
}
/* called when closing /dev/device */
-static int
+static int
libcfs_psdev_release(struct inode * inode, struct file * file)
{
struct libcfs_device_userstate *pdu;
int rc = 0;
- if (!inode)
+ if (!inode)
return (-EINVAL);
pdu = file->private_data;
if (libcfs_psdev_ops.p_close != NULL)
return rc;
}
-static int
-libcfs_ioctl(struct inode *inode, struct file *file,
+static int
+libcfs_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg)
-{
+{
struct cfs_psdev_file pfile;
int rc = 0;
- if (current->fsuid != 0)
- return -EACCES;
-
- if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
- _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
- _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
- CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
- _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
- return (-EINVAL);
- }
-
+ if (current->fsuid != 0)
+ return -EACCES;
+
+ if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
+ _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
+ _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
+ CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+ return (-EINVAL);
+ }
+
/* Handle platform-dependent IOC requests */
- switch (cmd) {
- case IOC_LIBCFS_PANIC:
- if (!capable (CAP_SYS_BOOT))
- return (-EPERM);
- panic("debugctl-invoked panic");
+ switch (cmd) {
+ case IOC_LIBCFS_PANIC:
+ if (!capable (CAP_SYS_BOOT))
+ return (-EPERM);
+ panic("debugctl-invoked panic");
return (0);
- case IOC_LIBCFS_MEMHOG:
- if (!capable (CAP_SYS_ADMIN))
+ case IOC_LIBCFS_MEMHOG:
+ if (!capable (CAP_SYS_ADMIN))
return -EPERM;
/* go thought */
}
pfile.off = 0;
pfile.private_data = file->private_data;
- if (libcfs_psdev_ops.p_ioctl != NULL)
- rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+ if (libcfs_psdev_ops.p_ioctl != NULL)
+ rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
else
rc = -EPERM;
return (rc);
}
-static struct file_operations libcfs_fops = {
- ioctl: libcfs_ioctl,
- open: libcfs_psdev_open,
+static struct file_operations libcfs_fops = {
+ ioctl: libcfs_ioctl,
+ open: libcfs_psdev_open,
release: libcfs_psdev_release
};
-cfs_psdev_t libcfs_dev = {
- LNET_MINOR,
- "lnet",
+cfs_psdev_t libcfs_dev = {
+ LNET_MINOR,
+ "lnet",
&libcfs_fops
};
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
#define DEBUG_SUBSYSTEM S_LNET
#include <linux/config.h>
#include <linux/module.h>
#include <linux/kernel.h>
+#include <libcfs/libcfs.h>
+
+void cfs_enter_debugger(void)
+{
+#if defined(CONFIG_KGDB)
+ extern void breakpoint(void);
+ breakpoint();
+#elif defined(__arch_um__)
+ //asm("int $3");
+#else
+ /* nothing */
+#endif
+}
+
+void cfs_daemonize(char *str) {
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
+ daemonize(str);
+#else
+ daemonize();
+ snprintf (current->comm, sizeof (current->comm), "%s", str);
+#endif
+}
+
+sigset_t cfs_get_blocked_sigs(void)
+{
+ unsigned long flags;
+ sigset_t old;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ old = current->blocked;
+ SIGNAL_MASK_UNLOCK(current, flags);
+ return old;
+}
+
+void cfs_block_allsigs(void)
+{
+ unsigned long flags;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+}
+
+void cfs_block_sigs(sigset_t bits)
+{
+ unsigned long flags;
+
+ SIGNAL_MASK_LOCK(current, flags);
+ current->blocked = bits;
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+}
int
libcfs_arch_init(void)
-{
- return 0;
+{
+ return 0;
}
void
libcfs_arch_cleanup(void)
{
- return;
+ return;
}
EXPORT_SYMBOL(libcfs_arch_init);
EXPORT_SYMBOL(libcfs_arch_cleanup);
+EXPORT_SYMBOL(cfs_daemonize);
+EXPORT_SYMBOL(cfs_block_allsigs);
+EXPORT_SYMBOL(cfs_block_sigs);
+EXPORT_SYMBOL(cfs_get_blocked_sigs);
CERROR ("Can't create socket: %d\n", rc);
return rc;
}
-
+
nob = strnlen(name, IFNAMSIZ);
if (nob == IFNAMSIZ) {
CERROR("Interface name %s too long\n", name);
rc = -EINVAL;
goto out;
}
-
+
CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
strcpy(ifr.ifr_name, name);
CERROR("Can't get IP address for interface %s\n", name);
goto out;
}
-
+
val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
*ip = ntohl(val);
set_fs(KERNEL_DS);
rc = sock->ops->ioctl(sock, SIOCGIFNETMASK, (unsigned long)&ifr);
set_fs(oldmm);
-
+
if (rc != 0) {
CERROR("Can't get netmask for interface %s\n", name);
goto out;
}
-
+
val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
*mask = ntohl(val);
rc = -ENOMEM;
goto out0;
}
-
+
ifc.ifc_buf = (char *)ifr;
ifc.ifc_len = nalloc * sizeof(*ifr);
-
+
set_fs(KERNEL_DS);
rc = sock->ops->ioctl(sock, SIOCGIFCONF, (unsigned long)&ifc);
set_fs(oldmm);
CERROR ("Error %d enumerating interfaces\n", rc);
goto out1;
}
-
+
LASSERT (rc == 0);
nfound = ifc.ifc_len/sizeof(*ifr);
}
/* NULL out all names[i] */
memset (names, 0, nfound * sizeof(*names));
-
+
for (i = 0; i < nfound; i++) {
nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
if (nob == IFNAMSIZ) {
/* no space for terminating NULL */
- CERROR("interface name %.*s too long (%d max)\n",
+ CERROR("interface name %.*s too long (%d max)\n",
nob, ifr[i].ifr_name, IFNAMSIZ);
rc = -ENAMETOOLONG;
goto out2;
*namesp = names;
rc = nfound;
-
+
out2:
if (rc < 0)
libcfs_ipif_free_enumeration(names, nfound);
long ticks = timeout * HZ;
unsigned long then;
struct timeval tv;
-
+
LASSERT (nob > 0);
/* Caller may pass a zero timeout if she thinks the socket buffer is
* empty enough to take the whole message immediately */
set_fs(oldmm);
if (rc != 0) {
CERROR("Can't set socket send timeout "
- "%ld.%06d: %d\n",
+ "%ld.%06d: %d\n",
(long)tv.tv_sec, (int)tv.tv_usec, rc);
return rc;
}
set_fs (KERNEL_DS);
then = jiffies;
rc = sock_sendmsg (sock, &msg, iov.iov_len);
- ticks -= then - jiffies;
+ ticks -= jiffies - then;
set_fs (oldmm);
if (rc == nob)
if (ticks <= 0)
return -EAGAIN;
-
+
buffer = ((char *)buffer) + rc;
nob -= rc;
}
return (0);
}
-
EXPORT_SYMBOL(libcfs_sock_write);
int
EXPORT_SYMBOL(libcfs_sock_read);
static int
-libcfs_sock_create (struct socket **sockp, int *fatal,
+libcfs_sock_create (struct socket **sockp, int *fatal,
__u32 local_ip, int local_port)
{
struct sockaddr_in locaddr;
memset(&locaddr, 0, sizeof(locaddr));
locaddr.sin_family = AF_INET;
locaddr.sin_port = htons(local_port);
- locaddr.sin_addr.s_addr = (local_ip == 0) ?
+ locaddr.sin_addr.s_addr = (local_ip == 0) ?
INADDR_ANY : htonl(local_ip);
-
- rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr,
+
+ rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr,
sizeof(locaddr));
if (rc == -EADDRINUSE) {
CDEBUG(D_NET, "Port %d already in use\n", local_port);
goto failed;
}
}
-
+
return 0;
failed:
(char *)&option, sizeof (option));
set_fs (oldmm);
if (rc != 0) {
- CERROR ("Can't set send buffer %d: %d\n",
+ CERROR ("Can't set send buffer %d: %d\n",
option, rc);
return (rc);
}
}
-
+
if (rxbufsize != 0) {
option = rxbufsize;
set_fs (KERNEL_DS);
(char *)&option, sizeof (option));
set_fs (oldmm);
if (rc != 0) {
- CERROR ("Can't set receive buffer %d: %d\n",
+ CERROR ("Can't set receive buffer %d: %d\n",
option, rc);
return (rc);
}
}
-
+
return 0;
}
}
*txbufsize = option;
}
-
+
if (rxbufsize != NULL) {
optlen = sizeof(option);
set_fs (KERNEL_DS);
}
*rxbufsize = option;
}
-
+
return 0;
}
EXPORT_SYMBOL(libcfs_sock_getbuf);
int
-libcfs_sock_listen (struct socket **sockp,
+libcfs_sock_listen (struct socket **sockp,
__u32 local_ip, int local_port, int backlog)
{
int fatal;
local_port);
return rc;
}
-
+
rc = (*sockp)->ops->listen(*sockp, backlog);
if (rc == 0)
return 0;
-
+
CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
sock_release(*sockp);
return rc;
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(sock->sk->sk_sleep, &wait);
-
+
rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
if (rc == -EAGAIN) {
/* Nothing ready, so wait for activity */
schedule();
rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
}
-
+
remove_wait_queue(sock->sk->sk_sleep, &wait);
set_current_state(TASK_RUNNING);
extern union trace_data_union trace_data[NR_CPUS];
extern char *tracefile;
extern long long tracefile_size;
-extern struct rw_semaphore tracefile_sem;
+
+struct rw_semaphore tracefile_sem;
+
+void tracefile_lock_init()
+{
+ init_rwsem(&tracefile_sem);
+}
+
+void tracefile_read_lock()
+{
+ down_read(&tracefile_sem);
+}
+
+void tracefile_read_unlock()
+{
+ up_read(&tracefile_sem);
+}
+
+void tracefile_write_lock()
+{
+ down_write(&tracefile_sem);
+}
+
+void tracefile_write_unlock()
+{
+ up_write(&tracefile_sem);
+}
inline struct trace_cpu_data *
-__trace_get_tcd(unsigned long *flags)
+__trace_get_tcd(unsigned long *flags)
{
- struct trace_cpu_data *ret;
+ struct trace_cpu_data *ret;
- int cpu = get_cpu();
- local_irq_save(*flags);
- ret = &trace_data[cpu].tcd;
+ int cpu = get_cpu();
+ local_irq_save(*flags);
+ ret = &trace_data[cpu].tcd;
- return ret;
+ return ret;
}
-inline void
+inline void
trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
{
- local_irq_restore(flags);
- put_cpu();
+ local_irq_restore(flags);
+ put_cpu();
+}
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
+{
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ return tcd->tcd_cpu == tage->cpu;
}
void
-set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
const int line, unsigned long stack)
-{
- struct timeval tv;
-
- do_gettimeofday(&tv);
-
- header->ph_subsys = subsys;
- header->ph_mask = mask;
- header->ph_cpu_id = smp_processor_id();
- header->ph_sec = (__u32)tv.tv_sec;
- header->ph_usec = tv.tv_usec;
- header->ph_stack = stack;
- header->ph_pid = current->pid;
- header->ph_line_num = line;
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+
+ header->ph_subsys = subsys;
+ header->ph_mask = mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_sec = (__u32)tv.tv_sec;
+ header->ph_usec = tv.tv_usec;
+ header->ph_stack = stack;
+ header->ph_pid = current->pid;
+ header->ph_line_num = line;
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
header->ph_extern_pid = current->thread.extern_pid;
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
header->ph_extern_pid = current->thread.mode.tt.extern_pid;
-#else
+#else
header->ph_extern_pid = 0;
#endif
return;
}
-void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
+void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
int len, char *file, const char *fn)
-{
- char *prefix = NULL, *ptype = NULL;
-
- if ((mask & D_EMERG) != 0) {
- prefix = "LustreError";
- ptype = KERN_EMERG;
- } else if ((mask & D_ERROR) != 0) {
- prefix = "LustreError";
- ptype = KERN_ERR;
- } else if ((mask & D_WARNING) != 0) {
- prefix = "Lustre";
- ptype = KERN_WARNING;
+{
+ char *prefix = "Lustre", *ptype = NULL;
+
+ if ((mask & D_EMERG) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_EMERG;
+ } else if ((mask & D_ERROR) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_ERR;
+ } else if ((mask & D_WARNING) != 0) {
+ prefix = "Lustre";
+ ptype = KERN_WARNING;
} else if (libcfs_printk != 0 || (mask & D_CONSOLE)) {
- prefix = "Lustre";
- ptype = KERN_INFO;
- }
+ prefix = "Lustre";
+ ptype = KERN_INFO;
+ }
if ((mask & D_CONSOLE) != 0) {
printk("%s%s: %.*s", ptype, prefix, len, buf);
} else {
- printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
+ printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
}
return;
}
-int trace_write_daemon_file(struct file *file, const char *buffer,
+int trace_write_daemon_file(struct file *file, const char *buffer,
unsigned long count, void *data)
-{
- char *name;
- unsigned long off;
- int rc;
-
- name = kmalloc(count + 1, GFP_KERNEL);
- if (name == NULL)
- return -ENOMEM;
-
- if (copy_from_user(name, buffer, count)) {
- rc = -EFAULT;
- goto out;
- }
-
- /* be nice and strip out trailing '\n' */
- for (off = count ; off > 2 && isspace(name[off - 1]); off--)
- ;
-
- name[off] = '\0';
-
- down_write(&tracefile_sem);
- if (strcmp(name, "stop") == 0) {
- tracefile = NULL;
- trace_stop_thread();
- goto out_sem;
- } else if (strncmp(name, "size=", 5) == 0) {
- tracefile_size = simple_strtoul(name + 5, NULL, 0);
- if (tracefile_size < 10 || tracefile_size > 20480)
- tracefile_size = TRACEFILE_SIZE;
- else
- tracefile_size <<= 20;
- goto out_sem;
- }
-
- if (name[0] != '/') {
- rc = -EINVAL;
- goto out_sem;
- }
-
- if (tracefile != NULL)
- kfree(tracefile);
-
- tracefile = name;
- name = NULL;
- printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
- "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
-
- trace_start_thread();
-out_sem:
- up_write(&tracefile_sem);
-out:
+{
+ char *name;
+ unsigned long off;
+ int rc;
+
+ name = kmalloc(count + 1, GFP_KERNEL);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(name, buffer, count)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ /* be nice and strip out trailing '\n' */
+ for (off = count ; off > 2 && isspace(name[off - 1]); off--)
+ ;
+
+ name[off] = '\0';
+
+ tracefile_write_lock();
+ if (strcmp(name, "stop") == 0) {
+ tracefile = NULL;
+ trace_stop_thread();
+ goto out_sem;
+ } else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
+ goto out_sem;
+ }
+
+ if (name[0] != '/') {
+ rc = -EINVAL;
+ goto out_sem;
+ }
+
+ if (tracefile != NULL)
+ kfree(tracefile);
+
+ tracefile = name;
+ name = NULL;
+ printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
+ "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
+
+ trace_start_thread();
+out_sem:
+ tracefile_write_unlock();
+out:
kfree(name);
return count;
}
-int trace_read_daemon_file(char *page, char **start, off_t off, int count,
+int trace_read_daemon_file(char *page, char **start, off_t off, int count,
int *eof, void *data)
-{
- int rc;
-
- down_read(&tracefile_sem);
- rc = snprintf(page, count, "%s", tracefile);
- up_read(&tracefile_sem);
+{
+ int rc;
+
+ tracefile_read_lock();
+ rc = snprintf(page, count, "%s", tracefile);
+ tracefile_read_unlock();
return rc;
}
-int trace_write_debug_mb(struct file *file, const char *buffer,
+int trace_write_debug_mb(struct file *file, const char *buffer,
unsigned long count, void *data)
-{
- char string[32];
- int i;
- unsigned max;
-
- if (count >= sizeof(string)) {
- printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
- count);
- return -EOVERFLOW;
- }
-
- if (copy_from_user(string, buffer, count))
- return -EFAULT;
-
- max = simple_strtoul(string, NULL, 0);
- if (max == 0)
+{
+ char string[32];
+ int i;
+ unsigned max;
+
+ if (count >= sizeof(string)) {
+ printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
+ count);
+ return -EOVERFLOW;
+ }
+
+ if (copy_from_user(string, buffer, count))
+ return -EFAULT;
+
+ max = simple_strtoul(string, NULL, 0);
+ if (max == 0)
+ return -EINVAL;
+
+ if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
+ printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
+ "%dMB, which is more than 80%% of available RAM (%lu)\n",
+ max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
return -EINVAL;
+ }
+
+ max /= smp_num_cpus;
- if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
- printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
- "%dMB, which is more than 80%% of available RAM (%lu)\n",
- max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
- return -EINVAL;
- }
-
- max /= smp_num_cpus;
-
- for (i = 0; i < NR_CPUS; i++) {
- struct trace_cpu_data *tcd;
- tcd = &trace_data[i].tcd;
- tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
- }
+ for (i = 0; i < NR_CPUS; i++) {
+ struct trace_cpu_data *tcd;
+ tcd = &trace_data[i].tcd;
+ tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
+ }
return count;
}
int trace_read_debug_mb(char *page, char **start, off_t off, int count,
int *eof, void *data)
-{
- struct trace_cpu_data *tcd;
- unsigned long flags;
+{
+ struct trace_cpu_data *tcd;
+ unsigned long flags;
int rc;
-
- tcd = trace_get_tcd(flags);
- rc = snprintf(page, count, "%lu\n",
- (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
- trace_put_tcd(tcd, flags);
+
+ tcd = trace_get_tcd(flags);
+ rc = snprintf(page, count, "%lu\n",
+ (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
+ trace_put_tcd(tcd, flags);
return rc;
}
{
return ecode;
}
+EXPORT_SYMBOL(convert_server_error);
/*
* convert <fcntl.h> flag from client to server.
*/
-int convert_client_oflag(int cflag)
+int convert_client_oflag(int cflag, int *result)
{
- return cflag;
+ *result = cflag;
+ return 0;
}
+EXPORT_SYMBOL(convert_client_oflag);
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{}
+EXPORT_SYMBOL(cfs_stack_trace_fill);
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+ return NULL;
+}
+EXPORT_SYMBOL(cfs_stack_trace_frame);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+
+/*
+ * On-wire format is native kdev_t format of Linux kernel 2.6
+ */
+enum {
+ WIRE_RDEV_MINORBITS = 20,
+ WIRE_RDEV_MINORMASK = ((1U << WIRE_RDEV_MINORBITS) - 1)
+};
+
+cfs_wire_rdev_t cfs_wire_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor)
+{
+ return (major << WIRE_RDEV_MINORBITS) | minor;
+}
+
+cfs_major_nr_t cfs_wire_rdev_major(cfs_wire_rdev_t rdev)
+{
+ return rdev >> WIRE_RDEV_MINORBITS;
+}
+
+cfs_minor_nr_t cfs_wire_rdev_minor(cfs_wire_rdev_t rdev)
+{
+ return rdev & WIRE_RDEV_MINORMASK;
+}
+
#include <lnet/lib-lnet.h>
#include <lnet/lnet.h>
#include <libcfs/kp30.h>
+#include "tracefile.h"
void
kportal_memhog_free (struct libcfs_device_userstate *ldu)
};
static struct netstrfns libcfs_netstrfns[] = {
- {.nf_type = LOLND,
- .nf_name = "lo",
- .nf_modname = "klolnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_lo_str2addr},
- {.nf_type = SOCKLND,
- .nf_name = "tcp",
- .nf_modname = "ksocklnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr},
- {.nf_type = OPENIBLND,
- .nf_name = "openib",
- .nf_modname = "kopeniblnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr},
- {.nf_type = IIBLND,
- .nf_name = "iib",
- .nf_modname = "kiiblnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr},
- {.nf_type = VIBLND,
- .nf_name = "vib",
- .nf_modname = "kviblnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr},
- {.nf_type = RALND,
- .nf_name = "ra",
- .nf_modname = "kralnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr},
- {.nf_type = QSWLND,
- .nf_name = "elan",
- .nf_modname = "kqswlnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr},
- {.nf_type = GMLND,
- .nf_name = "gm",
- .nf_modname = "kgmlnd",
- .nf_addr2str = libcfs_hexnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr},
- {.nf_type = PTLLND,
- .nf_name = "ptl",
- .nf_modname = "kptllnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr},
+ {/* .nf_type */ LOLND,
+ /* .nf_name */ "lo",
+ /* .nf_modname */ "klolnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_lo_str2addr},
+ {/* .nf_type */ SOCKLND,
+ /* .nf_name */ "tcp",
+ /* .nf_modname */ "ksocklnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr},
+ {/* .nf_type */ OPENIBLND,
+ /* .nf_name */ "openib",
+ /* .nf_modname */ "kopeniblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr},
+ {/* .nf_type */ IIBLND,
+ /* .nf_name */ "iib",
+ /* .nf_modname */ "kiiblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr},
+ {/* .nf_type */ VIBLND,
+ /* .nf_name */ "vib",
+ /* .nf_modname */ "kviblnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr},
+ {/* .nf_type */ RALND,
+ /* .nf_name */ "ra",
+ /* .nf_modname */ "kralnd",
+ /* .nf_addr2str */ libcfs_ip_addr2str,
+ /* .nf_str2addr */ libcfs_ip_str2addr},
+ {/* .nf_type */ QSWLND,
+ /* .nf_name */ "elan",
+ /* .nf_modname */ "kqswlnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr},
+ {/* .nf_type */ GMLND,
+ /* .nf_name */ "gm",
+ /* .nf_modname */ "kgmlnd",
+ /* .nf_addr2str */ libcfs_hexnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr},
+ {/* .nf_type */ PTLLND,
+ /* .nf_name */ "ptl",
+ /* .nf_modname */ "kptllnd",
+ /* .nf_addr2str */ libcfs_decnum_addr2str,
+ /* .nf_str2addr */ libcfs_num_str2addr},
/* placeholder for net0 alias. It MUST BE THE LAST ENTRY */
- {.nf_type = -1},
+ {/* .nf_type */ -1},
};
const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]);
struct netstrfns *nf;
int nob;
int netnum;
- unsigned int i;
+ int i;
for (i = 0; i < libcfs_nnetstrfns; i++) {
nf = &libcfs_netstrfns[i];
nob = strlen(nf->nf_name);
- if (strlen(str) == nob) {
+ if (strlen(str) == (unsigned int)nob) {
netnum = 0;
} else {
if (nf->nf_type == LOLND) /* net number not allowed */
str += nob;
i = strlen(str);
if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
- i != strlen(str))
+ i != (int)strlen(str))
return NULL;
}
/* XXX move things up to the top, comment */
union trace_data_union trace_data[NR_CPUS] __cacheline_aligned;
-struct rw_semaphore tracefile_sem;
char *tracefile = NULL;
-long long tracefile_size = TRACEFILE_SIZE;
+int64_t tracefile_size = TRACEFILE_SIZE;
static struct tracefiled_ctl trace_tctl;
struct semaphore trace_thread_sem;
static int thread_running = 0;
-static void put_pages_on_daemon_list_on_cpu(void *info);
+atomic_t tage_allocated = ATOMIC_INIT(0);
+
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+ struct trace_cpu_data *tcd);
static inline struct trace_page *tage_from_list(struct list_head *list)
{
}
tage->page = page;
+ atomic_inc(&tage_allocated);
return tage;
}
cfs_free_page(tage->page);
cfs_free(tage);
+ atomic_dec(&tage_allocated);
}
static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
cfs_page_count(tage->page));
}
-/* return a page that has 'len' bytes left at the end */
-static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
- unsigned long len)
+int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
+ struct list_head *stock)
{
- struct trace_page *tage;
+ int i;
- if (len > CFS_PAGE_SIZE) {
- printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
- "page\n", len);
- return NULL;
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+
+ for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
+ struct trace_page *tage;
+
+ tage = tage_alloc(gfp);
+ if (tage == NULL)
+ break;
+ list_add_tail(&tage->linkage, stock);
}
+ return i;
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd,
+ unsigned long len)
+{
+ struct trace_page *tage;
- if (!list_empty(&tcd->tcd_pages)) {
+ if (tcd->tcd_cur_pages > 0) {
+ LASSERT(!list_empty(&tcd->tcd_pages));
tage = tage_from_list(tcd->tcd_pages.prev);
if (tage->used + len <= CFS_PAGE_SIZE)
return tage;
}
if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
- tage = tage_alloc(CFS_ALLOC_ATOMIC);
- if (tage == NULL) {
- /* the kernel should print a message for us. fall back
- * to using the last page in the ring buffer. */
- goto ring_buffer;
+ if (tcd->tcd_cur_stock_pages > 0) {
+ tage = tage_from_list(tcd->tcd_stock_pages.prev);
+ -- tcd->tcd_cur_stock_pages;
+ list_del_init(&tage->linkage);
+ } else {
+ tage = tage_alloc(CFS_ALLOC_ATOMIC);
+ if (tage == NULL) {
+ printk(KERN_WARNING
+ "failure to allocate a tage (%ld)\n",
+ tcd->tcd_cur_pages);
+ return NULL;
+ }
}
tage->used = 0;
if (tcd->tcd_cur_pages > 8 && thread_running) {
struct tracefiled_ctl *tctl = &trace_tctl;
+ /*
+ * wake up tracefiled to process some pages.
+ */
cfs_waitq_signal(&tctl->tctl_waitq);
}
return tage;
}
+ return NULL;
+}
- ring_buffer:
- if (thread_running) {
- int pgcount = tcd->tcd_cur_pages / 10;
- struct page_collection pc;
- struct trace_page *tage;
- struct trace_page *tmp;
+static void tcd_shrink(struct trace_cpu_data *tcd)
+{
+ int pgcount = tcd->tcd_cur_pages / 10;
+ struct page_collection pc;
+ struct trace_page *tage;
+ struct trace_page *tmp;
- printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
- " 10%% of pages (%d)\n", pgcount + 1);
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
- CFS_INIT_LIST_HEAD(&pc.pc_pages);
- spin_lock_init(&pc.pc_lock);
+ printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
+ " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages);
- list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
- if (pgcount-- == 0)
- break;
+ CFS_INIT_LIST_HEAD(&pc.pc_pages);
+ spin_lock_init(&pc.pc_lock);
- list_move_tail(&tage->linkage, &pc.pc_pages);
- tcd->tcd_cur_pages--;
- }
- put_pages_on_daemon_list_on_cpu(&pc);
+ list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+ if (pgcount-- == 0)
+ break;
- LASSERT(!list_empty(&tcd->tcd_pages));
+ list_move_tail(&tage->linkage, &pc.pc_pages);
+ tcd->tcd_cur_pages--;
}
+ put_pages_on_tcd_daemon_list(&pc, tcd);
+}
- if (list_empty(&tcd->tcd_pages))
- return NULL;
+/* return a page that has 'len' bytes left at the end */
+static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
+ unsigned long len)
+{
+ struct trace_page *tage;
- tage = tage_from_list(tcd->tcd_pages.next);
- tage->used = 0;
- tage_to_tail(tage, &tcd->tcd_pages);
+ /*
+ * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+ * from here: this will lead to infinite recursion.
+ */
+ if (len > CFS_PAGE_SIZE) {
+ printk(KERN_ERR
+ "cowardly refusing to write %lu bytes in a page\n", len);
+ return NULL;
+ }
+
+ tage = trace_get_tage_try(tcd, len);
+ if (tage != NULL)
+ return tage;
+ if (thread_running)
+ tcd_shrink(tcd);
+ if (tcd->tcd_cur_pages > 0) {
+ tage = tage_from_list(tcd->tcd_pages.next);
+ tage->used = 0;
+ tage_to_tail(tage, &tcd->tcd_pages);
+ }
return tage;
}
struct trace_cpu_data *tcd;
struct ptldebug_header header;
struct trace_page *tage;
- char *debug_buf = format;
+ char *string_buf = format;
+ char *debug_buf;
int known_size, needed = 85 /* average message length */, max_nob;
va_list ap;
unsigned long flags;
+ int depth;
if (strchr(file, '/'))
file = strrchr(file, '/') + 1;
goto out;
set_ptldebug_header(&header, subsys, mask, line, stack);
- known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
+
+ depth = __current_nesting_level();
+ known_size = strlen(file) + 1 + depth;
+ if (fn)
+ known_size += strlen(fn) + 1;
+
+ if (libcfs_debug_binary)
+ known_size += sizeof(header);
retry:
tage = trace_get_tage(tcd, needed + known_size);
if (tage == NULL) {
- debug_buf = format;
if (needed + known_size > CFS_PAGE_SIZE)
mask |= D_ERROR;
- needed = strlen(format);
goto out;
}
- debug_buf = cfs_page_address(tage->page) + tage->used + known_size;
+ string_buf = (char *)cfs_page_address(tage->page) + tage->used + known_size;
max_nob = CFS_PAGE_SIZE - tage->used - known_size;
if (max_nob <= 0) {
goto out;
}
va_start(ap, format);
- needed = vsnprintf(debug_buf, max_nob, format, ap);
+ needed = vsnprintf(string_buf, max_nob, format, ap);
va_end(ap);
if (needed > max_nob) /* overflow. oh poop. */
goto retry;
header.ph_len = known_size + needed;
- debug_buf = cfs_page_address(tage->page) + tage->used;
+ debug_buf = (char *)cfs_page_address(tage->page) + tage->used;
- memcpy(debug_buf, &header, sizeof(header));
- tage->used += sizeof(header);
- debug_buf += sizeof(header);
+ if (libcfs_debug_binary) {
+ memcpy(debug_buf, &header, sizeof(header));
+ tage->used += sizeof(header);
+ debug_buf += sizeof(header);
+ }
+
+ /* indent message according to the nesting level */
+ while (depth-- > 0) {
+ *(debug_buf++) = '.';
+ ++ tage->used;
+ }
strcpy(debug_buf, file);
tage->used += strlen(file) + 1;
debug_buf += strlen(file) + 1;
- strcpy(debug_buf, fn);
- tage->used += strlen(fn) + 1;
- debug_buf += strlen(fn) + 1;
+ if (fn) {
+ strcpy(debug_buf, fn);
+ tage->used += strlen(fn) + 1;
+ debug_buf += strlen(fn) + 1;
+ }
+
+ LASSERT(debug_buf == string_buf);
tage->used += needed;
if (tage->used > CFS_PAGE_SIZE)
out:
if ((mask & (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE)) || libcfs_printk)
- print_to_console(&header, mask, debug_buf, needed, file, fn);
+ print_to_console(&header, mask, string_buf, needed, file, fn);
trace_put_tcd(tcd, flags);
}
* we have a good amount of data at all times for dumping during an LBUG, even
* if we have been steadily writing (and otherwise discarding) pages via the
* debug daemon. */
-static void put_pages_on_daemon_list_on_cpu(void *info)
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+ struct trace_cpu_data *tcd)
{
- struct page_collection *pc = info;
- struct trace_cpu_data *tcd;
struct trace_page *tage;
struct trace_page *tmp;
- unsigned long flags;
-
- tcd = trace_get_tcd(flags);
spin_lock(&pc->pc_lock);
list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
}
}
spin_unlock(&pc->pc_lock);
+}
+
+static void put_pages_on_daemon_list_on_cpu(void *info)
+{
+ struct trace_cpu_data *tcd;
+ unsigned long flags;
+ tcd = trace_get_tcd(flags);
+ put_pages_on_tcd_daemon_list(info, tcd);
trace_put_tcd(tcd, flags);
}
spin_lock_init(&pc.pc_lock);
+ pc.pc_want_daemon_pages = 1;
collect_pages(&pc);
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
char *p, *file, *fn;
cfs_file_t *filp;
struct trace_page *tage;
struct trace_page *tmp;
- CFS_DECL_MMSPACE;
int rc;
- down_write(&tracefile_sem);
+ CFS_DECL_MMSPACE;
+
+ tracefile_write_lock();
filp = cfs_filp_open(filename,
O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
rc = cfs_filp_write(filp, cfs_page_address(tage->page),
tage->used, cfs_filp_poff(filp));
- if (rc != tage->used) {
+ if (rc != (int)tage->used) {
printk(KERN_WARNING "wanted to write %u but wrote "
"%d\n", tage->used, rc);
put_pages_back(&pc);
+ LASSERT(list_empty(&pc.pc_pages));
break;
}
list_del(&tage->linkage);
close:
cfs_filp_close(filp);
out:
- up_write(&tracefile_sem);
+ tracefile_write_unlock();
return rc;
}
spin_lock_init(&pc.pc_lock);
+ pc.pc_want_daemon_pages = 1;
collect_pages(&pc);
list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
if (name == NULL)
return -ENOMEM;
- if (copy_from_user(name, buffer, count)) {
+ if (copy_from_user((void *)name, (void *)buffer, count)) {
rc = -EFAULT;
goto out;
}
+#if !defined(__WINNT__)
if (name[0] != '/') {
rc = -EINVAL;
goto out;
}
+#endif
/* be nice and strip out trailing '\n' */
for (off = count ; off > 2 && isspace(name[off - 1]); off--)
struct trace_page *tmp;
struct ptldebug_header *hdr;
cfs_file_t *filp;
- CFS_DECL_MMSPACE;
int rc;
+ CFS_DECL_MMSPACE;
+
/* we're started late enough that we pick up init's fs context */
/* this is so broken in uml? what on earth is going on? */
- libcfs_daemonize("ktracefiled");
+ cfs_daemonize("ktracefiled");
reparent_to_init();
spin_lock_init(&pc.pc_lock);
cfs_waitlink_init(&__wait);
cfs_waitq_add(&tctl->tctl_waitq, &__wait);
set_current_state(TASK_INTERRUPTIBLE);
- cfs_waitq_timedwait(&__wait, cfs_time_seconds(1));
+ cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
+ cfs_time_seconds(1));
cfs_waitq_del(&tctl->tctl_waitq, &__wait);
if (atomic_read(&tctl->tctl_shutdown))
continue;
filp = NULL;
- down_read(&tracefile_sem);
+ tracefile_read_lock();
if (tracefile != NULL) {
filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
0600, &rc);
if (!(filp))
printk("couldn't open %s: %d\n", tracefile, rc);
}
- up_read(&tracefile_sem);
+ tracefile_read_unlock();
if (filp == NULL) {
put_pages_on_daemon_list(&pc);
+ LASSERT(list_empty(&pc.pc_pages));
continue;
}
LASSERT_TAGE_INVARIANT(tage);
- if (f_pos >= tracefile_size)
+ if (f_pos >= (off_t)tracefile_size)
f_pos = 0;
else if (f_pos > cfs_filp_size(filp))
f_pos = cfs_filp_size(filp);
rc = cfs_filp_write(filp, cfs_page_address(tage->page),
tage->used, &f_pos);
- if (rc != tage->used) {
+ if (rc != (int)tage->used) {
printk(KERN_WARNING "wanted to write %u but "
"wrote %d\n", tage->used, rc);
put_pages_back(&pc);
+ LASSERT(list_empty(&pc.pc_pages));
}
}
CFS_MMSPACE_CLOSE;
cfs_filp_close(filp);
put_pages_on_daemon_list(&pc);
+ LASSERT(list_empty(&pc.pc_pages));
}
complete(&tctl->tctl_stop);
return 0;
struct trace_cpu_data *tcd;
int i;
+ tracefile_lock_init();
for (i = 0; i < NR_CPUS; i++) {
tcd = &trace_data[i].tcd;
CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
+ CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages);
CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
tcd->tcd_cur_pages = 0;
+ tcd->tcd_cur_stock_pages = 0;
tcd->tcd_cur_daemon_pages = 0;
tcd->tcd_max_pages = TCD_MAX_PAGES;
tcd->tcd_shutting_down = 0;
+ tcd->tcd_cpu = i;
}
return 0;
}
#include <libcfs/libcfs.h>
+/* trace file lock routines */
+
+void tracefile_lock_init(void);
+void tracefile_read_lock(void);
+void tracefile_read_unlock(void);
+void tracefile_write_lock(void);
+void tracefile_write_unlock(void);
+
int tracefile_dump_all_pages(char *filename);
void trace_debug_print(void);
void trace_flush_pages(void);
/*
* Private declare for tracefile
*/
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
+#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
#define TRACEFILE_SIZE (500 << 20)
union trace_data_union {
struct trace_cpu_data {
+ /*
+ * pages with trace records not yet processed by tracefiled.
+ */
struct list_head tcd_pages;
+ /* number of pages on ->tcd_pages */
unsigned long tcd_cur_pages;
+ /*
+ * pages with trace records already processed by
+ * tracefiled. These pages are kept in memory, so that some
+ * portion of log can be written in the event of LBUG. This
+ * list is maintained in LRU order.
+ *
+ * Pages are moved to ->tcd_daemon_pages by tracefiled()
+ * (put_pages_on_daemon_list()). LRU pages from this list are
+ * discarded when list grows too large.
+ */
struct list_head tcd_daemon_pages;
+ /* number of pages on ->tcd_cur_daemon_pages */
unsigned long tcd_cur_daemon_pages;
+ /*
+ * Maximal number of pages allowed on ->tcd_pages and
+ * ->tcd_daemon_pages each. Always TCD_MAX_PAGES in current
+ * implementation.
+ */
unsigned long tcd_max_pages;
+
+ /*
+ * preallocated pages to write trace records into. Pages from
+ * ->tcd_stock_pages are moved to ->tcd_pages by
+ * portals_debug_msg().
+ *
+ * This list is necessary, because on some platforms it's
+ * impossible to perform efficient atomic page allocation in a
+ * non-blockable context.
+ *
+ * Such platforms fill ->tcd_stock_pages "on occasion", when
+ * tracing code is entered in blockable context.
+ *
+ * trace_get_tage_try() tries to get a page from
+ * ->tcd_stock_pages first and resorts to atomic page
+ * allocation only if this queue is empty. ->tcd_stock_pages
+ * is replenished when tracing code is entered in blocking
+ * context (darwin-tracefile.c:__trace_get_tcd()). We try to
+ * maintain TCD_STOCK_PAGES (40 by default) pages in this
+ * queue. Atomic allocation is only required if more than
+ * TCD_STOCK_PAGES pagesful are consumed by trace records all
+ * emitted in non-blocking contexts. Which is quite unlikely.
+ */
+ struct list_head tcd_stock_pages;
+ /* number of pages on ->tcd_stock_pages */
+ unsigned long tcd_cur_stock_pages;
+
int tcd_shutting_down;
+ int tcd_cpu;
} tcd;
char __pad[SMP_CACHE_BYTES];
};
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
struct page_collection {
struct list_head pc_pages;
+ /*
+ * spin-lock protecting ->pc_pages. It is taken by smp_call_function()
+ * call-back functions. XXX nikita: Which is horrible: all processors
+ * receive NMI at the same time only to be serialized by this
+ * lock. Probably ->pc_pages should be replaced with an array of
+ * NR_CPUS elements accessed locklessly.
+ */
spinlock_t pc_lock;
+ /*
+ * if this flag is set, collect_pages() will spill both
+ * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
+ * only ->tcd_pages are spilled.
+ */
int pc_want_daemon_pages;
};
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
struct tracefiled_ctl {
struct completion tctl_start;
struct completion tctl_stop;
- cfs_waitq_t tctl_waitq;
+ cfs_waitq_t tctl_waitq;
pid_t tctl_pid;
atomic_t tctl_shutdown;
};
/*
* small data-structure for each page owned by tracefiled.
*/
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
struct trace_page {
/*
* page itself
char *buf, int len, char *file, const char *fn);
extern struct trace_cpu_data * __trace_get_tcd (unsigned long *flags);
extern void __trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags);
+int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
+ struct list_head *stock);
+
#define trace_get_tcd(f) __trace_get_tcd(&(f))
#define trace_put_tcd(t, f) __trace_put_tcd(t, f)
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage);
+
#endif /* LUSTRE_TRACEFILE_PRIVATE */
#endif /* __PORTALS_TRACEFILE_H */
/*
* liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ *
+ * XXX Liang: There are several branches share lnet with b_hd_newconfig,
+ * if we define lock APIs at here, there will be conflict with liblustre
+ * in other branches.
*/
#ifndef __KERNEL__
+#include <stdlib.h>
+#include <libcfs/libcfs.h>
/*
* Optional debugging (magic stamping and checking ownership) can be added.
*/
+#if 0
/*
* spin_lock
*
(void)lock;
}
-void spin_lock_irqsave(spinlock_t *lock, unsigned long flags)
-{
- LASSERT(lock != NULL);
- (void)lock;
-}
-
-void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
-{
- LASSERT(lock != NULL);
- (void)lock;
-}
-
-
/*
* Semaphore
*
LASSERT(s != NULL);
(void)s;
}
+#endif
/* !__KERNEL__ */
#endif
#include <libcfs/libcfs.h>
+#define LASSERT(a) do {} while (0)
/*
* Sleep channel. No-op implementation.
*/
{
LASSERT(waitq != NULL);
(void)waitq;
+ return 0;
}
void cfs_waitq_signal(struct cfs_waitq *waitq)
(void)waitq;
}
-void cfs_waitq_broadcast(struct cfs_waitq *waitq)
+void cfs_waitq_broadcast(struct cfs_waitq *waitq, int state)
{
LASSERT(waitq != NULL);
(void)waitq;
(void)link;
}
-int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int64_t timeout)
+int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout)
{
LASSERT(link != NULL);
(void)link;
+ return 0;
}
/*
* Allocator
*/
-cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order)
+cfs_page_t *cfs_alloc_page(unsigned int flags)
{
cfs_page_t *pg = malloc(sizeof(*pg));
if (!pg)
return NULL;
-#if 0 //#ifdef MAP_ANONYMOUS
- pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
-#else
- pg->addr = malloc(PAGE_SIZE << order);
-#endif
+ pg->addr = malloc(PAGE_SIZE);
if (!pg->addr) {
free(pg);
return pg;
}
-void cfs_free_pages(struct page *pg, int what)
+void cfs_free_page(cfs_page_t *pg)
{
-#if 0 //#ifdef MAP_ANONYMOUS
- munmap(pg->addr, PAGE_SIZE);
-#else
free(pg->addr);
-#endif
free(pg);
}
-cfs_page_t *cfs_alloc_page(unsigned int flags)
-{
- return cfs_alloc_pages(flags, 0);
-}
-
-void cfs_free_page(cfs_page_t *pg, int what)
-{
- cfs_free_page(pg, what);
-}
-
void *cfs_page_address(cfs_page_t *pg)
{
return pg->addr;
}
/*
- * Memory allocator
- */
-void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
-{
- void *result;
-
- result = malloc(nr_bytes);
- if (result != NULL && (flags & CFS_ALLOC_ZERO))
- memset(result, 0, nr_bytes);
-}
-
-void cfs_free(void *addr)
-{
- free(addr);
-}
-
-void *cfs_alloc_large(size_t nr_bytes)
-{
- return cfs_alloc(nr_bytes, 0);
-}
-
-void cfs_free_large(void *addr)
-{
- return cfs_free(addr);
-}
-
-/*
* SLAB allocator
*/
cfs_mem_cache_t *
-cfs_mem_cache_create(const char *, size_t, size_t, unsigned long,
- void (*)(void *, cfs_mem_cache_t *, unsigned long),
- void (*)(void *, cfs_mem_cache_t *, unsigned long))
+cfs_mem_cache_create(const char *name, size_t objsize, size_t off, unsigned long flags)
{
cfs_mem_cache_t *c;
void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp)
{
- return cfs_alloc(c, gfp);
+ return cfs_alloc(c->size, gfp);
}
void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr)
cfs_free(addr);
}
+/*
+ * This uses user-visible declarations from <linux/kdev_t.h>
+ */
+#ifdef __LINUX__
+#include <linux/kdev_t.h>
+#endif
+
+#ifndef MKDEV
+
+#define MAJOR(dev) ((dev)>>8)
+#define MINOR(dev) ((dev) & 0xff)
+#define MKDEV(ma,mi) ((ma)<<8 | (mi))
+
+#endif
+
+cfs_rdev_t cfs_rdev_build(cfs_major_nr_t major, cfs_minor_nr_t minor)
+{
+ return MKDEV(major, minor);
+}
+
+cfs_major_nr_t cfs_rdev_major(cfs_rdev_t rdev)
+{
+ return MAJOR(rdev);
+}
+
+cfs_minor_nr_t cfs_rdev_minor(cfs_rdev_t rdev)
+{
+ return MINOR(rdev);
+}
+
+void cfs_enter_debugger(void)
+{
+ /*
+ * nothing for now.
+ */
+}
+
+void cfs_daemonize(char *str)
+{
+ return;
+}
+
+void cfs_block_allsigs()
+{
+}
+
+cfs_sigset_t cfs_get_blocked_sigs()
+{
+ cfs_sigset_t s;
+ memset(&s, 0, sizeof(s));
+ return s;
+}
+
+void cfs_block_sigs(cfs_sigset_t blocks)
+{
+}
+
+#ifdef __LINUX__
+
+/*
+ * In glibc (NOT in Linux, so check above is not right), implement
+ * stack-back-tracing through backtrace() function.
+ */
+#include <execinfo.h>
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{
+ backtrace(trace->frame, sizeof_array(trace->frame));
+}
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+ if (0 <= frame_no && frame_no < sizeof_array(trace->frame))
+ return trace->frame[frame_no];
+ else
+ return NULL;
+}
+
+#else
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{}
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+ return NULL;
+}
+
+/* __LINUX__ */
+#endif
+
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+ CEMERG("LBUG\n");
+ abort();
+}
+
/* !__KERNEL__ */
#endif
#include <libcfs/kp30.h>
#include <libcfs/libcfs.h>
-#include <libcfs/linux/portals_compat25.h>
-
-
struct lc_watchdog {
- struct timer_list lcw_timer; /* kernel timer */
+ cfs_timer_t lcw_timer; /* kernel timer */
struct list_head lcw_list;
struct timeval lcw_last_touched;
- struct task_struct *lcw_task;
+ cfs_task_t *lcw_task;
void (*lcw_callback)(struct lc_watchdog *,
- struct task_struct *,
+ cfs_task_t *,
void *data);
void *lcw_data;
} lcw_state;
};
+#ifdef WITH_WATCHDOG
/*
* The dispatcher will complete lcw_start_completion when it starts,
* and lcw_stop_completion when it exits.
static struct list_head lcw_pending_timers = \
LIST_HEAD_INIT(lcw_pending_timers);
-static struct task_struct *lcw_lookup_task(struct lc_watchdog *lcw)
+static cfs_task_t *lcw_lookup_task(struct lc_watchdog *lcw)
{
- struct task_struct *tsk;
+ cfs_task_t *tsk;
unsigned long flags;
ENTRY;
lcw->lcw_state = LC_WATCHDOG_EXPIRED;
- CWARN("Watchdog triggered for pid %d: it was inactive for %dms\n",
- lcw->lcw_pid, (lcw->lcw_time * 1000) / HZ);
+ CWARN("Watchdog triggered for pid %d: it was inactive for %ldms\n",
+ lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time) * 1000);
+
tsk = lcw_lookup_task(lcw);
if (tsk != NULL)
ENTRY;
- libcfs_daemonize("lc_watchdogd");
+ cfs_daemonize("lc_watchdogd");
SIGNAL_MASK_LOCK(current, flags);
sigfillset(¤t->blocked);
lcw->lcw_task = cfs_current();
lcw->lcw_pid = cfs_curproc_pid();
- lcw->lcw_time = (timeout_ms * HZ) / 1000;
+ lcw->lcw_time = cfs_time_seconds(timeout_ms / 1000);
lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog;
lcw->lcw_data = data;
lcw->lcw_state = LC_WATCHDOG_DISABLED;
}
EXPORT_SYMBOL(lc_watchdog_add);
-static long
-timeval_sub(struct timeval *large, struct timeval *small)
-{
- return (large->tv_sec - small->tv_sec) * 1000000 +
- (large->tv_usec - small->tv_usec);
-}
-
static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
{
struct timeval newtime;
- unsigned long timediff;
+ struct timeval timediff;
do_gettimeofday(&newtime);
if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
- timediff = timeval_sub(&newtime, &lcw->lcw_last_touched);
+ cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff);
CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n",
lcw->lcw_pid,
message,
- timediff / 1000000,
- (timediff % 1000000) / 100);
+ timediff.tv_sec,
+ timediff.tv_usec / 100);
}
lcw->lcw_last_touched = newtime;
}
libcfs_debug_dumplog_internal((void *)(long)tsk->pid);
}
EXPORT_SYMBOL(lc_watchdog_dumplog);
+
+#else /* !defined(WITH_WATCHDOG) */
+
+struct lc_watchdog *lc_watchdog_add(int timeout_ms,
+ void (*callback)(struct lc_watchdog *,
+ cfs_task_t *,
+ void *),
+ void *data)
+{
+ static struct lc_watchdog watchdog;
+ return &watchdog;
+}
+EXPORT_SYMBOL(lc_watchdog_add);
+
+void lc_watchdog_touch(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_touch);
+
+void lc_watchdog_disable(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_disable);
+
+void lc_watchdog_delete(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_delete);
+
+#endif
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ * Impletion of winnt curproc routines.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for Linux kernel.
+ */
+
+cfs_task_t this_task =
+ { 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 1, 0, 0, 0, 0,
+ "sysetm\0" };
+
+
+uid_t cfs_curproc_uid(void)
+{
+ return this_task.uid;
+}
+
+gid_t cfs_curproc_gid(void)
+{
+ return this_task.gid;
+}
+
+uid_t cfs_curproc_fsuid(void)
+{
+ return this_task.fsuid;
+}
+
+gid_t cfs_curproc_fsgid(void)
+{
+ return this_task.fsgid;
+}
+
+pid_t cfs_curproc_pid(void)
+{
+ return cfs_current()->pid;
+}
+
+int cfs_curproc_groups_nr(void)
+{
+ return this_task.ngroups;
+}
+
+void cfs_curproc_groups_dump(gid_t *array, int size)
+{
+ LASSERT(size <= NGROUPS);
+ size = min_t(int, size, this_task.ngroups);
+ memcpy(array, this_task.groups, size * sizeof(__u32));
+}
+
+int cfs_curproc_is_in_groups(gid_t gid)
+{
+ return in_group_p(gid);
+}
+
+mode_t cfs_curproc_umask(void)
+{
+ return this_task.umask;
+}
+
+char *cfs_curproc_comm(void)
+{
+ return this_task.comm;
+}
+
+cfs_kernel_cap_t cfs_curproc_cap_get(void)
+{
+ return this_task.cap_effective;
+}
+
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
+{
+ this_task.cap_effective = cap;
+}
+
+
+/*
+ * Implementation of linux task management routines
+ */
+
+
+/* global of the task manager structure */
+
+TASK_MAN TaskMan;
+
+
+/*
+ * task slot routiens
+ */
+
+PTASK_SLOT
+alloc_task_slot()
+{
+ PTASK_SLOT task = NULL;
+
+ if (TaskMan.slab) {
+ task = cfs_mem_cache_alloc(TaskMan.slab, 0);
+ } else {
+ task = cfs_alloc(sizeof(TASK_SLOT), 0);
+ }
+
+ return task;
+}
+
+void
+init_task_slot(PTASK_SLOT task)
+{
+ memset(task, 0, sizeof(TASK_SLOT));
+ task->Magic = TASKSLT_MAGIC;
+ task->task = this_task;
+ task->task.pid = (pid_t)PsGetCurrentThreadId();
+ cfs_init_event(&task->Event, TRUE, FALSE);
+}
+
+
+void
+cleanup_task_slot(PTASK_SLOT task)
+{
+ if (TaskMan.slab) {
+ cfs_mem_cache_free(TaskMan.slab, task);
+ } else {
+ cfs_free(task);
+ }
+}
+
+/*
+ * task manager related routines
+ */
+
+VOID
+task_manager_notify(
+ IN HANDLE ProcessId,
+ IN HANDLE ThreadId,
+ IN BOOLEAN Create
+ )
+{
+ PLIST_ENTRY ListEntry = NULL;
+ PTASK_SLOT TaskSlot = NULL;
+
+ spin_lock(&(TaskMan.Lock));
+
+ ListEntry = TaskMan.TaskList.Flink;
+
+ while (ListEntry != (&(TaskMan.TaskList))) {
+
+ TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+
+ if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) {
+
+ if (Create) {
+/*
+ DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n",
+ ProcessId, ThreadId, TaskSlot->Tet);
+*/
+ } else {
+ /* remove the taskslot */
+ RemoveEntryList(&(TaskSlot->Link));
+ TaskMan.NumOfTasks--;
+
+ /* now free the task slot */
+ cleanup_task_slot(TaskSlot);
+ }
+ }
+
+ ListEntry = ListEntry->Flink;
+ }
+
+ spin_unlock(&(TaskMan.Lock));
+}
+
+int
+init_task_manager()
+{
+ NTSTATUS status;
+
+ /* initialize the content and magic */
+ memset(&TaskMan, 0, sizeof(TASK_MAN));
+ TaskMan.Magic = TASKMAN_MAGIC;
+
+ /* initialize the spinlock protection */
+ spin_lock_init(&TaskMan.Lock);
+
+ /* create slab memory cache */
+ TaskMan.slab = cfs_mem_cache_create(
+ "TSLT", sizeof(TASK_SLOT), 0, 0);
+
+ /* intialize the list header */
+ InitializeListHead(&(TaskMan.TaskList));
+
+ /* set the thread creation/destruction notify routine */
+ status = PsSetCreateThreadNotifyRoutine(task_manager_notify);
+
+ if (!NT_SUCCESS(status)) {
+ cfs_enter_debugger();
+ }
+
+ return 0;
+}
+
+void
+cleanup_task_manager()
+{
+ PLIST_ENTRY ListEntry = NULL;
+ PTASK_SLOT TaskSlot = NULL;
+
+ /* we must stay in system since we succeed to register the
+ CreateThreadNotifyRoutine: task_manager_notify */
+ cfs_enter_debugger();
+
+
+ /* cleanup all the taskslots attached to the list */
+ spin_lock(&(TaskMan.Lock));
+
+ while (!IsListEmpty(&(TaskMan.TaskList))) {
+
+ ListEntry = TaskMan.TaskList.Flink;
+ TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+
+ RemoveEntryList(ListEntry);
+ cleanup_task_slot(TaskSlot);
+ }
+
+ spin_unlock(&TaskMan.Lock);
+
+ /* destroy the taskslot cache slab */
+ cfs_mem_cache_destroy(TaskMan.slab);
+ memset(&TaskMan, 0, sizeof(TASK_MAN));
+}
+
+
+/*
+ * schedule routines (task slot list)
+ */
+
+
+cfs_task_t *
+cfs_current()
+{
+ HANDLE Pid = PsGetCurrentProcessId();
+ HANDLE Tid = PsGetCurrentThreadId();
+ PETHREAD Tet = PsGetCurrentThread();
+
+ PLIST_ENTRY ListEntry = NULL;
+ PTASK_SLOT TaskSlot = NULL;
+
+ spin_lock(&(TaskMan.Lock));
+
+ ListEntry = TaskMan.TaskList.Flink;
+
+ while (ListEntry != (&(TaskMan.TaskList))) {
+
+ TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+
+ if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) {
+ if (TaskSlot->Tet != Tet) {
+
+/*
+ DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n",
+ Pid, Tid, Tet, TaskSlot->Tet);
+*/
+ //
+ // The old thread was already exit. This must be a
+ // new thread which get the same Tid to the previous.
+ //
+
+ TaskSlot->Tet = Tet;
+ }
+ break;
+
+ } else {
+
+ if ((ULONG)TaskSlot->Pid > (ULONG)Pid) {
+ TaskSlot = NULL;
+ break;
+ } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) {
+ if ((ULONG)TaskSlot->Tid > (ULONG)Tid) {
+ TaskSlot = NULL;
+ break;
+ }
+ }
+
+ TaskSlot = NULL;
+ }
+
+ ListEntry = ListEntry->Flink;
+ }
+
+ if (!TaskSlot) {
+
+ TaskSlot = alloc_task_slot();
+
+ if (!TaskSlot) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ init_task_slot(TaskSlot);
+
+ TaskSlot->Pid = Pid;
+ TaskSlot->Tid = Tid;
+ TaskSlot->Tet = Tet;
+
+ if (ListEntry == (&(TaskMan.TaskList))) {
+ //
+ // Empty case or the biggest case, put it to the tail.
+ //
+ InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link));
+ } else {
+ //
+ // Get a slot and smaller than it's tid, put it just before.
+ //
+ InsertHeadList(ListEntry->Blink, &(TaskSlot->Link));
+ }
+
+ TaskMan.NumOfTasks++;
+ }
+
+ //
+ // To Check whether he task structures are arranged in the expected order ?
+ //
+
+ {
+ PTASK_SLOT Prev = NULL, Curr = NULL;
+
+ ListEntry = TaskMan.TaskList.Flink;
+
+ while (ListEntry != (&(TaskMan.TaskList))) {
+
+ Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+ ListEntry = ListEntry->Flink;
+
+ if (Prev) {
+ if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) {
+ cfs_enter_debugger();
+ } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) {
+ if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) {
+ cfs_enter_debugger();
+ }
+ }
+ }
+
+ Prev = Curr;
+ }
+ }
+
+errorout:
+
+ spin_unlock(&(TaskMan.Lock));
+
+ if (!TaskSlot) {
+ cfs_enter_debugger();
+ return NULL;
+ }
+
+ return (&(TaskSlot->task));
+}
+
+int
+schedule_timeout(int64_t time)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ cfs_enter_debugger();
+ return 0;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ cfs_assert(slot->Magic == TASKSLT_MAGIC);
+
+ if (time == MAX_SCHEDULE_TIMEOUT) {
+ time = 0;
+ }
+
+ return (cfs_wait_event(&(slot->Event), time) != 0);
+}
+
+int
+schedule()
+{
+ return schedule_timeout(0);
+}
+
+int
+wake_up_process(
+ cfs_task_t * task
+ )
+{
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ cfs_enter_debugger();
+ return 0;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ cfs_assert(slot->Magic == TASKSLT_MAGIC);
+
+ cfs_wake_event(&(slot->Event));
+
+ return TRUE;
+}
+
+void
+sleep_on(
+ cfs_waitq_t *waitq
+ )
+{
+ cfs_waitlink_t link;
+
+ cfs_waitlink_init(&link);
+ cfs_waitq_add(waitq, &link);
+ cfs_waitq_wait(&link, CFS_TASK_INTERRUPTIBLE);
+ cfs_waitq_del(waitq, &link);
+}
+
+EXPORT_SYMBOL(cfs_curproc_uid);
+EXPORT_SYMBOL(cfs_curproc_pid);
+EXPORT_SYMBOL(cfs_curproc_gid);
+EXPORT_SYMBOL(cfs_curproc_fsuid);
+EXPORT_SYMBOL(cfs_curproc_fsgid);
+EXPORT_SYMBOL(cfs_curproc_umask);
+EXPORT_SYMBOL(cfs_curproc_comm);
+EXPORT_SYMBOL(cfs_curproc_groups_nr);
+EXPORT_SYMBOL(cfs_curproc_groups_dump);
+EXPORT_SYMBOL(cfs_curproc_is_in_groups);
+EXPORT_SYMBOL(cfs_curproc_cap_get);
+EXPORT_SYMBOL(cfs_curproc_cap_set);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include "tracefile.h"
+
+void portals_debug_dumpstack(cfs_task_t *tsk)
+{
+ return;
+}
+
+cfs_task_t *portals_current(void)
+{
+ return cfs_current();
+}
+
+int portals_arch_debug_init(unsigned long bufsize)
+{
+ return 0;
+}
+
+int portals_arch_debug_cleanup(void)
+{
+ return 0;
+}
+
+void portals_run_lbug_upcall(char *file, const char *fn, const int line)
+{
+}
+
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+ CEMERG("LBUG: pid: %u thread: %#x\n",
+ (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread());
+ // portals_debug_dumplog();
+ // portals_run_lbug_upcall(file, func, line);
+}
+
+#if KS_DEBUG
+
+/*
+ * Definitions
+ */
+
+LONG KsDebugLevel = 0x5;
+
+
+/*
+ * Routines
+ */
+
+
+/*
+ * KsNtStatusToString
+ * Get the error message for a specified nt status
+ *
+ * Arguments:
+ * Status - nt status code
+ *
+ * Return Value:
+ * PUCHAR - message string for the status code
+ *
+ * NOTES:
+ * N/A
+ */
+
+PUCHAR
+KsNtStatusToString (IN NTSTATUS Status)
+{
+ switch (Status) {
+
+ case 0x00000000: return "STATUS_SUCCESS";
+ case 0x00000001: return "STATUS_WAIT_1";
+ case 0x00000002: return "STATUS_WAIT_2";
+ case 0x00000003: return "STATUS_WAIT_3";
+ case 0x0000003F: return "STATUS_WAIT_63";
+ case 0x00000080: return "STATUS_ABANDONED_WAIT_0";
+ case 0x000000BF: return "STATUS_ABANDONED_WAIT_63";
+ case 0x000000C0: return "STATUS_USER_APC";
+ case 0x00000100: return "STATUS_KERNEL_APC";
+ case 0x00000101: return "STATUS_ALERTED";
+ case 0x00000102: return "STATUS_TIMEOUT";
+ case 0x00000103: return "STATUS_PENDING";
+ case 0x00000104: return "STATUS_REPARSE";
+ case 0x00000105: return "STATUS_MORE_ENTRIES";
+ case 0x00000106: return "STATUS_NOT_ALL_ASSIGNED";
+ case 0x00000107: return "STATUS_SOME_NOT_MAPPED";
+ case 0x00000108: return "STATUS_OPLOCK_BREAK_IN_PROGRESS";
+ case 0x00000109: return "STATUS_VOLUME_MOUNTED";
+ case 0x0000010A: return "STATUS_RXACT_COMMITTED";
+ case 0x0000010B: return "STATUS_NOTIFY_CLEANUP";
+ case 0x0000010C: return "STATUS_NOTIFY_ENUM_DIR";
+ case 0x0000010D: return "STATUS_NO_QUOTAS_FOR_ACCOUNT";
+ case 0x0000010E: return "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED";
+ case 0x00000110: return "STATUS_PAGE_FAULT_TRANSITION";
+ case 0x00000111: return "STATUS_PAGE_FAULT_DEMAND_ZERO";
+ case 0x00000112: return "STATUS_PAGE_FAULT_COPY_ON_WRITE";
+ case 0x00000113: return "STATUS_PAGE_FAULT_GUARD_PAGE";
+ case 0x00000114: return "STATUS_PAGE_FAULT_PAGING_FILE";
+ case 0x00000115: return "STATUS_CACHE_PAGE_LOCKED";
+ case 0x00000116: return "STATUS_CRASH_DUMP";
+ case 0x00000117: return "STATUS_BUFFER_ALL_ZEROS";
+ case 0x00000118: return "STATUS_REPARSE_OBJECT";
+ case 0x00000119: return "STATUS_RESOURCE_REQUIREMENTS_CHANGED";
+ case 0x00000120: return "STATUS_TRANSLATION_COMPLETE";
+ case 0x00000121: return "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY";
+ case 0x00010001: return "DBG_EXCEPTION_HANDLED";
+ case 0x00010002: return "DBG_CONTINUE";
+ case 0x40000000: return "STATUS_OBJECT_NAME_EXISTS";
+ case 0x40000001: return "STATUS_THREAD_WAS_SUSPENDED";
+ case 0x40000002: return "STATUS_WORKING_SET_LIMIT_RANGE";
+ case 0x40000003: return "STATUS_IMAGE_NOT_AT_BASE";
+ case 0x40000004: return "STATUS_RXACT_STATE_CREATED";
+ case 0x40000005: return "STATUS_SEGMENT_NOTIFICATION";
+ case 0x40000006: return "STATUS_LOCAL_USER_SESSION_KEY";
+ case 0x40000007: return "STATUS_BAD_CURRENT_DIRECTORY";
+ case 0x40000008: return "STATUS_SERIAL_MORE_WRITES";
+ case 0x40000009: return "STATUS_REGISTRY_RECOVERED";
+ case 0x4000000A: return "STATUS_FT_READ_RECOVERY_FROM_BACKUP";
+ case 0x4000000B: return "STATUS_FT_WRITE_RECOVERY";
+ case 0x4000000C: return "STATUS_SERIAL_COUNTER_TIMEOUT";
+ case 0x4000000D: return "STATUS_NULL_LM_PASSWORD";
+ case 0x4000000E: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH";
+ case 0x4000000F: return "STATUS_RECEIVE_PARTIAL";
+ case 0x40000010: return "STATUS_RECEIVE_EXPEDITED";
+ case 0x40000011: return "STATUS_RECEIVE_PARTIAL_EXPEDITED";
+ case 0x40000012: return "STATUS_EVENT_DONE";
+ case 0x40000013: return "STATUS_EVENT_PENDING";
+ case 0x40000014: return "STATUS_CHECKING_FILE_SYSTEM";
+ case 0x40000015: return "STATUS_FATAL_APP_EXIT";
+ case 0x40000016: return "STATUS_PREDEFINED_HANDLE";
+ case 0x40000017: return "STATUS_WAS_UNLOCKED";
+ case 0x40000018: return "STATUS_SERVICE_NOTIFICATION";
+ case 0x40000019: return "STATUS_WAS_LOCKED";
+ case 0x4000001A: return "STATUS_LOG_HARD_ERROR";
+ case 0x4000001B: return "STATUS_ALREADY_WIN32";
+ case 0x4000001C: return "STATUS_WX86_UNSIMULATE";
+ case 0x4000001D: return "STATUS_WX86_CONTINUE";
+ case 0x4000001E: return "STATUS_WX86_SINGLE_STEP";
+ case 0x4000001F: return "STATUS_WX86_BREAKPOINT";
+ case 0x40000020: return "STATUS_WX86_EXCEPTION_CONTINUE";
+ case 0x40000021: return "STATUS_WX86_EXCEPTION_LASTCHANCE";
+ case 0x40000022: return "STATUS_WX86_EXCEPTION_CHAIN";
+ case 0x40000023: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE";
+ case 0x40000024: return "STATUS_NO_YIELD_PERFORMED";
+ case 0x40000025: return "STATUS_TIMER_RESUME_IGNORED";
+ case 0x40000026: return "STATUS_ARBITRATION_UNHANDLED";
+ case 0x40000027: return "STATUS_CARDBUS_NOT_SUPPORTED";
+ case 0x40000028: return "STATUS_WX86_CREATEWX86TIB";
+ case 0x40000029: return "STATUS_MP_PROCESSOR_MISMATCH";
+ case 0x40010001: return "DBG_REPLY_LATER";
+ case 0x40010002: return "DBG_UNABLE_TO_PROVIDE_HANDLE";
+ case 0x40010003: return "DBG_TERMINATE_THREAD";
+ case 0x40010004: return "DBG_TERMINATE_PROCESS";
+ case 0x40010005: return "DBG_CONTROL_C";
+ case 0x40010006: return "DBG_PRINTEXCEPTION_C";
+ case 0x40010007: return "DBG_RIPEXCEPTION";
+ case 0x40010008: return "DBG_CONTROL_BREAK";
+ case 0x80000001: return "STATUS_GUARD_PAGE_VIOLATION";
+ case 0x80000002: return "STATUS_DATATYPE_MISALIGNMENT";
+ case 0x80000003: return "STATUS_BREAKPOINT";
+ case 0x80000004: return "STATUS_SINGLE_STEP";
+ case 0x80000005: return "STATUS_BUFFER_OVERFLOW";
+ case 0x80000006: return "STATUS_NO_MORE_FILES";
+ case 0x80000007: return "STATUS_WAKE_SYSTEM_DEBUGGER";
+ case 0x8000000A: return "STATUS_HANDLES_CLOSED";
+ case 0x8000000B: return "STATUS_NO_INHERITANCE";
+ case 0x8000000C: return "STATUS_GUID_SUBSTITUTION_MADE";
+ case 0x8000000D: return "STATUS_PARTIAL_COPY";
+ case 0x8000000E: return "STATUS_DEVICE_PAPER_EMPTY";
+ case 0x8000000F: return "STATUS_DEVICE_POWERED_OFF";
+ case 0x80000010: return "STATUS_DEVICE_OFF_LINE";
+ case 0x80000011: return "STATUS_DEVICE_BUSY";
+ case 0x80000012: return "STATUS_NO_MORE_EAS";
+ case 0x80000013: return "STATUS_INVALID_EA_NAME";
+ case 0x80000014: return "STATUS_EA_LIST_INCONSISTENT";
+ case 0x80000015: return "STATUS_INVALID_EA_FLAG";
+ case 0x80000016: return "STATUS_VERIFY_REQUIRED";
+ case 0x80000017: return "STATUS_EXTRANEOUS_INFORMATION";
+ case 0x80000018: return "STATUS_RXACT_COMMIT_NECESSARY";
+ case 0x8000001A: return "STATUS_NO_MORE_ENTRIES";
+ case 0x8000001B: return "STATUS_FILEMARK_DETECTED";
+ case 0x8000001C: return "STATUS_MEDIA_CHANGED";
+ case 0x8000001D: return "STATUS_BUS_RESET";
+ case 0x8000001E: return "STATUS_END_OF_MEDIA";
+ case 0x8000001F: return "STATUS_BEGINNING_OF_MEDIA";
+ case 0x80000020: return "STATUS_MEDIA_CHECK";
+ case 0x80000021: return "STATUS_SETMARK_DETECTED";
+ case 0x80000022: return "STATUS_NO_DATA_DETECTED";
+ case 0x80000023: return "STATUS_REDIRECTOR_HAS_OPEN_HANDLES";
+ case 0x80000024: return "STATUS_SERVER_HAS_OPEN_HANDLES";
+ case 0x80000025: return "STATUS_ALREADY_DISCONNECTED";
+ case 0x80000026: return "STATUS_LONGJUMP";
+ case 0x80010001: return "DBG_EXCEPTION_NOT_HANDLED";
+ case 0xC0000001: return "STATUS_UNSUCCESSFUL";
+ case 0xC0000002: return "STATUS_NOT_IMPLEMENTED";
+ case 0xC0000003: return "STATUS_INVALID_INFO_CLASS";
+ case 0xC0000004: return "STATUS_INFO_LENGTH_MISMATCH";
+ case 0xC0000005: return "STATUS_ACCESS_VIOLATION";
+ case 0xC0000006: return "STATUS_IN_PAGE_ERROR";
+ case 0xC0000007: return "STATUS_PAGEFILE_QUOTA";
+ case 0xC0000008: return "STATUS_INVALID_HANDLE";
+ case 0xC0000009: return "STATUS_BAD_INITIAL_STACK";
+ case 0xC000000A: return "STATUS_BAD_INITIAL_PC";
+ case 0xC000000B: return "STATUS_INVALID_CID";
+ case 0xC000000C: return "STATUS_TIMER_NOT_CANCELED";
+ case 0xC000000D: return "STATUS_INVALID_PARAMETER";
+ case 0xC000000E: return "STATUS_NO_SUCH_DEVICE";
+ case 0xC000000F: return "STATUS_NO_SUCH_FILE";
+ case 0xC0000010: return "STATUS_INVALID_DEVICE_REQUEST";
+ case 0xC0000011: return "STATUS_END_OF_FILE";
+ case 0xC0000012: return "STATUS_WRONG_VOLUME";
+ case 0xC0000013: return "STATUS_NO_MEDIA_IN_DEVICE";
+ case 0xC0000014: return "STATUS_UNRECOGNIZED_MEDIA";
+ case 0xC0000015: return "STATUS_NONEXISTENT_SECTOR";
+ case 0xC0000016: return "STATUS_MORE_PROCESSING_REQUIRED";
+ case 0xC0000017: return "STATUS_NO_MEMORY";
+ case 0xC0000018: return "STATUS_CONFLICTING_ADDRESSES";
+ case 0xC0000019: return "STATUS_NOT_MAPPED_VIEW";
+ case 0xC000001A: return "STATUS_UNABLE_TO_FREE_VM";
+ case 0xC000001B: return "STATUS_UNABLE_TO_DELETE_SECTION";
+ case 0xC000001C: return "STATUS_INVALID_SYSTEM_SERVICE";
+ case 0xC000001D: return "STATUS_ILLEGAL_INSTRUCTION";
+ case 0xC000001E: return "STATUS_INVALID_LOCK_SEQUENCE";
+ case 0xC000001F: return "STATUS_INVALID_VIEW_SIZE";
+ case 0xC0000020: return "STATUS_INVALID_FILE_FOR_SECTION";
+ case 0xC0000021: return "STATUS_ALREADY_COMMITTED";
+ case 0xC0000022: return "STATUS_ACCESS_DENIED";
+ case 0xC0000023: return "STATUS_BUFFER_TOO_SMALL";
+ case 0xC0000024: return "STATUS_OBJECT_TYPE_MISMATCH";
+ case 0xC0000025: return "STATUS_NONCONTINUABLE_EXCEPTION";
+ case 0xC0000026: return "STATUS_INVALID_DISPOSITION";
+ case 0xC0000027: return "STATUS_UNWIND";
+ case 0xC0000028: return "STATUS_BAD_STACK";
+ case 0xC0000029: return "STATUS_INVALID_UNWIND_TARGET";
+ case 0xC000002A: return "STATUS_NOT_LOCKED";
+ case 0xC000002B: return "STATUS_PARITY_ERROR";
+ case 0xC000002C: return "STATUS_UNABLE_TO_DECOMMIT_VM";
+ case 0xC000002D: return "STATUS_NOT_COMMITTED";
+ case 0xC000002E: return "STATUS_INVALID_PORT_ATTRIBUTES";
+ case 0xC000002F: return "STATUS_PORT_MESSAGE_TOO_LONG";
+ case 0xC0000030: return "STATUS_INVALID_PARAMETER_MIX";
+ case 0xC0000031: return "STATUS_INVALID_QUOTA_LOWER";
+ case 0xC0000032: return "STATUS_DISK_CORRUPT_ERROR";
+ case 0xC0000033: return "STATUS_OBJECT_NAME_INVALID";
+ case 0xC0000034: return "STATUS_OBJECT_NAME_NOT_FOUND";
+ case 0xC0000035: return "STATUS_OBJECT_NAME_COLLISION";
+ case 0xC0000037: return "STATUS_PORT_DISCONNECTED";
+ case 0xC0000038: return "STATUS_DEVICE_ALREADY_ATTACHED";
+ case 0xC0000039: return "STATUS_OBJECT_PATH_INVALID";
+ case 0xC000003A: return "STATUS_OBJECT_PATH_NOT_FOUND";
+ case 0xC000003B: return "STATUS_OBJECT_PATH_SYNTAX_BAD";
+ case 0xC000003C: return "STATUS_DATA_OVERRUN";
+ case 0xC000003D: return "STATUS_DATA_LATE_ERROR";
+ case 0xC000003E: return "STATUS_DATA_ERROR";
+ case 0xC000003F: return "STATUS_CRC_ERROR";
+ case 0xC0000040: return "STATUS_SECTION_TOO_BIG";
+ case 0xC0000041: return "STATUS_PORT_CONNECTION_REFUSED";
+ case 0xC0000042: return "STATUS_INVALID_PORT_HANDLE";
+ case 0xC0000043: return "STATUS_SHARING_VIOLATION";
+ case 0xC0000044: return "STATUS_QUOTA_EXCEEDED";
+ case 0xC0000045: return "STATUS_INVALID_PAGE_PROTECTION";
+ case 0xC0000046: return "STATUS_MUTANT_NOT_OWNED";
+ case 0xC0000047: return "STATUS_SEMAPHORE_LIMIT_EXCEEDED";
+ case 0xC0000048: return "STATUS_PORT_ALREADY_SET";
+ case 0xC0000049: return "STATUS_SECTION_NOT_IMAGE";
+ case 0xC000004A: return "STATUS_SUSPEND_COUNT_EXCEEDED";
+ case 0xC000004B: return "STATUS_THREAD_IS_TERMINATING";
+ case 0xC000004C: return "STATUS_BAD_WORKING_SET_LIMIT";
+ case 0xC000004D: return "STATUS_INCOMPATIBLE_FILE_MAP";
+ case 0xC000004E: return "STATUS_SECTION_PROTECTION";
+ case 0xC000004F: return "STATUS_EAS_NOT_SUPPORTED";
+ case 0xC0000050: return "STATUS_EA_TOO_LARGE";
+ case 0xC0000051: return "STATUS_NONEXISTENT_EA_ENTRY";
+ case 0xC0000052: return "STATUS_NO_EAS_ON_FILE";
+ case 0xC0000053: return "STATUS_EA_CORRUPT_ERROR";
+ case 0xC0000054: return "STATUS_FILE_LOCK_CONFLICT";
+ case 0xC0000055: return "STATUS_LOCK_NOT_GRANTED";
+ case 0xC0000056: return "STATUS_DELETE_PENDING";
+ case 0xC0000057: return "STATUS_CTL_FILE_NOT_SUPPORTED";
+ case 0xC0000058: return "STATUS_UNKNOWN_REVISION";
+ case 0xC0000059: return "STATUS_REVISION_MISMATCH";
+ case 0xC000005A: return "STATUS_INVALID_OWNER";
+ case 0xC000005B: return "STATUS_INVALID_PRIMARY_GROUP";
+ case 0xC000005C: return "STATUS_NO_IMPERSONATION_TOKEN";
+ case 0xC000005D: return "STATUS_CANT_DISABLE_MANDATORY";
+ case 0xC000005E: return "STATUS_NO_LOGON_SERVERS";
+ case 0xC000005F: return "STATUS_NO_SUCH_LOGON_SESSION";
+ case 0xC0000060: return "STATUS_NO_SUCH_PRIVILEGE";
+ case 0xC0000061: return "STATUS_PRIVILEGE_NOT_HELD";
+ case 0xC0000062: return "STATUS_INVALID_ACCOUNT_NAME";
+ case 0xC0000063: return "STATUS_USER_EXISTS";
+ case 0xC0000064: return "STATUS_NO_SUCH_USER";
+ case 0xC0000065: return "STATUS_GROUP_EXISTS";
+ case 0xC0000066: return "STATUS_NO_SUCH_GROUP";
+ case 0xC0000067: return "STATUS_MEMBER_IN_GROUP";
+ case 0xC0000068: return "STATUS_MEMBER_NOT_IN_GROUP";
+ case 0xC0000069: return "STATUS_LAST_ADMIN";
+ case 0xC000006A: return "STATUS_WRONG_PASSWORD";
+ case 0xC000006B: return "STATUS_ILL_FORMED_PASSWORD";
+ case 0xC000006C: return "STATUS_PASSWORD_RESTRICTION";
+ case 0xC000006D: return "STATUS_LOGON_FAILURE";
+ case 0xC000006E: return "STATUS_ACCOUNT_RESTRICTION";
+ case 0xC000006F: return "STATUS_INVALID_LOGON_HOURS";
+ case 0xC0000070: return "STATUS_INVALID_WORKSTATION";
+ case 0xC0000071: return "STATUS_PASSWORD_EXPIRED";
+ case 0xC0000072: return "STATUS_ACCOUNT_DISABLED";
+ case 0xC0000073: return "STATUS_NONE_MAPPED";
+ case 0xC0000074: return "STATUS_TOO_MANY_LUIDS_REQUESTED";
+ case 0xC0000075: return "STATUS_LUIDS_EXHAUSTED";
+ case 0xC0000076: return "STATUS_INVALID_SUB_AUTHORITY";
+ case 0xC0000077: return "STATUS_INVALID_ACL";
+ case 0xC0000078: return "STATUS_INVALID_SID";
+ case 0xC0000079: return "STATUS_INVALID_SECURITY_DESCR";
+ case 0xC000007A: return "STATUS_PROCEDURE_NOT_FOUND";
+ case 0xC000007B: return "STATUS_INVALID_IMAGE_FORMAT";
+ case 0xC000007C: return "STATUS_NO_TOKEN";
+ case 0xC000007D: return "STATUS_BAD_INHERITANCE_ACL";
+ case 0xC000007E: return "STATUS_RANGE_NOT_LOCKED";
+ case 0xC000007F: return "STATUS_DISK_FULL";
+ case 0xC0000080: return "STATUS_SERVER_DISABLED";
+ case 0xC0000081: return "STATUS_SERVER_NOT_DISABLED";
+ case 0xC0000082: return "STATUS_TOO_MANY_GUIDS_REQUESTED";
+ case 0xC0000083: return "STATUS_GUIDS_EXHAUSTED";
+ case 0xC0000084: return "STATUS_INVALID_ID_AUTHORITY";
+ case 0xC0000085: return "STATUS_AGENTS_EXHAUSTED";
+ case 0xC0000086: return "STATUS_INVALID_VOLUME_LABEL";
+ case 0xC0000087: return "STATUS_SECTION_NOT_EXTENDED";
+ case 0xC0000088: return "STATUS_NOT_MAPPED_DATA";
+ case 0xC0000089: return "STATUS_RESOURCE_DATA_NOT_FOUND";
+ case 0xC000008A: return "STATUS_RESOURCE_TYPE_NOT_FOUND";
+ case 0xC000008B: return "STATUS_RESOURCE_NAME_NOT_FOUND";
+ case 0xC000008C: return "STATUS_ARRAY_BOUNDS_EXCEEDED";
+ case 0xC000008D: return "STATUS_FLOAT_DENORMAL_OPERAND";
+ case 0xC000008E: return "STATUS_FLOAT_DIVIDE_BY_ZERO";
+ case 0xC000008F: return "STATUS_FLOAT_INEXACT_RESULT";
+ case 0xC0000090: return "STATUS_FLOAT_INVALID_OPERATION";
+ case 0xC0000091: return "STATUS_FLOAT_OVERFLOW";
+ case 0xC0000092: return "STATUS_FLOAT_STACK_CHECK";
+ case 0xC0000093: return "STATUS_FLOAT_UNDERFLOW";
+ case 0xC0000094: return "STATUS_INTEGER_DIVIDE_BY_ZERO";
+ case 0xC0000095: return "STATUS_INTEGER_OVERFLOW";
+ case 0xC0000096: return "STATUS_PRIVILEGED_INSTRUCTION";
+ case 0xC0000097: return "STATUS_TOO_MANY_PAGING_FILES";
+ case 0xC0000098: return "STATUS_FILE_INVALID";
+ case 0xC0000099: return "STATUS_ALLOTTED_SPACE_EXCEEDED";
+ case 0xC000009A: return "STATUS_INSUFFICIENT_RESOURCES";
+ case 0xC000009B: return "STATUS_DFS_EXIT_PATH_FOUND";
+ case 0xC000009C: return "STATUS_DEVICE_DATA_ERROR";
+ case 0xC000009D: return "STATUS_DEVICE_NOT_CONNECTED";
+ case 0xC000009E: return "STATUS_DEVICE_POWER_FAILURE";
+ case 0xC000009F: return "STATUS_FREE_VM_NOT_AT_BASE";
+ case 0xC00000A0: return "STATUS_MEMORY_NOT_ALLOCATED";
+ case 0xC00000A1: return "STATUS_WORKING_SET_QUOTA";
+ case 0xC00000A2: return "STATUS_MEDIA_WRITE_PROTECTED";
+ case 0xC00000A3: return "STATUS_DEVICE_NOT_READY";
+ case 0xC00000A4: return "STATUS_INVALID_GROUP_ATTRIBUTES";
+ case 0xC00000A5: return "STATUS_BAD_IMPERSONATION_LEVEL";
+ case 0xC00000A6: return "STATUS_CANT_OPEN_ANONYMOUS";
+ case 0xC00000A7: return "STATUS_BAD_VALIDATION_CLASS";
+ case 0xC00000A8: return "STATUS_BAD_TOKEN_TYPE";
+ case 0xC00000A9: return "STATUS_BAD_MASTER_BOOT_RECORD";
+ case 0xC00000AA: return "STATUS_INSTRUCTION_MISALIGNMENT";
+ case 0xC00000AB: return "STATUS_INSTANCE_NOT_AVAILABLE";
+ case 0xC00000AC: return "STATUS_PIPE_NOT_AVAILABLE";
+ case 0xC00000AD: return "STATUS_INVALID_PIPE_STATE";
+ case 0xC00000AE: return "STATUS_PIPE_BUSY";
+ case 0xC00000AF: return "STATUS_ILLEGAL_FUNCTION";
+ case 0xC00000B0: return "STATUS_PIPE_DISCONNECTED";
+ case 0xC00000B1: return "STATUS_PIPE_CLOSING";
+ case 0xC00000B2: return "STATUS_PIPE_CONNECTED";
+ case 0xC00000B3: return "STATUS_PIPE_LISTENING";
+ case 0xC00000B4: return "STATUS_INVALID_READ_MODE";
+ case 0xC00000B5: return "STATUS_IO_TIMEOUT";
+ case 0xC00000B6: return "STATUS_FILE_FORCED_CLOSED";
+ case 0xC00000B7: return "STATUS_PROFILING_NOT_STARTED";
+ case 0xC00000B8: return "STATUS_PROFILING_NOT_STOPPED";
+ case 0xC00000B9: return "STATUS_COULD_NOT_INTERPRET";
+ case 0xC00000BA: return "STATUS_FILE_IS_A_DIRECTORY";
+ case 0xC00000BB: return "STATUS_NOT_SUPPORTED";
+ case 0xC00000BC: return "STATUS_REMOTE_NOT_LISTENING";
+ case 0xC00000BD: return "STATUS_DUPLICATE_NAME";
+ case 0xC00000BE: return "STATUS_BAD_NETWORK_PATH";
+ case 0xC00000BF: return "STATUS_NETWORK_BUSY";
+ case 0xC00000C0: return "STATUS_DEVICE_DOES_NOT_EXIST";
+ case 0xC00000C1: return "STATUS_TOO_MANY_COMMANDS";
+ case 0xC00000C2: return "STATUS_ADAPTER_HARDWARE_ERROR";
+ case 0xC00000C3: return "STATUS_INVALID_NETWORK_RESPONSE";
+ case 0xC00000C4: return "STATUS_UNEXPECTED_NETWORK_ERROR";
+ case 0xC00000C5: return "STATUS_BAD_REMOTE_ADAPTER";
+ case 0xC00000C6: return "STATUS_PRINT_QUEUE_FULL";
+ case 0xC00000C7: return "STATUS_NO_SPOOL_SPACE";
+ case 0xC00000C8: return "STATUS_PRINT_CANCELLED";
+ case 0xC00000C9: return "STATUS_NETWORK_NAME_DELETED";
+ case 0xC00000CA: return "STATUS_NETWORK_ACCESS_DENIED";
+ case 0xC00000CB: return "STATUS_BAD_DEVICE_TYPE";
+ case 0xC00000CC: return "STATUS_BAD_NETWORK_NAME";
+ case 0xC00000CD: return "STATUS_TOO_MANY_NAMES";
+ case 0xC00000CE: return "STATUS_TOO_MANY_SESSIONS";
+ case 0xC00000CF: return "STATUS_SHARING_PAUSED";
+ case 0xC00000D0: return "STATUS_REQUEST_NOT_ACCEPTED";
+ case 0xC00000D1: return "STATUS_REDIRECTOR_PAUSED";
+ case 0xC00000D2: return "STATUS_NET_WRITE_FAULT";
+ case 0xC00000D3: return "STATUS_PROFILING_AT_LIMIT";
+ case 0xC00000D4: return "STATUS_NOT_SAME_DEVICE";
+ case 0xC00000D5: return "STATUS_FILE_RENAMED";
+ case 0xC00000D6: return "STATUS_VIRTUAL_CIRCUIT_CLOSED";
+ case 0xC00000D7: return "STATUS_NO_SECURITY_ON_OBJECT";
+ case 0xC00000D8: return "STATUS_CANT_WAIT";
+ case 0xC00000D9: return "STATUS_PIPE_EMPTY";
+ case 0xC00000DA: return "STATUS_CANT_ACCESS_DOMAIN_INFO";
+ case 0xC00000DB: return "STATUS_CANT_TERMINATE_SELF";
+ case 0xC00000DC: return "STATUS_INVALID_SERVER_STATE";
+ case 0xC00000DD: return "STATUS_INVALID_DOMAIN_STATE";
+ case 0xC00000DE: return "STATUS_INVALID_DOMAIN_ROLE";
+ case 0xC00000DF: return "STATUS_NO_SUCH_DOMAIN";
+ case 0xC00000E0: return "STATUS_DOMAIN_EXISTS";
+ case 0xC00000E1: return "STATUS_DOMAIN_LIMIT_EXCEEDED";
+ case 0xC00000E2: return "STATUS_OPLOCK_NOT_GRANTED";
+ case 0xC00000E3: return "STATUS_INVALID_OPLOCK_PROTOCOL";
+ case 0xC00000E4: return "STATUS_INTERNAL_DB_CORRUPTION";
+ case 0xC00000E5: return "STATUS_INTERNAL_ERROR";
+ case 0xC00000E6: return "STATUS_GENERIC_NOT_MAPPED";
+ case 0xC00000E7: return "STATUS_BAD_DESCRIPTOR_FORMAT";
+ case 0xC00000E8: return "STATUS_INVALID_USER_BUFFER";
+ case 0xC00000E9: return "STATUS_UNEXPECTED_IO_ERROR";
+ case 0xC00000EA: return "STATUS_UNEXPECTED_MM_CREATE_ERR";
+ case 0xC00000EB: return "STATUS_UNEXPECTED_MM_MAP_ERROR";
+ case 0xC00000EC: return "STATUS_UNEXPECTED_MM_EXTEND_ERR";
+ case 0xC00000ED: return "STATUS_NOT_LOGON_PROCESS";
+ case 0xC00000EE: return "STATUS_LOGON_SESSION_EXISTS";
+ case 0xC00000EF: return "STATUS_INVALID_PARAMETER_1";
+ case 0xC00000F0: return "STATUS_INVALID_PARAMETER_2";
+ case 0xC00000F1: return "STATUS_INVALID_PARAMETER_3";
+ case 0xC00000F2: return "STATUS_INVALID_PARAMETER_4";
+ case 0xC00000F3: return "STATUS_INVALID_PARAMETER_5";
+ case 0xC00000F4: return "STATUS_INVALID_PARAMETER_6";
+ case 0xC00000F5: return "STATUS_INVALID_PARAMETER_7";
+ case 0xC00000F6: return "STATUS_INVALID_PARAMETER_8";
+ case 0xC00000F7: return "STATUS_INVALID_PARAMETER_9";
+ case 0xC00000F8: return "STATUS_INVALID_PARAMETER_10";
+ case 0xC00000F9: return "STATUS_INVALID_PARAMETER_11";
+ case 0xC00000FA: return "STATUS_INVALID_PARAMETER_12";
+ case 0xC00000FB: return "STATUS_REDIRECTOR_NOT_STARTED";
+ case 0xC00000FC: return "STATUS_REDIRECTOR_STARTED";
+ case 0xC00000FD: return "STATUS_STACK_OVERFLOW";
+ case 0xC00000FE: return "STATUS_NO_SUCH_PACKAGE";
+ case 0xC00000FF: return "STATUS_BAD_FUNCTION_TABLE";
+ case 0xC0000100: return "STATUS_VARIABLE_NOT_FOUND";
+ case 0xC0000101: return "STATUS_DIRECTORY_NOT_EMPTY";
+ case 0xC0000102: return "STATUS_FILE_CORRUPT_ERROR";
+ case 0xC0000103: return "STATUS_NOT_A_DIRECTORY";
+ case 0xC0000104: return "STATUS_BAD_LOGON_SESSION_STATE";
+ case 0xC0000105: return "STATUS_LOGON_SESSION_COLLISION";
+ case 0xC0000106: return "STATUS_NAME_TOO_LONG";
+ case 0xC0000107: return "STATUS_FILES_OPEN";
+ case 0xC0000108: return "STATUS_CONNECTION_IN_USE";
+ case 0xC0000109: return "STATUS_MESSAGE_NOT_FOUND";
+ case 0xC000010A: return "STATUS_PROCESS_IS_TERMINATING";
+ case 0xC000010B: return "STATUS_INVALID_LOGON_TYPE";
+ case 0xC000010C: return "STATUS_NO_GUID_TRANSLATION";
+ case 0xC000010D: return "STATUS_CANNOT_IMPERSONATE";
+ case 0xC000010E: return "STATUS_IMAGE_ALREADY_LOADED";
+ case 0xC000010F: return "STATUS_ABIOS_NOT_PRESENT";
+ case 0xC0000110: return "STATUS_ABIOS_LID_NOT_EXIST";
+ case 0xC0000111: return "STATUS_ABIOS_LID_ALREADY_OWNED";
+ case 0xC0000112: return "STATUS_ABIOS_NOT_LID_OWNER";
+ case 0xC0000113: return "STATUS_ABIOS_INVALID_COMMAND";
+ case 0xC0000114: return "STATUS_ABIOS_INVALID_LID";
+ case 0xC0000115: return "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE";
+ case 0xC0000116: return "STATUS_ABIOS_INVALID_SELECTOR";
+ case 0xC0000117: return "STATUS_NO_LDT";
+ case 0xC0000118: return "STATUS_INVALID_LDT_SIZE";
+ case 0xC0000119: return "STATUS_INVALID_LDT_OFFSET";
+ case 0xC000011A: return "STATUS_INVALID_LDT_DESCRIPTOR";
+ case 0xC000011B: return "STATUS_INVALID_IMAGE_NE_FORMAT";
+ case 0xC000011C: return "STATUS_RXACT_INVALID_STATE";
+ case 0xC000011D: return "STATUS_RXACT_COMMIT_FAILURE";
+ case 0xC000011E: return "STATUS_MAPPED_FILE_SIZE_ZERO";
+ case 0xC000011F: return "STATUS_TOO_MANY_OPENED_FILES";
+ case 0xC0000120: return "STATUS_CANCELLED";
+ case 0xC0000121: return "STATUS_CANNOT_DELETE";
+ case 0xC0000122: return "STATUS_INVALID_COMPUTER_NAME";
+ case 0xC0000123: return "STATUS_FILE_DELETED";
+ case 0xC0000124: return "STATUS_SPECIAL_ACCOUNT";
+ case 0xC0000125: return "STATUS_SPECIAL_GROUP";
+ case 0xC0000126: return "STATUS_SPECIAL_USER";
+ case 0xC0000127: return "STATUS_MEMBERS_PRIMARY_GROUP";
+ case 0xC0000128: return "STATUS_FILE_CLOSED";
+ case 0xC0000129: return "STATUS_TOO_MANY_THREADS";
+ case 0xC000012A: return "STATUS_THREAD_NOT_IN_PROCESS";
+ case 0xC000012B: return "STATUS_TOKEN_ALREADY_IN_USE";
+ case 0xC000012C: return "STATUS_PAGEFILE_QUOTA_EXCEEDED";
+ case 0xC000012D: return "STATUS_COMMITMENT_LIMIT";
+ case 0xC000012E: return "STATUS_INVALID_IMAGE_LE_FORMAT";
+ case 0xC000012F: return "STATUS_INVALID_IMAGE_NOT_MZ";
+ case 0xC0000130: return "STATUS_INVALID_IMAGE_PROTECT";
+ case 0xC0000131: return "STATUS_INVALID_IMAGE_WIN_16";
+ case 0xC0000132: return "STATUS_LOGON_SERVER_CONFLICT";
+ case 0xC0000133: return "STATUS_TIME_DIFFERENCE_AT_DC";
+ case 0xC0000134: return "STATUS_SYNCHRONIZATION_REQUIRED";
+ case 0xC0000135: return "STATUS_DLL_NOT_FOUND";
+ case 0xC0000136: return "STATUS_OPEN_FAILED";
+ case 0xC0000137: return "STATUS_IO_PRIVILEGE_FAILED";
+ case 0xC0000138: return "STATUS_ORDINAL_NOT_FOUND";
+ case 0xC0000139: return "STATUS_ENTRYPOINT_NOT_FOUND";
+ case 0xC000013A: return "STATUS_CONTROL_C_EXIT";
+ case 0xC000013B: return "STATUS_LOCAL_DISCONNECT";
+ case 0xC000013C: return "STATUS_REMOTE_DISCONNECT";
+ case 0xC000013D: return "STATUS_REMOTE_RESOURCES";
+ case 0xC000013E: return "STATUS_LINK_FAILED";
+ case 0xC000013F: return "STATUS_LINK_TIMEOUT";
+ case 0xC0000140: return "STATUS_INVALID_CONNECTION";
+ case 0xC0000141: return "STATUS_INVALID_ADDRESS";
+ case 0xC0000142: return "STATUS_DLL_INIT_FAILED";
+ case 0xC0000143: return "STATUS_MISSING_SYSTEMFILE";
+ case 0xC0000144: return "STATUS_UNHANDLED_EXCEPTION";
+ case 0xC0000145: return "STATUS_APP_INIT_FAILURE";
+ case 0xC0000146: return "STATUS_PAGEFILE_CREATE_FAILED";
+ case 0xC0000147: return "STATUS_NO_PAGEFILE";
+ case 0xC0000148: return "STATUS_INVALID_LEVEL";
+ case 0xC0000149: return "STATUS_WRONG_PASSWORD_CORE";
+ case 0xC000014A: return "STATUS_ILLEGAL_FLOAT_CONTEXT";
+ case 0xC000014B: return "STATUS_PIPE_BROKEN";
+ case 0xC000014C: return "STATUS_REGISTRY_CORRUPT";
+ case 0xC000014D: return "STATUS_REGISTRY_IO_FAILED";
+ case 0xC000014E: return "STATUS_NO_EVENT_PAIR";
+ case 0xC000014F: return "STATUS_UNRECOGNIZED_VOLUME";
+ case 0xC0000150: return "STATUS_SERIAL_NO_DEVICE_INITED";
+ case 0xC0000151: return "STATUS_NO_SUCH_ALIAS";
+ case 0xC0000152: return "STATUS_MEMBER_NOT_IN_ALIAS";
+ case 0xC0000153: return "STATUS_MEMBER_IN_ALIAS";
+ case 0xC0000154: return "STATUS_ALIAS_EXISTS";
+ case 0xC0000155: return "STATUS_LOGON_NOT_GRANTED";
+ case 0xC0000156: return "STATUS_TOO_MANY_SECRETS";
+ case 0xC0000157: return "STATUS_SECRET_TOO_LONG";
+ case 0xC0000158: return "STATUS_INTERNAL_DB_ERROR";
+ case 0xC0000159: return "STATUS_FULLSCREEN_MODE";
+ case 0xC000015A: return "STATUS_TOO_MANY_CONTEXT_IDS";
+ case 0xC000015B: return "STATUS_LOGON_TYPE_NOT_GRANTED";
+ case 0xC000015C: return "STATUS_NOT_REGISTRY_FILE";
+ case 0xC000015D: return "STATUS_NT_CROSS_ENCRYPTION_REQUIRED";
+ case 0xC000015E: return "STATUS_DOMAIN_CTRLR_CONFIG_ERROR";
+ case 0xC000015F: return "STATUS_FT_MISSING_MEMBER";
+ case 0xC0000160: return "STATUS_ILL_FORMED_SERVICE_ENTRY";
+ case 0xC0000161: return "STATUS_ILLEGAL_CHARACTER";
+ case 0xC0000162: return "STATUS_UNMAPPABLE_CHARACTER";
+ case 0xC0000163: return "STATUS_UNDEFINED_CHARACTER";
+ case 0xC0000164: return "STATUS_FLOPPY_VOLUME";
+ case 0xC0000165: return "STATUS_FLOPPY_ID_MARK_NOT_FOUND";
+ case 0xC0000166: return "STATUS_FLOPPY_WRONG_CYLINDER";
+ case 0xC0000167: return "STATUS_FLOPPY_UNKNOWN_ERROR";
+ case 0xC0000168: return "STATUS_FLOPPY_BAD_REGISTERS";
+ case 0xC0000169: return "STATUS_DISK_RECALIBRATE_FAILED";
+ case 0xC000016A: return "STATUS_DISK_OPERATION_FAILED";
+ case 0xC000016B: return "STATUS_DISK_RESET_FAILED";
+ case 0xC000016C: return "STATUS_SHARED_IRQ_BUSY";
+ case 0xC000016D: return "STATUS_FT_ORPHANING";
+ case 0xC000016E: return "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT";
+ case 0xC0000172: return "STATUS_PARTITION_FAILURE";
+ case 0xC0000173: return "STATUS_INVALID_BLOCK_LENGTH";
+ case 0xC0000174: return "STATUS_DEVICE_NOT_PARTITIONED";
+ case 0xC0000175: return "STATUS_UNABLE_TO_LOCK_MEDIA";
+ case 0xC0000176: return "STATUS_UNABLE_TO_UNLOAD_MEDIA";
+ case 0xC0000177: return "STATUS_EOM_OVERFLOW";
+ case 0xC0000178: return "STATUS_NO_MEDIA";
+ case 0xC000017A: return "STATUS_NO_SUCH_MEMBER";
+ case 0xC000017B: return "STATUS_INVALID_MEMBER";
+ case 0xC000017C: return "STATUS_KEY_DELETED";
+ case 0xC000017D: return "STATUS_NO_LOG_SPACE";
+ case 0xC000017E: return "STATUS_TOO_MANY_SIDS";
+ case 0xC000017F: return "STATUS_LM_CROSS_ENCRYPTION_REQUIRED";
+ case 0xC0000180: return "STATUS_KEY_HAS_CHILDREN";
+ case 0xC0000181: return "STATUS_CHILD_MUST_BE_VOLATILE";
+ case 0xC0000182: return "STATUS_DEVICE_CONFIGURATION_ERROR";
+ case 0xC0000183: return "STATUS_DRIVER_INTERNAL_ERROR";
+ case 0xC0000184: return "STATUS_INVALID_DEVICE_STATE";
+ case 0xC0000185: return "STATUS_IO_DEVICE_ERROR";
+ case 0xC0000186: return "STATUS_DEVICE_PROTOCOL_ERROR";
+ case 0xC0000187: return "STATUS_BACKUP_CONTROLLER";
+ case 0xC0000188: return "STATUS_LOG_FILE_FULL";
+ case 0xC0000189: return "STATUS_TOO_LATE";
+ case 0xC000018A: return "STATUS_NO_TRUST_LSA_SECRET";
+ case 0xC000018B: return "STATUS_NO_TRUST_SAM_ACCOUNT";
+ case 0xC000018C: return "STATUS_TRUSTED_DOMAIN_FAILURE";
+ case 0xC000018D: return "STATUS_TRUSTED_RELATIONSHIP_FAILURE";
+ case 0xC000018E: return "STATUS_EVENTLOG_FILE_CORRUPT";
+ case 0xC000018F: return "STATUS_EVENTLOG_CANT_START";
+ case 0xC0000190: return "STATUS_TRUST_FAILURE";
+ case 0xC0000191: return "STATUS_MUTANT_LIMIT_EXCEEDED";
+ case 0xC0000192: return "STATUS_NETLOGON_NOT_STARTED";
+ case 0xC0000193: return "STATUS_ACCOUNT_EXPIRED";
+ case 0xC0000194: return "STATUS_POSSIBLE_DEADLOCK";
+ case 0xC0000195: return "STATUS_NETWORK_CREDENTIAL_CONFLICT";
+ case 0xC0000196: return "STATUS_REMOTE_SESSION_LIMIT";
+ case 0xC0000197: return "STATUS_EVENTLOG_FILE_CHANGED";
+ case 0xC0000198: return "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT";
+ case 0xC0000199: return "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT";
+ case 0xC000019A: return "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT";
+ case 0xC000019B: return "STATUS_DOMAIN_TRUST_INCONSISTENT";
+ case 0xC000019C: return "STATUS_FS_DRIVER_REQUIRED";
+ case 0xC0000202: return "STATUS_NO_USER_SESSION_KEY";
+ case 0xC0000203: return "STATUS_USER_SESSION_DELETED";
+ case 0xC0000204: return "STATUS_RESOURCE_LANG_NOT_FOUND";
+ case 0xC0000205: return "STATUS_INSUFF_SERVER_RESOURCES";
+ case 0xC0000206: return "STATUS_INVALID_BUFFER_SIZE";
+ case 0xC0000207: return "STATUS_INVALID_ADDRESS_COMPONENT";
+ case 0xC0000208: return "STATUS_INVALID_ADDRESS_WILDCARD";
+ case 0xC0000209: return "STATUS_TOO_MANY_ADDRESSES";
+ case 0xC000020A: return "STATUS_ADDRESS_ALREADY_EXISTS";
+ case 0xC000020B: return "STATUS_ADDRESS_CLOSED";
+ case 0xC000020C: return "STATUS_CONNECTION_DISCONNECTED";
+ case 0xC000020D: return "STATUS_CONNECTION_RESET";
+ case 0xC000020E: return "STATUS_TOO_MANY_NODES";
+ case 0xC000020F: return "STATUS_TRANSACTION_ABORTED";
+ case 0xC0000210: return "STATUS_TRANSACTION_TIMED_OUT";
+ case 0xC0000211: return "STATUS_TRANSACTION_NO_RELEASE";
+ case 0xC0000212: return "STATUS_TRANSACTION_NO_MATCH";
+ case 0xC0000213: return "STATUS_TRANSACTION_RESPONDED";
+ case 0xC0000214: return "STATUS_TRANSACTION_INVALID_ID";
+ case 0xC0000215: return "STATUS_TRANSACTION_INVALID_TYPE";
+ case 0xC0000216: return "STATUS_NOT_SERVER_SESSION";
+ case 0xC0000217: return "STATUS_NOT_CLIENT_SESSION";
+ case 0xC0000218: return "STATUS_CANNOT_LOAD_REGISTRY_FILE";
+ case 0xC0000219: return "STATUS_DEBUG_ATTACH_FAILED";
+ case 0xC000021A: return "STATUS_SYSTEM_PROCESS_TERMINATED";
+ case 0xC000021B: return "STATUS_DATA_NOT_ACCEPTED";
+ case 0xC000021C: return "STATUS_NO_BROWSER_SERVERS_FOUND";
+ case 0xC000021D: return "STATUS_VDM_HARD_ERROR";
+ case 0xC000021E: return "STATUS_DRIVER_CANCEL_TIMEOUT";
+ case 0xC000021F: return "STATUS_REPLY_MESSAGE_MISMATCH";
+ case 0xC0000220: return "STATUS_MAPPED_ALIGNMENT";
+ case 0xC0000221: return "STATUS_IMAGE_CHECKSUM_MISMATCH";
+ case 0xC0000222: return "STATUS_LOST_WRITEBEHIND_DATA";
+ case 0xC0000223: return "STATUS_CLIENT_SERVER_PARAMETERS_INVALID";
+ case 0xC0000224: return "STATUS_PASSWORD_MUST_CHANGE";
+ case 0xC0000225: return "STATUS_NOT_FOUND";
+ case 0xC0000226: return "STATUS_NOT_TINY_STREAM";
+ case 0xC0000227: return "STATUS_RECOVERY_FAILURE";
+ case 0xC0000228: return "STATUS_STACK_OVERFLOW_READ";
+ case 0xC0000229: return "STATUS_FAIL_CHECK";
+ case 0xC000022A: return "STATUS_DUPLICATE_OBJECTID";
+ case 0xC000022B: return "STATUS_OBJECTID_EXISTS";
+ case 0xC000022C: return "STATUS_CONVERT_TO_LARGE";
+ case 0xC000022D: return "STATUS_RETRY";
+ case 0xC000022E: return "STATUS_FOUND_OUT_OF_SCOPE";
+ case 0xC000022F: return "STATUS_ALLOCATE_BUCKET";
+ case 0xC0000230: return "STATUS_PROPSET_NOT_FOUND";
+ case 0xC0000231: return "STATUS_MARSHALL_OVERFLOW";
+ case 0xC0000232: return "STATUS_INVALID_VARIANT";
+ case 0xC0000233: return "STATUS_DOMAIN_CONTROLLER_NOT_FOUND";
+ case 0xC0000234: return "STATUS_ACCOUNT_LOCKED_OUT";
+ case 0xC0000235: return "STATUS_HANDLE_NOT_CLOSABLE";
+ case 0xC0000236: return "STATUS_CONNECTION_REFUSED";
+ case 0xC0000237: return "STATUS_GRACEFUL_DISCONNECT";
+ case 0xC0000238: return "STATUS_ADDRESS_ALREADY_ASSOCIATED";
+ case 0xC0000239: return "STATUS_ADDRESS_NOT_ASSOCIATED";
+ case 0xC000023A: return "STATUS_CONNECTION_INVALID";
+ case 0xC000023B: return "STATUS_CONNECTION_ACTIVE";
+ case 0xC000023C: return "STATUS_NETWORK_UNREACHABLE";
+ case 0xC000023D: return "STATUS_HOST_UNREACHABLE";
+ case 0xC000023E: return "STATUS_PROTOCOL_UNREACHABLE";
+ case 0xC000023F: return "STATUS_PORT_UNREACHABLE";
+ case 0xC0000240: return "STATUS_REQUEST_ABORTED";
+ case 0xC0000241: return "STATUS_CONNECTION_ABORTED";
+ case 0xC0000242: return "STATUS_BAD_COMPRESSION_BUFFER";
+ case 0xC0000243: return "STATUS_USER_MAPPED_FILE";
+ case 0xC0000244: return "STATUS_AUDIT_FAILED";
+ case 0xC0000245: return "STATUS_TIMER_RESOLUTION_NOT_SET";
+ case 0xC0000246: return "STATUS_CONNECTION_COUNT_LIMIT";
+ case 0xC0000247: return "STATUS_LOGIN_TIME_RESTRICTION";
+ case 0xC0000248: return "STATUS_LOGIN_WKSTA_RESTRICTION";
+ case 0xC0000249: return "STATUS_IMAGE_MP_UP_MISMATCH";
+ case 0xC0000250: return "STATUS_INSUFFICIENT_LOGON_INFO";
+ case 0xC0000251: return "STATUS_BAD_DLL_ENTRYPOINT";
+ case 0xC0000252: return "STATUS_BAD_SERVICE_ENTRYPOINT";
+ case 0xC0000253: return "STATUS_LPC_REPLY_LOST";
+ case 0xC0000254: return "STATUS_IP_ADDRESS_CONFLICT1";
+ case 0xC0000255: return "STATUS_IP_ADDRESS_CONFLICT2";
+ case 0xC0000256: return "STATUS_REGISTRY_QUOTA_LIMIT";
+ case 0xC0000257: return "STATUS_PATH_NOT_COVERED";
+ case 0xC0000258: return "STATUS_NO_CALLBACK_ACTIVE";
+ case 0xC0000259: return "STATUS_LICENSE_QUOTA_EXCEEDED";
+ case 0xC000025A: return "STATUS_PWD_TOO_SHORT";
+ case 0xC000025B: return "STATUS_PWD_TOO_RECENT";
+ case 0xC000025C: return "STATUS_PWD_HISTORY_CONFLICT";
+ case 0xC000025E: return "STATUS_PLUGPLAY_NO_DEVICE";
+ case 0xC000025F: return "STATUS_UNSUPPORTED_COMPRESSION";
+ case 0xC0000260: return "STATUS_INVALID_HW_PROFILE";
+ case 0xC0000261: return "STATUS_INVALID_PLUGPLAY_DEVICE_PATH";
+ case 0xC0000262: return "STATUS_DRIVER_ORDINAL_NOT_FOUND";
+ case 0xC0000263: return "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND";
+ case 0xC0000264: return "STATUS_RESOURCE_NOT_OWNED";
+ case 0xC0000265: return "STATUS_TOO_MANY_LINKS";
+ case 0xC0000266: return "STATUS_QUOTA_LIST_INCONSISTENT";
+ case 0xC0000267: return "STATUS_FILE_IS_OFFLINE";
+ case 0xC0000268: return "STATUS_EVALUATION_EXPIRATION";
+ case 0xC0000269: return "STATUS_ILLEGAL_DLL_RELOCATION";
+ case 0xC000026A: return "STATUS_LICENSE_VIOLATION";
+ case 0xC000026B: return "STATUS_DLL_INIT_FAILED_LOGOFF";
+ case 0xC000026C: return "STATUS_DRIVER_UNABLE_TO_LOAD";
+ case 0xC000026D: return "STATUS_DFS_UNAVAILABLE";
+ case 0xC000026E: return "STATUS_VOLUME_DISMOUNTED";
+ case 0xC000026F: return "STATUS_WX86_INTERNAL_ERROR";
+ case 0xC0000270: return "STATUS_WX86_FLOAT_STACK_CHECK";
+ case 0xC0000271: return "STATUS_VALIDATE_CONTINUE";
+ case 0xC0000272: return "STATUS_NO_MATCH";
+ case 0xC0000273: return "STATUS_NO_MORE_MATCHES";
+ case 0xC0000275: return "STATUS_NOT_A_REPARSE_POINT";
+ case 0xC0000276: return "STATUS_IO_REPARSE_TAG_INVALID";
+ case 0xC0000277: return "STATUS_IO_REPARSE_TAG_MISMATCH";
+ case 0xC0000278: return "STATUS_IO_REPARSE_DATA_INVALID";
+ case 0xC0000279: return "STATUS_IO_REPARSE_TAG_NOT_HANDLED";
+ case 0xC0000280: return "STATUS_REPARSE_POINT_NOT_RESOLVED";
+ case 0xC0000281: return "STATUS_DIRECTORY_IS_A_REPARSE_POINT";
+ case 0xC0000282: return "STATUS_RANGE_LIST_CONFLICT";
+ case 0xC0000283: return "STATUS_SOURCE_ELEMENT_EMPTY";
+ case 0xC0000284: return "STATUS_DESTINATION_ELEMENT_FULL";
+ case 0xC0000285: return "STATUS_ILLEGAL_ELEMENT_ADDRESS";
+ case 0xC0000286: return "STATUS_MAGAZINE_NOT_PRESENT";
+ case 0xC0000287: return "STATUS_REINITIALIZATION_NEEDED";
+ case 0x80000288: return "STATUS_DEVICE_REQUIRES_CLEANING";
+ case 0x80000289: return "STATUS_DEVICE_DOOR_OPEN";
+ case 0xC000028A: return "STATUS_ENCRYPTION_FAILED";
+ case 0xC000028B: return "STATUS_DECRYPTION_FAILED";
+ case 0xC000028C: return "STATUS_RANGE_NOT_FOUND";
+ case 0xC000028D: return "STATUS_NO_RECOVERY_POLICY";
+ case 0xC000028E: return "STATUS_NO_EFS";
+ case 0xC000028F: return "STATUS_WRONG_EFS";
+ case 0xC0000290: return "STATUS_NO_USER_KEYS";
+ case 0xC0000291: return "STATUS_FILE_NOT_ENCRYPTED";
+ case 0xC0000292: return "STATUS_NOT_EXPORT_FORMAT";
+ case 0xC0000293: return "STATUS_FILE_ENCRYPTED";
+ case 0x40000294: return "STATUS_WAKE_SYSTEM";
+ case 0xC0000295: return "STATUS_WMI_GUID_NOT_FOUND";
+ case 0xC0000296: return "STATUS_WMI_INSTANCE_NOT_FOUND";
+ case 0xC0000297: return "STATUS_WMI_ITEMID_NOT_FOUND";
+ case 0xC0000298: return "STATUS_WMI_TRY_AGAIN";
+ case 0xC0000299: return "STATUS_SHARED_POLICY";
+ case 0xC000029A: return "STATUS_POLICY_OBJECT_NOT_FOUND";
+ case 0xC000029B: return "STATUS_POLICY_ONLY_IN_DS";
+ case 0xC000029C: return "STATUS_VOLUME_NOT_UPGRADED";
+ case 0xC000029D: return "STATUS_REMOTE_STORAGE_NOT_ACTIVE";
+ case 0xC000029E: return "STATUS_REMOTE_STORAGE_MEDIA_ERROR";
+ case 0xC000029F: return "STATUS_NO_TRACKING_SERVICE";
+ case 0xC00002A0: return "STATUS_SERVER_SID_MISMATCH";
+ case 0xC00002A1: return "STATUS_DS_NO_ATTRIBUTE_OR_VALUE";
+ case 0xC00002A2: return "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX";
+ case 0xC00002A3: return "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED";
+ case 0xC00002A4: return "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS";
+ case 0xC00002A5: return "STATUS_DS_BUSY";
+ case 0xC00002A6: return "STATUS_DS_UNAVAILABLE";
+ case 0xC00002A7: return "STATUS_DS_NO_RIDS_ALLOCATED";
+ case 0xC00002A8: return "STATUS_DS_NO_MORE_RIDS";
+ case 0xC00002A9: return "STATUS_DS_INCORRECT_ROLE_OWNER";
+ case 0xC00002AA: return "STATUS_DS_RIDMGR_INIT_ERROR";
+ case 0xC00002AB: return "STATUS_DS_OBJ_CLASS_VIOLATION";
+ case 0xC00002AC: return "STATUS_DS_CANT_ON_NON_LEAF";
+ case 0xC00002AD: return "STATUS_DS_CANT_ON_RDN";
+ case 0xC00002AE: return "STATUS_DS_CANT_MOD_OBJ_CLASS";
+ case 0xC00002AF: return "STATUS_DS_CROSS_DOM_MOVE_FAILED";
+ case 0xC00002B0: return "STATUS_DS_GC_NOT_AVAILABLE";
+ case 0xC00002B1: return "STATUS_DIRECTORY_SERVICE_REQUIRED";
+ case 0xC00002B2: return "STATUS_REPARSE_ATTRIBUTE_CONFLICT";
+ case 0xC00002B3: return "STATUS_CANT_ENABLE_DENY_ONLY";
+ case 0xC00002B4: return "STATUS_FLOAT_MULTIPLE_FAULTS";
+ case 0xC00002B5: return "STATUS_FLOAT_MULTIPLE_TRAPS";
+ case 0xC00002B6: return "STATUS_DEVICE_REMOVED";
+ case 0xC00002B7: return "STATUS_JOURNAL_DELETE_IN_PROGRESS";
+ case 0xC00002B8: return "STATUS_JOURNAL_NOT_ACTIVE";
+ case 0xC00002B9: return "STATUS_NOINTERFACE";
+ case 0xC00002C1: return "STATUS_DS_ADMIN_LIMIT_EXCEEDED";
+ case 0xC00002C2: return "STATUS_DRIVER_FAILED_SLEEP";
+ case 0xC00002C3: return "STATUS_MUTUAL_AUTHENTICATION_FAILED";
+ case 0xC00002C4: return "STATUS_CORRUPT_SYSTEM_FILE";
+ case 0xC00002C5: return "STATUS_DATATYPE_MISALIGNMENT_ERROR";
+ case 0xC00002C6: return "STATUS_WMI_READ_ONLY";
+ case 0xC00002C7: return "STATUS_WMI_SET_FAILURE";
+ case 0xC00002C8: return "STATUS_COMMITMENT_MINIMUM";
+ case 0xC00002C9: return "STATUS_REG_NAT_CONSUMPTION";
+ case 0xC00002CA: return "STATUS_TRANSPORT_FULL";
+ case 0xC00002CB: return "STATUS_DS_SAM_INIT_FAILURE";
+ case 0xC00002CC: return "STATUS_ONLY_IF_CONNECTED";
+ case 0xC00002CD: return "STATUS_DS_SENSITIVE_GROUP_VIOLATION";
+ case 0xC00002CE: return "STATUS_PNP_RESTART_ENUMERATION";
+ case 0xC00002CF: return "STATUS_JOURNAL_ENTRY_DELETED";
+ case 0xC00002D0: return "STATUS_DS_CANT_MOD_PRIMARYGROUPID";
+ case 0xC00002D1: return "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE";
+ case 0xC00002D2: return "STATUS_PNP_REBOOT_REQUIRED";
+ case 0xC00002D3: return "STATUS_POWER_STATE_INVALID";
+ case 0xC00002D4: return "STATUS_DS_INVALID_GROUP_TYPE";
+ case 0xC00002D5: return "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN";
+ case 0xC00002D6: return "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN";
+ case 0xC00002D7: return "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER";
+ case 0xC00002D8: return "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER";
+ case 0xC00002D9: return "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER";
+ case 0xC00002DA: return "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER";
+ case 0xC00002DB: return "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER";
+ case 0xC00002DC: return "STATUS_DS_HAVE_PRIMARY_MEMBERS";
+ case 0xC00002DD: return "STATUS_WMI_NOT_SUPPORTED";
+ case 0xC00002DE: return "STATUS_INSUFFICIENT_POWER";
+ case 0xC00002DF: return "STATUS_SAM_NEED_BOOTKEY_PASSWORD";
+ case 0xC00002E0: return "STATUS_SAM_NEED_BOOTKEY_FLOPPY";
+ case 0xC00002E1: return "STATUS_DS_CANT_START";
+ case 0xC00002E2: return "STATUS_DS_INIT_FAILURE";
+ case 0xC00002E3: return "STATUS_SAM_INIT_FAILURE";
+ case 0xC00002E4: return "STATUS_DS_GC_REQUIRED";
+ case 0xC00002E5: return "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY";
+ case 0xC00002E6: return "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS";
+ case 0xC00002E7: return "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED";
+ case 0xC00002E8: return "STATUS_MULTIPLE_FAULT_VIOLATION";
+ case 0xC0000300: return "STATUS_NOT_SUPPORTED_ON_SBS";
+ case 0xC0009898: return "STATUS_WOW_ASSERTION";
+ case 0xC0010001: return "DBG_NO_STATE_CHANGE";
+ case 0xC0010002: return "DBG_APP_NOT_IDLE";
+ case 0xC0020001: return "RPC_NT_INVALID_STRING_BINDING";
+ case 0xC0020002: return "RPC_NT_WRONG_KIND_OF_BINDING";
+ case 0xC0020003: return "RPC_NT_INVALID_BINDING";
+ case 0xC0020004: return "RPC_NT_PROTSEQ_NOT_SUPPORTED";
+ case 0xC0020005: return "RPC_NT_INVALID_RPC_PROTSEQ";
+ case 0xC0020006: return "RPC_NT_INVALID_STRING_UUID";
+ case 0xC0020007: return "RPC_NT_INVALID_ENDPOINT_FORMAT";
+ case 0xC0020008: return "RPC_NT_INVALID_NET_ADDR";
+ case 0xC0020009: return "RPC_NT_NO_ENDPOINT_FOUND";
+ case 0xC002000A: return "RPC_NT_INVALID_TIMEOUT";
+ case 0xC002000B: return "RPC_NT_OBJECT_NOT_FOUND";
+ case 0xC002000C: return "RPC_NT_ALREADY_REGISTERED";
+ case 0xC002000D: return "RPC_NT_TYPE_ALREADY_REGISTERED";
+ case 0xC002000E: return "RPC_NT_ALREADY_LISTENING";
+ case 0xC002000F: return "RPC_NT_NO_PROTSEQS_REGISTERED";
+ case 0xC0020010: return "RPC_NT_NOT_LISTENING";
+ case 0xC0020011: return "RPC_NT_UNKNOWN_MGR_TYPE";
+ case 0xC0020012: return "RPC_NT_UNKNOWN_IF";
+ case 0xC0020013: return "RPC_NT_NO_BINDINGS";
+ case 0xC0020014: return "RPC_NT_NO_PROTSEQS";
+ case 0xC0020015: return "RPC_NT_CANT_CREATE_ENDPOINT";
+ case 0xC0020016: return "RPC_NT_OUT_OF_RESOURCES";
+ case 0xC0020017: return "RPC_NT_SERVER_UNAVAILABLE";
+ case 0xC0020018: return "RPC_NT_SERVER_TOO_BUSY";
+ case 0xC0020019: return "RPC_NT_INVALID_NETWORK_OPTIONS";
+ case 0xC002001A: return "RPC_NT_NO_CALL_ACTIVE";
+ case 0xC002001B: return "RPC_NT_CALL_FAILED";
+ case 0xC002001C: return "RPC_NT_CALL_FAILED_DNE";
+ case 0xC002001D: return "RPC_NT_PROTOCOL_ERROR";
+ case 0xC002001F: return "RPC_NT_UNSUPPORTED_TRANS_SYN";
+ case 0xC0020021: return "RPC_NT_UNSUPPORTED_TYPE";
+ case 0xC0020022: return "RPC_NT_INVALID_TAG";
+ case 0xC0020023: return "RPC_NT_INVALID_BOUND";
+ case 0xC0020024: return "RPC_NT_NO_ENTRY_NAME";
+ case 0xC0020025: return "RPC_NT_INVALID_NAME_SYNTAX";
+ case 0xC0020026: return "RPC_NT_UNSUPPORTED_NAME_SYNTAX";
+ case 0xC0020028: return "RPC_NT_UUID_NO_ADDRESS";
+ case 0xC0020029: return "RPC_NT_DUPLICATE_ENDPOINT";
+ case 0xC002002A: return "RPC_NT_UNKNOWN_AUTHN_TYPE";
+ case 0xC002002B: return "RPC_NT_MAX_CALLS_TOO_SMALL";
+ case 0xC002002C: return "RPC_NT_STRING_TOO_LONG";
+ case 0xC002002D: return "RPC_NT_PROTSEQ_NOT_FOUND";
+ case 0xC002002E: return "RPC_NT_PROCNUM_OUT_OF_RANGE";
+ case 0xC002002F: return "RPC_NT_BINDING_HAS_NO_AUTH";
+ case 0xC0020030: return "RPC_NT_UNKNOWN_AUTHN_SERVICE";
+ case 0xC0020031: return "RPC_NT_UNKNOWN_AUTHN_LEVEL";
+ case 0xC0020032: return "RPC_NT_INVALID_AUTH_IDENTITY";
+ case 0xC0020033: return "RPC_NT_UNKNOWN_AUTHZ_SERVICE";
+ case 0xC0020034: return "EPT_NT_INVALID_ENTRY";
+ case 0xC0020035: return "EPT_NT_CANT_PERFORM_OP";
+ case 0xC0020036: return "EPT_NT_NOT_REGISTERED";
+ case 0xC0020037: return "RPC_NT_NOTHING_TO_EXPORT";
+ case 0xC0020038: return "RPC_NT_INCOMPLETE_NAME";
+ case 0xC0020039: return "RPC_NT_INVALID_VERS_OPTION";
+ case 0xC002003A: return "RPC_NT_NO_MORE_MEMBERS";
+ case 0xC002003B: return "RPC_NT_NOT_ALL_OBJS_UNEXPORTED";
+ case 0xC002003C: return "RPC_NT_INTERFACE_NOT_FOUND";
+ case 0xC002003D: return "RPC_NT_ENTRY_ALREADY_EXISTS";
+ case 0xC002003E: return "RPC_NT_ENTRY_NOT_FOUND";
+ case 0xC002003F: return "RPC_NT_NAME_SERVICE_UNAVAILABLE";
+ case 0xC0020040: return "RPC_NT_INVALID_NAF_ID";
+ case 0xC0020041: return "RPC_NT_CANNOT_SUPPORT";
+ case 0xC0020042: return "RPC_NT_NO_CONTEXT_AVAILABLE";
+ case 0xC0020043: return "RPC_NT_INTERNAL_ERROR";
+ case 0xC0020044: return "RPC_NT_ZERO_DIVIDE";
+ case 0xC0020045: return "RPC_NT_ADDRESS_ERROR";
+ case 0xC0020046: return "RPC_NT_FP_DIV_ZERO";
+ case 0xC0020047: return "RPC_NT_FP_UNDERFLOW";
+ case 0xC0020048: return "RPC_NT_FP_OVERFLOW";
+ case 0xC0030001: return "RPC_NT_NO_MORE_ENTRIES";
+ case 0xC0030002: return "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL";
+ case 0xC0030003: return "RPC_NT_SS_CHAR_TRANS_SHORT_FILE";
+ case 0xC0030004: return "RPC_NT_SS_IN_NULL_CONTEXT";
+ case 0xC0030005: return "RPC_NT_SS_CONTEXT_MISMATCH";
+ case 0xC0030006: return "RPC_NT_SS_CONTEXT_DAMAGED";
+ case 0xC0030007: return "RPC_NT_SS_HANDLES_MISMATCH";
+ case 0xC0030008: return "RPC_NT_SS_CANNOT_GET_CALL_HANDLE";
+ case 0xC0030009: return "RPC_NT_NULL_REF_POINTER";
+ case 0xC003000A: return "RPC_NT_ENUM_VALUE_OUT_OF_RANGE";
+ case 0xC003000B: return "RPC_NT_BYTE_COUNT_TOO_SMALL";
+ case 0xC003000C: return "RPC_NT_BAD_STUB_DATA";
+ case 0xC0020049: return "RPC_NT_CALL_IN_PROGRESS";
+ case 0xC002004A: return "RPC_NT_NO_MORE_BINDINGS";
+ case 0xC002004B: return "RPC_NT_GROUP_MEMBER_NOT_FOUND";
+ case 0xC002004C: return "EPT_NT_CANT_CREATE";
+ case 0xC002004D: return "RPC_NT_INVALID_OBJECT";
+ case 0xC002004F: return "RPC_NT_NO_INTERFACES";
+ case 0xC0020050: return "RPC_NT_CALL_CANCELLED";
+ case 0xC0020051: return "RPC_NT_BINDING_INCOMPLETE";
+ case 0xC0020052: return "RPC_NT_COMM_FAILURE";
+ case 0xC0020053: return "RPC_NT_UNSUPPORTED_AUTHN_LEVEL";
+ case 0xC0020054: return "RPC_NT_NO_PRINC_NAME";
+ case 0xC0020055: return "RPC_NT_NOT_RPC_ERROR";
+ case 0x40020056: return "RPC_NT_UUID_LOCAL_ONLY";
+ case 0xC0020057: return "RPC_NT_SEC_PKG_ERROR";
+ case 0xC0020058: return "RPC_NT_NOT_CANCELLED";
+ case 0xC0030059: return "RPC_NT_INVALID_ES_ACTION";
+ case 0xC003005A: return "RPC_NT_WRONG_ES_VERSION";
+ case 0xC003005B: return "RPC_NT_WRONG_STUB_VERSION";
+ case 0xC003005C: return "RPC_NT_INVALID_PIPE_OBJECT";
+ case 0xC003005D: return "RPC_NT_INVALID_PIPE_OPERATION";
+ case 0xC003005E: return "RPC_NT_WRONG_PIPE_VERSION";
+ case 0xC003005F: return "RPC_NT_PIPE_CLOSED";
+ case 0xC0030060: return "RPC_NT_PIPE_DISCIPLINE_ERROR";
+ case 0xC0030061: return "RPC_NT_PIPE_EMPTY";
+ case 0xC0020062: return "RPC_NT_INVALID_ASYNC_HANDLE";
+ case 0xC0020063: return "RPC_NT_INVALID_ASYNC_CALL";
+ case 0x400200AF: return "RPC_NT_SEND_INCOMPLETE";
+ case 0xC0140001: return "STATUS_ACPI_INVALID_OPCODE";
+ case 0xC0140002: return "STATUS_ACPI_STACK_OVERFLOW";
+ case 0xC0140003: return "STATUS_ACPI_ASSERT_FAILED";
+ case 0xC0140004: return "STATUS_ACPI_INVALID_INDEX";
+ case 0xC0140005: return "STATUS_ACPI_INVALID_ARGUMENT";
+ case 0xC0140006: return "STATUS_ACPI_FATAL";
+ case 0xC0140007: return "STATUS_ACPI_INVALID_SUPERNAME";
+ case 0xC0140008: return "STATUS_ACPI_INVALID_ARGTYPE";
+ case 0xC0140009: return "STATUS_ACPI_INVALID_OBJTYPE";
+ case 0xC014000A: return "STATUS_ACPI_INVALID_TARGETTYPE";
+ case 0xC014000B: return "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT";
+ case 0xC014000C: return "STATUS_ACPI_ADDRESS_NOT_MAPPED";
+ case 0xC014000D: return "STATUS_ACPI_INVALID_EVENTTYPE";
+ case 0xC014000E: return "STATUS_ACPI_HANDLER_COLLISION";
+ case 0xC014000F: return "STATUS_ACPI_INVALID_DATA";
+ case 0xC0140010: return "STATUS_ACPI_INVALID_REGION";
+ case 0xC0140011: return "STATUS_ACPI_INVALID_ACCESS_SIZE";
+ case 0xC0140012: return "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK";
+ case 0xC0140013: return "STATUS_ACPI_ALREADY_INITIALIZED";
+ case 0xC0140014: return "STATUS_ACPI_NOT_INITIALIZED";
+ case 0xC0140015: return "STATUS_ACPI_INVALID_MUTEX_LEVEL";
+ case 0xC0140016: return "STATUS_ACPI_MUTEX_NOT_OWNED";
+ case 0xC0140017: return "STATUS_ACPI_MUTEX_NOT_OWNER";
+ case 0xC0140018: return "STATUS_ACPI_RS_ACCESS";
+ case 0xC0140019: return "STATUS_ACPI_INVALID_TABLE";
+ case 0xC0140020: return "STATUS_ACPI_REG_HANDLER_FAILED";
+ case 0xC0140021: return "STATUS_ACPI_POWER_REQUEST_FAILED";
+ case 0xC00A0001: return "STATUS_CTX_WINSTATION_NAME_INVALID";
+ case 0xC00A0002: return "STATUS_CTX_INVALID_PD";
+ case 0xC00A0003: return "STATUS_CTX_PD_NOT_FOUND";
+ case 0x400A0004: return "STATUS_CTX_CDM_CONNECT";
+ case 0x400A0005: return "STATUS_CTX_CDM_DISCONNECT";
+ case 0xC00A0006: return "STATUS_CTX_CLOSE_PENDING";
+ case 0xC00A0007: return "STATUS_CTX_NO_OUTBUF";
+ case 0xC00A0008: return "STATUS_CTX_MODEM_INF_NOT_FOUND";
+ case 0xC00A0009: return "STATUS_CTX_INVALID_MODEMNAME";
+ case 0xC00A000A: return "STATUS_CTX_RESPONSE_ERROR";
+ case 0xC00A000B: return "STATUS_CTX_MODEM_RESPONSE_TIMEOUT";
+ case 0xC00A000C: return "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER";
+ case 0xC00A000D: return "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE";
+ case 0xC00A000E: return "STATUS_CTX_MODEM_RESPONSE_BUSY";
+ case 0xC00A000F: return "STATUS_CTX_MODEM_RESPONSE_VOICE";
+ case 0xC00A0010: return "STATUS_CTX_TD_ERROR";
+ case 0xC00A0012: return "STATUS_CTX_LICENSE_CLIENT_INVALID";
+ case 0xC00A0013: return "STATUS_CTX_LICENSE_NOT_AVAILABLE";
+ case 0xC00A0014: return "STATUS_CTX_LICENSE_EXPIRED";
+ case 0xC00A0015: return "STATUS_CTX_WINSTATION_NOT_FOUND";
+ case 0xC00A0016: return "STATUS_CTX_WINSTATION_NAME_COLLISION";
+ case 0xC00A0017: return "STATUS_CTX_WINSTATION_BUSY";
+ case 0xC00A0018: return "STATUS_CTX_BAD_VIDEO_MODE";
+ case 0xC00A0022: return "STATUS_CTX_GRAPHICS_INVALID";
+ case 0xC00A0024: return "STATUS_CTX_NOT_CONSOLE";
+ case 0xC00A0026: return "STATUS_CTX_CLIENT_QUERY_TIMEOUT";
+ case 0xC00A0027: return "STATUS_CTX_CONSOLE_DISCONNECT";
+ case 0xC00A0028: return "STATUS_CTX_CONSOLE_CONNECT";
+ case 0xC00A002A: return "STATUS_CTX_SHADOW_DENIED";
+ case 0xC00A002B: return "STATUS_CTX_WINSTATION_ACCESS_DENIED";
+ case 0xC00A002E: return "STATUS_CTX_INVALID_WD";
+ case 0xC00A002F: return "STATUS_CTX_WD_NOT_FOUND";
+ case 0xC00A0030: return "STATUS_CTX_SHADOW_INVALID";
+ case 0xC00A0031: return "STATUS_CTX_SHADOW_DISABLED";
+ case 0xC00A0032: return "STATUS_RDP_PROTOCOL_ERROR";
+ case 0xC00A0033: return "STATUS_CTX_CLIENT_LICENSE_NOT_SET";
+ case 0xC00A0034: return "STATUS_CTX_CLIENT_LICENSE_IN_USE";
+ case 0xC0040035: return "STATUS_PNP_BAD_MPS_TABLE";
+ case 0xC0040036: return "STATUS_PNP_TRANSLATION_FAILED";
+ case 0xC0040037: return "STATUS_PNP_IRQ_TRANSLATION_FAILED";
+ default: return "STATUS_UNKNOWN";
+ }
+}
+
+
+/*
+ * KsPrintf
+ * This function is variable-argument, level-sensitive debug print routine.
+ * If the specified debug level for the print statement is lower or equal
+ * to the current debug level, the message will be printed.
+ *
+ * Arguments:
+ * DebugPrintLevel - Specifies at which debugging level the string should
+ * be printed
+ * DebugMessage - Variable argument ascii c string
+ *
+ * Return Value:
+ * N/A
+ *
+ * NOTES:
+ * N/A
+ */
+
+VOID
+KsPrintf(
+ LONG DebugPrintLevel,
+ PCHAR DebugMessage,
+ ...
+ )
+{
+ va_list ap;
+
+ va_start(ap, DebugMessage);
+
+ if (DebugPrintLevel <= KsDebugLevel)
+ {
+ CHAR buffer[0x200];
+
+ vsprintf(buffer, DebugMessage, ap);
+
+ KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer));
+ }
+
+ va_end(ap);
+
+} // KsPrint()
+
+#endif
\ No newline at end of file
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+
+const CHAR *dos_file_prefix = "\\??\\";
+
+/*
+ * cfs_filp_open
+ * To open or create a file in kernel mode
+ *
+ * Arguments:
+ * name: name of the file to be opened or created, no dos path prefix
+ * flags: open/creation attribute options
+ * mode: access mode/permission to open or create
+ * err: error code
+ *
+ * Return Value:
+ * the pointer to the cfs_file_t or NULL if it fails
+ *
+ * Notes:
+ * N/A
+ */
+
+cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
+{
+ cfs_file_t * fp = NULL;
+
+ NTSTATUS Status;
+
+ OBJECT_ATTRIBUTES ObjectAttributes;
+ HANDLE FileHandle;
+ IO_STATUS_BLOCK IoStatus;
+ ACCESS_MASK DesiredAccess;
+ ULONG CreateDisposition;
+ ULONG ShareAccess;
+ ULONG CreateOptions;
+
+ USHORT NameLength = 0;
+ USHORT PrefixLength = 0;
+
+ UNICODE_STRING UnicodeName;
+ PWCHAR UnicodeString = NULL;
+
+ ANSI_STRING AnsiName;
+ PUCHAR AnsiString = NULL;
+
+ /* Analyze the flags settings */
+
+ if (cfs_is_flag_set(flags, O_WRONLY)) {
+ DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
+ ShareAccess = 0;
+ } else if (cfs_is_flag_set(flags, O_RDWR)) {
+ DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
+ ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
+ } else {
+ DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
+ ShareAccess = FILE_SHARE_READ;
+ }
+
+ if (cfs_is_flag_set(flags, O_CREAT)) {
+ if (cfs_is_flag_set(flags, O_EXCL)) {
+ CreateDisposition = FILE_CREATE;
+ } else {
+ CreateDisposition = FILE_OPEN_IF;
+ }
+ } else {
+ CreateDisposition = FILE_OPEN;
+ }
+
+ if (cfs_is_flag_set(flags, O_TRUNC)) {
+ if (cfs_is_flag_set(flags, O_EXCL)) {
+ CreateDisposition = FILE_OVERWRITE;
+ } else {
+ CreateDisposition = FILE_OVERWRITE_IF;
+ }
+ }
+
+ CreateOptions = 0;
+
+ if (cfs_is_flag_set(flags, O_DIRECTORY)) {
+ cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE);
+ }
+
+ if (cfs_is_flag_set(flags, O_SYNC)) {
+ cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
+ }
+
+ if (cfs_is_flag_set(flags, O_DIRECT)) {
+ cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
+ }
+
+ /* Initialize the unicode path name for the specified file */
+
+ NameLength = (USHORT)strlen(name);
+
+ if (name[0] != '\\') {
+ PrefixLength = (USHORT)strlen(dos_file_prefix);
+ }
+
+ AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1),
+ CFS_ALLOC_ZERO);
+ if (NULL == AnsiString) {
+ if (err) *err = -ENOMEM;
+ return NULL;
+ }
+
+ UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1),
+ CFS_ALLOC_ZERO);
+
+ if (NULL == UnicodeString) {
+ if (err) *err = -ENOMEM;
+ cfs_free(AnsiString);
+ return NULL;
+ }
+
+ if (PrefixLength) {
+ RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength);
+ }
+
+ RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength);
+ NameLength += PrefixLength;
+
+ AnsiName.MaximumLength = NameLength + 1;
+ AnsiName.Length = NameLength;
+ AnsiName.Buffer = AnsiString;
+
+ UnicodeName.MaximumLength = (NameLength + 1) * sizeof(WCHAR);
+ UnicodeName.Length = 0;
+ UnicodeName.Buffer = (PWSTR)UnicodeString;
+
+ RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE);
+
+ /* Setup the object attributes structure for the file. */
+
+ InitializeObjectAttributes(
+ &ObjectAttributes,
+ &UnicodeName,
+ OBJ_CASE_INSENSITIVE |
+ OBJ_KERNEL_HANDLE,
+ NULL,
+ NULL );
+
+ /* Now to open or create the file now */
+
+ Status = ZwCreateFile(
+ &FileHandle,
+ DesiredAccess,
+ &ObjectAttributes,
+ &IoStatus,
+ 0,
+ FILE_ATTRIBUTE_NORMAL,
+ ShareAccess,
+ CreateDisposition,
+ CreateOptions,
+ NULL,
+ 0 );
+
+ /* Check the returned status of IoStatus... */
+
+ if (!NT_SUCCESS(IoStatus.Status)) {
+ *err = cfs_error_code(IoStatus.Status);
+ cfs_free(UnicodeString);
+ cfs_free(AnsiString);
+ return NULL;
+ }
+
+ /* Allocate the cfs_file_t: libcfs file object */
+
+ fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO);
+
+ if (NULL == fp) {
+ Status = ZwClose(FileHandle);
+ ASSERT(NT_SUCCESS(Status));
+ *err = -ENOMEM;
+ cfs_free(UnicodeString);
+ cfs_free(AnsiString);
+ return NULL;
+ }
+
+ fp->f_handle = FileHandle;
+ strcpy(fp->f_name, name);
+ fp->f_flags = flags;
+ fp->f_mode = (mode_t)mode;
+ fp->f_count = 1;
+ *err = 0;
+
+ /* free the memory of temporary name strings */
+ cfs_free(UnicodeString);
+ cfs_free(AnsiString);
+
+ return fp;
+}
+
+
+/*
+ * cfs_filp_close
+ * To close the opened file and release the filp structure
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ * ZERO: on success
+ * Non-Zero: on failure
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_filp_close(cfs_file_t *fp)
+{
+ NTSTATUS Status;
+
+ ASSERT(fp != NULL);
+ ASSERT(fp->f_handle != NULL);
+
+ /* release the file handle */
+ Status = ZwClose(fp->f_handle);
+ ASSERT(NT_SUCCESS(Status));
+
+ /* free the file flip structure */
+ cfs_free(fp);
+ return 0;
+}
+
+
+/*
+ * cfs_filp_read
+ * To read data from the opened file
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ * buf: pointer to the buffer to contain the data
+ * nbytes: size in bytes to be read from the file
+ * pos: offset in file where reading starts, if pos
+ * NULL, then read from current file offset
+ *
+ * Return Value:
+ * Actual size read into the buffer in success case
+ * Error code in failure case
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+ LARGE_INTEGER address;
+ NTSTATUS Status;
+ IO_STATUS_BLOCK IoStatus;
+
+ int rc = 0;
+
+ /* Read data from the file into the specified buffer */
+
+ if (pos != NULL) {
+ address.QuadPart = *pos;
+ } else {
+ address.QuadPart = fp->f_pos;
+ }
+
+ Status = ZwReadFile( fp->f_handle,
+ 0,
+ NULL,
+ NULL,
+ &IoStatus,
+ buf,
+ nbytes,
+ &address,
+ NULL );
+
+ if (!NT_SUCCESS(IoStatus.Status)) {
+ rc = cfs_error_code(IoStatus.Status);
+ } else {
+ rc = (int)IoStatus.Information;
+ fp->f_pos = address.QuadPart + rc;
+
+ if (pos != NULL) {
+ *pos = fp->f_pos;
+ }
+ }
+
+ return rc;
+}
+
+
+/*
+ * cfs_filp_wrtie
+ * To write specified data to the opened file
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ * buf: pointer to the buffer containing the data
+ * nbytes: size in bytes to be written to the file
+ * pos: offset in file where writing starts, if pos
+ * NULL, then write to current file offset
+ *
+ * Return Value:
+ * Actual size written into the buffer in success case
+ * Error code in failure case
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+ LARGE_INTEGER address;
+ NTSTATUS Status;
+ IO_STATUS_BLOCK IoStatus;
+ int rc = 0;
+
+ /* Write user specified data into the file */
+
+ if (pos != NULL) {
+ address.QuadPart = *pos;
+ } else {
+ address.QuadPart = fp->f_pos;
+ }
+
+ Status = ZwWriteFile( fp->f_handle,
+ 0,
+ NULL,
+ NULL,
+ &IoStatus,
+ buf,
+ nbytes,
+ &address,
+ NULL );
+
+ if (!NT_SUCCESS(Status)) {
+ rc = cfs_error_code(Status);
+ } else {
+ rc = (int)IoStatus.Information;
+ fp->f_pos = address.QuadPart + rc;
+
+ if (pos != NULL) {
+ *pos = fp->f_pos;
+ }
+ }
+
+ return rc;
+}
+
+
+NTSTATUS
+CompletionRoutine(
+ PDEVICE_OBJECT DeviceObject,
+ PIRP Irp,
+ PVOID Context)
+{
+ /* copy the IoStatus result */
+ *Irp->UserIosb = Irp->IoStatus;
+
+ /* singal the event we set */
+ KeSetEvent(Irp->UserEvent, 0, FALSE);
+
+ /* free the Irp we allocated */
+ IoFreeIrp(Irp);
+
+ return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+
+/*
+ * cfs_filp_fsync
+ * To sync the dirty data of the file to disk
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ * Zero: in success case
+ * Error code: in failure case
+ *
+ * Notes:
+ * Nt kernel doesn't export such a routine to flush a file,
+ * we must allocate our own Irp and issue it to the file
+ * system driver.
+ */
+
+int cfs_filp_fsync(cfs_file_t *fp)
+{
+
+ PFILE_OBJECT FileObject;
+ PDEVICE_OBJECT DeviceObject;
+
+ NTSTATUS Status;
+ PIRP Irp;
+ KEVENT Event;
+ IO_STATUS_BLOCK IoSb;
+ PIO_STACK_LOCATION IrpSp;
+
+ /* get the FileObject and the DeviceObject */
+
+ Status = ObReferenceObjectByHandle(
+ fp->f_handle,
+ FILE_WRITE_DATA,
+ NULL,
+ KernelMode,
+ (PVOID*)&FileObject,
+ NULL );
+
+ if (!NT_SUCCESS(Status)) {
+ return cfs_error_code(Status);
+ }
+
+ DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+ /* allocate a new Irp */
+
+ Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
+
+ if (!Irp) {
+
+ ObDereferenceObject(FileObject);
+ return -ENOMEM;
+ }
+
+ /* intialize the event */
+ KeInitializeEvent(&Event, SynchronizationEvent, FALSE);
+
+ /* setup the Irp */
+ Irp->UserEvent = &Event;
+ Irp->UserIosb = &IoSb;
+ Irp->RequestorMode = KernelMode;
+
+ Irp->Tail.Overlay.Thread = PsGetCurrentThread();
+ Irp->Tail.Overlay.OriginalFileObject = FileObject;
+
+ /* setup the Irp stack location */
+ IrpSp = IoGetNextIrpStackLocation(Irp);
+
+ IrpSp->MajorFunction = IRP_MJ_FLUSH_BUFFERS;
+ IrpSp->DeviceObject = DeviceObject;
+ IrpSp->FileObject = FileObject;
+
+ IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE);
+
+
+ /* issue the Irp to the underlying file system driver */
+ IoCallDriver(DeviceObject, Irp);
+
+ /* wait until it is finished */
+ KeWaitForSingleObject(&Event, Executive, KernelMode, TRUE, 0);
+
+ /* cleanup our reference on it */
+ ObDereferenceObject(FileObject);
+
+ Status = IoSb.Status;
+
+ return cfs_error_code(Status);
+}
+
+/*
+ * cfs_get_file
+ * To increase the reference of the file object
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ * Zero: in success case
+ * Non-Zero: in failure case
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_get_file(cfs_file_t *fp)
+{
+ InterlockedIncrement(&(fp->f_count));
+ return 0;
+}
+
+
+/*
+ * cfs_put_file
+ * To decrease the reference of the file object
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ * Zero: in success case
+ * Non-Zero: in failure case
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_put_file(cfs_file_t *fp)
+{
+ if (InterlockedDecrement(&(fp->f_count)) == 0) {
+ cfs_filp_close(fp);
+ }
+
+ return 0;
+}
+
+
+/*
+ * cfs_file_count
+ * To query the reference count of the file object
+ *
+ * Arguments:
+ * fp: the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ * the reference count of the file object
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_file_count(cfs_file_t *fp)
+{
+ return (int)(fp->f_count);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+
+
+#if _X86_
+
+void __declspec (naked) FASTCALL
+atomic_add(
+ int i,
+ atomic_t *v
+ )
+{
+ // ECX = i
+ // EDX = v ; [EDX][0] = v->counter
+
+ __asm {
+ lock add dword ptr [edx][0], ecx
+ ret
+ }
+}
+
+void __declspec (naked) FASTCALL
+atomic_sub(
+ int i,
+ atomic_t *v
+ )
+{
+ // ECX = i
+ // EDX = v ; [EDX][0] = v->counter
+
+ __asm {
+ lock sub dword ptr [edx][0], ecx
+ ret
+ }
+}
+
+void __declspec (naked) FASTCALL
+atomic_inc(
+ atomic_t *v
+ )
+{
+ //InterlockedIncrement((PULONG)(&((v)->counter)));
+
+ //` ECX = v ; [ECX][0] = v->counter
+
+ __asm {
+ lock inc dword ptr [ecx][0]
+ ret
+ }
+}
+
+void __declspec (naked) FASTCALL
+atomic_dec(
+ atomic_t *v
+ )
+{
+ // ECX = v ; [ECX][0] = v->counter
+
+ __asm {
+ lock dec dword ptr [ecx][0]
+ ret
+ }
+}
+
+int __declspec (naked) FASTCALL
+atomic_sub_and_test(
+ int i,
+ atomic_t *v
+ )
+{
+
+ // ECX = i
+ // EDX = v ; [EDX][0] = v->counter
+
+ __asm {
+ xor eax, eax
+ lock sub dword ptr [edx][0], ecx
+ sete al
+ ret
+ }
+}
+
+int __declspec (naked) FASTCALL
+atomic_inc_and_test(
+ atomic_t *v
+ )
+{
+ // ECX = v ; [ECX][0] = v->counter
+
+ __asm {
+ xor eax, eax
+ lock inc dword ptr [ecx][0]
+ sete al
+ ret
+ }
+}
+
+int __declspec (naked) FASTCALL
+atomic_dec_and_test(
+ atomic_t *v
+ )
+{
+ // ECX = v ; [ECX][0] = v->counter
+
+ __asm {
+ xor eax, eax
+ lock dec dword ptr [ecx][0]
+ sete al
+ ret
+ }
+}
+
+#else
+
+void FASTCALL
+atomic_add(
+ int i,
+ atomic_t *v
+ )
+{
+ InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (i));
+}
+
+void FASTCALL
+atomic_sub(
+ int i,
+ atomic_t *v
+ )
+{
+ InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (-1*i));
+}
+
+void FASTCALL
+atomic_inc(
+ atomic_t *v
+ )
+{
+ InterlockedIncrement((PULONG)(&((v)->counter)));
+}
+
+void FASTCALL
+atomic_dec(
+ atomic_t *v
+ )
+{
+ InterlockedDecrement((PULONG)(&((v)->counter)));
+}
+
+int FASTCALL
+atomic_sub_and_test(
+ int i,
+ atomic_t *v
+ )
+{
+ int counter, result;
+
+ do {
+
+ counter = v->counter;
+ result = counter - i;
+
+ } while ( InterlockedCompareExchange(
+ &(v->counter),
+ result,
+ counter) != counter);
+
+ return (result == 0);
+}
+
+int FASTCALL
+atomic_inc_and_test(
+ atomic_t *v
+ )
+{
+ int counter, result;
+
+ do {
+
+ counter = v->counter;
+ result = counter + 1;
+
+ } while ( InterlockedCompareExchange(
+ &(v->counter),
+ result,
+ counter) != counter);
+
+ return (result == 0);
+}
+
+int FASTCALL
+atomic_dec_and_test(
+ atomic_t *v
+ )
+{
+ int counter, result;
+
+ do {
+
+ counter = v->counter;
+ result = counter + 1;
+
+ } while ( InterlockedCompareExchange(
+ &(v->counter),
+ result,
+ counter) != counter);
+
+ return (result == 0);
+}
+
+#endif
+
+
+/*
+ * rw spinlock
+ */
+
+
+void
+rwlock_init(rwlock_t * rwlock)
+{
+ spin_lock_init(&rwlock->guard);
+ rwlock->count = 0;
+}
+
+void
+rwlock_fini(rwlock_t * rwlock)
+{
+}
+
+void
+read_lock(rwlock_t * rwlock)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ /* should bugchk here */
+ cfs_enter_debugger();
+ return;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ ASSERT(slot->Magic == TASKSLT_MAGIC);
+
+ slot->irql = KeRaiseIrqlToDpcLevel();
+
+ while (TRUE) {
+ spin_lock(&rwlock->guard);
+ if (rwlock->count >= 0)
+ break;
+ spin_unlock(&rwlock->guard);
+ }
+
+ rwlock->count++;
+ spin_unlock(&rwlock->guard);
+}
+
+void
+read_unlock(rwlock_t * rwlock)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ /* should bugchk here */
+ cfs_enter_debugger();
+ return;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ ASSERT(slot->Magic == TASKSLT_MAGIC);
+
+ spin_lock(&rwlock->guard);
+ ASSERT(rwlock->count > 0);
+ rwlock->count--;
+ if (rwlock < 0) {
+ cfs_enter_debugger();
+ }
+ spin_unlock(&rwlock->guard);
+
+ KeLowerIrql(slot->irql);
+}
+
+void
+write_lock(rwlock_t * rwlock)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ /* should bugchk here */
+ cfs_enter_debugger();
+ return;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ ASSERT(slot->Magic == TASKSLT_MAGIC);
+
+ slot->irql = KeRaiseIrqlToDpcLevel();
+
+ while (TRUE) {
+ spin_lock(&rwlock->guard);
+ if (rwlock->count == 0)
+ break;
+ spin_unlock(&rwlock->guard);
+ }
+
+ rwlock->count = -1;
+ spin_unlock(&rwlock->guard);
+}
+
+void
+write_unlock(rwlock_t * rwlock)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ /* should bugchk here */
+ cfs_enter_debugger();
+ return;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ ASSERT(slot->Magic == TASKSLT_MAGIC);
+
+ spin_lock(&rwlock->guard);
+ ASSERT(rwlock->count == -1);
+ rwlock->count = 0;
+ spin_unlock(&rwlock->guard);
+
+ KeLowerIrql(slot->irql);
+}
\ No newline at end of file
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+
+
+cfs_mem_cache_t *cfs_page_t_slab = NULL;
+cfs_mem_cache_t *cfs_page_p_slab = NULL;
+
+/*
+ * cfs_alloc_page
+ * To allocate the cfs_page_t and also 1 page of memory
+ *
+ * Arguments:
+ * flags: the allocation options
+ *
+ * Return Value:
+ * pointer to the cfs_page_t strcture in success or
+ * NULL in failure case
+ *
+ * Notes:
+ * N/A
+ */
+
+cfs_page_t * cfs_alloc_page(int flags)
+{
+ cfs_page_t *pg;
+ pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0);
+
+ if (NULL == pg) {
+ cfs_enter_debugger();
+ return NULL;
+ }
+
+ memset(pg, 0, sizeof(cfs_page_t));
+ pg->addr = cfs_mem_cache_alloc(cfs_page_p_slab, 0);
+ atomic_set(&pg->count, 1);
+
+ if (pg->addr) {
+ if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) {
+ memset(pg->addr, 0, CFS_PAGE_SIZE);
+ }
+ } else {
+ cfs_enter_debugger();
+ cfs_mem_cache_free(cfs_page_t_slab, pg);
+ pg = NULL;
+ }
+
+ return pg;
+}
+
+/*
+ * cfs_free_page
+ * To free the cfs_page_t including the page
+ *
+ * Arguments:
+ * pg: pointer to the cfs_page_t strcture
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+void cfs_free_page(cfs_page_t *pg)
+{
+ ASSERT(pg != NULL);
+ ASSERT(pg->addr != NULL);
+ ASSERT(atomic_read(&pg->count) <= 1);
+
+ cfs_mem_cache_free(cfs_page_p_slab, pg->addr);
+ cfs_mem_cache_free(cfs_page_t_slab, pg);
+}
+
+
+/*
+ * cfs_alloc
+ * To allocate memory from system pool
+ *
+ * Arguments:
+ * nr_bytes: length in bytes of the requested buffer
+ * flags: flags indiction
+ *
+ * Return Value:
+ * NULL: if there's no enough memory space in system
+ * the address of the allocated memory in success.
+ *
+ * Notes:
+ * This operation can be treated as atomic.
+ */
+
+void *
+cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+ void *ptr;
+
+ /* Ignore the flags: always allcoate from NonPagedPool */
+
+ ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs');
+
+ if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) {
+ memset(ptr, 0, nr_bytes);
+ }
+
+ if (!ptr) {
+ cfs_enter_debugger();
+ }
+
+ return ptr;
+}
+
+/*
+ * cfs_free
+ * To free the sepcified memory to system pool
+ *
+ * Arguments:
+ * addr: pointer to the buffer to be freed
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * This operation can be treated as atomic.
+ */
+
+void
+cfs_free(void *addr)
+{
+ ExFreePool(addr);
+}
+
+/*
+ * cfs_alloc_large
+ * To allocate large block of memory from system pool
+ *
+ * Arguments:
+ * nr_bytes: length in bytes of the requested buffer
+ *
+ * Return Value:
+ * NULL: if there's no enough memory space in system
+ * the address of the allocated memory in success.
+ *
+ * Notes:
+ * N/A
+ */
+
+void *
+cfs_alloc_large(size_t nr_bytes)
+{
+ return cfs_alloc(nr_bytes, 0);
+}
+
+/*
+ * cfs_free_large
+ * To free the sepcified memory to system pool
+ *
+ * Arguments:
+ * addr: pointer to the buffer to be freed
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+cfs_free_large(void *addr)
+{
+ cfs_free(addr);
+}
+
+
+/*
+ * cfs_mem_cache_create
+ * To create a SLAB cache
+ *
+ * Arguments:
+ * name: name string of the SLAB cache to be created
+ * size: size in bytes of SLAB entry buffer
+ * offset: offset in the page
+ * flags: SLAB creation flags
+*
+ * Return Value:
+ * The poitner of cfs_memory_cache structure in success.
+ * NULL pointer in failure case.
+ *
+ * Notes:
+ * 1, offset won't be used here.
+ * 2, it could be better to induce a lock to protect the access of the
+ * SLAB structure on SMP if there's not outside lock protection.
+ * 3, parameters C/D are removed.
+ */
+
+cfs_mem_cache_t *
+cfs_mem_cache_create(
+ const char * name,
+ size_t size,
+ size_t offset,
+ unsigned long flags
+ )
+{
+ cfs_mem_cache_t * kmc = NULL;
+
+ /* The name of the SLAB could not exceed 20 chars */
+
+ if (name && strlen(name) >= 20) {
+ goto errorout;
+ }
+
+ /* Allocate and initialize the SLAB strcture */
+
+ kmc = cfs_alloc (sizeof(cfs_mem_cache_t), 0);
+
+ if (NULL == kmc) {
+ goto errorout;
+ }
+
+ memset(kmc, 0, sizeof(cfs_mem_cache_t));
+
+ kmc->flags = flags;
+
+ if (name) {
+ strcpy(&kmc->name[0], name);
+ }
+
+ /* Initialize the corresponding LookAside list */
+
+ ExInitializeNPagedLookasideList(
+ &(kmc->npll),
+ NULL,
+ NULL,
+ 0,
+ size,
+ 'pnmk',
+ 0);
+
+errorout:
+
+ return kmc;
+}
+
+/*
+ * cfs_mem_cache_destroy
+ * To destroy the unused SLAB cache
+ *
+ * Arguments:
+ * kmc: the SLAB cache to be destroied.
+ *
+ * Return Value:
+ * 0: in success case.
+ * 1: in failure case.
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_mem_cache_destroy (cfs_mem_cache_t * kmc)
+{
+ ASSERT(kmc != NULL);
+
+ ExDeleteNPagedLookasideList(&(kmc->npll));
+
+ cfs_free(kmc);
+
+ return 0;
+}
+
+/*
+ * cfs_mem_cache_alloc
+ * To allocate an object (LookAside entry) from the SLAB
+ *
+ * Arguments:
+ * kmc: the SLAB cache to be allocated from.
+ * flags: flags for allocation options
+ *
+ * Return Value:
+ * object buffer address: in success case.
+ * NULL: in failure case.
+ *
+ * Notes:
+ * N/A
+ */
+
+void *cfs_mem_cache_alloc(cfs_mem_cache_t * kmc, int flags)
+{
+ void *buf = NULL;
+
+ buf = ExAllocateFromNPagedLookasideList(&(kmc->npll));
+
+ return buf;
+}
+
+/*
+ * cfs_mem_cache_free
+ * To free an object (LookAside entry) to the SLAB cache
+ *
+ * Arguments:
+ * kmc: the SLAB cache to be freed to.
+ * buf: the pointer to the object to be freed.
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf)
+{
+ ExFreeToNPagedLookasideList(&(kmc->npll), buf);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+
+#define DEBUG_SUBSYSTEM S_LIBCFS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define LIBCFS_MINOR 240
+
+int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
+{
+ struct libcfs_ioctl_hdr *hdr;
+ struct libcfs_ioctl_data *data;
+ int err;
+ ENTRY;
+
+ hdr = (struct libcfs_ioctl_hdr *)buf;
+ data = (struct libcfs_ioctl_data *)buf;
+
+ err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+ if (err)
+ RETURN(err);
+
+ if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
+ CERROR(("LIBCFS: version mismatch kernel vs application\n"));
+ RETURN(-EINVAL);
+ }
+
+ if (hdr->ioc_len + buf >= end) {
+ CERROR(("LIBCFS: user buffer exceeds kernel buffer\n"));
+ RETURN(-EINVAL);
+ }
+
+ if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
+ CERROR(("LIBCFS: user buffer too small for ioctl\n"));
+ RETURN(-EINVAL);
+ }
+
+ err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+ if (err)
+ RETURN(err);
+
+ if (libcfs_ioctl_is_invalid(data)) {
+ CERROR(("LIBCFS: ioctl not correctly formatted\n"));
+ RETURN(-EINVAL);
+ }
+
+ if (data->ioc_inllen1)
+ data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+ if (data->ioc_inllen2)
+ data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+ size_round(data->ioc_inllen1);
+
+ RETURN(0);
+}
+
+extern struct cfs_psdev_ops libcfs_psdev_ops;
+
+static int
+libcfs_psdev_open(cfs_file_t * file)
+{
+ struct libcfs_device_userstate **pdu = NULL;
+ int rc = 0;
+
+ pdu = (struct libcfs_device_userstate **)&file->private_data;
+ if (libcfs_psdev_ops.p_open != NULL)
+ rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
+ else
+ return (-EPERM);
+ return rc;
+}
+
+/* called when closing /dev/device */
+static int
+libcfs_psdev_release(cfs_file_t * file)
+{
+ struct libcfss_device_userstate *pdu;
+ int rc = 0;
+
+ pdu = file->private_data;
+ if (libcfs_psdev_ops.p_close != NULL)
+ rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
+ else
+ rc = -EPERM;
+ return rc;
+}
+
+static int
+libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
+{
+ struct cfs_psdev_file pfile;
+ int rc = 0;
+
+ if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
+ _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR ||
+ _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
+ CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n",
+ _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)));
+ return (-EINVAL);
+ }
+
+ /* Handle platform-dependent IOC requests */
+ switch (cmd) {
+ case IOC_LIBCFS_PANIC:
+ if (!capable (CAP_SYS_BOOT))
+ return (-EPERM);
+ CERROR(("debugctl-invoked panic"));
+ KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL);
+
+ return (0);
+ case IOC_LIBCFS_MEMHOG:
+
+ if (!capable (CAP_SYS_ADMIN))
+ return -EPERM;
+ break;
+ }
+
+ pfile.off = 0;
+ pfile.private_data = file->private_data;
+ if (libcfs_psdev_ops.p_ioctl != NULL)
+ rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+ else
+ rc = -EPERM;
+ return (rc);
+}
+
+static struct file_operations libcfs_fops = {
+ /* lseek: */ NULL,
+ /* read: */ NULL,
+ /* write: */ NULL,
+ /* ioctl: */ libcfs_ioctl,
+ /* open: */ libcfs_psdev_open,
+ /* release:*/ libcfs_psdev_release
+};
+
+cfs_psdev_t libcfs_dev = {
+ LIBCFS_MINOR,
+ "libcfs",
+ &libcfs_fops
+};
+
+void
+libcfs_daemonize (char *str)
+{
+ printk("Daemonize request: %s.\n", str);
+ return;
+}
+
+void
+libcfs_blockallsigs(void)
+{
+ return;
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+
+/*
+ * Thread routines
+ */
+
+/*
+ * cfs_thread_proc
+ * Lustre thread procedure wrapper routine (It's an internal routine)
+ *
+ * Arguments:
+ * context: a structure of cfs_thread_context_t, containing
+ * all the necessary parameters
+ *
+ * Return Value:
+ * void: N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+cfs_thread_proc(
+ void * context
+ )
+{
+ cfs_thread_context_t * thread_context =
+ (cfs_thread_context_t *) context;
+
+ /* Execute the specified function ... */
+
+ if (thread_context->func) {
+ (thread_context->func)(thread_context->arg);
+ }
+
+ /* Free the context memory */
+
+ cfs_free(context);
+
+ /* Terminate this system thread */
+
+ PsTerminateSystemThread(STATUS_SUCCESS);
+}
+
+/*
+ * cfs_kernel_thread
+ * Create a system thread to execute the routine specified
+ *
+ * Arguments:
+ * func: function to be executed in the thread
+ * arg: argument transferred to func function
+ * flag: thread creation flags.
+ *
+ * Return Value:
+ * int: 0 on success or error codes
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_kernel_thread(int (*func)(void *), void *arg, int flag)
+{
+ cfs_handle_t thread = NULL;
+ NTSTATUS status;
+ cfs_thread_context_t * context = NULL;
+
+ /* Allocate the context to be transferred to system thread */
+
+ context = cfs_alloc(sizeof(cfs_thread_context_t), CFS_ALLOC_ZERO);
+
+ if (!context) {
+ return -ENOMEM;
+ }
+
+ context->func = func;
+ context->arg = arg;
+
+ /* Create system thread with the cfs_thread_proc wrapper */
+
+ status = PsCreateSystemThread(
+ &thread,
+ (ACCESS_MASK)0L,
+ 0, 0, 0,
+ cfs_thread_proc,
+ context);
+
+ if (!NT_SUCCESS(status)) {
+
+
+ cfs_free(context);
+
+ /* We need translate the nt status to linux error code */
+
+ return cfs_error_code(status);
+ }
+
+ //
+ // Query the thread id of the newly created thread
+ //
+
+ ZwClose(thread);
+
+ return 0;
+}
+
+
+/*
+ * Symbols routines
+ */
+
+
+static CFS_DECL_RWSEM(cfs_symbol_lock);
+CFS_LIST_HEAD(cfs_symbol_list);
+
+int MPSystem = FALSE;
+
+/*
+ * cfs_symbol_get
+ * To query the specified symbol form the symbol table
+ *
+ * Arguments:
+ * name: the symbol name to be queried
+ *
+ * Return Value:
+ * If the symbol is in the table, return the address of it.
+ * If not, return NULL.
+ *
+ * Notes:
+ * N/A
+ */
+
+void *
+cfs_symbol_get(const char *name)
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+
+ down_read(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ sym->ref ++;
+ break;
+ }
+ }
+ up_read(&cfs_symbol_lock);
+
+ if (sym != NULL)
+ return sym->value;
+
+ return NULL;
+}
+
+/*
+ * cfs_symbol_put
+ * To decrease the reference of the specified symbol
+ *
+ * Arguments:
+ * name: the symbol name to be dereferred
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+cfs_symbol_put(const char *name)
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+
+ down_read(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ LASSERT(sym->ref > 0);
+ sym->ref--;
+ break;
+ }
+ }
+ up_read(&cfs_symbol_lock);
+
+ LASSERT(sym != NULL);
+}
+
+
+/*
+ * cfs_symbol_register
+ * To register the specified symbol infromation
+ *
+ * Arguments:
+ * name: the symbol name to be dereferred
+ * value: the value that the symbol stands for
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * Zero: Succeed to register
+ * Non-Zero: Fail to register the symbol
+ */
+
+int
+cfs_symbol_register(const char *name, const void *value)
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+ struct cfs_symbol *new = NULL;
+
+ new = cfs_alloc(sizeof(struct cfs_symbol), CFS_ALLOC_ZERO);
+ if (!new) {
+ return (-ENOMEM);
+ }
+ strncpy(new->name, name, CFS_SYMBOL_LEN);
+ new->value = (void *)value;
+ new->ref = 0;
+ CFS_INIT_LIST_HEAD(&new->sym_list);
+
+ down_write(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ up_write(&cfs_symbol_lock);
+ cfs_free(new);
+ return 0; // alreay registerred
+ }
+ }
+ list_add_tail(&new->sym_list, &cfs_symbol_list);
+ up_write(&cfs_symbol_lock);
+
+ return 0;
+}
+
+/*
+ * cfs_symbol_unregister
+ * To unregister/remove the specified symbol
+ *
+ * Arguments:
+ * name: the symbol name to be dereferred
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+cfs_symbol_unregister(const char *name)
+{
+ struct list_head *walker;
+ struct list_head *nxt;
+ struct cfs_symbol *sym = NULL;
+
+ down_write(&cfs_symbol_lock);
+ list_for_each_safe(walker, nxt, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ if (!strcmp(sym->name, name)) {
+ LASSERT(sym->ref == 0);
+ list_del (&sym->sym_list);
+ cfs_free(sym);
+ break;
+ }
+ }
+ up_write(&cfs_symbol_lock);
+}
+
+/*
+ * cfs_symbol_clean
+ * To clean all the symbols
+ *
+ * Arguments:
+ * N/A
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+cfs_symbol_clean()
+{
+ struct list_head *walker;
+ struct cfs_symbol *sym = NULL;
+
+ down_write(&cfs_symbol_lock);
+ list_for_each(walker, &cfs_symbol_list) {
+ sym = list_entry (walker, struct cfs_symbol, sym_list);
+ LASSERT(sym->ref == 0);
+ list_del (&sym->sym_list);
+ cfs_free(sym);
+ }
+ up_write(&cfs_symbol_lock);
+ return;
+}
+
+
+
+/*
+ * Timer routines
+ */
+
+
+/* Timer dpc procedure */
+
+static void
+cfs_timer_dpc_proc (
+ IN PKDPC Dpc,
+ IN PVOID DeferredContext,
+ IN PVOID SystemArgument1,
+ IN PVOID SystemArgument2)
+{
+ cfs_timer_t * timer;
+ KIRQL Irql;
+
+ timer = (cfs_timer_t *) DeferredContext;
+
+ /* clear the flag */
+ KeAcquireSpinLock(&(timer->Lock), &Irql);
+ cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
+ KeReleaseSpinLock(&(timer->Lock), Irql);
+
+ /* call the user specified timer procedure */
+ timer->proc((unsigned long)(timer->arg));
+}
+
+/*
+ * cfs_timer_init
+ * To initialize the cfs_timer_t
+ *
+ * Arguments:
+ * timer: the cfs_timer to be initialized
+ * func: the timer callback procedure
+ * arg: argument for the callback proc
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg)
+{
+ memset(timer, 0, sizeof(cfs_timer_t));
+
+ timer->proc = func;
+ timer->arg = arg;
+
+ KeInitializeSpinLock(&(timer->Lock));
+ KeInitializeTimer(&timer->Timer);
+ KeInitializeDpc (&timer->Dpc, cfs_timer_dpc_proc, timer);
+
+ cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_INITED);
+}
+
+/*
+ * cfs_timer_done
+ * To finialize the cfs_timer_t (unused)
+ *
+ * Arguments:
+ * timer: the cfs_timer to be cleaned up
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_timer_done(cfs_timer_t *timer)
+{
+ return;
+}
+
+/*
+ * cfs_timer_arm
+ * To schedule the timer while touching @deadline
+ *
+ * Arguments:
+ * timer: the cfs_timer to be freed
+ * dealine: timeout value to wake up the timer
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline)
+{
+ LARGE_INTEGER timeout;
+ KIRQL Irql;
+
+ KeAcquireSpinLock(&(timer->Lock), &Irql);
+ if (!cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)){
+
+ timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline;
+
+ if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) {
+ cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
+ }
+
+ timer->deadline = deadline;
+ }
+
+ KeReleaseSpinLock(&(timer->Lock), Irql);
+}
+
+/*
+ * cfs_timer_disarm
+ * To discard the timer to be scheduled
+ *
+ * Arguments:
+ * timer: the cfs_timer to be discarded
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_timer_disarm(cfs_timer_t *timer)
+{
+ KIRQL Irql;
+
+ KeAcquireSpinLock(&(timer->Lock), &Irql);
+ KeCancelTimer(&(timer->Timer));
+ cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
+ KeReleaseSpinLock(&(timer->Lock), Irql);
+}
+
+
+/*
+ * cfs_timer_is_armed
+ * To check the timer is scheduled or not
+ *
+ * Arguments:
+ * timer: the cfs_timer to be checked
+ *
+ * Return Value:
+ * 1: if it's armed.
+ * 0: if it's not.
+ *
+ * Notes:
+ * N/A
+ */
+
+int cfs_timer_is_armed(cfs_timer_t *timer)
+{
+ int rc = 0;
+ KIRQL Irql;
+
+ KeAcquireSpinLock(&(timer->Lock), &Irql);
+ if (cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)) {
+ rc = 1;
+ }
+ KeReleaseSpinLock(&(timer->Lock), Irql);
+
+ return rc;
+}
+
+/*
+ * cfs_timer_deadline
+ * To query the deadline of the timer
+ *
+ * Arguments:
+ * timer: the cfs_timer to be queried
+ *
+ * Return Value:
+ * the deadline value
+ *
+ * Notes:
+ * N/A
+ */
+
+cfs_time_t cfs_timer_deadline(cfs_timer_t * timer)
+{
+ return timer->deadline;
+}
+
+/*
+ * daemonize routine stub
+ */
+
+void cfs_daemonize(char *str)
+{
+ return;
+}
+
+/*
+ * routine related with sigals
+ */
+
+cfs_sigset_t cfs_get_blocked_sigs(cfs_task_t *t)
+{
+ return 0;
+}
+
+void cfs_block_allsigs(cfs_task_t *t)
+{
+ return;
+}
+
+void cfs_block_sigs(cfs_task_t *t, sigset_t bit)
+{
+ return;
+}
+
+/**
+ ** Initialize routines
+ **/
+
+int
+libcfs_arch_init(void)
+{
+ int rc;
+
+ spinlock_t lock;
+ /* Workground to check the system is MP build or UP build */
+ spin_lock_init(&lock);
+ spin_lock(&lock);
+ MPSystem = (int)lock.lock;
+ /* MP build system: it's a real spin, for UP build system, it
+ only raises the IRQL to DISPATCH_LEVEL */
+ spin_unlock(&lock);
+
+ /* create slab memory caches for page alloctors */
+ cfs_page_t_slab = cfs_mem_cache_create(
+ "CPGT", sizeof(cfs_page_t), 0, 0 );
+
+ cfs_page_p_slab = cfs_mem_cache_create(
+ "CPGP", CFS_PAGE_SIZE, 0, 0 );
+
+ if ( cfs_page_t_slab == NULL ||
+ cfs_page_p_slab == NULL ){
+ rc = -ENOMEM;
+ goto errorout;
+ }
+
+ rc = init_task_manager();
+
+ if (rc != 0) {
+ cfs_enter_debugger();
+ KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n"));
+ goto errorout;
+ }
+
+ /* initialize the proc file system */
+ rc = proc_init_fs();
+
+ if (rc != 0) {
+ cfs_enter_debugger();
+ KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n"));
+ cleanup_task_manager();
+ goto errorout;
+ }
+
+ /* initialize the tdi data */
+ rc = ksocknal_init_tdi_data();
+
+ if (rc != 0) {
+ cfs_enter_debugger();
+ KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n"));
+ proc_destroy_fs();
+ cleanup_task_manager();
+ goto errorout;
+ }
+
+errorout:
+
+ if (rc != 0) {
+ /* destroy the taskslot cache slab */
+ if (cfs_page_t_slab) {
+ cfs_mem_cache_destroy(cfs_page_t_slab);
+ }
+ if (cfs_page_p_slab) {
+ cfs_mem_cache_destroy(cfs_page_p_slab);
+ }
+ }
+
+ return rc;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+ /* finialize the tdi data */
+ ksocknal_fini_tdi_data();
+
+ /* detroy the whole proc fs tree and nodes */
+ proc_destroy_fs();
+
+ /* destroy the taskslot cache slab */
+ if (cfs_page_t_slab) {
+ cfs_mem_cache_destroy(cfs_page_t_slab);
+ }
+
+ if (cfs_page_p_slab) {
+ cfs_mem_cache_destroy(cfs_page_p_slab);
+ }
+
+ return;
+}
+
+EXPORT_SYMBOL(libcfs_arch_init);
+EXPORT_SYMBOL(libcfs_arch_cleanup);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+# define DEBUG_SUBSYSTEM S_PORTALS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+#ifdef __KERNEL__
+
+
+/*
+ * /proc emulator routines ...
+ */
+
+/* The root node of the proc fs emulation: /proc */
+cfs_proc_entry_t * proc_fs_root = NULL;
+
+
+/* The sys root: /proc/sys */
+cfs_proc_entry_t * proc_sys_root = NULL;
+
+
+/* The sys root: /proc/dev | to implement misc device */
+
+cfs_proc_entry_t * proc_dev_root = NULL;
+
+
+/* SLAB object for cfs_proc_entry_t allocation */
+
+cfs_mem_cache_t * proc_entry_cache = NULL;
+
+/* root node for sysctl table */
+
+cfs_sysctl_table_header_t root_table_header;
+
+/* The global lock to protect all the access */
+
+#if LIBCFS_PROCFS_SPINLOCK
+spinlock_t proc_fs_lock;
+
+#define INIT_PROCFS_LOCK() spin_lock_init(&proc_fs_lock)
+#define LOCK_PROCFS() spin_lock(&proc_fs_lock)
+#define UNLOCK_PROCFS() spin_unlock(&proc_fs_lock)
+
+#else
+
+mutex_t proc_fs_lock;
+
+#define INIT_PROCFS_LOCK() init_mutex(&proc_fs_lock)
+#define LOCK_PROCFS() mutex_down(&proc_fs_lock)
+#define UNLOCK_PROCFS() mutex_up(&proc_fs_lock)
+
+#endif
+
+static ssize_t
+proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos)
+{
+ char *page;
+ ssize_t retval=0;
+ int eof=0;
+ ssize_t n, count;
+ char *start;
+ cfs_proc_entry_t * dp;
+
+ dp = (cfs_proc_entry_t *) file->private_data;
+ if (!(page = (char*) cfs_alloc(PAGE_SIZE, 0)))
+ return -ENOMEM;
+
+ while ((nbytes > 0) && !eof) {
+
+ count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
+
+ start = NULL;
+ if (dp->read_proc) {
+ n = dp->read_proc( page, &start, (long)*ppos,
+ count, &eof, dp->data);
+ } else
+ break;
+
+ if (!start) {
+ /*
+ * For proc files that are less than 4k
+ */
+ start = page + *ppos;
+ n -= (ssize_t)(*ppos);
+ if (n <= 0)
+ break;
+ if (n > count)
+ n = count;
+ }
+ if (n == 0)
+ break; /* End of file */
+ if (n < 0) {
+ if (retval == 0)
+ retval = n;
+ break;
+ }
+
+ n -= copy_to_user((void *)buf, start, n);
+ if (n == 0) {
+ if (retval == 0)
+ retval = -EFAULT;
+ break;
+ }
+
+ *ppos += n;
+ nbytes -= n;
+ buf += n;
+ retval += n;
+ }
+ cfs_free(page);
+
+ return retval;
+}
+
+static ssize_t
+proc_file_write(struct file * file, const char * buffer,
+ size_t count, loff_t *ppos)
+{
+ cfs_proc_entry_t * dp;
+
+ dp = (cfs_proc_entry_t *) file->private_data;
+
+ if (!dp->write_proc)
+ return -EIO;
+
+ /* FIXME: does this routine need ppos? probably... */
+ return dp->write_proc(file, buffer, count, dp->data);
+}
+
+struct file_operations proc_file_operations = {
+ /*lseek:*/ NULL, //proc_file_lseek,
+ /*read:*/ proc_file_read,
+ /*write:*/ proc_file_write,
+ /*ioctl:*/ NULL,
+ /*open:*/ NULL,
+ /*release:*/ NULL
+};
+
+/* allocate proc entry block */
+
+cfs_proc_entry_t *
+proc_alloc_entry()
+{
+ cfs_proc_entry_t * entry = NULL;
+
+ entry = cfs_mem_cache_alloc(proc_entry_cache, 0);
+ if (!entry) {
+ return NULL;
+ }
+
+ memset(entry, 0, sizeof(cfs_proc_entry_t));
+
+ entry->magic = CFS_PROC_ENTRY_MAGIC;
+ RtlInitializeSplayLinks(&(entry->s_link));
+ entry->proc_fops = &proc_file_operations;
+
+ return entry;
+}
+
+/* free the proc entry block */
+
+void
+proc_free_entry(cfs_proc_entry_t * entry)
+
+{
+ ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC);
+
+ cfs_mem_cache_free(proc_entry_cache, entry);
+}
+
+/* dissect the path string for a given full proc path */
+
+void
+proc_dissect_name(
+ char *path,
+ char **first,
+ int *first_len,
+ char **remain
+ )
+{
+ int i = 0, j = 0, len = 0;
+
+ *first = *remain = NULL;
+ *first_len = 0;
+
+ len = strlen(path);
+
+ while (i < len && (path[i] == '/')) i++;
+
+ if (i < len) {
+
+ *first = path + i;
+ while (i < len && (path[i] != '/')) i++;
+ *first_len = (path + i - *first);
+
+ if (i + 1 < len) {
+ *remain = path + i + 1;
+ }
+ }
+}
+
+/* search the children entries of the parent entry */
+
+cfs_proc_entry_t *
+proc_search_splay (
+ cfs_proc_entry_t * parent,
+ char * name
+ )
+{
+ cfs_proc_entry_t * node;
+ PRTL_SPLAY_LINKS link;
+
+ ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
+ ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
+
+ link = parent->root;
+
+ while (link) {
+
+ ANSI_STRING ename,nname;
+ long result;
+
+ node = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link);
+
+ ASSERT(node->magic == CFS_PROC_ENTRY_MAGIC);
+
+ /* Compare the prefix in the tree with the full name */
+
+ RtlInitAnsiString(&ename, name);
+ RtlInitAnsiString(&nname, node->name);
+
+ result = RtlCompareString(&nname, &ename,TRUE);
+
+ if (result > 0) {
+
+ /* The prefix is greater than the full name
+ so we go down the left child */
+
+ link = RtlLeftChild(link);
+
+ } else if (result < 0) {
+
+ /* The prefix is less than the full name
+ so we go down the right child */
+ //
+
+ link = RtlRightChild(link);
+
+ } else {
+
+ /* We got the entry in the splay tree and
+ make it root node instead */
+
+ parent->root = RtlSplay(link);
+
+ return node;
+ }
+
+ /* we need continue searching down the tree ... */
+ }
+
+ /* There's no the exptected entry in the splay tree */
+
+ return NULL;
+}
+
+int
+proc_insert_splay (
+ cfs_proc_entry_t * parent,
+ cfs_proc_entry_t * child
+ )
+{
+ cfs_proc_entry_t * entry;
+
+ ASSERT(parent != NULL && child != NULL);
+ ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
+ ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
+ ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
+
+ if (!parent->root) {
+ parent->root = &(child->s_link);
+ } else {
+ entry = CONTAINING_RECORD(parent->root, cfs_proc_entry_t, s_link);
+ while (TRUE) {
+ long result;
+ ANSI_STRING ename, cname;
+
+ ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC);
+
+ RtlInitAnsiString(&ename, entry->name);
+ RtlInitAnsiString(&cname, child->name);
+
+ result = RtlCompareString(&ename, &cname,TRUE);
+
+ if (result == 0) {
+ cfs_enter_debugger();
+ if (entry == child) {
+ break;
+ }
+ return FALSE;
+ }
+
+ if (result > 0) {
+ if (RtlLeftChild(&entry->s_link) == NULL) {
+ RtlInsertAsLeftChild(&entry->s_link, &child->s_link);
+ break;
+ } else {
+ entry = CONTAINING_RECORD( RtlLeftChild(&entry->s_link),
+ cfs_proc_entry_t, s_link);
+ }
+ } else {
+ if (RtlRightChild(&entry->s_link) == NULL) {
+ RtlInsertAsRightChild(&entry->s_link, &child->s_link);
+ break;
+ } else {
+ entry = CONTAINING_RECORD( RtlRightChild(&entry->s_link),
+ cfs_proc_entry_t, s_link );
+ }
+ }
+ }
+ }
+
+ cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED);
+ parent->nlink++;
+
+ return TRUE;
+}
+
+
+/* remove a child entry from the splay tree */
+int
+proc_remove_splay (
+ cfs_proc_entry_t * parent,
+ cfs_proc_entry_t * child
+ )
+{
+ cfs_proc_entry_t * entry = NULL;
+
+ ASSERT(parent != NULL && child != NULL);
+ ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
+ ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
+ ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
+ ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED));
+
+ entry = proc_search_splay(parent, child->name);
+
+ if (entry) {
+ ASSERT(entry == child);
+ parent->root = RtlDelete(&(entry->s_link));
+ parent->nlink--;
+ } else {
+ cfs_enter_debugger();
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+
+/* search a node inside the proc fs tree */
+
+cfs_proc_entry_t *
+proc_search_entry(
+ char * name,
+ cfs_proc_entry_t * root
+ )
+{
+ cfs_proc_entry_t * entry;
+ cfs_proc_entry_t * parent;
+ char *first, *remain;
+ int flen;
+ char *ename = NULL;
+
+ parent = root;
+ entry = NULL;
+
+ ename = cfs_alloc(0x21, CFS_ALLOC_ZERO);
+
+ if (ename == NULL) {
+ goto errorout;
+ }
+
+again:
+
+ /* dissect the file name string */
+ proc_dissect_name(name, &first, &flen, &remain);
+
+ if (first) {
+
+ if (flen >= 0x20) {
+ cfs_enter_debugger();
+ entry = NULL;
+ goto errorout;
+ }
+
+ memset(ename, 0, 0x20);
+ memcpy(ename, first, flen);
+
+ entry = proc_search_splay(parent, ename);
+
+ if (!entry) {
+ goto errorout;
+ }
+
+ if (remain) {
+ name = remain;
+ parent = entry;
+
+ goto again;
+ }
+ }
+
+errorout:
+
+ if (ename) {
+ cfs_free(ename);
+ }
+
+ return entry;
+}
+
+/* insert the path nodes to the proc fs tree */
+
+cfs_proc_entry_t *
+proc_insert_entry(
+ char * name,
+ cfs_proc_entry_t * root
+ )
+{
+ cfs_proc_entry_t *entry;
+ cfs_proc_entry_t *parent;
+ char *first, *remain;
+ int flen;
+ char ename[0x20];
+
+ parent = root;
+ entry = NULL;
+
+again:
+
+ proc_dissect_name(name, &first, &flen, &remain);
+
+ if (first) {
+
+ if (flen >= 0x20) {
+ return NULL;
+ }
+
+ memset(ename, 0, 0x20);
+ memcpy(ename, first, flen);
+
+ entry = proc_search_splay(parent, ename);
+
+ if (!entry) {
+ entry = proc_alloc_entry();
+ memcpy(entry->name, ename, flen);
+
+ if (entry) {
+ if(!proc_insert_splay(parent, entry)) {
+ proc_free_entry(entry);
+ entry = NULL;
+ }
+ }
+ }
+
+ if (!entry) {
+ return NULL;
+ }
+
+ if (remain) {
+ entry->mode |= S_IFDIR | S_IRUGO | S_IXUGO;
+ cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY);
+ name = remain;
+ parent = entry;
+ goto again;
+ }
+ }
+
+ return entry;
+}
+
+/* remove the path nodes from the proc fs tree */
+
+void
+proc_remove_entry(
+ char * name,
+ cfs_proc_entry_t * root
+ )
+{
+ cfs_proc_entry_t *entry;
+ char *first, *remain;
+ int flen;
+ char ename[0x20];
+
+ entry = NULL;
+
+ proc_dissect_name(name, &first, &flen, &remain);
+
+ if (first) {
+
+ memset(ename, 0, 0x20);
+ memcpy(ename, first, flen);
+
+ entry = proc_search_splay(root, ename);
+
+ if (entry) {
+
+ if (remain) {
+ ASSERT(S_ISDIR(entry->mode));
+ proc_remove_entry(remain, entry);
+ }
+
+ if (!entry->nlink) {
+ proc_remove_splay(root, entry);
+ proc_free_entry(entry);
+ }
+ }
+ } else {
+ cfs_enter_debugger();
+ }
+}
+
+/* create proc entry and insert it into the proc fs */
+
+cfs_proc_entry_t *
+create_proc_entry (
+ char * name,
+ mode_t mode,
+ cfs_proc_entry_t * root
+ )
+{
+ cfs_proc_entry_t *parent = root;
+ cfs_proc_entry_t *entry = NULL;
+
+ if (S_ISDIR(mode)) {
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO | S_IXUGO;
+ } else {
+ if ((mode & S_IFMT) == 0)
+ mode |= S_IFREG;
+ if ((mode & S_IALLUGO) == 0)
+ mode |= S_IRUGO;
+ }
+
+ LOCK_PROCFS();
+
+ ASSERT(NULL != proc_fs_root);
+
+ if (!parent) {
+ parent = proc_fs_root;
+ }
+
+ entry = proc_search_entry(name, parent);
+
+ if (!entry) {
+ entry = proc_insert_entry(name, parent);
+ if (!entry) {
+ /* Failed to create/insert the splay node ... */
+ cfs_enter_debugger();
+ goto errorout;
+ }
+ /* Initializing entry ... */
+ entry->mode = mode;
+
+ if (S_ISDIR(mode)) {
+ cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY);
+ }
+ }
+
+errorout:
+
+ UNLOCK_PROCFS();
+
+ return entry;
+}
+
+
+/* search the specified entry form the proc fs */
+
+cfs_proc_entry_t *
+search_proc_entry(
+ char * name,
+ cfs_proc_entry_t * root
+ )
+{
+ cfs_proc_entry_t * entry;
+
+ LOCK_PROCFS();
+ if (root == NULL) {
+ root = proc_fs_root;
+ }
+ entry = proc_search_entry(name, root);
+ UNLOCK_PROCFS();
+
+ return entry;
+}
+
+/* remove the entry from the proc fs */
+
+void
+remove_proc_entry(
+ char * name,
+ cfs_proc_entry_t * parent
+ )
+{
+ LOCK_PROCFS();
+ if (parent == NULL) {
+ parent = proc_fs_root;
+ }
+ proc_remove_entry(name, parent);
+ UNLOCK_PROCFS();
+}
+
+
+void proc_destroy_splay(cfs_proc_entry_t * entry)
+{
+ cfs_proc_entry_t * node;
+
+ if (S_ISDIR(entry->mode)) {
+
+ while (entry->root) {
+ node = CONTAINING_RECORD(entry->root, cfs_proc_entry_t, s_link);
+ entry->root = RtlDelete(&(node->s_link));
+ proc_destroy_splay(node);
+ }
+ }
+
+ proc_free_entry(entry);
+}
+
+
+/* destory the whole proc fs tree */
+
+void proc_destroy_fs()
+{
+ LOCK_PROCFS();
+
+ if (proc_fs_root) {
+ proc_destroy_splay(proc_fs_root);
+ }
+
+ if (proc_entry_cache) {
+ cfs_mem_cache_destroy(proc_entry_cache);
+ }
+
+ UNLOCK_PROCFS();
+}
+
+/* initilaize / build the proc fs tree */
+
+int proc_init_fs()
+{
+ cfs_proc_entry_t * root = NULL;
+
+ memset(&(root_table_header), 0, sizeof(struct ctl_table_header));
+ INIT_LIST_HEAD(&(root_table_header.ctl_entry));
+
+ INIT_PROCFS_LOCK();
+ proc_entry_cache = cfs_mem_cache_create(
+ NULL,
+ sizeof(cfs_proc_entry_t),
+ 0,
+ 0
+ );
+
+ if (!proc_entry_cache) {
+ return (-ENOMEM);
+ }
+
+ root = proc_alloc_entry();
+
+ if (!root) {
+ proc_destroy_fs();
+ return (-ENOMEM);
+ }
+
+ root->magic = CFS_PROC_ENTRY_MAGIC;
+ root->flags = CFS_PROC_FLAG_DIRECTORY;
+ root->mode = S_IFDIR | S_IRUGO | S_IXUGO;
+ root->nlink = 3; // root should never be deleted.
+
+ root->name[0]='p';
+ root->name[1]='r';
+ root->name[2]='o';
+ root->name[3]='c';
+
+ proc_fs_root = root;
+
+ proc_sys_root = create_proc_entry("sys", S_IFDIR, root);
+
+ if (!proc_sys_root) {
+ proc_free_entry(root);
+ proc_fs_root = NULL;
+ proc_destroy_fs();
+ return (-ENOMEM);
+ }
+
+ proc_sys_root->nlink = 1;
+
+ proc_dev_root = create_proc_entry("dev", S_IFDIR, root);
+
+ if (!proc_dev_root) {
+ proc_free_entry(proc_sys_root);
+ proc_sys_root = NULL;
+ proc_free_entry(proc_fs_root);
+ proc_fs_root = NULL;
+ proc_destroy_fs();
+ return (-ENOMEM);
+ }
+
+ proc_dev_root->nlink = 1;
+
+ return 0;
+}
+
+
+static ssize_t do_rw_proc(int write, struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ int op;
+ cfs_proc_entry_t *de;
+ struct ctl_table *table;
+ size_t res;
+ ssize_t error;
+
+ de = (cfs_proc_entry_t *) file->proc_dentry;
+
+ if (!de || !de->data)
+ return -ENOTDIR;
+ table = (struct ctl_table *) de->data;
+ if (!table || !table->proc_handler)
+ return -ENOTDIR;
+ op = (write ? 002 : 004);
+
+// if (ctl_perm(table, op))
+// return -EPERM;
+
+ res = count;
+
+ /*
+ * FIXME: we need to pass on ppos to the handler.
+ */
+
+ error = (*table->proc_handler) (table, write, file, buf, &res);
+ if (error)
+ return error;
+ return res;
+}
+
+static ssize_t proc_readsys(struct file * file, char * buf,
+ size_t count, loff_t *ppos)
+{
+ return do_rw_proc(0, file, buf, count, ppos);
+}
+
+static ssize_t proc_writesys(struct file * file, const char * buf,
+ size_t count, loff_t *ppos)
+{
+ return do_rw_proc(1, file, (char *) buf, count, ppos);
+}
+
+
+struct file_operations proc_sys_file_operations = {
+ /*lseek:*/ NULL,
+ /*read:*/ proc_readsys,
+ /*write:*/ proc_writesys,
+ /*ioctl:*/ NULL,
+ /*open:*/ NULL,
+ /*release:*/ NULL
+};
+
+
+/* Scan the sysctl entries in table and add them all into /proc */
+void register_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t * root)
+{
+ cfs_proc_entry_t * de;
+ int len;
+ mode_t mode;
+
+ for (; table->ctl_name; table++) {
+ /* Can't do anything without a proc name. */
+ if (!table->procname)
+ continue;
+ /* Maybe we can't do anything with it... */
+ if (!table->proc_handler && !table->child) {
+ printk(KERN_WARNING "SYSCTL: Can't register %s\n",
+ table->procname);
+ continue;
+ }
+
+ len = strlen(table->procname);
+ mode = table->mode;
+
+ de = NULL;
+ if (table->proc_handler)
+ mode |= S_IFREG;
+ else {
+ de = search_proc_entry(table->procname, root);
+ if (de) {
+ break;
+ }
+ /* If the subdir exists already, de is non-NULL */
+ }
+
+ if (!de) {
+
+ de = create_proc_entry((char *)table->procname, mode, root);
+ if (!de)
+ continue;
+ de->data = (void *) table;
+ if (table->proc_handler) {
+ de->proc_fops = &proc_sys_file_operations;
+ }
+ }
+ table->de = de;
+ if (de->mode & S_IFDIR)
+ register_proc_table(table->child, de);
+ }
+}
+
+
+/*
+ * Unregister a /proc sysctl table and any subdirectories.
+ */
+void unregister_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t *root)
+{
+ cfs_proc_entry_t *de;
+ for (; table->ctl_name; table++) {
+ if (!(de = table->de))
+ continue;
+ if (de->mode & S_IFDIR) {
+ if (!table->child) {
+ printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
+ continue;
+ }
+ unregister_proc_table(table->child, de);
+
+ /* Don't unregister directories which still have entries.. */
+ if (de->nlink)
+ continue;
+ }
+
+ /* Don't unregister proc entries that are still being used.. */
+ if (de->nlink)
+ continue;
+
+ table->de = NULL;
+ remove_proc_entry((char *)table->procname, root);
+ }
+}
+
+/* The generic string strategy routine: */
+int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ int l, len;
+
+ if (!table->data || !table->maxlen)
+ return -ENOTDIR;
+
+ if (oldval && oldlenp) {
+ if(get_user(len, oldlenp))
+ return -EFAULT;
+ if (len) {
+ l = strlen(table->data);
+ if (len > l) len = l;
+ if (len >= table->maxlen)
+ len = table->maxlen;
+ if(copy_to_user(oldval, table->data, len))
+ return -EFAULT;
+ if(put_user(0, ((char *) oldval) + len))
+ return -EFAULT;
+ if(put_user(len, oldlenp))
+ return -EFAULT;
+ }
+ }
+ if (newval && newlen) {
+ len = newlen;
+ if (len > table->maxlen)
+ len = table->maxlen;
+ if(copy_from_user(table->data, newval, len))
+ return -EFAULT;
+ if (len == table->maxlen)
+ len--;
+ ((char *) table->data)[len] = 0;
+ }
+ return 0;
+}
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+ unsigned long result = 0, value;
+
+ if (!base) {
+ base = 10;
+ if (*cp == '0') {
+ base = 8;
+ cp++;
+ if ((*cp == 'x') && isxdigit(cp[1])) {
+ cp++;
+ base = 16;
+ }
+ }
+ }
+ while (isxdigit(*cp) &&
+ (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+ result = result*base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+ return result;
+}
+
+#define OP_SET 0
+#define OP_AND 1
+#define OP_OR 2
+#define OP_MAX 3
+#define OP_MIN 4
+
+
+static int do_proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
+ void *buffer, size_t *lenp, int conv, int op)
+{
+ int *i, vleft, first=1, neg, val;
+ size_t left, len;
+
+ #define TMPBUFLEN 20
+ char buf[TMPBUFLEN], *p;
+
+ if (!table->data || !table->maxlen || !*lenp)
+ {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (int *) table->data;
+ vleft = table->maxlen / sizeof(int);
+ left = *lenp;
+
+ for (; left && vleft--; i++, first=0) {
+ if (write) {
+ while (left) {
+ char c;
+ if(get_user(c,(char *) buffer))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ left--;
+ ((char *) buffer)++;
+ }
+ if (!left)
+ break;
+ neg = 0;
+ len = left;
+ if (len > TMPBUFLEN-1)
+ len = TMPBUFLEN-1;
+ if(copy_from_user(buf, buffer, len))
+ return -EFAULT;
+ buf[len] = 0;
+ p = buf;
+ if (*p == '-' && left > 1) {
+ neg = 1;
+ left--, p++;
+ }
+ if (*p < '0' || *p > '9')
+ break;
+ val = simple_strtoul(p, &p, 0) * conv;
+ len = p-buf;
+ if ((len < left) && *p && !isspace(*p))
+ break;
+ if (neg)
+ val = -val;
+ (char *)buffer += len;
+ left -= len;
+ switch(op) {
+ case OP_SET: *i = val; break;
+ case OP_AND: *i &= val; break;
+ case OP_OR: *i |= val; break;
+ case OP_MAX: if(*i < val)
+ *i = val;
+ break;
+ case OP_MIN: if(*i > val)
+ *i = val;
+ break;
+ }
+ } else {
+ p = buf;
+ if (!first)
+ *p++ = '\t';
+ sprintf(p, "%d", (*i) / conv);
+ len = strlen(buf);
+ if (len > left)
+ len = left;
+ if(copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ left -= len;
+ (char *)buffer += len;
+ }
+ }
+
+ if (!write && !first && left) {
+ if(put_user('\n', (char *) buffer))
+ return -EFAULT;
+ left--, ((char *)buffer)++;
+ }
+ if (write) {
+ p = (char *) buffer;
+ while (left) {
+ char c;
+ if(get_user(c, p++))
+ return -EFAULT;
+ if (!isspace(c))
+ break;
+ left--;
+ }
+ }
+ if (write && first)
+ return -EINVAL;
+ *lenp -= left;
+ memset(&(filp->f_pos) , 0, sizeof(loff_t));
+ filp->f_pos += (loff_t)(*lenp);
+ return 0;
+}
+
+/**
+ * proc_dointvec - read a vector of integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
+}
+
+
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
+int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ size_t len;
+ char *p, c;
+
+ if (!table->data || !table->maxlen || !*lenp ||
+ (filp->f_pos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ if (write) {
+ len = 0;
+ p = buffer;
+ while (len < *lenp) {
+ if(get_user(c, p++))
+ return -EFAULT;
+ if (c == 0 || c == '\n')
+ break;
+ len++;
+ }
+ if (len >= (size_t)table->maxlen)
+ len = (size_t)table->maxlen-1;
+ if(copy_from_user(table->data, buffer, len))
+ return -EFAULT;
+ ((char *) table->data)[len] = 0;
+ filp->f_pos += *lenp;
+ } else {
+ len = (size_t)strlen(table->data);
+ if (len > (size_t)table->maxlen)
+ len = (size_t)table->maxlen;
+ if (len > *lenp)
+ len = *lenp;
+ if (len)
+ if(copy_to_user(buffer, table->data, len))
+ return -EFAULT;
+ if (len < *lenp) {
+ if(put_user('\n', ((char *) buffer) + len))
+ return -EFAULT;
+ len++;
+ }
+ *lenp = len;
+ filp->f_pos += len;
+ }
+ return 0;
+}
+
+/* Perform the actual read/write of a sysctl table entry. */
+int do_sysctl_strategy (cfs_sysctl_table_t *table,
+ int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen, void **context)
+{
+ int op = 0, rc;
+ size_t len;
+
+ if (oldval)
+ op |= 004;
+ if (newval)
+ op |= 002;
+
+ if (table->strategy) {
+ rc = table->strategy(table, name, nlen, oldval, oldlenp,
+ newval, newlen, context);
+ if (rc < 0)
+ return rc;
+ if (rc > 0)
+ return 0;
+ }
+
+ /* If there is no strategy routine, or if the strategy returns
+ * zero, proceed with automatic r/w */
+ if (table->data && table->maxlen) {
+ if (oldval && oldlenp) {
+ get_user(len, oldlenp);
+ if (len) {
+ if (len > (size_t)table->maxlen)
+ len = (size_t)table->maxlen;
+ if(copy_to_user(oldval, table->data, len))
+ return -EFAULT;
+ if(put_user(len, oldlenp))
+ return -EFAULT;
+ }
+ }
+ if (newval && newlen) {
+ len = newlen;
+ if (len > (size_t)table->maxlen)
+ len = (size_t)table->maxlen;
+ if(copy_from_user(table->data, newval, len))
+ return -EFAULT;
+ }
+ }
+ return 0;
+}
+
+static int parse_table(int *name, int nlen,
+ void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen,
+ cfs_sysctl_table_t *table, void **context)
+{
+ int n;
+
+repeat:
+
+ if (!nlen)
+ return -ENOTDIR;
+ if (get_user(n, name))
+ return -EFAULT;
+ for ( ; table->ctl_name; table++) {
+ if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
+ int error;
+ if (table->child) {
+/*
+ if (ctl_perm(table, 001))
+ return -EPERM;
+*/
+ if (table->strategy) {
+ error = table->strategy(
+ table, name, nlen,
+ oldval, oldlenp,
+ newval, newlen, context);
+ if (error)
+ return error;
+ }
+ name++;
+ nlen--;
+ table = table->child;
+ goto repeat;
+ }
+ error = do_sysctl_strategy(table, name, nlen,
+ oldval, oldlenp,
+ newval, newlen, context);
+ return error;
+ }
+ }
+ return -ENOTDIR;
+}
+
+int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
+ void *newval, size_t newlen)
+{
+ struct list_head *tmp;
+
+ if (nlen <= 0 || nlen >= CTL_MAXNAME)
+ return -ENOTDIR;
+ if (oldval) {
+ int old_len;
+ if (!oldlenp || get_user(old_len, oldlenp))
+ return -EFAULT;
+ }
+ tmp = &root_table_header.ctl_entry;
+ do {
+ struct ctl_table_header *head =
+ list_entry(tmp, struct ctl_table_header, ctl_entry);
+ void *context = NULL;
+ int error = parse_table(name, nlen, oldval, oldlenp,
+ newval, newlen, head->ctl_table,
+ &context);
+ if (context)
+ cfs_free(context);
+ if (error != -ENOTDIR)
+ return error;
+ tmp = tmp->next;
+ } while (tmp != &root_table_header.ctl_entry);
+ return -ENOTDIR;
+}
+
+/**
+ * register_sysctl_table - register a sysctl heirarchy
+ * @table: the top-level table structure
+ * @insert_at_head: whether the entry should be inserted in front or at the end
+ *
+ * Register a sysctl table heirarchy. @table should be a filled in ctl_table
+ * array. An entry with a ctl_name of 0 terminates the table.
+ *
+ * The members of the &ctl_table structure are used as follows:
+ *
+ * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
+ * must be unique within that level of sysctl
+ *
+ * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
+ * enter a sysctl file
+ *
+ * data - a pointer to data for use by proc_handler
+ *
+ * maxlen - the maximum size in bytes of the data
+ *
+ * mode - the file permissions for the /proc/sys file, and for sysctl(2)
+ *
+ * child - a pointer to the child sysctl table if this entry is a directory, or
+ * %NULL.
+ *
+ * proc_handler - the text handler routine (described below)
+ *
+ * strategy - the strategy routine (described below)
+ *
+ * de - for internal use by the sysctl routines
+ *
+ * extra1, extra2 - extra pointers usable by the proc handler routines
+ *
+ * Leaf nodes in the sysctl tree will be represented by a single file
+ * under /proc; non-leaf nodes will be represented by directories.
+ *
+ * sysctl(2) can automatically manage read and write requests through
+ * the sysctl table. The data and maxlen fields of the ctl_table
+ * struct enable minimal validation of the values being written to be
+ * performed, and the mode field allows minimal authentication.
+ *
+ * More sophisticated management can be enabled by the provision of a
+ * strategy routine with the table entry. This will be called before
+ * any automatic read or write of the data is performed.
+ *
+ * The strategy routine may return
+ *
+ * < 0 - Error occurred (error is passed to user process)
+ *
+ * 0 - OK - proceed with automatic read or write.
+ *
+ * > 0 - OK - read or write has been done by the strategy routine, so
+ * return immediately.
+ *
+ * There must be a proc_handler routine for any terminal nodes
+ * mirrored under /proc/sys (non-terminals are handled by a built-in
+ * directory handler). Several default handlers are available to
+ * cover common cases -
+ *
+ * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
+ * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(),
+ * proc_doulongvec_minmax()
+ *
+ * It is the handler's job to read the input buffer from user memory
+ * and process it. The handler should return 0 on success.
+ *
+ * This routine returns %NULL on a failure to register, and a pointer
+ * to the table header on success.
+ */
+struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table,
+ int insert_at_head)
+{
+ struct ctl_table_header *tmp;
+ tmp = cfs_alloc(sizeof(struct ctl_table_header), 0);
+ if (!tmp)
+ return NULL;
+ tmp->ctl_table = table;
+
+ INIT_LIST_HEAD(&tmp->ctl_entry);
+ if (insert_at_head)
+ list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+ else
+ list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+#ifdef CONFIG_PROC_FS
+ register_proc_table(table, proc_sys_root);
+#endif
+ return tmp;
+}
+
+/**
+ * unregister_sysctl_table - unregister a sysctl table heirarchy
+ * @header: the header returned from register_sysctl_table
+ *
+ * Unregisters the sysctl table and all children. proc entries may not
+ * actually be removed until they are no longer used by anyone.
+ */
+void unregister_sysctl_table(struct ctl_table_header * header)
+{
+ list_del(&header->ctl_entry);
+#ifdef CONFIG_PROC_FS
+ unregister_proc_table(header->ctl_table, proc_sys_root);
+#endif
+ cfs_free(header);
+}
+
+
+int cfs_psdev_register(cfs_psdev_t * psdev)
+{
+ cfs_proc_entry_t * entry;
+
+ entry = create_proc_entry (
+ (char *)psdev->name,
+ S_IFREG,
+ proc_dev_root
+ );
+
+ if (!entry) {
+ return -ENOMEM;
+ }
+
+ entry->flags |= CFS_PROC_FLAG_MISCDEV;
+
+ entry->proc_fops = psdev->fops;
+ entry->data = (void *)psdev;
+
+ return 0;
+}
+
+int cfs_psdev_deregister(cfs_psdev_t * psdev)
+{
+ cfs_proc_entry_t * entry;
+
+ entry = search_proc_entry (
+ (char *)psdev->name,
+ proc_dev_root
+ );
+
+ if (entry) {
+
+ ASSERT(entry->data == (void *)psdev);
+ ASSERT(entry->flags & CFS_PROC_FLAG_MISCDEV);
+
+ remove_proc_entry(
+ (char *)psdev->name,
+ proc_dev_root
+ );
+ }
+
+ return 0;
+}
+
+
+
+extern char debug_file_path[1024];
+
+#define PSDEV_LIBCFS (0x100)
+enum {
+ PSDEV_DEBUG = 1, /* control debugging */
+ PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
+ PSDEV_PRINTK, /* force all errors to console */
+ PSDEV_CONSOLE, /* allow _any_ messages to console */
+ PSDEV_DEBUG_PATH, /* crashdump log location */
+ PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
+ PSDEV_LIBCFS_MEMUSED, /* bytes currently PORTAL_ALLOCated */
+};
+
+static struct ctl_table libcfs_table[] = {
+ {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
+ sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
+/*
+ {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
+ sizeof(portals_upcall), 0644, NULL, &proc_dostring,
+ &sysctl_string},
+*/
+ {PSDEV_LIBCFS_MEMUSED, "memused", (int *)&libcfs_kmemory.counter,
+ sizeof(int), 0644, NULL, &proc_dointvec},
+ {0}
+};
+
+static struct ctl_table top_table[2] = {
+ {PSDEV_LIBCFS, "libcfs", NULL, 0, 0555, libcfs_table},
+ {0}
+};
+
+
+#ifdef PORTALS_PROFILING
+/*
+ * profiling stuff. we do this statically for now 'cause its simple,
+ * but we could do some tricks with elf sections to have this array
+ * automatically built.
+ */
+#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, }
+
+struct prof_ent prof_ents[] = {
+ def_prof(our_recvmsg),
+ def_prof(our_sendmsg),
+ def_prof(socknal_recv),
+ def_prof(lib_parse),
+ def_prof(conn_list_walk),
+ def_prof(memcpy),
+ def_prof(lib_finalize),
+ def_prof(pingcli_time),
+ def_prof(gmnal_send),
+ def_prof(gmnal_recv),
+};
+
+EXPORT_SYMBOL(prof_ents);
+
+/*
+ * this function is as crazy as the proc filling api
+ * requires.
+ *
+ * buffer: page allocated for us to scribble in. the
+ * data returned to the user will be taken from here.
+ * *start: address of the pointer that will tell the
+ * caller where in buffer the data the user wants is.
+ * ppos: offset in the entire /proc file that the user
+ * currently wants.
+ * wanted: the amount of data the user wants.
+ *
+ * while going, 'curpos' is the offset in the entire
+ * file where we currently are. We only actually
+ * start filling buffer when we get to a place in
+ * the file that the user cares about.
+ *
+ * we take care to only sprintf when the user cares because
+ * we're holding a lock while we do this.
+ *
+ * we're smart and know that we generate fixed size lines.
+ * we only start writing to the buffer when the user cares.
+ * This is unpredictable because we don't snapshot the
+ * list between calls that are filling in a file from
+ * the list. The list could change mid read and the
+ * output will look very weird indeed. oh well.
+ */
+
+static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted,
+ int *eof, void *data)
+{
+ int len = 0, i;
+ int curpos;
+ char *header = "Interval Cycles_per (Starts Finishes Total)\n";
+ int header_len = strlen(header);
+ char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)";
+ int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1);
+
+ *start = buffer;
+
+ if (ppos < header_len) {
+ int diff = MIN(header_len, wanted);
+ memcpy(buffer, header + ppos, diff);
+ len += diff;
+ ppos += diff;
+ }
+
+ if (len >= wanted)
+ goto out;
+
+ curpos = header_len;
+
+ for ( i = 0; i < MAX_PROFS ; i++) {
+ int copied;
+ struct prof_ent *pe = &prof_ents[i];
+ long long cycles_per;
+ /*
+ * find the part of the array that the buffer wants
+ */
+ if (ppos >= (curpos + line_len)) {
+ curpos += line_len;
+ continue;
+ }
+ /* the clever caller split a line */
+ if (ppos > curpos) {
+ *start = buffer + (ppos - curpos);
+ }
+
+ if (pe->finishes == 0)
+ cycles_per = 0;
+ else
+ {
+ cycles_per = pe->total_cycles;
+ do_div (cycles_per, pe->finishes);
+ }
+
+ copied = sprintf(buffer + len, format, pe->str, cycles_per,
+ pe->starts, pe->finishes, pe->total_cycles);
+
+ len += copied;
+
+ /* pad to line len, -1 for \n */
+ if ((copied < line_len-1)) {
+ int diff = (line_len-1) - copied;
+ memset(buffer + len, ' ', diff);
+ len += diff;
+ copied += diff;
+ }
+
+ buffer[len++]= '\n';
+
+ /* bail if we have enough */
+ if (((buffer + len) - *start) >= wanted)
+ break;
+
+ curpos += line_len;
+ }
+
+ /* lameness */
+ if (i == MAX_PROFS)
+ *eof = 1;
+ out:
+
+ return MIN(((buffer + len) - *start), wanted);
+}
+
+/*
+ * all kids love /proc :/
+ */
+static unsigned char basedir[]="net/portals";
+#endif /* PORTALS_PROFILING */
+
+
+int insert_proc(void)
+{
+ cfs_proc_entry_t *ent;
+#if PORTALS_PROFILING
+ unsigned char dir[128];
+
+ if (ARRAY_SIZE(prof_ents) != MAX_PROFS) {
+ CERROR("profiling enum and array are out of sync.\n");
+ return -1;
+ }
+
+ /*
+ * This is pretty lame. assuming that failure just
+ * means that they already existed.
+ */
+ strcat(dir, basedir);
+ create_proc_entry(dir, S_IFDIR, 0);
+
+ strcat(dir, "/cycles");
+ ent = create_proc_entry(dir, 0, 0);
+ if (!ent) {
+ CERROR("couldn't register %s?\n", dir);
+ return -1;
+ }
+
+ ent->data = NULL;
+ ent->read_proc = prof_read_proc;
+#endif /* PORTALS_PROFILING */
+
+#ifdef CONFIG_SYSCTL
+ if (!portals_table_header)
+ portals_table_header = register_sysctl_table(top_table, 0);
+#endif
+
+ ent = create_proc_entry("sys/portals/dump_kernel", 0, NULL);
+ if (ent == NULL) {
+ CERROR(("couldn't register dump_kernel\n"));
+ return -1;
+ }
+ ent->write_proc = trace_dk;
+
+ ent = create_proc_entry("sys/portals/daemon_file", 0, NULL);
+ if (ent == NULL) {
+ CERROR(("couldn't register daemon_file\n"));
+ return -1;
+ }
+ ent->write_proc = trace_write_daemon_file;
+ ent->read_proc = trace_read_daemon_file;
+
+ ent = create_proc_entry("sys/portals/debug_mb", 0, NULL);
+ if (ent == NULL) {
+ CERROR(("couldn't register debug_mb\n"));
+ return -1;
+ }
+ ent->write_proc = trace_write_debug_mb;
+ ent->read_proc = trace_read_debug_mb;
+
+ return 0;
+}
+
+void remove_proc(void)
+{
+#if PORTALS_PROFILING
+ unsigned char dir[128];
+ int end;
+
+ dir[0]='\0';
+ strcat(dir, basedir);
+
+ end = strlen(dir);
+
+ strcat(dir, "/cycles");
+ remove_proc_entry(dir, 0);
+
+ dir[end] = '\0';
+ remove_proc_entry(dir, 0);
+#endif /* PORTALS_PROFILING */
+
+ remove_proc_entry("sys/portals/dump_kernel", NULL);
+ remove_proc_entry("sys/portals/daemon_file", NULL);
+ remove_proc_entry("sys/portals/debug_mb", NULL);
+
+#ifdef CONFIG_SYSCTL
+ if (portals_table_header)
+ unregister_sysctl_table(portals_table_header);
+ portals_table_header = NULL;
+#endif
+}
+
+
+/*
+ * proc process routines of kernel space
+ */
+
+cfs_file_t *
+lustre_open_file(char * filename)
+{
+ int rc = 0;
+ cfs_file_t * fh = NULL;
+ cfs_proc_entry_t * fp = NULL;
+
+ fp = search_proc_entry(filename, proc_fs_root);
+
+ if (!fp) {
+ rc = -ENOENT;
+ return NULL;
+ }
+
+ fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO);
+
+ if (!fh) {
+ rc = -ENOMEM;
+ return NULL;
+ }
+
+ fh->private_data = (void *)fp;
+ fh->f_op = fp->proc_fops;
+
+ if (fh->f_op->open) {
+ rc = (fh->f_op->open)(fh);
+ } else {
+ fp->nlink++;
+ }
+
+ if (0 != rc) {
+ cfs_free(fh);
+ return NULL;
+ }
+
+ return fh;
+}
+
+int
+lustre_close_file(cfs_file_t * fh)
+{
+ int rc = 0;
+ cfs_proc_entry_t * fp = NULL;
+
+ fp = (cfs_proc_entry_t *) fh->private_data;
+
+ if (fh->f_op->release) {
+ rc = (fh->f_op->release)(fh);
+ } else {
+ fp->nlink--;
+ }
+
+ cfs_free(fh);
+
+ return rc;
+}
+
+int
+lustre_do_ioctl( cfs_file_t * fh,
+ unsigned long cmd,
+ ulong_ptr arg )
+{
+ int rc = 0;
+
+ if (fh->f_op->ioctl) {
+ rc = (fh->f_op->ioctl)(fh, cmd, arg);
+ }
+
+ if (rc != 0) {
+ printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n",
+ cmd, arg, rc);
+ }
+
+ return rc;
+}
+
+int
+lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl)
+{
+ int rc = 0;
+ ulong_ptr data;
+
+ data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL);
+
+ /* obd ioctl code */
+ if (_IOC_TYPE(devctl->cmd) == 'f') {
+#if 0
+ struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data;
+
+ if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE &&
+ devctl->cmd != (ULONG)OBD_IOC_BRW_READ ) {
+
+ unsigned long off = obd->ioc_len;
+
+ if (obd->ioc_pbuf1) {
+ obd->ioc_pbuf1 = (char *)(data + off);
+ off += size_round(obd->ioc_plen1);
+ }
+
+ if (obd->ioc_pbuf2) {
+ obd->ioc_pbuf2 = (char *)(data + off);
+ }
+ }
+ #endif
+ }
+
+ rc = lustre_do_ioctl(fh, devctl->cmd, data);
+
+ return rc;
+}
+
+
+size_t
+lustre_read_file(
+ cfs_file_t * fh,
+ loff_t off,
+ size_t size,
+ char * buf
+ )
+{
+ size_t rc = 0;
+
+ if (fh->f_op->read) {
+ rc = (fh->f_op->read) (fh, buf, size, &off);
+ }
+
+ return rc;
+}
+
+
+size_t
+lustre_write_file(
+ cfs_file_t * fh,
+ loff_t off,
+ size_t size,
+ char * buf
+ )
+{
+ size_t rc = 0;
+
+ if (fh->f_op->write) {
+ rc = (fh->f_op->write)(fh, buf, size, &off);
+ }
+
+ return rc;
+}
+
+#else /* !__KERNEL__ */
+
+#include <lnet/api-support.h>
+#include <liblustre.h>
+#include <lustre_lib.h>
+
+/*
+ * proc process routines of user space
+ */
+
+HANDLE cfs_proc_open (char * filename, int oflag)
+{
+ NTSTATUS status;
+ IO_STATUS_BLOCK iosb;
+ int rc;
+
+ HANDLE FileHandle = INVALID_HANDLE_VALUE;
+ OBJECT_ATTRIBUTES ObjectAttributes;
+ ACCESS_MASK DesiredAccess;
+ ULONG CreateDisposition;
+ ULONG ShareAccess;
+ ULONG CreateOptions;
+ UNICODE_STRING UnicodeName;
+ USHORT NameLength;
+
+ PFILE_FULL_EA_INFORMATION Ea = NULL;
+ ULONG EaLength;
+ UCHAR EaBuffer[EA_MAX_LENGTH];
+
+ /* Check the filename: should start with "/proc" or "/dev" */
+ NameLength = (USHORT)strlen(filename);
+ if (NameLength > 0x05) {
+ if (_strnicmp(filename, "/proc/", 6) == 0) {
+ filename += 6;
+ NameLength -=6;
+ if (NameLength <= 0) {
+ rc = -EINVAL;
+ goto errorout;
+ }
+ } else if (_strnicmp(filename, "/dev/", 5) == 0) {
+ } else {
+ rc = -EINVAL;
+ goto errorout;
+ }
+ } else {
+ rc = -EINVAL;
+ goto errorout;
+ }
+
+ /* Analyze the flags settings */
+
+ if (cfs_is_flag_set(oflag, O_WRONLY)) {
+ DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
+ ShareAccess = 0;
+ } else if (cfs_is_flag_set(oflag, O_RDWR)) {
+ DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
+ ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
+ } else {
+ DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
+ ShareAccess = FILE_SHARE_READ;
+ }
+
+ if (cfs_is_flag_set(oflag, O_CREAT)) {
+ if (cfs_is_flag_set(oflag, O_EXCL)) {
+ CreateDisposition = FILE_CREATE;
+ rc = -EINVAL;
+ goto errorout;
+ } else {
+ CreateDisposition = FILE_OPEN_IF;
+ }
+ } else {
+ CreateDisposition = FILE_OPEN;
+ }
+
+ if (cfs_is_flag_set(oflag, O_TRUNC)) {
+ if (cfs_is_flag_set(oflag, O_EXCL)) {
+ CreateDisposition = FILE_OVERWRITE;
+ } else {
+ CreateDisposition = FILE_OVERWRITE_IF;
+ }
+ }
+
+ CreateOptions = 0;
+
+ if (cfs_is_flag_set(oflag, O_DIRECTORY)) {
+ cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE);
+ }
+
+ if (cfs_is_flag_set(oflag, O_SYNC)) {
+ cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
+ }
+
+ if (cfs_is_flag_set(oflag, O_DIRECT)) {
+ cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
+ }
+
+ /* Initialize the unicode path name for the specified file */
+ RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK);
+
+ /* Setup the object attributes structure for the file. */
+ InitializeObjectAttributes(
+ &ObjectAttributes,
+ &UnicodeName,
+ OBJ_CASE_INSENSITIVE,
+ NULL,
+ NULL );
+
+ /* building EA for the proc entry ... */
+ Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+ Ea->NextEntryOffset = 0;
+ Ea->Flags = 0;
+ Ea->EaNameLength = (UCHAR)NameLength;
+ Ea->EaValueLength = 0;
+ RtlCopyMemory(
+ &(Ea->EaName),
+ filename,
+ NameLength + 1
+ );
+ EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
+ Ea->EaNameLength + 1;
+
+ /* Now to open or create the file now */
+ status = ZwCreateFile(
+ &FileHandle,
+ DesiredAccess,
+ &ObjectAttributes,
+ &iosb,
+ 0,
+ FILE_ATTRIBUTE_NORMAL,
+ ShareAccess,
+ CreateDisposition,
+ CreateOptions,
+ Ea,
+ EaLength );
+
+ /* Check the returned status of Iosb ... */
+
+ if (!NT_SUCCESS(status)) {
+ rc = cfs_error_code(status);
+ goto errorout;
+ }
+
+errorout:
+
+ return FileHandle;
+}
+
+int cfs_proc_close(HANDLE handle)
+{
+ if (handle) {
+ NtClose((HANDLE)handle);
+ }
+
+ return 0;
+}
+
+int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count)
+{
+ NTSTATUS status;
+ IO_STATUS_BLOCK iosb;
+ LARGE_INTEGER offset;
+
+
+ offset.QuadPart = 0;
+
+ /* read file data */
+ status = NtReadFile(
+ (HANDLE)handle,
+ 0,
+ NULL,
+ NULL,
+ &iosb,
+ buffer,
+ count,
+ &offset,
+ NULL);
+
+ /* check the return status */
+ if (!NT_SUCCESS(status)) {
+ printf("NtReadFile request failed 0x%0x\n", status);
+ goto errorout;
+ }
+
+errorout:
+
+ if (NT_SUCCESS(status)) {
+ return iosb.Information;
+ }
+
+ return cfs_error_code(status);
+}
+
+
+int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count)
+{
+ NTSTATUS status;
+ IO_STATUS_BLOCK iosb;
+ LARGE_INTEGER offset;
+
+ offset.QuadPart = -1;
+
+ /* write buffer to the opened file */
+ status = NtWriteFile(
+ (HANDLE)handle,
+ 0,
+ NULL,
+ NULL,
+ &iosb,
+ buffer,
+ count,
+ &offset,
+ NULL);
+
+ /* check the return status */
+ if (!NT_SUCCESS(status)) {
+ printf("NtWriteFile request failed 0x%0x\n", status);
+ goto errorout;
+ }
+
+errorout:
+
+ if (NT_SUCCESS(status)) {
+ return iosb.Information;
+ }
+
+ return cfs_error_code(status);
+}
+
+int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer)
+{
+ PUCHAR procdat = NULL;
+ CFS_PROC_IOCTL procctl;
+ ULONG length = 0;
+ ULONG extra = 0;
+
+ NTSTATUS status;
+ IO_STATUS_BLOCK iosb;
+
+ procctl.cmd = cmd;
+
+ if(_IOC_TYPE(cmd) == IOC_PORTAL_TYPE) {
+ struct portal_ioctl_data * portal;
+ portal = (struct portal_ioctl_data *) buffer;
+ length = portal->ioc_len;
+ } else if (_IOC_TYPE(cmd) == 'f') {
+ struct obd_ioctl_data * obd;
+ obd = (struct obd_ioctl_data *) buffer;
+ length = obd->ioc_len;
+ extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2);
+ } else if(_IOC_TYPE(cmd) == 'u') {
+ length = 4;
+ extra = 0;
+ } else {
+ printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n");
+ cfs_enter_debugger();
+ status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+
+ procctl.len = length + extra;
+ procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL));
+
+ if (NULL == procdat) {
+ printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n");
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ cfs_enter_debugger();
+ goto errorout;
+ }
+ memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL));
+ memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL));
+ memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length);
+ length += sizeof(CFS_PROC_IOCTL);
+
+ if (_IOC_TYPE(cmd) == 'f') {
+
+ char *ptr;
+ struct obd_ioctl_data * data;
+ struct obd_ioctl_data * obd;
+
+ data = (struct obd_ioctl_data *) buffer;
+ obd = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL));
+ ptr = obd->ioc_bulk;
+
+ if (data->ioc_inlbuf1) {
+ obd->ioc_inlbuf1 = ptr;
+ LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+ }
+
+ if (data->ioc_inlbuf2) {
+ obd->ioc_inlbuf2 = ptr;
+ LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+ }
+ if (data->ioc_inlbuf3) {
+ obd->ioc_inlbuf3 = ptr;
+ LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+ }
+ if (data->ioc_inlbuf4) {
+ obd->ioc_inlbuf4 = ptr;
+ LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+ }
+
+ if ( cmd != (ULONG)OBD_IOC_BRW_WRITE &&
+ cmd != (ULONG)OBD_IOC_BRW_READ ) {
+
+ if (data->ioc_pbuf1 && data->ioc_plen1) {
+ obd->ioc_pbuf1 = &procdat[length];
+ memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1);
+ length += size_round(data->ioc_plen1);
+ }
+
+ if (data->ioc_pbuf2 && data->ioc_plen2) {
+ obd->ioc_pbuf2 = &procdat[length];
+ memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2);
+ length += size_round(data->ioc_plen2);
+ }
+ }
+
+ if (obd_ioctl_is_invalid(obd)) {
+ cfs_enter_debugger();
+ }
+ }
+
+ status = NtDeviceIoControlFile(
+ (HANDLE)handle,
+ NULL, NULL, NULL, &iosb,
+ IOCTL_LIBCFS_ENTRY,
+ procdat, length,
+ procdat, length );
+
+
+ if (NT_SUCCESS(status)) {
+ memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len);
+ }
+
+errorout:
+
+ if (procdat) {
+ free(procdat);
+ }
+
+ return cfs_error_code(status);
+}
+
+#endif /* __KERNEL__ */
\ No newline at end of file
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LIBCFS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+
+/*
+ * Wait queue routines
+ */
+
+/*
+ * cfs_waitq_init
+ * To initialize the wait queue
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_init(cfs_waitq_t *waitq)
+{
+ waitq->magic = CFS_WAITQ_MAGIC;
+ waitq->flags = 0;
+ INIT_LIST_HEAD(&(waitq->waiters));
+ spin_lock_init(&(waitq->guard));
+}
+
+/*
+ * cfs_waitlink_init
+ * To initialize the wake link node
+ *
+ * Arguments:
+ * link: pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitlink_init(cfs_waitlink_t *link)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ /* should bugchk here */
+ cfs_enter_debugger();
+ return;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ cfs_assert(slot->Magic == TASKSLT_MAGIC);
+
+ memset(link, 0, sizeof(cfs_waitlink_t));
+
+ link->magic = CFS_WAITLINK_MAGIC;
+ link->flags = 0;
+
+ link->event = &(slot->Event);
+ link->hits = &(slot->hits);
+
+ atomic_inc(&slot->count);
+
+ INIT_LIST_HEAD(&(link->waitq[0].link));
+ INIT_LIST_HEAD(&(link->waitq[1].link));
+
+ link->waitq[0].waitl = link->waitq[1].waitl = link;
+}
+
+
+/*
+ * cfs_waitlink_fini
+ * To finilize the wake link node
+ *
+ * Arguments:
+ * link: pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitlink_fini(cfs_waitlink_t *link)
+{
+ cfs_task_t * task = cfs_current();
+ PTASK_SLOT slot = NULL;
+
+ if (!task) {
+ /* should bugchk here */
+ cfs_enter_debugger();
+ return;
+ }
+
+ slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+ cfs_assert(slot->Magic == TASKSLT_MAGIC);
+ cfs_assert(link->magic == CFS_WAITLINK_MAGIC);
+ cfs_assert(link->waitq[0].waitq == NULL);
+ cfs_assert(link->waitq[1].waitq == NULL);
+
+ atomic_dec(&slot->count);
+}
+
+
+/*
+ * cfs_waitq_add_internal
+ * To queue the wait link node to the wait queue
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ * link: pointer to the cfs_waitlink_t structure
+ * int: queue no (Normal or Forward waitq)
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_add_internal(cfs_waitq_t *waitq,
+ cfs_waitlink_t *link,
+ __u32 waitqid )
+{
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+ LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+ LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+ LASSERT(waitqid < CFS_WAITQ_CHANNELS);
+
+ spin_lock(&(waitq->guard));
+ LASSERT(link->waitq[waitqid].waitq == NULL);
+ link->waitq[waitqid].waitq = waitq;
+ if (link->flags & CFS_WAITQ_EXCLUSIVE) {
+ list_add_tail(&link->waitq[waitqid].link, &waitq->waiters);
+ } else {
+ list_add(&link->waitq[waitqid].link, &waitq->waiters);
+ }
+ spin_unlock(&(waitq->guard));
+}
+/*
+ * cfs_waitq_add
+ * To queue the wait link node to the wait queue
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ * link: pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_add(cfs_waitq_t *waitq,
+ cfs_waitlink_t *link)
+{
+ cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_NORMAL);
+}
+
+/*
+ * cfs_waitq_add_exclusive
+ * To set the wait link node to exclusive mode
+ * and queue it to the wait queue
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ * link: pointer to the cfs_wait_link structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_add_exclusive( cfs_waitq_t *waitq,
+ cfs_waitlink_t *link)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+ LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+ LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+
+ link->flags |= CFS_WAITQ_EXCLUSIVE;
+ cfs_waitq_add(waitq, link);
+}
+
+/*
+ * cfs_waitq_forward
+ * To be determinated.
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ * link: pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_forward( cfs_waitlink_t *link,
+ cfs_waitq_t *waitq)
+{
+ cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_FORWARD);
+}
+
+/*
+ * cfs_waitq_del
+ * To remove the wait link node from the waitq
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_ waitq_t structure
+ * link: pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_del( cfs_waitq_t *waitq,
+ cfs_waitlink_t *link)
+{
+ int i = 0;
+
+ LASSERT(waitq != NULL);
+ LASSERT(link != NULL);
+
+ LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+ LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+
+ spin_lock(&(waitq->guard));
+
+ for (i=0; i < CFS_WAITQ_CHANNELS; i++) {
+ if (link->waitq[i].waitq == waitq)
+ break;
+ }
+
+ if (i < CFS_WAITQ_CHANNELS) {
+ link->waitq[i].waitq = NULL;
+ list_del_init(&link->waitq[i].link);
+ } else {
+ cfs_enter_debugger();
+ }
+
+ spin_unlock(&(waitq->guard));
+}
+
+/*
+ * cfs_waitq_active
+ * Is the waitq active (not empty) ?
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_ waitq_t structure
+ *
+ * Return Value:
+ * Zero: the waitq is empty
+ * Non-Zero: the waitq is active
+ *
+ * Notes:
+ * We always returns TRUE here, the same to Darwin.
+ */
+
+int cfs_waitq_active(cfs_waitq_t *waitq)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+
+ return (1);
+}
+
+/*
+ * cfs_waitq_signal_nr
+ * To wake up all the non-exclusive tasks plus nr exclusive
+ * ones in the waitq
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ * nr: number of exclusive tasks to be woken up
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+
+void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr)
+{
+ int result;
+ cfs_waitlink_channel_t * scan;
+
+ LASSERT(waitq != NULL);
+ LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+
+ spin_lock(&waitq->guard);
+
+ list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) {
+
+ cfs_waitlink_t *waitl = scan->waitl;
+
+ result = cfs_wake_event(waitl->event);
+ LASSERT( result == FALSE || result == TRUE );
+
+ if (result) {
+ atomic_inc(waitl->hits);
+ }
+
+ if ((waitl->flags & CFS_WAITQ_EXCLUSIVE) && --nr == 0)
+ break;
+ }
+
+ spin_unlock(&waitq->guard);
+ return;
+}
+
+/*
+ * cfs_waitq_signal
+ * To wake up all the non-exclusive tasks and 1 exclusive
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_signal(cfs_waitq_t *waitq)
+{
+ cfs_waitq_signal_nr(waitq, 1);
+}
+
+
+/*
+ * cfs_waitq_broadcast
+ * To wake up all the tasks in the waitq
+ *
+ * Arguments:
+ * waitq: pointer to the cfs_waitq_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_broadcast(cfs_waitq_t *waitq)
+{
+ LASSERT(waitq != NULL);
+ LASSERT(waitq->magic ==CFS_WAITQ_MAGIC);
+
+ cfs_waitq_signal_nr(waitq, 0);
+}
+
+/*
+ * cfs_waitq_wait
+ * To wait on the link node until it is signaled.
+ *
+ * Arguments:
+ * link: pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state)
+{
+ LASSERT(link != NULL);
+ LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+
+ if (atomic_read(link->hits) > 0) {
+ atomic_dec(link->hits);
+ LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
+ } else {
+ cfs_wait_event(link->event, 0);
+ }
+}
+
+/*
+ * cfs_waitq_timedwait
+ * To wait the link node to be signaled with a timeout limit
+ *
+ * Arguments:
+ * link: pointer to the cfs_waitlink_t structure
+ * timeout: the timeout limitation
+ *
+ * Return Value:
+ * Woken up: return the difference of the current time and
+ * the timeout
+ * Timeout: return 0
+ *
+ * Notes:
+ * What if it happens to be woken up at the just timeout time !?
+ */
+
+cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link,
+ cfs_task_state_t state,
+ cfs_duration_t timeout)
+{
+
+ if (atomic_read(link->hits) > 0) {
+ atomic_dec(link->hits);
+ LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
+ return TRUE;
+ }
+
+ return (cfs_duration_t)cfs_wait_event(link->event, timeout);
+}
+
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LIBCFS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include <lnet/lnet.h>
+
+ks_data_t ksocknal_data;
+
+ULONG
+ksocknal_tdi_send_flags(ULONG SockFlags)
+{
+ ULONG TdiFlags = 0;
+
+ if (cfs_is_flag_set(SockFlags, MSG_OOB)) {
+ cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED);
+ }
+
+ if (cfs_is_flag_set(SockFlags, MSG_MORE)) {
+ cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL);
+ }
+
+ if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) {
+ cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING);
+ }
+
+ return TdiFlags;
+}
+
+NTSTATUS
+KsIrpCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ )
+{
+ if (NULL != Context) {
+ KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE);
+ }
+
+ return STATUS_MORE_PROCESSING_REQUIRED;
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+ UNREFERENCED_PARAMETER(Irp);
+}
+
+
+/*
+ * KsBuildTdiIrp
+ * Allocate a new IRP and initialize it to be issued to tdi
+ *
+ * Arguments:
+ * DeviceObject: device object created by the underlying
+ * TDI transport driver
+ *
+ * Return Value:
+ * PRIP: the allocated Irp in success or NULL in failure.
+ *
+ * NOTES:
+ * N/A
+ */
+
+PIRP
+KsBuildTdiIrp(
+ IN PDEVICE_OBJECT DeviceObject
+ )
+{
+ PIRP Irp;
+ PIO_STACK_LOCATION IrpSp;
+
+ //
+ // Allocating the IRP ...
+ //
+
+ Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
+
+ if (NULL != Irp) {
+
+ //
+ // Getting the Next Stack Location ...
+ //
+
+ IrpSp = IoGetNextIrpStackLocation(Irp);
+
+ //
+ // Initializing Irp ...
+ //
+
+ IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL;
+ IrpSp->Parameters.DeviceIoControl.IoControlCode = 0;
+ }
+
+ return Irp;
+}
+
+/*
+ * KsSubmitTdiIrp
+ * Issue the Irp to the underlying tdi driver
+ *
+ * Arguments:
+ * DeviceObject: the device object created by TDI driver
+ * Irp: the I/O request packet to be processed
+ * bSynchronous: synchronous or not. If true, we need wait
+ * until the process is finished.
+ * Information: returned info
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsSubmitTdiIrp(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN BOOLEAN bSynchronous,
+ OUT PULONG Information
+ )
+{
+ NTSTATUS Status;
+ KEVENT Event;
+
+ if (bSynchronous) {
+
+ KeInitializeEvent(
+ &Event,
+ SynchronizationEvent,
+ FALSE
+ );
+
+
+ IoSetCompletionRoutine(
+ Irp,
+ KsIrpCompletionRoutine,
+ &Event,
+ TRUE,
+ TRUE,
+ TRUE
+ );
+ }
+
+ Status = IoCallDriver(DeviceObject, Irp);
+
+ if (bSynchronous) {
+
+ if (STATUS_PENDING == Status) {
+
+ Status = KeWaitForSingleObject(
+ &Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+ }
+
+ Status = Irp->IoStatus.Status;
+
+ if (Information) {
+ *Information = (ULONG)(Irp->IoStatus.Information);
+ }
+
+ Irp->MdlAddress = NULL;
+ IoFreeIrp(Irp);
+ }
+
+ if (!NT_SUCCESS(Status)) {
+
+ KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n",
+ Status, KsNtStatusToString(Status)));
+ }
+
+ return (Status);
+}
+
+
+
+/*
+ * KsOpenControl
+ * Open the Control Channel Object ...
+ *
+ * Arguments:
+ * DeviceName: the device name to be opened
+ * Handle: opened handle in success case
+ * FileObject: the fileobject of the device
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsOpenControl(
+ IN PUNICODE_STRING DeviceName,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ OBJECT_ATTRIBUTES ObjectAttributes;
+ IO_STATUS_BLOCK IoStatus;
+
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ //
+ // Initializing ...
+ //
+
+ InitializeObjectAttributes(
+ &ObjectAttributes,
+ DeviceName,
+ OBJ_CASE_INSENSITIVE |
+ OBJ_KERNEL_HANDLE,
+ NULL,
+ NULL
+ );
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ //
+ // Creating the Transport Address Object ...
+ //
+
+ Status = ZwCreateFile(
+ Handle,
+ FILE_READ_DATA | FILE_WRITE_DATA,
+ &ObjectAttributes,
+ &IoStatus,
+ 0,
+ FILE_ATTRIBUTE_NORMAL,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
+ FILE_OPEN,
+ 0,
+ NULL,
+ 0
+ );
+
+
+ if (NT_SUCCESS(Status)) {
+
+ //
+ // Now Obtaining the FileObject of the Transport Address ...
+ //
+
+ Status = ObReferenceObjectByHandle(
+ *Handle,
+ FILE_ANY_ACCESS,
+ NULL,
+ KernelMode,
+ FileObject,
+ NULL
+ );
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ ZwClose(*Handle);
+ }
+
+ } else {
+
+ cfs_enter_debugger();
+ }
+
+ return (Status);
+}
+
+
+/*
+ * KsCloseControl
+ * Release the Control Channel Handle and FileObject
+ *
+ * Arguments:
+ * Handle: the channel handle to be released
+ * FileObject: the fileobject to be released
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsCloseControl(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ if (FileObject) {
+
+ ObDereferenceObject(FileObject);
+ }
+
+ if (Handle) {
+
+ Status = ZwClose(Handle);
+ }
+
+ ASSERT(NT_SUCCESS(Status));
+
+ return (Status);
+}
+
+
+/*
+ * KsOpenAddress
+ * Open the tdi address object
+ *
+ * Arguments:
+ * DeviceName: device name of the address object
+ * pAddress: tdi address of the address object
+ * AddressLength: length in bytes of the tdi address
+ * Handle: the newly opened handle
+ * FileObject: the newly opened fileobject
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsOpenAddress(
+ IN PUNICODE_STRING DeviceName,
+ IN PTRANSPORT_ADDRESS pAddress,
+ IN ULONG AddressLength,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ PFILE_FULL_EA_INFORMATION Ea = NULL;
+ ULONG EaLength;
+ UCHAR EaBuffer[EA_MAX_LENGTH];
+
+ OBJECT_ATTRIBUTES ObjectAttributes;
+ IO_STATUS_BLOCK IoStatus;
+
+ //
+ // Building EA for the Address Object to be Opened ...
+ //
+
+ Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+ Ea->NextEntryOffset = 0;
+ Ea->Flags = 0;
+ Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;
+ Ea->EaValueLength = (USHORT)AddressLength;
+ RtlCopyMemory(
+ &(Ea->EaName),
+ TdiTransportAddress,
+ Ea->EaNameLength + 1
+ );
+ RtlMoveMemory(
+ &(Ea->EaName[Ea->EaNameLength + 1]),
+ pAddress,
+ AddressLength
+ );
+ EaLength = sizeof(FILE_FULL_EA_INFORMATION) +
+ Ea->EaNameLength + AddressLength;
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+
+ //
+ // Initializing ...
+ //
+
+ InitializeObjectAttributes(
+ &ObjectAttributes,
+ DeviceName,
+ OBJ_CASE_INSENSITIVE |
+ OBJ_KERNEL_HANDLE,
+ NULL,
+ NULL
+ );
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ //
+ // Creating the Transport Address Object ...
+ //
+
+ Status = ZwCreateFile(
+ Handle,
+ FILE_READ_DATA | FILE_WRITE_DATA,
+ &ObjectAttributes,
+ &IoStatus,
+ 0,
+ FILE_ATTRIBUTE_NORMAL,
+ 0, /* DON'T REUSE: FILE_SHARE_READ | FILE_SHARE_WRITE, */
+ FILE_OPEN,
+ 0,
+ Ea,
+ EaLength
+ );
+
+
+ if (NT_SUCCESS(Status)) {
+
+ //
+ // Now Obtaining the FileObject of the Transport Address ...
+ //
+
+ Status = ObReferenceObjectByHandle(
+ *Handle,
+ FILE_ANY_ACCESS,
+ NULL,
+ KernelMode,
+ FileObject,
+ NULL
+ );
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ ZwClose(*Handle);
+ }
+
+ } else {
+
+ cfs_enter_debugger();
+ }
+
+ return (Status);
+}
+
+/*
+ * KsCloseAddress
+ * Release the Hanlde and FileObject of an opened tdi
+ * address object
+ *
+ * Arguments:
+ * Handle: the handle to be released
+ * FileObject: the fileobject to be released
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsCloseAddress(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+)
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ if (FileObject) {
+
+ ObDereferenceObject(FileObject);
+ }
+
+ if (Handle) {
+
+ Status = ZwClose(Handle);
+ }
+
+ ASSERT(NT_SUCCESS(Status));
+
+ return (Status);
+}
+
+
+/*
+ * KsOpenConnection
+ * Open a tdi connection object
+ *
+ * Arguments:
+ * DeviceName: device name of the connection object
+ * ConnectionContext: the connection context
+ * Handle: the newly opened handle
+ * FileObject: the newly opened fileobject
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsOpenConnection(
+ IN PUNICODE_STRING DeviceName,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ OUT HANDLE * Handle,
+ OUT PFILE_OBJECT * FileObject
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ PFILE_FULL_EA_INFORMATION Ea = NULL;
+ ULONG EaLength;
+ UCHAR EaBuffer[EA_MAX_LENGTH];
+
+ OBJECT_ATTRIBUTES ObjectAttributes;
+ IO_STATUS_BLOCK IoStatus;
+
+ //
+ // Building EA for the Address Object to be Opened ...
+ //
+
+ Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+ Ea->NextEntryOffset = 0;
+ Ea->Flags = 0;
+ Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH;
+ Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT);
+ RtlCopyMemory(
+ &(Ea->EaName),
+ TdiConnectionContext,
+ Ea->EaNameLength + 1
+ );
+ RtlMoveMemory(
+ &(Ea->EaName[Ea->EaNameLength + 1]),
+ &ConnectionContext,
+ sizeof(CONNECTION_CONTEXT)
+ );
+ EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
+ Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT);
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+
+ //
+ // Initializing ...
+ //
+
+ InitializeObjectAttributes(
+ &ObjectAttributes,
+ DeviceName,
+ OBJ_CASE_INSENSITIVE |
+ OBJ_KERNEL_HANDLE,
+ NULL,
+ NULL
+ );
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ //
+ // Creating the Connection Object ...
+ //
+
+ Status = ZwCreateFile(
+ Handle,
+ FILE_READ_DATA | FILE_WRITE_DATA,
+ &ObjectAttributes,
+ &IoStatus,
+ NULL,
+ FILE_ATTRIBUTE_NORMAL,
+ 0,
+ FILE_OPEN,
+ 0,
+ Ea,
+ EaLength
+ );
+
+
+ if (NT_SUCCESS(Status)) {
+
+ //
+ // Now Obtaining the FileObject of the Transport Address ...
+ //
+
+ Status = ObReferenceObjectByHandle(
+ *Handle,
+ FILE_ANY_ACCESS,
+ NULL,
+ KernelMode,
+ FileObject,
+ NULL
+ );
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ ZwClose(*Handle);
+ }
+
+ } else {
+
+ cfs_enter_debugger();
+ }
+
+ return (Status);
+}
+
+/*
+ * KsCloseConnection
+ * Release the Hanlde and FileObject of an opened tdi
+ * connection object
+ *
+ * Arguments:
+ * Handle: the handle to be released
+ * FileObject: the fileobject to be released
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsCloseConnection(
+ IN HANDLE Handle,
+ IN PFILE_OBJECT FileObject
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ if (FileObject) {
+
+ ObDereferenceObject(FileObject);
+ }
+
+ if (Handle) {
+
+ Status = ZwClose(Handle);
+ }
+
+ ASSERT(NT_SUCCESS(Status));
+
+ return (Status);
+}
+
+
+/*
+ * KsAssociateAddress
+ * Associate an address object with a connection object
+ *
+ * Arguments:
+ * AddressHandle: the handle of the address object
+ * ConnectionObject: the FileObject of the connection
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsAssociateAddress(
+ IN HANDLE AddressHandle,
+ IN PFILE_OBJECT ConnectionObject
+ )
+{
+ NTSTATUS Status;
+ PDEVICE_OBJECT DeviceObject;
+ PIRP Irp;
+
+ //
+ // Getting the DeviceObject from Connection FileObject
+ //
+
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ //
+ // Building Tdi Internal Irp ...
+ //
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ //
+ // Assocating the Address Object with the Connection Object
+ //
+
+ TdiBuildAssociateAddress(
+ Irp,
+ DeviceObject,
+ ConnectionObject,
+ NULL,
+ NULL,
+ AddressHandle
+ );
+
+ //
+ // Calling the Transprot Driver with the Prepared Irp
+ //
+
+ Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+ }
+
+ return (Status);
+}
+
+
+/*
+ * KsDisassociateAddress
+ * Disassociate the connection object (the relationship will
+ * the corresponding address object will be dismissed. )
+ *
+ * Arguments:
+ * ConnectionObject: the FileObject of the connection
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsDisassociateAddress(
+ IN PFILE_OBJECT ConnectionObject
+ )
+{
+ NTSTATUS Status;
+ PDEVICE_OBJECT DeviceObject;
+ PIRP Irp;
+
+ //
+ // Getting the DeviceObject from Connection FileObject
+ //
+
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ //
+ // Building Tdi Internal Irp ...
+ //
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ //
+ // Disassocating the Address Object with the Connection Object
+ //
+
+ TdiBuildDisassociateAddress(
+ Irp,
+ DeviceObject,
+ ConnectionObject,
+ NULL,
+ NULL
+ );
+
+ //
+ // Calling the Transprot Driver with the Prepared Irp
+ //
+
+ Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+ }
+
+ return (Status);
+}
+
+
+/*
+
+//
+// Connection Control Event Callbacks
+//
+
+TDI_EVENT_CONNECT
+TDI_EVENT_DISCONNECT
+TDI_EVENT_ERROR
+
+//
+// Tcp Event Callbacks
+//
+
+TDI_EVENT_RECEIVE
+TDI_EVENT_RECEIVE_EXPEDITED
+TDI_EVENT_CHAINED_RECEIVE
+TDI_EVENT_CHAINED_RECEIVE_EXPEDITED
+
+//
+// Udp Event Callbacks
+//
+
+TDI_EVENT_RECEIVE_DATAGRAM
+TDI_EVENT_CHAINED_RECEIVE_DATAGRAM
+
+*/
+
+
+/*
+ * KsSetEventHandlers
+ * Set the tdi event callbacks with an address object
+ *
+ * Arguments:
+ * AddressObject: the FileObject of the address object
+ * EventContext: the parameter for the callbacks
+ * Handlers: the handlers indictor array
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsSetEventHandlers(
+ IN PFILE_OBJECT AddressObject, // Address File Object
+ IN PVOID EventContext, // Context for Handlers
+ IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+ PDEVICE_OBJECT DeviceObject;
+ USHORT i = 0;
+
+ DeviceObject = IoGetRelatedDeviceObject(AddressObject);
+
+ for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) {
+
+ //
+ // Setup the tdi event callback handler if requested.
+ //
+
+ if (Handlers->IsActive[i]) {
+
+ PIRP Irp;
+
+ //
+ // Building Tdi Internal Irp ...
+ //
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ //
+ // Building the Irp to set the Event Handler ...
+ //
+
+ TdiBuildSetEventHandler(
+ Irp,
+ DeviceObject,
+ AddressObject,
+ NULL,
+ NULL,
+ i, /* tdi event type */
+ Handlers->Handler[i], /* tdi event handler */
+ EventContext /* context for the handler */
+ );
+
+ //
+ // Calling the Transprot Driver with the Prepared Irp
+ //
+
+ Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+
+ //
+ // tcp/ip tdi does not support these two event callbacks
+ //
+
+ if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE ||
+ i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) {
+ cfs_enter_debugger();
+ Status = STATUS_SUCCESS;
+ }
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+ }
+ }
+
+
+errorout:
+
+ if (!NT_SUCCESS(Status)) {
+
+ KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n",
+ Status, KsNtStatusToString(Status) ));
+ }
+
+ return (Status);
+}
+
+
+
+/*
+ * KsQueryAddressInfo
+ * Query the address of the FileObject specified
+ *
+ * Arguments:
+ * FileObject: the FileObject to be queried
+ * AddressInfo: buffer to contain the address info
+ * AddressSize: length of the AddressInfo buffer
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsQueryAddressInfo(
+ PFILE_OBJECT FileObject,
+ PTDI_ADDRESS_INFO AddressInfo,
+ PULONG AddressSize
+ )
+{
+ NTSTATUS Status = STATUS_UNSUCCESSFUL;
+ PIRP Irp = NULL;
+ PMDL Mdl;
+ PDEVICE_OBJECT DeviceObject;
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+ RtlZeroMemory(AddressInfo, *(AddressSize));
+
+ //
+ // Allocating the Tdi Setting Irp ...
+ //
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ //
+ // Locking the User Buffer / Allocating a MDL for it
+ //
+
+ Status = KsLockUserBuffer(
+ AddressInfo,
+ FALSE,
+ *(AddressSize),
+ IoModifyAccess,
+ &Mdl
+ );
+
+ if (!NT_SUCCESS(Status)) {
+
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ }
+ }
+
+ if (Irp) {
+
+ LASSERT(NT_SUCCESS(Status));
+
+ TdiBuildQueryInformation(
+ Irp,
+ DeviceObject,
+ FileObject,
+ NULL,
+ NULL,
+ TDI_QUERY_ADDRESS_INFO,
+ Mdl
+ );
+
+ Status = KsSubmitTdiIrp(
+ DeviceObject,
+ Irp,
+ TRUE,
+ AddressSize
+ );
+
+ KsReleaseMdl(Mdl, FALSE);
+ }
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ //TDI_BUFFER_OVERFLOW
+ }
+
+ return (Status);
+}
+
+/*
+ * KsQueryProviderInfo
+ * Query the underlying transport device's information
+ *
+ * Arguments:
+ * TdiDeviceName: the transport device's name string
+ * ProviderInfo: TDI_PROVIDER_INFO struncture
+ *
+ * Return Value:
+ * NTSTATUS: Nt system status code
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsQueryProviderInfo(
+ PWSTR TdiDeviceName,
+ PTDI_PROVIDER_INFO ProviderInfo
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ PIRP Irp = NULL;
+ PMDL Mdl = NULL;
+
+ UNICODE_STRING ControlName;
+
+ HANDLE Handle;
+ PFILE_OBJECT FileObject;
+ PDEVICE_OBJECT DeviceObject;
+
+ ULONG ProviderSize = 0;
+
+ RtlInitUnicodeString(&ControlName, TdiDeviceName);
+
+ //
+ // Open the Tdi Control Channel
+ //
+
+ Status = KsOpenControl(
+ &ControlName,
+ &Handle,
+ &FileObject
+ );
+
+ if (!NT_SUCCESS(Status)) {
+
+ KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n"));
+ return (Status);
+ }
+
+ //
+ // Obtain The Related Device Object
+ //
+
+ DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+ ProviderSize = sizeof(TDI_PROVIDER_INFO);
+ RtlZeroMemory(ProviderInfo, ProviderSize);
+
+ //
+ // Allocating the Tdi Setting Irp ...
+ //
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ //
+ // Locking the User Buffer / Allocating a MDL for it
+ //
+
+ Status = KsLockUserBuffer(
+ ProviderInfo,
+ FALSE,
+ ProviderSize,
+ IoModifyAccess,
+ &Mdl
+ );
+
+ if (!NT_SUCCESS(Status)) {
+
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ }
+ }
+
+ if (Irp) {
+
+ LASSERT(NT_SUCCESS(Status));
+
+ TdiBuildQueryInformation(
+ Irp,
+ DeviceObject,
+ FileObject,
+ NULL,
+ NULL,
+ TDI_QUERY_PROVIDER_INFO,
+ Mdl
+ );
+
+ Status = KsSubmitTdiIrp(
+ DeviceObject,
+ Irp,
+ TRUE,
+ &ProviderSize
+ );
+
+ KsReleaseMdl(Mdl, FALSE);
+ }
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ //TDI_BUFFER_OVERFLOW
+ }
+
+ KsCloseControl(Handle, FileObject);
+
+ return (Status);
+}
+
+/*
+ * KsQueryConnectionInfo
+ * Query the connection info of the FileObject specified
+ * (some statics data of the traffic)
+ *
+ * Arguments:
+ * FileObject: the FileObject to be queried
+ * ConnectionInfo: buffer to contain the connection info
+ * ConnectionSize: length of the ConnectionInfo buffer
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsQueryConnectionInfo(
+ PFILE_OBJECT ConnectionObject,
+ PTDI_CONNECTION_INFO ConnectionInfo,
+ PULONG ConnectionSize
+ )
+{
+ NTSTATUS Status = STATUS_UNSUCCESSFUL;
+ PIRP Irp = NULL;
+ PMDL Mdl;
+ PDEVICE_OBJECT DeviceObject;
+
+ LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ RtlZeroMemory(ConnectionInfo, *(ConnectionSize));
+
+ //
+ // Allocating the Tdi Query Irp ...
+ //
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ //
+ // Locking the User Buffer / Allocating a MDL for it
+ //
+
+ Status = KsLockUserBuffer(
+ ConnectionInfo,
+ FALSE,
+ *(ConnectionSize),
+ IoModifyAccess,
+ &Mdl
+ );
+
+ if (NT_SUCCESS(Status)) {
+
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ }
+ }
+
+ if (Irp) {
+
+ LASSERT(NT_SUCCESS(Status));
+
+ TdiBuildQueryInformation(
+ Irp,
+ DeviceObject,
+ ConnectionObject,
+ NULL,
+ NULL,
+ TDI_QUERY_CONNECTION_INFO,
+ Mdl
+ );
+
+ Status = KsSubmitTdiIrp(
+ DeviceObject,
+ Irp,
+ TRUE,
+ ConnectionSize
+ );
+
+ KsReleaseMdl(Mdl, FALSE);
+ }
+
+ return (Status);
+}
+
+
+/*
+ * KsInitializeTdiAddress
+ * Initialize the tdi addresss
+ *
+ * Arguments:
+ * pTransportAddress: tdi address to be initialized
+ * IpAddress: the ip address of object
+ * IpPort: the ip port of the object
+ *
+ * Return Value:
+ * ULONG: the total size of the tdi address
+ *
+ * NOTES:
+ * N/A
+ */
+
+ULONG
+KsInitializeTdiAddress(
+ IN OUT PTA_IP_ADDRESS pTransportAddress,
+ IN ULONG IpAddress,
+ IN USHORT IpPort
+ )
+{
+ pTransportAddress->TAAddressCount = 1;
+ pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP;
+ pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP;
+ pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort;
+ pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress;
+
+ return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP);
+}
+
+/*
+ * KsQueryTdiAddressLength
+ * Query the total size of the tdi address
+ *
+ * Arguments:
+ * pTransportAddress: tdi address to be queried
+ *
+ * Return Value:
+ * ULONG: the total size of the tdi address
+ *
+ * NOTES:
+ * N/A
+ */
+
+ULONG
+KsQueryTdiAddressLength(
+ PTRANSPORT_ADDRESS pTransportAddress
+ )
+{
+ ULONG TotalLength = 0;
+ LONG i;
+
+ PTA_ADDRESS UNALIGNED pTaAddress = NULL;
+
+ ASSERT (NULL != pTransportAddress);
+
+ TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) +
+ FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount;
+
+ pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address;
+
+ for (i = 0; i < pTransportAddress->TAAddressCount; i++)
+ {
+ TotalLength += pTaAddress->AddressLength;
+ pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress +
+ FIELD_OFFSET(TA_ADDRESS,Address) +
+ pTaAddress->AddressLength );
+ }
+
+ return (TotalLength);
+}
+
+
+/*
+ * KsQueryIpAddress
+ * Query the ip address of the tdi object
+ *
+ * Arguments:
+ * FileObject: tdi object to be queried
+ * TdiAddress: TdiAddress buffer, to store the queried
+ * tdi ip address
+ * AddressLength: buffer length of the TdiAddress
+ *
+ * Return Value:
+ * ULONG: the total size of the tdi ip address
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsQueryIpAddress(
+ PFILE_OBJECT FileObject,
+ PVOID TdiAddress,
+ ULONG* AddressLength
+ )
+{
+ NTSTATUS Status;
+
+ PTDI_ADDRESS_INFO TdiAddressInfo;
+ ULONG Length;
+
+
+ //
+ // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS
+ //
+
+ Length = MAX_ADDRESS_LENGTH;
+
+ TdiAddressInfo = (PTDI_ADDRESS_INFO)
+ ExAllocatePoolWithTag(
+ NonPagedPool,
+ Length,
+ 'KSAI' );
+
+ if (NULL == TdiAddressInfo) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+
+ Status = KsQueryAddressInfo(
+ FileObject,
+ TdiAddressInfo,
+ &Length
+ );
+
+errorout:
+
+ if (NT_SUCCESS(Status))
+ {
+ if (*AddressLength < Length) {
+
+ Status = STATUS_BUFFER_TOO_SMALL;
+
+ } else {
+
+ *AddressLength = Length;
+ RtlCopyMemory(
+ TdiAddress,
+ &(TdiAddressInfo->Address),
+ Length
+ );
+
+ Status = STATUS_SUCCESS;
+ }
+
+ } else {
+
+ }
+
+
+ if (NULL != TdiAddressInfo) {
+
+ ExFreePool(TdiAddressInfo);
+ }
+
+ return Status;
+}
+
+
+/*
+ * KsErrorEventHandler
+ * the common error event handler callback
+ *
+ * Arguments:
+ * TdiEventContext: should be the socket
+ * Status: the error code
+ *
+ * Return Value:
+ * Status: STATS_SUCCESS
+ *
+ * NOTES:
+ * We need not do anything in such a severe
+ * error case. System will process it for us.
+ */
+
+NTSTATUS
+KsErrorEventHandler(
+ IN PVOID TdiEventContext,
+ IN NTSTATUS Status
+ )
+{
+ KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n",
+ KeGetCurrentIrql()));
+
+ cfs_enter_debugger();
+
+ return (STATUS_SUCCESS);
+}
+
+
+/*
+ * ksocknal_set_handlers
+ * setup all the event handler callbacks
+ *
+ * Arguments:
+ * tconn: the tdi connecton object
+ *
+ * Return Value:
+ * int: ksocknal error code
+ *
+ * NOTES:
+ * N/A
+ */
+
+int
+ksocknal_set_handlers(
+ ksock_tconn_t * tconn
+ )
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ KS_EVENT_HANDLERS handlers;
+
+ /* to make sure the address object is opened already */
+ if (tconn->kstc_addr.FileObject == NULL) {
+ goto errorout;
+ }
+
+ /* initialize the handlers indictor array. for sender and listenr,
+ there are different set of callbacks. for child, we just return. */
+
+ memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
+
+ SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler);
+ SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler);
+ SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler);
+ SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler);
+ SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler);
+
+ // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler);
+
+ if (tconn->kstc_type == kstt_listener) {
+ SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler);
+ } else if (tconn->kstc_type == kstt_child) {
+ goto errorout;
+ }
+
+ /* set all the event callbacks */
+ status = KsSetEventHandlers(
+ tconn->kstc_addr.FileObject, /* Address File Object */
+ tconn, /* Event Context */
+ &handlers /* Event callback handlers */
+ );
+
+errorout:
+
+ return cfs_error_code(status);
+}
+
+
+/*
+ * ksocknal_reset_handlers
+ * disable all the event handler callbacks (set to NULL)
+ *
+ * Arguments:
+ * tconn: the tdi connecton object
+ *
+ * Return Value:
+ * int: ksocknal error code
+ *
+ * NOTES:
+ * N/A
+ */
+
+int
+ksocknal_reset_handlers(
+ ksock_tconn_t * tconn
+ )
+{
+ NTSTATUS status = STATUS_SUCCESS;
+ KS_EVENT_HANDLERS handlers;
+
+ /* to make sure the address object is opened already */
+ if (tconn->kstc_addr.FileObject == NULL) {
+ goto errorout;
+ }
+
+ /* initialize the handlers indictor array. for sender and listenr,
+ there are different set of callbacks. for child, we just return. */
+
+ memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
+
+ SetEventHandler(handlers, TDI_EVENT_ERROR, NULL);
+ SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL);
+ SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL);
+ SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL);
+ SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL);
+ // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL);
+
+ if (tconn->kstc_type == kstt_listener) {
+ SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL);
+ } else if (tconn->kstc_type == kstt_child) {
+ goto errorout;
+ }
+
+ /* set all the event callbacks */
+ status = KsSetEventHandlers(
+ tconn->kstc_addr.FileObject, /* Address File Object */
+ tconn, /* Event Context */
+ &handlers /* Event callback handlers */
+ );
+
+errorout:
+
+ return cfs_error_code(status);
+}
+
+
+/*
+ * KsAcceptCompletionRoutine
+ * Irp completion routine for TdiBuildAccept (KsConnectEventHandler)
+ *
+ * Here system gives us a chance to check the conneciton is built
+ * ready or not.
+ *
+ * Arguments:
+ * DeviceObject: the device object of the transport driver
+ * Irp: the Irp is being completed.
+ * Context: the context we specified when issuing the Irp
+ *
+ * Return Value:
+ * Nt status code
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsAcceptCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ )
+{
+ ksock_tconn_t * child = (ksock_tconn_t *) Context;
+ ksock_tconn_t * parent = child->child.kstc_parent;
+
+ KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n",
+ KeGetCurrentIrql() ));
+
+ KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n",
+ Context, Irp->IoStatus.Status));
+
+ LASSERT(child->kstc_type == kstt_child);
+
+ spin_lock(&(child->kstc_lock));
+
+ LASSERT(parent->kstc_state == ksts_listening);
+ LASSERT(child->kstc_state == ksts_connecting);
+
+ if (NT_SUCCESS(Irp->IoStatus.Status)) {
+
+ child->child.kstc_accepted = TRUE;
+
+ child->kstc_state = ksts_connected;
+
+ /* wake up the daemon thread which waits on this event */
+ KeSetEvent(
+ &(parent->listener.kstc_accept_event),
+ 0,
+ FALSE
+ );
+
+ spin_unlock(&(child->kstc_lock));
+
+ KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent));
+
+ } else {
+
+ /* re-use this child connecton */
+ child->child.kstc_accepted = FALSE;
+ child->child.kstc_busy = FALSE;
+ child->kstc_state = ksts_associated;
+
+ spin_unlock(&(child->kstc_lock));
+ }
+
+ /* now free the Irp */
+ IoFreeIrp(Irp);
+
+ /* drop the refer count of the child */
+ ksocknal_put_tconn(child);
+
+ return (STATUS_MORE_PROCESSING_REQUIRED);
+}
+
+
+/*
+ * ksocknal_get_vacancy_backlog
+ * Get a vacancy listeing child from the backlog list
+ *
+ * Arguments:
+ * parent: the listener daemon connection
+ *
+ * Return Value:
+ * the child listening connection or NULL in failure
+ *
+ * Notes
+ * Parent's lock should be acquired before calling.
+ */
+
+ksock_tconn_t *
+ksocknal_get_vacancy_backlog(
+ ksock_tconn_t * parent
+ )
+{
+ ksock_tconn_t * child;
+
+ LASSERT(parent->kstc_type == kstt_listener);
+ LASSERT(parent->kstc_state == ksts_listening);
+
+ if (list_empty(&(parent->listener.kstc_listening.list))) {
+
+ child = NULL;
+
+ } else {
+
+ struct list_head * tmp;
+
+ /* check the listening queue and try to get a free connecton */
+
+ list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
+ child = list_entry (tmp, ksock_tconn_t, child.kstc_link);
+ spin_lock(&(child->kstc_lock));
+
+ if (!child->child.kstc_busy) {
+ LASSERT(child->kstc_state == ksts_associated);
+ child->child.kstc_busy = TRUE;
+ spin_unlock(&(child->kstc_lock));
+ break;
+ } else {
+ spin_unlock(&(child->kstc_lock));
+ child = NULL;
+ }
+ }
+ }
+
+ return child;
+}
+
+
+/*
+ * KsConnectEventHandler
+ * Connect event handler event handler, called by the underlying TDI
+ * transport in response to an incoming request to the listening daemon.
+ *
+ * it will grab a vacancy backlog from the children tconn list, and
+ * build an acception Irp with it, then transfer the Irp to TDI driver.
+ *
+ * Arguments:
+ * TdiEventContext: the tdi connnection object of the listening daemon
+ * ......
+ *
+ * Return Value:
+ * Nt kernel status code
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsConnectEventHandler(
+ IN PVOID TdiEventContext,
+ IN LONG RemoteAddressLength,
+ IN PVOID RemoteAddress,
+ IN LONG UserDataLength,
+ IN PVOID UserData,
+ IN LONG OptionsLength,
+ IN PVOID Options,
+ OUT CONNECTION_CONTEXT * ConnectionContext,
+ OUT PIRP * AcceptIrp
+ )
+{
+ ksock_tconn_t * parent;
+ ksock_tconn_t * child;
+
+ PFILE_OBJECT FileObject;
+ PDEVICE_OBJECT DeviceObject;
+ NTSTATUS Status;
+
+ PIRP Irp = NULL;
+ PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
+
+ KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql()));
+ parent = (ksock_tconn_t *) TdiEventContext;
+
+ LASSERT(parent->kstc_type == kstt_listener);
+
+ spin_lock(&(parent->kstc_lock));
+
+ if (parent->kstc_state == ksts_listening) {
+
+ /* allocate a new ConnectionInfo to backup the peer's info */
+
+ ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
+ NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) +
+ RemoteAddressLength, 'iCsK' );
+
+ if (NULL == ConnectionInfo) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ /* initializing ConnectionInfo structure ... */
+
+ ConnectionInfo->UserDataLength = UserDataLength;
+ ConnectionInfo->UserData = UserData;
+ ConnectionInfo->OptionsLength = OptionsLength;
+ ConnectionInfo->Options = Options;
+ ConnectionInfo->RemoteAddressLength = RemoteAddressLength;
+ ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
+
+ RtlCopyMemory(
+ ConnectionInfo->RemoteAddress,
+ RemoteAddress,
+ RemoteAddressLength
+ );
+
+ /* get the vacancy listening child tdi connections */
+
+ child = ksocknal_get_vacancy_backlog(parent);
+
+ if (child) {
+
+ spin_lock(&(child->kstc_lock));
+ child->child.kstc_info.ConnectionInfo = ConnectionInfo;
+ child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress;
+ child->kstc_state = ksts_connecting;
+ spin_unlock(&(child->kstc_lock));
+
+ } else {
+
+ KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent));
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ goto errorout;
+ }
+
+ FileObject = child->child.kstc_info.FileObject;
+ DeviceObject = IoGetRelatedDeviceObject (FileObject);
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ TdiBuildAccept(
+ Irp,
+ DeviceObject,
+ FileObject,
+ KsAcceptCompletionRoutine,
+ child,
+ NULL,
+ NULL
+ );
+
+ IoSetNextIrpStackLocation(Irp);
+
+ /* grap the refer of the child tdi connection */
+ ksocknal_get_tconn(child);
+
+ Status = STATUS_MORE_PROCESSING_REQUIRED;
+
+ *AcceptIrp = Irp;
+ *ConnectionContext = child;
+
+ } else {
+
+ Status = STATUS_CONNECTION_REFUSED;
+ goto errorout;
+ }
+
+ spin_unlock(&(parent->kstc_lock));
+
+ return Status;
+
+errorout:
+
+ spin_unlock(&(parent->kstc_lock));
+
+ {
+ *AcceptIrp = NULL;
+ *ConnectionContext = NULL;
+
+ if (ConnectionInfo) {
+
+ ExFreePool(ConnectionInfo);
+ }
+
+ if (Irp) {
+
+ IoFreeIrp (Irp);
+ }
+ }
+
+ return Status;
+}
+
+
+
+/*
+ * KsDisconnectCompletionRoutine
+ * the Irp completion routine for TdiBuildDisconect
+ *
+ * We just signal the event and return MORE_PRO... to
+ * let the caller take the responsibility of the Irp.
+ *
+ * Arguments:
+ * DeviceObject: the device object of the transport
+ * Irp: the Irp is being completed.
+ * Context: the event specified by the caller
+ *
+ * Return Value:
+ * Nt status code
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsDisconectCompletionRoutine (
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ )
+{
+
+ KeSetEvent((PKEVENT) Context, 0, FALSE);
+
+ return STATUS_MORE_PROCESSING_REQUIRED;
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+
+/*
+ * KsDisconnectHelper
+ * the routine to be executed in the WorkItem procedure
+ * this routine is to disconnect a tdi connection
+ *
+ * Arguments:
+ * Workitem: the context transferred to the workitem
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * tconn is already referred in abort_connecton ...
+ */
+
+VOID
+KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem)
+{
+ ksock_tconn_t * tconn = WorkItem->tconn;
+
+ ksocknal_disconnect_tconn(tconn, WorkItem->Flags);
+
+ KeSetEvent(&(WorkItem->Event), 0, FALSE);
+
+ spin_lock(&(tconn->kstc_lock));
+ cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+ spin_unlock(&(tconn->kstc_lock));
+ ksocknal_put_tconn(tconn);
+}
+
+
+/*
+ * KsDisconnectEventHandler
+ * Disconnect event handler event handler, called by the underlying TDI transport
+ * in response to an incoming disconnection notification from a remote node.
+ *
+ * Arguments:
+ * ConnectionContext: tdi connnection object
+ * DisconnectFlags: specifies the nature of the disconnection
+ * ......
+ *
+ * Return Value:
+ * Nt kernel status code
+ *
+ * Notes:
+ * N/A
+ */
+
+
+NTSTATUS
+KsDisconnectEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN LONG DisconnectDataLength,
+ IN PVOID DisconnectData,
+ IN LONG DisconnectInformationLength,
+ IN PVOID DisconnectInformation,
+ IN ULONG DisconnectFlags
+ )
+{
+ ksock_tconn_t * tconn;
+ NTSTATUS Status;
+ PKS_DISCONNECT_WORKITEM WorkItem;
+
+ tconn = (ksock_tconn_t *)ConnectionContext;
+
+ KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n",
+ KeGetCurrentIrql() ));
+
+ KsPrint((2, "tconn = %x DisconnectFlags= %xh\n",
+ tconn, DisconnectFlags));
+
+ ksocknal_get_tconn(tconn);
+ spin_lock(&(tconn->kstc_lock));
+
+ WorkItem = &(tconn->kstc_disconnect);
+
+ if (tconn->kstc_state != ksts_connected) {
+
+ Status = STATUS_SUCCESS;
+
+ } else {
+
+ if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) {
+
+ Status = STATUS_REMOTE_DISCONNECT;
+
+ } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) {
+
+ Status = STATUS_GRACEFUL_DISCONNECT;
+ }
+
+ if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
+
+ ksocknal_get_tconn(tconn);
+
+ WorkItem->Flags = DisconnectFlags;
+ WorkItem->tconn = tconn;
+
+ cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+
+ /* queue the workitem to call */
+ ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue);
+ }
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+ ksocknal_put_tconn(tconn);
+
+ return (Status);
+}
+
+NTSTATUS
+KsTcpReceiveCompletionRoutine(
+ IN PIRP Irp,
+ IN PKS_TCP_COMPLETION_CONTEXT Context
+ )
+{
+ NTSTATUS Status = Irp->IoStatus.Status;
+
+ if (NT_SUCCESS(Status)) {
+
+ ksock_tconn_t *tconn = Context->tconn;
+
+ PKS_TSDU_DAT KsTsduDat = Context->CompletionContext;
+ PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext;
+
+ KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n",
+ Context->KsTsduMgr->TotalBytes ));
+
+ spin_lock(&(tconn->kstc_lock));
+
+ if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+ if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
+ cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
+ } else {
+ cfs_enter_debugger();
+ }
+ } else {
+ ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+ if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
+ cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
+ } else {
+ cfs_enter_debugger();
+ }
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ /* wake up the thread waiting for the completion of this Irp */
+ KeSetEvent(Context->Event, 0, FALSE);
+
+ /* re-active the ksocknal connection and wake up the scheduler */
+ if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+ tconn->kstc_sched_cb( tconn, FALSE, NULL,
+ Context->KsTsduMgr->TotalBytes );
+ }
+
+ } else {
+
+ /* un-expected errors occur, we must abort the connection */
+ ksocknal_abort_tconn(Context->tconn);
+ }
+
+ if (Context) {
+
+ /* Freeing the Context structure... */
+ ExFreePool(Context);
+ Context = NULL;
+ }
+
+
+ /* free the Irp */
+ if (Irp) {
+ IoFreeIrp(Irp);
+ }
+
+ return (Status);
+}
+
+
+/*
+ * KsTcpCompletionRoutine
+ * the Irp completion routine for TdiBuildSend and TdiBuildReceive ...
+ * We need call the use's own CompletionRoutine if specified. Or
+ * it's a synchronous case, we need signal the event.
+ *
+ * Arguments:
+ * DeviceObject: the device object of the transport
+ * Irp: the Irp is being completed.
+ * Context: the context we specified when issuing the Irp
+ *
+ * Return Value:
+ * Nt status code
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsTcpCompletionRoutine(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp,
+ IN PVOID Context
+ )
+{
+ if (Context) {
+
+ PKS_TCP_COMPLETION_CONTEXT CompletionContext = NULL;
+ ksock_tconn_t * tconn = NULL;
+
+ CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context;
+ tconn = CompletionContext->tconn;
+
+ /* release the chained mdl */
+ KsReleaseMdl(Irp->MdlAddress, FALSE);
+ Irp->MdlAddress = NULL;
+
+ if (CompletionContext->CompletionRoutine) {
+
+ if ( CompletionContext->bCounted &&
+ InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) {
+ goto errorout;
+ }
+
+ //
+ // Giving control to user specified CompletionRoutine ...
+ //
+
+ CompletionContext->CompletionRoutine(
+ Irp,
+ CompletionContext
+ );
+
+ } else {
+
+ //
+ // Signaling the Event ...
+ //
+
+ KeSetEvent(CompletionContext->Event, 0, FALSE);
+ }
+
+ /* drop the reference count of the tconn object */
+ ksocknal_put_tconn(tconn);
+
+
+ /*
+ * We need free the Context now ...
+ */
+
+ if (Context) {
+ cfs_free(Context);
+ }
+
+ } else {
+
+ cfs_enter_debugger();
+ }
+
+errorout:
+
+ return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+/*
+ * KsTcpSendCompletionRoutine
+ * the user specified Irp completion routine for asynchronous
+ * data transmission requests.
+ *
+ * It will do th cleanup job of the ksock_tx_t and wake up the
+ * ksocknal scheduler thread
+ *
+ * Arguments:
+ * Irp: the Irp is being completed.
+ * Context: the context we specified when issuing the Irp
+ *
+ * Return Value:
+ * Nt status code
+ *
+ * Notes:
+ * N/A
+ */
+
+NTSTATUS
+KsTcpSendCompletionRoutine(
+ IN PIRP Irp,
+ IN PKS_TCP_COMPLETION_CONTEXT Context
+ )
+{
+ NTSTATUS Status = Irp->IoStatus.Status;
+ ULONG rc = Irp->IoStatus.Information;
+ ksock_tconn_t * tconn = Context->tconn;
+ PKS_TSDUMGR KsTsduMgr = Context->KsTsduMgr;
+
+ ENTRY;
+
+ LASSERT(tconn) ;
+
+ if (NT_SUCCESS(Status)) {
+
+ if (Context->bCounted) {
+ PVOID tx = Context->CompletionContext;
+
+ ASSERT(tconn->kstc_update_tx != NULL);
+
+ /* update the tx, rebasing the kiov or iov pointers */
+ tx = tconn->kstc_update_tx(tconn, tx, rc);
+
+ /* update the KsTsudMgr total bytes */
+ spin_lock(&tconn->kstc_lock);
+ KsTsduMgr->TotalBytes -= rc;
+ spin_unlock(&tconn->kstc_lock);
+
+ /*
+ * now it's time to re-queue the conns into the
+ * scheduler queue and wake the scheduler thread.
+ */
+
+ if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+ tconn->kstc_sched_cb( tconn, TRUE, tx, 0);
+ }
+
+ } else {
+
+ PKS_TSDU KsTsdu = Context->CompletionContext;
+ PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext2;
+ PKS_TSDU_DAT KsTsduDat = Context->CompletionContext2;
+
+ spin_lock(&tconn->kstc_lock);
+ /* This is bufferred sending ... */
+ ASSERT(KsTsduBuf->StartOffset == 0);
+
+ if (KsTsduBuf->DataLength > Irp->IoStatus.Information) {
+ /* not fully sent .... we have to abort the connection */
+ spin_unlock(&tconn->kstc_lock);
+ ksocknal_abort_tconn(tconn);
+ goto errorout;
+ }
+
+ if (KsTsduBuf->TsduType == TSDU_TYPE_BUF) {
+ /* free the buffer */
+ ExFreePool(KsTsduBuf->UserBuffer);
+ KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength;
+ KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+ } else if (KsTsduDat->TsduType == TSDU_TYPE_DAT) {
+ KsTsduMgr->TotalBytes -= KsTsduDat->DataLength;
+ KsTsdu->StartOffset += KsTsduDat->TotalLength;
+ } else {
+ cfs_enter_debugger(); /* shoult not get here */
+ }
+
+ if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+
+ list_del(&KsTsdu->Link);
+ KsTsduMgr->NumOfTsdu--;
+ KsPutKsTsdu(KsTsdu);
+ }
+
+ spin_unlock(&tconn->kstc_lock);
+ }
+
+ } else {
+
+ /* cfs_enter_debugger(); */
+
+ /*
+ * for the case that the transmission is ussuccessful,
+ * we need abort the tdi connection, but not destroy it.
+ * the socknal conn will drop the refer count, then the
+ * tdi connection will be freed.
+ */
+
+ ksocknal_abort_tconn(tconn);
+ }
+
+errorout:
+
+ /*
+ * it's our duty to free the Irp.
+ */
+
+ if (Irp) {
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ }
+
+ EXIT;
+
+ return Status;
+}
+
+/*
+ * Normal receive event handler
+ *
+ * It will move data from system Tsdu to our TsduList
+ */
+
+NTSTATUS
+KsTcpReceiveEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG BytesIndicated,
+ IN ULONG BytesAvailable,
+ OUT ULONG * BytesTaken,
+ IN PVOID Tsdu,
+ OUT PIRP * IoRequestPacket
+ )
+{
+ NTSTATUS Status;
+
+ ksock_tconn_t * tconn;
+
+ PKS_CHAIN KsChain;
+ PKS_TSDUMGR KsTsduMgr;
+ PKS_TSDU KsTsdu;
+ PKS_TSDU_DAT KsTsduDat;
+ PKS_TSDU_BUF KsTsduBuf;
+
+ BOOLEAN bIsExpedited;
+ BOOLEAN bIsCompleteTsdu;
+
+ BOOLEAN bNewTsdu = FALSE;
+ BOOLEAN bNewBuff = FALSE;
+
+ PCHAR Buffer = NULL;
+
+ PIRP Irp = NULL;
+ PMDL Mdl = NULL;
+ PFILE_OBJECT FileObject;
+ PDEVICE_OBJECT DeviceObject;
+
+ ULONG BytesReceived = 0;
+
+ PKS_TCP_COMPLETION_CONTEXT context = NULL;
+
+
+ tconn = (ksock_tconn_t *) ConnectionContext;
+
+ ksocknal_get_tconn(tconn);
+
+ /* check whether the whole body of payload is received or not */
+ if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) &&
+ (BytesIndicated == BytesAvailable) ) {
+ bIsCompleteTsdu = TRUE;
+ } else {
+ bIsCompleteTsdu = FALSE;
+ }
+
+ bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
+
+ KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable));
+ KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited ));
+
+ spin_lock(&(tconn->kstc_lock));
+
+ /* check whether we are conntected or not listener ¡Â*/
+ if ( !((tconn->kstc_state == ksts_connected) &&
+ (tconn->kstc_type == kstt_sender ||
+ tconn->kstc_type == kstt_child))) {
+
+ *BytesTaken = BytesIndicated;
+
+ spin_unlock(&(tconn->kstc_lock));
+ ksocknal_put_tconn(tconn);
+
+ return (STATUS_SUCCESS);
+ }
+
+ if (tconn->kstc_type == kstt_sender) {
+ KsChain = &(tconn->sender.kstc_recv);
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ KsChain = &(tconn->child.kstc_recv);
+ }
+
+ if (bIsExpedited) {
+ KsTsduMgr = &(KsChain->Expedited);
+ } else {
+ KsTsduMgr = &(KsChain->Normal);
+ }
+
+ /* if the Tsdu is even larger than the biggest Tsdu, we have
+ to allocate new buffer and use TSDU_TYOE_BUF to store it */
+
+ if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ksocknal_data.ksnd_tsdu_size -
+ KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
+ bNewBuff = TRUE;
+ }
+
+ /* retrieve the latest Tsdu buffer form TsduMgr
+ list if the list is not empty. */
+
+ if (list_empty(&(KsTsduMgr->TsduList))) {
+
+ LASSERT(KsTsduMgr->NumOfTsdu == 0);
+ KsTsdu = NULL;
+
+ } else {
+
+ LASSERT(KsTsduMgr->NumOfTsdu > 0);
+ KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+
+ /* if this Tsdu does not contain enough space, we need
+ allocate a new Tsdu queue. */
+
+ if (bNewBuff) {
+ if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) >
+ KsTsdu->TotalLength ) {
+ KsTsdu = NULL;
+ }
+ } else {
+ if ( KS_TSDU_STRU_SIZE(BytesAvailable) >
+ KsTsdu->TotalLength - KsTsdu->LastOffset ) {
+ KsTsdu = NULL;
+ }
+ }
+ }
+
+ /* allocating the buffer for TSDU_TYPE_BUF */
+ if (bNewBuff) {
+ Buffer = ExAllocatePool(NonPagedPool, BytesAvailable);
+ if (NULL == Buffer) {
+ /* there's no enough memory for us. We just try to
+ receive maximum bytes with a new Tsdu */
+ bNewBuff = FALSE;
+ KsTsdu = NULL;
+ }
+ }
+
+ /* allocate a new Tsdu in case we are not statisfied. */
+
+ if (NULL == KsTsdu) {
+
+ KsTsdu = KsAllocateKsTsdu();
+
+ if (NULL == KsTsdu) {
+ goto errorout;
+ } else {
+ bNewTsdu = TRUE;
+ }
+ }
+
+ KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+ KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+ if (bNewBuff) {
+
+ /* setup up the KS_TSDU_BUF record */
+
+ KsTsduBuf->TsduType = TSDU_TYPE_BUF;
+ KsTsduBuf->TsduFlags = 0;
+ KsTsduBuf->StartOffset = 0;
+ KsTsduBuf->UserBuffer = Buffer;
+ KsTsduBuf->DataLength = BytesReceived = BytesAvailable;
+
+ KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+
+ } else {
+
+ /* setup the KS_TSDU_DATA to contain all the messages */
+
+ KsTsduDat->TsduType = TSDU_TYPE_DAT;
+ KsTsduDat->TsduFlags = 0;
+
+ if ( KsTsdu->TotalLength - KsTsdu->LastOffset >=
+ KS_TSDU_STRU_SIZE(BytesAvailable) ) {
+ BytesReceived = BytesAvailable;
+ } else {
+ BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset -
+ FIELD_OFFSET(KS_TSDU_DAT, Data);
+ BytesReceived &= (~((ULONG)3));
+ }
+ KsTsduDat->DataLength = BytesReceived;
+ KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(BytesReceived);
+ KsTsduDat->StartOffset = 0;
+
+ Buffer = &KsTsduDat->Data[0];
+
+ KsTsdu->LastOffset += KsTsduDat->TotalLength;
+ }
+
+ KsTsduMgr->TotalBytes += BytesReceived;
+
+ if (bIsCompleteTsdu) {
+
+ /* It's a complete receive, we just move all
+ the data from system to our Tsdu */
+
+ RtlMoveMemory(
+ Buffer,
+ Tsdu,
+ BytesReceived
+ );
+
+ *BytesTaken = BytesReceived;
+ Status = STATUS_SUCCESS;
+
+ if (bNewTsdu) {
+ list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+ KsTsduMgr->NumOfTsdu++;
+ }
+
+ KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+
+ /* re-active the ksocknal connection and wake up the scheduler */
+ if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+ tconn->kstc_sched_cb( tconn, FALSE, NULL,
+ KsTsduMgr->TotalBytes );
+ }
+
+ } else {
+
+ /* there's still data in tdi internal queue, we need issue a new
+ Irp to receive all of them. first allocate the tcp context */
+
+ context = ExAllocatePoolWithTag(
+ NonPagedPool,
+ sizeof(KS_TCP_COMPLETION_CONTEXT),
+ 'cTsK');
+
+ if (!context) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ /* setup the context */
+ RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT));
+
+ context->tconn = tconn;
+ context->CompletionRoutine = KsTcpReceiveCompletionRoutine;
+ context->CompletionContext = KsTsdu;
+ context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat;
+ context->KsTsduMgr = KsTsduMgr;
+ context->Event = &(KsTsduMgr->Event);
+
+ if (tconn->kstc_type == kstt_sender) {
+ FileObject = tconn->sender.kstc_info.FileObject;
+ } else {
+ FileObject = tconn->child.kstc_info.FileObject;
+ }
+
+ DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+ /* build new tdi Irp and setup it. */
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+ goto errorout;
+ }
+
+ Status = KsLockUserBuffer(
+ Buffer,
+ FALSE,
+ BytesReceived,
+ IoModifyAccess,
+ &Mdl
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ goto errorout;
+ }
+
+ TdiBuildReceive(
+ Irp,
+ DeviceObject,
+ FileObject,
+ KsTcpCompletionRoutine,
+ context,
+ Mdl,
+ ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED),
+ BytesReceived
+ );
+
+ IoSetNextIrpStackLocation(Irp);
+
+ /* return the newly built Irp to transport driver,
+ it will process it to receive all the data */
+
+ *IoRequestPacket = Irp;
+ *BytesTaken = 0;
+
+ if (bNewTsdu) {
+
+ list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+ KsTsduMgr->NumOfTsdu++;
+ }
+
+ if (bNewBuff) {
+ cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
+ } else {
+ cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
+ }
+ ksocknal_get_tconn(tconn);
+ Status = STATUS_MORE_PROCESSING_REQUIRED;
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+ ksocknal_put_tconn(tconn);
+
+ return (Status);
+
+errorout:
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ if (bNewTsdu && (KsTsdu != NULL)) {
+ KsFreeKsTsdu(KsTsdu);
+ }
+
+ if (Mdl) {
+ KsReleaseMdl(Mdl, FALSE);
+ }
+
+ if (Irp) {
+ IoFreeIrp(Irp);
+ }
+
+ if (context) {
+ ExFreePool(context);
+ }
+
+ ksocknal_abort_tconn(tconn);
+ ksocknal_put_tconn(tconn);
+
+ *BytesTaken = BytesAvailable;
+ Status = STATUS_SUCCESS;
+
+ return (Status);
+}
+
+/*
+ * Expedited receive event handler
+ */
+
+NTSTATUS
+KsTcpReceiveExpeditedEventHandler(
+ IN PVOID TdiEventContext,
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG BytesIndicated,
+ IN ULONG BytesAvailable,
+ OUT ULONG * BytesTaken,
+ IN PVOID Tsdu,
+ OUT PIRP * IoRequestPacket
+ )
+{
+ return KsTcpReceiveEventHandler(
+ TdiEventContext,
+ ConnectionContext,
+ ReceiveFlags | TDI_RECEIVE_EXPEDITED,
+ BytesIndicated,
+ BytesAvailable,
+ BytesTaken,
+ Tsdu,
+ IoRequestPacket
+ );
+}
+
+
+/*
+ * Bulk receive event handler
+ *
+ * It will queue all the system Tsdus to our TsduList.
+ * Then later ksocknal_recv_mdl will release them.
+ */
+
+NTSTATUS
+KsTcpChainedReceiveEventHandler (
+ IN PVOID TdiEventContext, // the event context
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG ReceiveLength,
+ IN ULONG StartingOffset, // offset of start of client data in TSDU
+ IN PMDL Tsdu, // TSDU data chain
+ IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
+ )
+{
+
+ NTSTATUS Status;
+
+ ksock_tconn_t * tconn;
+
+ PKS_CHAIN KsChain;
+ PKS_TSDUMGR KsTsduMgr;
+ PKS_TSDU KsTsdu;
+ PKS_TSDU_MDL KsTsduMdl;
+
+ BOOLEAN bIsExpedited;
+ BOOLEAN bNewTsdu = FALSE;
+
+ tconn = (ksock_tconn_t *) ConnectionContext;
+
+ bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
+
+ KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited));
+
+ ksocknal_get_tconn(tconn);
+ spin_lock(&(tconn->kstc_lock));
+
+ /* check whether we are conntected or not listener ¡Â*/
+ if ( !((tconn->kstc_state == ksts_connected) &&
+ (tconn->kstc_type == kstt_sender ||
+ tconn->kstc_type == kstt_child))) {
+
+ spin_unlock(&(tconn->kstc_lock));
+ ksocknal_put_tconn(tconn);
+
+ return (STATUS_SUCCESS);
+ }
+
+ /* get the latest Tsdu buffer form TsduMgr list.
+ just set NULL if the list is empty. */
+
+ if (tconn->kstc_type == kstt_sender) {
+ KsChain = &(tconn->sender.kstc_recv);
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ KsChain = &(tconn->child.kstc_recv);
+ }
+
+ if (bIsExpedited) {
+ KsTsduMgr = &(KsChain->Expedited);
+ } else {
+ KsTsduMgr = &(KsChain->Normal);
+ }
+
+ if (list_empty(&(KsTsduMgr->TsduList))) {
+
+ LASSERT(KsTsduMgr->NumOfTsdu == 0);
+ KsTsdu = NULL;
+
+ } else {
+
+ LASSERT(KsTsduMgr->NumOfTsdu > 0);
+ KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+ LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+ if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) {
+ KsTsdu = NULL;
+ }
+ }
+
+ /* if there's no Tsdu or the free size is not enough for this
+ KS_TSDU_MDL structure. We need re-allocate a new Tsdu. */
+
+ if (NULL == KsTsdu) {
+
+ KsTsdu = KsAllocateKsTsdu();
+
+ if (NULL == KsTsdu) {
+ goto errorout;
+ } else {
+ bNewTsdu = TRUE;
+ }
+ }
+
+ /* just queue the KS_TSDU_MDL to the Tsdu buffer */
+
+ KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+ KsTsduMdl->TsduType = TSDU_TYPE_MDL;
+ KsTsduMdl->DataLength = ReceiveLength;
+ KsTsduMdl->StartOffset = StartingOffset;
+ KsTsduMdl->Mdl = Tsdu;
+ KsTsduMdl->Descriptor = TsduDescriptor;
+
+ KsTsdu->LastOffset += sizeof(KS_TSDU_MDL);
+ KsTsduMgr->TotalBytes += ReceiveLength;
+
+ KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n",
+ KsTsduMgr->TotalBytes ));
+
+ Status = STATUS_PENDING;
+
+ /* attach it to the TsduMgr list if the Tsdu is newly created. */
+ if (bNewTsdu) {
+
+ list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+ KsTsduMgr->NumOfTsdu++;
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ /* wake up the threads waiing in ksocknal_recv_mdl */
+ KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+
+ if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+ tconn->kstc_sched_cb( tconn, FALSE, NULL,
+ KsTsduMgr->TotalBytes );
+ }
+
+ ksocknal_put_tconn(tconn);
+
+ /* Return STATUS_PENDING to system because we are still
+ owning the MDL resources. ksocknal_recv_mdl is expected
+ to free the MDL resources. */
+
+ return (Status);
+
+errorout:
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ if (bNewTsdu && (KsTsdu != NULL)) {
+ KsFreeKsTsdu(KsTsdu);
+ }
+
+ /* abort the tdi connection */
+ ksocknal_abort_tconn(tconn);
+ ksocknal_put_tconn(tconn);
+
+
+ Status = STATUS_SUCCESS;
+
+ return (Status);
+}
+
+
+/*
+ * Expedited & Bulk receive event handler
+ */
+
+NTSTATUS
+KsTcpChainedReceiveExpeditedEventHandler (
+ IN PVOID TdiEventContext, // the event context
+ IN CONNECTION_CONTEXT ConnectionContext,
+ IN ULONG ReceiveFlags,
+ IN ULONG ReceiveLength,
+ IN ULONG StartingOffset, // offset of start of client data in TSDU
+ IN PMDL Tsdu, // TSDU data chain
+ IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives
+ )
+{
+ return KsTcpChainedReceiveEventHandler(
+ TdiEventContext,
+ ConnectionContext,
+ ReceiveFlags | TDI_RECEIVE_EXPEDITED,
+ ReceiveLength,
+ StartingOffset,
+ Tsdu,
+ TsduDescriptor );
+}
+
+
+VOID
+KsPrintProviderInfo(
+ PWSTR DeviceName,
+ PTDI_PROVIDER_INFO ProviderInfo
+ )
+{
+ KsPrint((2, "%ws ProviderInfo:\n", DeviceName));
+
+ KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version ));
+ KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize ));
+ KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData ));
+ KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize ));
+ KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags ));
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) {
+ KsPrint((2, " CONNECTION_MODE\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) {
+ KsPrint((2, " ORDERLY_RELEASE\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) {
+ KsPrint((2, " CONNECTIONLESS_MODE\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) {
+ KsPrint((2, " ERROR_FREE_DELIVERY\n"));
+ }
+
+ if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) {
+ KsPrint((2, " SECURITY_LEVEL\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) {
+ KsPrint((2, " BROADCAST_SUPPORTED\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) {
+ KsPrint((2, " MULTICAST_SUPPORTED\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) {
+ KsPrint((2, " DELAYED_ACCEPTANCE\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) {
+ KsPrint((2, " EXPEDITED_DATA\n"));
+ }
+
+ if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) {
+ KsPrint((2, " INTERNAL_BUFFERING\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) {
+ KsPrint((2, " ROUTE_DIRECTED\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) {
+ KsPrint((2, " NO_ZERO_LENGTH\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) {
+ KsPrint((2, " POINT_TO_POINT\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) {
+ KsPrint((2, " MESSAGE_MODE\n"));
+ }
+
+ if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) {
+ KsPrint((2, " HALF_DUPLEX\n"));
+ }
+
+ KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData ));
+ KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData ));
+ KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources ));
+}
+
+
+/*
+ * KsAllocateKsTsdu
+ * Reuse a Tsdu from the freelist or allocate a new Tsdu
+ * from the LookAsideList table or the NonPagedPool
+ *
+ * Arguments:
+ * N/A
+ *
+ * Return Value:
+ * PKS_Tsdu: the new Tsdu or NULL if it fails
+ *
+ * Notes:
+ * N/A
+ */
+
+PKS_TSDU
+KsAllocateKsTsdu()
+{
+ PKS_TSDU KsTsdu = NULL;
+
+ spin_lock(&(ksocknal_data.ksnd_tsdu_lock));
+
+ if (!list_empty (&(ksocknal_data.ksnd_freetsdus))) {
+
+ LASSERT(ksocknal_data.ksnd_nfreetsdus > 0);
+
+ KsTsdu = list_entry(ksocknal_data.ksnd_freetsdus.next, KS_TSDU, Link);
+ list_del(&(KsTsdu->Link));
+ ksocknal_data.ksnd_nfreetsdus--;
+
+ } else {
+
+ KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc(
+ ksocknal_data.ksnd_tsdu_slab, 0);
+ }
+
+ spin_unlock(&(ksocknal_data.ksnd_tsdu_lock));
+
+ if (NULL != KsTsdu) {
+ KsInitializeKsTsdu(KsTsdu, ksocknal_data.ksnd_tsdu_size);
+ }
+
+ return (KsTsdu);
+}
+
+
+/*
+ * KsPutKsTsdu
+ * Move the Tsdu to the free tsdu list in ksocknal_data.
+ *
+ * Arguments:
+ * KsTsdu: Tsdu to be moved.
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+VOID
+KsPutKsTsdu(
+ PKS_TSDU KsTsdu
+ )
+{
+ spin_lock(&(ksocknal_data.ksnd_tsdu_lock));
+
+ list_add_tail( &(KsTsdu->Link), &(ksocknal_data.ksnd_freetsdus));
+ ksocknal_data.ksnd_nfreetsdus++;
+
+ spin_unlock(&(ksocknal_data.ksnd_tsdu_lock));
+}
+
+
+/*
+ * KsFreeKsTsdu
+ * Release a Tsdu: uninitialize then free it.
+ *
+ * Arguments:
+ * KsTsdu: Tsdu to be freed.
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+VOID
+KsFreeKsTsdu(
+ PKS_TSDU KsTsdu
+ )
+{
+ cfs_mem_cache_free(
+ ksocknal_data.ksnd_tsdu_slab,
+ KsTsdu );
+}
+
+
+/*
+ * KsInitializeKsTsdu
+ * Initialize the Tsdu buffer header
+ *
+ * Arguments:
+ * KsTsdu: the Tsdu to be initialized
+ * Length: the total length of the Tsdu
+ *
+ * Return Value:
+ * VOID
+ *
+ * NOTES:
+ * N/A
+ */
+
+VOID
+KsInitializeKsTsdu(
+ PKS_TSDU KsTsdu,
+ ULONG Length
+ )
+{
+ RtlZeroMemory(KsTsdu, Length);
+ KsTsdu->Magic = KS_TSDU_MAGIC;
+ KsTsdu->TotalLength = Length;
+ KsTsdu->StartOffset = KsTsdu->LastOffset =
+ KS_DWORD_ALIGN(sizeof(KS_TSDU));
+}
+
+
+/*
+ * KsInitializeKsTsduMgr
+ * Initialize the management structure of
+ * Tsdu buffers
+ *
+ * Arguments:
+ * TsduMgr: the TsduMgr to be initialized
+ *
+ * Return Value:
+ * VOID
+ *
+ * NOTES:
+ * N/A
+ */
+
+VOID
+KsInitializeKsTsduMgr(
+ PKS_TSDUMGR TsduMgr
+ )
+{
+ KeInitializeEvent(
+ &(TsduMgr->Event),
+ NotificationEvent,
+ FALSE
+ );
+
+ CFS_INIT_LIST_HEAD(
+ &(TsduMgr->TsduList)
+ );
+
+ TsduMgr->NumOfTsdu = 0;
+ TsduMgr->TotalBytes = 0;
+}
+
+
+/*
+ * KsInitializeKsChain
+ * Initialize the China structure for receiving
+ * or transmitting
+ *
+ * Arguments:
+ * KsChain: the KsChain to be initialized
+ *
+ * Return Value:
+ * VOID
+ *
+ * NOTES:
+ * N/A
+ */
+
+VOID
+KsInitializeKsChain(
+ PKS_CHAIN KsChain
+ )
+{
+ KsInitializeKsTsduMgr(&(KsChain->Normal));
+ KsInitializeKsTsduMgr(&(KsChain->Expedited));
+}
+
+
+/*
+ * KsCleanupTsduMgr
+ * Clean up all the Tsdus in the TsduMgr list
+ *
+ * Arguments:
+ * KsTsduMgr: the Tsdu list manager
+ *
+ * Return Value:
+ * NTSTATUS: nt status code
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsCleanupTsduMgr(
+ PKS_TSDUMGR KsTsduMgr
+ )
+{
+ PKS_TSDU KsTsdu;
+ PKS_TSDU_DAT KsTsduDat;
+ PKS_TSDU_BUF KsTsduBuf;
+ PKS_TSDU_MDL KsTsduMdl;
+
+ LASSERT(NULL != KsTsduMgr);
+
+ KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+
+ while (!list_empty(&KsTsduMgr->TsduList)) {
+
+ KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
+ LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+ if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+
+ //
+ // KsTsdu is empty now, we need free it ...
+ //
+
+ list_del(&(KsTsdu->Link));
+ KsTsduMgr->NumOfTsdu--;
+
+ KsFreeKsTsdu(KsTsdu);
+
+ } else {
+
+ KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+ KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+ KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+
+ if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+
+ KsTsdu->StartOffset += KsTsduDat->TotalLength;
+
+ } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
+
+ ASSERT(KsTsduBuf->UserBuffer != NULL);
+
+ if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) {
+ ExFreePool(KsTsduBuf->UserBuffer);
+ } else {
+ cfs_enter_debugger();
+ }
+
+ KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+
+ } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
+
+ //
+ // MDL Tsdu Unit ...
+ //
+
+ TdiReturnChainedReceives(
+ &(KsTsduMdl->Descriptor),
+ 1 );
+
+ KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+ }
+ }
+ }
+
+ return STATUS_SUCCESS;
+}
+
+
+/*
+ * KsCleanupKsChain
+ * Clean up the TsduMgrs of the KsChain
+ *
+ * Arguments:
+ * KsChain: the chain managing TsduMgr
+ *
+ * Return Value:
+ * NTSTATUS: nt status code
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsCleanupKsChain(
+ PKS_CHAIN KsChain
+ )
+{
+ NTSTATUS Status;
+
+ LASSERT(NULL != KsChain);
+
+ Status = KsCleanupTsduMgr(
+ &(KsChain->Normal)
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ Status = KsCleanupTsduMgr(
+ &(KsChain->Expedited)
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+errorout:
+
+ return Status;
+}
+
+
+/*
+ * KsCleanupTsdu
+ * Clean up all the Tsdus of a tdi connected object
+ *
+ * Arguments:
+ * tconn: the tdi connection which is connected already.
+ *
+ * Return Value:
+ * Nt status code
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsCleanupTsdu(
+ ksock_tconn_t * tconn
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+
+ if (tconn->kstc_type != kstt_sender &&
+ tconn->kstc_type != kstt_child ) {
+
+ goto errorout;
+ }
+
+ if (tconn->kstc_type == kstt_sender) {
+
+ Status = KsCleanupKsChain(
+ &(tconn->sender.kstc_recv)
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ Status = KsCleanupKsChain(
+ &(tconn->sender.kstc_send)
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ } else {
+
+ Status = KsCleanupKsChain(
+ &(tconn->child.kstc_recv)
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ Status = KsCleanupKsChain(
+ &(tconn->child.kstc_send)
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ }
+
+errorout:
+
+ return (Status);
+}
+
+
+/*
+ * KsCopyMdlChainToMdlChain
+ * Copy data from a [chained] Mdl to anther [chained] Mdl.
+ * Tdi library does not provide this function. We have to
+ * realize it ourselives.
+ *
+ * Arguments:
+ * SourceMdlChain: the source mdl
+ * SourceOffset: start offset of the source
+ * DestinationMdlChain: the dst mdl
+ * DestinationOffset: the offset where data are to be copied.
+ * BytesTobecopied: the expteced bytes to be copied
+ * BytesCopied: to store the really copied data length
+ *
+ * Return Value:
+ * NTSTATUS: STATUS_SUCCESS or other error code
+ *
+ * NOTES:
+ * The length of source mdl must be >= SourceOffset + BytesTobecopied
+ */
+
+NTSTATUS
+KsCopyMdlChainToMdlChain(
+ IN PMDL SourceMdlChain,
+ IN ULONG SourceOffset,
+ IN PMDL DestinationMdlChain,
+ IN ULONG DestinationOffset,
+ IN ULONG BytesTobecopied,
+ OUT PULONG BytesCopied
+ )
+{
+ PMDL SrcMdl = SourceMdlChain;
+ PMDL DstMdl = DestinationMdlChain;
+
+ PUCHAR SrcBuf = NULL;
+ PUCHAR DstBuf = NULL;
+
+ ULONG dwBytes = 0;
+
+ NTSTATUS Status = STATUS_SUCCESS;
+
+
+ while (dwBytes < BytesTobecopied) {
+
+ ULONG Length = 0;
+
+ while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) {
+
+ SourceOffset -= MmGetMdlByteCount(SrcMdl);
+
+ SrcMdl = SrcMdl->Next;
+
+ if (NULL == SrcMdl) {
+
+ Status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+ }
+
+ while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) {
+
+ DestinationOffset -= MmGetMdlByteCount(DstMdl);
+
+ DstMdl = DstMdl->Next;
+
+ if (NULL == DstMdl) {
+
+ Status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+ }
+
+ DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl);
+
+ if ((NULL == DstBuf)) {
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ //
+ // Here we need skip the OVERFLOW case via RtlCopyMemory :-(
+ //
+
+ if ( KsQueryMdlsSize(SrcMdl) - SourceOffset >
+ MmGetMdlByteCount(DstMdl) - DestinationOffset ) {
+
+ Length = BytesTobecopied - dwBytes;
+
+ if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) {
+ Length = KsQueryMdlsSize(SrcMdl) - SourceOffset;
+ }
+
+ if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) {
+ Length = MmGetMdlByteCount(DstMdl) - DestinationOffset;
+ }
+
+ SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl);
+
+ if ((NULL == DstBuf)) {
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ RtlCopyMemory(
+ DstBuf + DestinationOffset,
+ SrcBuf + SourceOffset,
+ Length
+ );
+
+ } else {
+
+ Status = TdiCopyMdlToBuffer(
+ SrcMdl,
+ SourceOffset,
+ DstBuf,
+ DestinationOffset,
+ MmGetMdlByteCount(DstMdl),
+ &Length
+ );
+
+ if (STATUS_BUFFER_OVERFLOW == Status) {
+ cfs_enter_debugger();
+ } else if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+ }
+
+ SourceOffset += Length;
+ DestinationOffset += Length;
+ dwBytes += Length;
+ }
+
+errorout:
+
+ if (NT_SUCCESS(Status)) {
+ *BytesCopied = dwBytes;
+ } else {
+ *BytesCopied = 0;
+ }
+
+ return Status;
+}
+
+
+
+/*
+ * KsQueryMdlSize
+ * Query the whole size of a MDL (may be chained)
+ *
+ * Arguments:
+ * Mdl: the Mdl to be queried
+ *
+ * Return Value:
+ * ULONG: the total size of the mdl
+ *
+ * NOTES:
+ * N/A
+ */
+
+ULONG
+KsQueryMdlsSize (PMDL Mdl)
+{
+ PMDL Next = Mdl;
+ ULONG Length = 0;
+
+
+ //
+ // Walking the MDL Chain ...
+ //
+
+ while (Next) {
+ Length += MmGetMdlByteCount(Next);
+ Next = Next->Next;
+ }
+
+ return (Length);
+}
+
+
+/*
+ * KsLockUserBuffer
+ * Allocate MDL for the buffer and lock the pages into
+ * nonpaged pool
+ *
+ * Arguments:
+ * UserBuffer: the user buffer to be locked
+ * Length: length in bytes of the buffer
+ * Operation: read or write access
+ * pMdl: the result of the created mdl
+ *
+ * Return Value:
+ * NTSTATUS: kernel status code (STATUS_SUCCESS
+ * or other error code)
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+KsLockUserBuffer (
+ IN PVOID UserBuffer,
+ IN BOOLEAN bPaged,
+ IN ULONG Length,
+ IN LOCK_OPERATION Operation,
+ OUT PMDL * pMdl
+ )
+{
+ NTSTATUS Status;
+ PMDL Mdl = NULL;
+
+ LASSERT(UserBuffer != NULL);
+
+ *pMdl = NULL;
+
+ Mdl = IoAllocateMdl(
+ UserBuffer,
+ Length,
+ FALSE,
+ FALSE,
+ NULL
+ );
+
+ if (Mdl == NULL) {
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+
+ } else {
+
+ __try {
+
+ if (bPaged) {
+ MmProbeAndLockPages(
+ Mdl,
+ KernelMode,
+ Operation
+ );
+ } else {
+ MmBuildMdlForNonPagedPool(
+ Mdl
+ );
+ }
+
+ Status = STATUS_SUCCESS;
+
+ *pMdl = Mdl;
+
+ } __except (EXCEPTION_EXECUTE_HANDLER) {
+
+ IoFreeMdl(Mdl);
+
+ Mdl = NULL;
+
+ cfs_enter_debugger();
+
+ Status = STATUS_INVALID_USER_BUFFER;
+ }
+ }
+
+ return Status;
+}
+
+/*
+ * KsMapMdlBuffer
+ * Map the mdl into a buffer in kernel space
+ *
+ * Arguments:
+ * Mdl: the mdl to be mapped
+ *
+ * Return Value:
+ * PVOID: the buffer mapped or NULL in failure
+ *
+ * NOTES:
+ * N/A
+ */
+
+PVOID
+KsMapMdlBuffer (PMDL Mdl)
+{
+ LASSERT(Mdl != NULL);
+
+ return MmGetSystemAddressForMdlSafe(
+ Mdl,
+ NormalPagePriority
+ );
+}
+
+
+/*
+ * KsReleaseMdl
+ * Unlock all the pages in the mdl
+ *
+ * Arguments:
+ * Mdl: memory description list to be released
+ *
+ * Return Value:
+ * N/A
+ *
+ * NOTES:
+ * N/A
+ */
+
+VOID
+KsReleaseMdl (IN PMDL Mdl,
+ IN int Paged )
+{
+ LASSERT(Mdl != NULL);
+
+ while (Mdl) {
+
+ PMDL Next;
+
+ Next = Mdl->Next;
+
+ if (Paged) {
+ MmUnlockPages(Mdl);
+ }
+
+ IoFreeMdl(Mdl);
+
+ Mdl = Next;
+ }
+}
+
+
+/*
+ * ksocknal_lock_buffer
+ * allocate MDL for the user spepcified buffer and lock (paging-in)
+ * all the pages of the buffer into system memory
+ *
+ * Arguments:
+ * buffer: the user buffer to be locked
+ * length: length in bytes of the buffer
+ * access: read or write access
+ * mdl: the result of the created mdl
+ *
+ * Return Value:
+ * int: the ksocknal error code: 0: success / -x: failture
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_lock_buffer (
+ void * buffer,
+ int paged,
+ int length,
+ LOCK_OPERATION access,
+ ksock_mdl_t ** kmdl
+ )
+{
+ NTSTATUS status;
+
+ status = KsLockUserBuffer(
+ buffer,
+ paged !=0,
+ length,
+ access,
+ kmdl
+ );
+
+ return cfs_error_code(status);
+}
+
+
+/*
+ * ksocknal_map_mdl
+ * Map the mdl pages into kernel space
+ *
+ * Arguments:
+ * mdl: the mdl to be mapped
+ *
+ * Return Value:
+ * void *: the buffer mapped or NULL in failure
+ *
+ * Notes:
+ * N/A
+ */
+
+void *
+ksocknal_map_mdl (ksock_mdl_t * mdl)
+{
+ LASSERT(mdl != NULL);
+
+ return KsMapMdlBuffer(mdl);
+}
+
+/*
+ * ksocknal_release_mdl
+ * Unlock all the pages in the mdl and release the mdl
+ *
+ * Arguments:
+ * mdl: memory description list to be released
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_release_mdl (ksock_mdl_t *mdl, int paged)
+{
+ LASSERT(mdl != NULL);
+
+ KsReleaseMdl(mdl, paged);
+}
+
+
+/*
+ * ksocknal_create_tconn
+ * allocate a new tconn structure from the SLAB cache or
+ * NonPaged sysetm pool
+ *
+ * Arguments:
+ * N/A
+ *
+ * Return Value:
+ * ksock_tconn_t *: the address of tconn or NULL if it fails
+ *
+ * NOTES:
+ * N/A
+ */
+
+ksock_tconn_t *
+ksocknal_create_tconn()
+{
+ ksock_tconn_t * tconn = NULL;
+
+ /* allocate ksoc_tconn_t from the slab cache memory */
+
+ tconn = (ksock_tconn_t *)cfs_mem_cache_alloc(
+ ksocknal_data.ksnd_tconn_slab, CFS_ALLOC_ZERO);
+
+ if (tconn) {
+
+ /* zero tconn elements */
+ memset(tconn, 0, sizeof(ksock_tconn_t));
+
+ /* initialize the tconn ... */
+ tconn->kstc_magic = KS_TCONN_MAGIC;
+
+ ExInitializeWorkItem(
+ &(tconn->kstc_disconnect.WorkItem),
+ KsDisconnectHelper,
+ &(tconn->kstc_disconnect)
+ );
+
+ KeInitializeEvent(
+ &(tconn->kstc_disconnect.Event),
+ SynchronizationEvent,
+ FALSE );
+
+ ExInitializeWorkItem(
+ &(tconn->kstc_destroy),
+ ksocknal_destroy_tconn,
+ tconn
+ );
+
+ spin_lock_init(&(tconn->kstc_lock));
+
+ ksocknal_get_tconn(tconn);
+
+ spin_lock(&(ksocknal_data.ksnd_tconn_lock));
+
+ /* attach it into global list in ksocknal_data */
+
+ list_add(&(tconn->kstc_list), &(ksocknal_data.ksnd_tconns));
+ ksocknal_data.ksnd_ntconns++;
+ spin_unlock(&(ksocknal_data.ksnd_tconn_lock));
+
+ tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000;
+ }
+
+ return (tconn);
+}
+
+
+/*
+ * ksocknal_free_tconn
+ * free the tconn structure to the SLAB cache or NonPaged
+ * sysetm pool
+ *
+ * Arguments:
+ * tconn: the tcon is to be freed
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_free_tconn(ksock_tconn_t * tconn)
+{
+ LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0);
+
+ spin_lock(&(ksocknal_data.ksnd_tconn_lock));
+
+ /* remove it from the global list */
+ list_del(&tconn->kstc_list);
+ ksocknal_data.ksnd_ntconns--;
+
+ /* if this is the last tconn, it would be safe for
+ ksocknal_tdi_fini_data to quit ... */
+ if (ksocknal_data.ksnd_ntconns == 0) {
+ cfs_wake_event(&ksocknal_data.ksnd_tconn_exit);
+ }
+ spin_unlock(&(ksocknal_data.ksnd_tconn_lock));
+
+ /* free the structure memory */
+ cfs_mem_cache_free(ksocknal_data.ksnd_tconn_slab, tconn);
+}
+
+
+/*
+ * ksocknal_init_listener
+ * Initialize the tconn as a listener (daemon)
+ *
+ * Arguments:
+ * tconn: the listener tconn
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_init_listener(
+ ksock_tconn_t * tconn
+ )
+{
+ /* preparation: intialize the tconn members */
+
+ tconn->kstc_type = kstt_listener;
+
+ RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+
+ CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list));
+ CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list));
+
+ cfs_init_event( &(tconn->listener.kstc_accept_event),
+ TRUE,
+ FALSE );
+
+ cfs_init_event( &(tconn->listener.kstc_destroy_event),
+ TRUE,
+ FALSE );
+
+ tconn->kstc_state = ksts_inited;
+}
+
+
+/*
+ * ksocknal_init_sender
+ * Initialize the tconn as a sender
+ *
+ * Arguments:
+ * tconn: the sender tconn
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_init_sender(
+ ksock_tconn_t * tconn
+ )
+{
+ tconn->kstc_type = kstt_sender;
+ RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+
+ KsInitializeKsChain(&(tconn->sender.kstc_recv));
+ KsInitializeKsChain(&(tconn->sender.kstc_send));
+
+ tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+ tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+
+ tconn->kstc_state = ksts_inited;
+}
+
+/*
+ * ksocknal_init_child
+ * Initialize the tconn as a child
+ *
+ * Arguments:
+ * tconn: the child tconn
+ *
+ * Return Value:
+ * N/A
+ *
+ * NOTES:
+ * N/A
+ */
+
+void
+ksocknal_init_child(
+ ksock_tconn_t * tconn
+ )
+{
+ tconn->kstc_type = kstt_child;
+ RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+
+ KsInitializeKsChain(&(tconn->child.kstc_recv));
+ KsInitializeKsChain(&(tconn->child.kstc_send));
+
+ tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+ tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+
+ tconn->kstc_state = ksts_inited;
+}
+
+/*
+ * ksocknal_get_tconn
+ * increase the reference count of the tconn with 1
+ *
+ * Arguments:
+ * tconn: the tdi connection to be referred
+ *
+ * Return Value:
+ * N/A
+ *
+ * NOTES:
+ * N/A
+ */
+
+void
+ksocknal_get_tconn(
+ ksock_tconn_t * tconn
+ )
+{
+ atomic_inc(&(tconn->kstc_refcount));
+}
+
+/*
+ * ksocknal_put_tconn
+ * decrease the reference count of the tconn and destroy
+ * it if the refercount becomes 0.
+ *
+ * Arguments:
+ * tconn: the tdi connection to be dereferred
+ *
+ * Return Value:
+ * N/A
+ *
+ * NOTES:
+ * N/A
+ */
+
+void
+ksocknal_put_tconn(
+ ksock_tconn_t *tconn
+ )
+{
+ if (atomic_dec_and_test(&(tconn->kstc_refcount))) {
+
+ spin_lock(&(tconn->kstc_lock));
+
+ if ( ( tconn->kstc_type == kstt_child ||
+ tconn->kstc_type == kstt_sender ) &&
+ ( tconn->kstc_state == ksts_connected ) ) {
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ ksocknal_abort_tconn(tconn);
+
+ } else {
+
+ if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) {
+ cfs_enter_debugger();
+ } else {
+ ExQueueWorkItem(
+ &(tconn->kstc_destroy),
+ DelayedWorkQueue
+ );
+
+ cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY);
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+ }
+ }
+}
+
+/*
+ * ksocknal_destroy_tconn
+ * cleanup the tdi connection and free it
+ *
+ * Arguments:
+ * tconn: the tdi connection to be cleaned.
+ *
+ * Return Value:
+ * N/A
+ *
+ * NOTES:
+ * N/A
+ */
+
+void
+ksocknal_destroy_tconn(
+ ksock_tconn_t * tconn
+ )
+{
+ LASSERT(tconn->kstc_refcount.counter == 0);
+
+ if (tconn->kstc_type == kstt_listener) {
+
+ ksocknal_reset_handlers(tconn);
+
+ /* for listener, we just need to close the address object */
+ KsCloseAddress(
+ tconn->kstc_addr.Handle,
+ tconn->kstc_addr.FileObject
+ );
+
+ tconn->kstc_state = ksts_inited;
+
+ } else if (tconn->kstc_type == kstt_child) {
+
+ /* for child tdi conections */
+
+ /* disassociate the relation between it's connection object
+ and the address object */
+
+ if (tconn->kstc_state == ksts_associated) {
+ KsDisassociateAddress(
+ tconn->child.kstc_info.FileObject
+ );
+ }
+
+ /* release the connection object */
+
+ KsCloseConnection(
+ tconn->child.kstc_info.Handle,
+ tconn->child.kstc_info.FileObject
+ );
+
+ /* release it's refer of it's parent's address object */
+ KsCloseAddress(
+ NULL,
+ tconn->kstc_addr.FileObject
+ );
+
+ spin_lock(&tconn->child.kstc_parent->kstc_lock);
+ spin_lock(&tconn->kstc_lock);
+
+ tconn->kstc_state = ksts_inited;
+
+ /* remove it frome it's parent's queues */
+
+ if (tconn->child.kstc_queued) {
+
+ list_del(&(tconn->child.kstc_link));
+
+ if (tconn->child.kstc_queueno) {
+
+ LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0);
+ tconn->child.kstc_parent->listener.kstc_accepted.num -= 1;
+
+ } else {
+
+ LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0);
+ tconn->child.kstc_parent->listener.kstc_listening.num -= 1;
+ }
+
+ tconn->child.kstc_queued = FALSE;
+ }
+
+ spin_unlock(&tconn->kstc_lock);
+ spin_unlock(&tconn->child.kstc_parent->kstc_lock);
+
+ /* drop the reference of the parent tconn */
+ ksocknal_put_tconn(tconn->child.kstc_parent);
+
+ } else if (tconn->kstc_type == kstt_sender) {
+
+ ksocknal_reset_handlers(tconn);
+
+ /* release the connection object */
+
+ KsCloseConnection(
+ tconn->sender.kstc_info.Handle,
+ tconn->sender.kstc_info.FileObject
+ );
+
+ /* release it's refer of it's parent's address object */
+ KsCloseAddress(
+ tconn->kstc_addr.Handle,
+ tconn->kstc_addr.FileObject
+ );
+
+ tconn->kstc_state = ksts_inited;
+
+ } else {
+ cfs_enter_debugger();
+ }
+
+ /* free the tconn structure ... */
+
+ ksocknal_free_tconn(tconn);
+}
+
+
+/*
+ * ksocknal_lock_iovs
+ * Lock the i/o vector buffers into MDL structure
+ *
+ * Arguments:
+ * iov: the array of i/o vectors
+ * niov: number of i/o vectors to be locked
+ * len: the real length of the iov vectors
+ *
+ * Return Value:
+ * ksock_mdl_t *: the Mdl of the locked buffers or
+ * NULL pointer in failure case
+ *
+ * Notes:
+ * N/A
+ */
+
+ksock_mdl_t *
+ksocknal_lock_iovs(
+ IN struct iovec *iov,
+ IN int niov,
+ IN int recving,
+ IN int * len )
+{
+ int rc = 0;
+
+ int i = 0;
+ int total = 0;
+ ksock_mdl_t * mdl = NULL;
+ ksock_mdl_t * tail = NULL;
+
+ LASSERT(iov != NULL);
+ LASSERT(niov > 0);
+ LASSERT(len != NULL);
+
+ for (i=0; i < niov; i++) {
+
+ ksock_mdl_t * Iovec = NULL;
+
+ rc = ksocknal_lock_buffer(
+ iov[i].iov_base,
+ FALSE,
+ iov[i].iov_len,
+ recving ? IoWriteAccess : IoReadAccess,
+ &Iovec );
+
+ if (rc < 0) {
+ break;
+ }
+
+ if (tail) {
+ tail->Next = Iovec;
+ } else {
+ mdl = Iovec;
+ }
+
+ tail = Iovec;
+
+ total +=iov[i].iov_len;
+ }
+
+ if (rc >= 0) {
+ *len = total;
+ } else {
+ if (mdl) {
+ ksocknal_release_mdl(mdl, FALSE);
+ mdl = NULL;
+ }
+ }
+
+ return mdl;
+}
+
+int
+ksocknal_query_data(
+ ksock_tconn_t * tconn,
+ size_t * size,
+ int bIsExpedited )
+{
+ int rc = 0;
+
+ PKS_CHAIN KsChain;
+ PKS_TSDUMGR KsTsduMgr;
+
+ *size = 0;
+
+ ksocknal_get_tconn(tconn);
+ spin_lock(&(tconn->kstc_lock));
+
+ if ( tconn->kstc_type != kstt_sender &&
+ tconn->kstc_type != kstt_child) {
+ rc = -EINVAL;
+ spin_unlock(&(tconn->kstc_lock));
+ goto errorout;
+ }
+
+ if (tconn->kstc_state != ksts_connected) {
+ rc = -ENOTCONN;
+ spin_unlock(&(tconn->kstc_lock));
+ goto errorout;
+ }
+
+ if (tconn->kstc_type == kstt_sender) {
+ KsChain = &(tconn->sender.kstc_recv);
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ KsChain = &(tconn->child.kstc_recv);
+ }
+
+ if (bIsExpedited) {
+ KsTsduMgr = &(KsChain->Expedited);
+ } else {
+ KsTsduMgr = &(KsChain->Normal);
+ }
+
+ *size = KsTsduMgr->TotalBytes;
+ spin_unlock(&(tconn->kstc_lock));
+
+errorout:
+
+ ksocknal_put_tconn(tconn);
+
+ return (rc);
+}
+
+/*
+ * ksocknal_get_tcp_option
+ * Query the the options of the tcp stream connnection
+ *
+ * Arguments:
+ * tconn: the tdi connection
+ * ID: option id
+ * OptionValue: buffer to store the option value
+ * Length: the length of the value, to be returned
+ *
+ * Return Value:
+ * int: ksocknal return code
+ *
+ * NOTES:
+ * N/A
+ */
+
+int
+ksocknal_get_tcp_option (
+ ksock_tconn_t * tconn,
+ ULONG ID,
+ PVOID OptionValue,
+ PULONG Length
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ IO_STATUS_BLOCK IoStatus;
+
+ TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx;
+
+ PFILE_OBJECT ConnectionObject;
+ PDEVICE_OBJECT DeviceObject = NULL;
+
+ PIRP Irp = NULL;
+ PIO_STACK_LOCATION IrpSp = NULL;
+
+ KEVENT Event;
+
+ /* make sure the tdi connection is connected ? */
+
+ ksocknal_get_tconn(tconn);
+
+ if (tconn->kstc_state != ksts_connected) {
+ Status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+
+ LASSERT(tconn->kstc_type == kstt_sender ||
+ tconn->kstc_type == kstt_child);
+
+ if (tconn->kstc_type == kstt_sender) {
+ ConnectionObject = tconn->sender.kstc_info.FileObject;
+ } else {
+ ConnectionObject = tconn->child.kstc_info.FileObject;
+ }
+
+ QueryInfoEx.ID.toi_id = ID;
+ QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION;
+ QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL;
+ QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY;
+ QueryInfoEx.ID.toi_entity.tei_instance = 0;
+
+ RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE);
+
+ KeInitializeEvent(&Event, NotificationEvent, FALSE);
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ Irp = IoBuildDeviceIoControlRequest(
+ IOCTL_TCP_QUERY_INFORMATION_EX,
+ DeviceObject,
+ &QueryInfoEx,
+ sizeof(TCP_REQUEST_QUERY_INFORMATION_EX),
+ OptionValue,
+ *Length,
+ FALSE,
+ &Event,
+ &IoStatus
+ );
+
+ if (Irp == NULL) {
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(Irp);
+
+ if (IrpSp == NULL) {
+
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ IrpSp->FileObject = ConnectionObject;
+ IrpSp->DeviceObject = DeviceObject;
+
+ Status = IoCallDriver(DeviceObject, Irp);
+
+ if (Status == STATUS_PENDING) {
+
+ KeWaitForSingleObject(
+ &Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+
+ Status = IoStatus.Status;
+ }
+
+
+ if (NT_SUCCESS(Status)) {
+ *Length = IoStatus.Information;
+ } else {
+ cfs_enter_debugger();
+ memset(OptionValue, 0, *Length);
+ Status = STATUS_SUCCESS;
+ }
+
+errorout:
+
+ ksocknal_put_tconn(tconn);
+
+ return cfs_error_code(Status);
+}
+
+/*
+ * ksocknal_set_tcp_option
+ * Set the the options for the tcp stream connnection
+ *
+ * Arguments:
+ * tconn: the tdi connection
+ * ID: option id
+ * OptionValue: buffer containing the new option value
+ * Length: the length of the value
+ *
+ * Return Value:
+ * int: ksocknal return code
+ *
+ * NOTES:
+ * N/A
+ */
+
+NTSTATUS
+ksocknal_set_tcp_option (
+ ksock_tconn_t * tconn,
+ ULONG ID,
+ PVOID OptionValue,
+ ULONG Length
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ IO_STATUS_BLOCK IoStatus;
+
+ ULONG SetInfoExLength;
+ PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL;
+
+ PFILE_OBJECT ConnectionObject;
+ PDEVICE_OBJECT DeviceObject = NULL;
+
+ PIRP Irp = NULL;
+ PIO_STACK_LOCATION IrpSp = NULL;
+
+ PKEVENT Event;
+
+ /* make sure the tdi connection is connected ? */
+
+ ksocknal_get_tconn(tconn);
+
+ if (tconn->kstc_state != ksts_connected) {
+ Status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+
+ LASSERT(tconn->kstc_type == kstt_sender ||
+ tconn->kstc_type == kstt_child);
+
+ if (tconn->kstc_type == kstt_sender) {
+ ConnectionObject = tconn->sender.kstc_info.FileObject;
+ } else {
+ ConnectionObject = tconn->child.kstc_info.FileObject;
+ }
+
+ SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT);
+
+ SetInfoEx = ExAllocatePoolWithTag(
+ NonPagedPool,
+ SetInfoExLength,
+ 'TSSK'
+ );
+
+ if (SetInfoEx == NULL) {
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ SetInfoEx->ID.toi_id = ID;
+
+ SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION;
+ SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL;
+ SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY;
+ SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE;
+
+ SetInfoEx->BufferSize = Length;
+ RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length);
+
+ Event = (PKEVENT)(&(SetInfoEx->Buffer[Length]));
+ KeInitializeEvent(Event, NotificationEvent, FALSE);
+
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ Irp = IoBuildDeviceIoControlRequest(
+ IOCTL_TCP_SET_INFORMATION_EX,
+ DeviceObject,
+ SetInfoEx,
+ SetInfoExLength,
+ NULL,
+ 0,
+ FALSE,
+ Event,
+ &IoStatus
+ );
+
+ if (Irp == NULL) {
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ IrpSp = IoGetNextIrpStackLocation(Irp);
+
+ if (IrpSp == NULL) {
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ IrpSp->FileObject = ConnectionObject;
+ IrpSp->DeviceObject = DeviceObject;
+
+ Status = IoCallDriver(DeviceObject, Irp);
+
+ if (Status == STATUS_PENDING) {
+
+ KeWaitForSingleObject(
+ Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+
+ Status = IoStatus.Status;
+ }
+
+errorout:
+
+ if (SetInfoEx) {
+ ExFreePool(SetInfoEx);
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ printk("ksocknal_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n",
+ ID, Status);
+ Status = STATUS_SUCCESS;
+ }
+
+ ksocknal_put_tconn(tconn);
+
+ return cfs_error_code(Status);
+}
+
+/*
+ * ksocknal_bind_tconn
+ * bind the tdi connection object with an address
+ *
+ * Arguments:
+ * tconn: tconn to be bound
+ * parent: the parent tconn object
+ * ipaddr: the ip address
+ * port: the port number
+ *
+ * Return Value:
+ * int: 0 for success or ksocknal error codes.
+ *
+ * NOTES:
+ * N/A
+ */
+
+int
+ksocknal_bind_tconn (
+ ksock_tconn_t * tconn,
+ ksock_tconn_t * parent,
+ ulong_ptr addr,
+ unsigned short port
+ )
+{
+ NTSTATUS status;
+ int rc = 0;
+
+ ksock_tdi_addr_t taddr;
+
+ memset(&taddr, 0, sizeof(ksock_tdi_addr_t));
+
+ if (tconn->kstc_state != ksts_inited) {
+
+ status = STATUS_INVALID_PARAMETER;
+ rc = cfs_error_code(status);
+
+ goto errorout;
+
+ } else if (tconn->kstc_type == kstt_child) {
+
+ if (NULL == parent) {
+ status = STATUS_INVALID_PARAMETER;
+ rc = cfs_error_code(status);
+
+ goto errorout;
+ }
+
+ /* refer it's parent's address object */
+
+ taddr = parent->kstc_addr;
+ ObReferenceObject(taddr.FileObject);
+
+ ksocknal_get_tconn(parent);
+
+ } else {
+
+ PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi);
+ ULONG AddrLen = 0;
+
+ /* intialize the tdi address*/
+
+ TdiAddress->TAAddressCount = 1;
+ TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
+ TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP;
+
+ ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
+ ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
+
+ memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
+
+
+ /* open the transport address object */
+
+ AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) +
+ TDI_ADDRESS_LENGTH_IP;
+
+ status = KsOpenAddress(
+ &(tconn->kstc_dev),
+ &(taddr.Tdi),
+ AddrLen,
+ &(taddr.Handle),
+ &(taddr.FileObject)
+ );
+
+ if (!NT_SUCCESS(status)) {
+
+ rc = cfs_error_code(status);
+ goto errorout;
+ }
+ }
+
+ if (tconn->kstc_type == kstt_child) {
+ tconn->child.kstc_parent = parent;
+ }
+
+ tconn->kstc_state = ksts_bind;
+ tconn->kstc_addr = taddr;
+
+errorout:
+
+ return (rc);
+}
+
+/*
+ * ksocknal_build_tconn
+ * build tcp/streaming connection to remote peer
+ *
+ * Arguments:
+ * tconn: tconn to be connected to the peer
+ * addr: the peer's ip address
+ * port: the peer's port number
+ *
+ * Return Value:
+ * int: 0 for success or ksocknal error codes.
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_build_tconn(
+ ksock_tconn_t * tconn,
+ ulong_ptr addr,
+ unsigned short port
+ )
+{
+ int rc = 0;
+ NTSTATUS status = STATUS_SUCCESS;
+
+
+ PFILE_OBJECT ConnectionObject = NULL;
+ PDEVICE_OBJECT DeviceObject = NULL;
+
+ PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
+ ULONG AddrLength;
+
+ PIRP Irp = NULL;
+
+ LASSERT(tconn->kstc_type == kstt_sender);
+ LASSERT(tconn->kstc_state == ksts_bind);
+
+ ksocknal_get_tconn(tconn);
+
+ {
+ /* set the event callbacks */
+ rc = ksocknal_set_handlers(tconn);
+
+ if (rc < 0) {
+ cfs_enter_debugger();
+ goto errorout;
+ }
+ }
+
+ /* create the connection file handle / object */
+ status = KsOpenConnection(
+ &(tconn->kstc_dev),
+ (CONNECTION_CONTEXT)tconn,
+ &(tconn->sender.kstc_info.Handle),
+ &(tconn->sender.kstc_info.FileObject)
+ );
+
+ if (!NT_SUCCESS(status)) {
+ rc = cfs_error_code(status);
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ /* associdate the the connection with the adress object of the tconn */
+
+ status = KsAssociateAddress(
+ tconn->kstc_addr.Handle,
+ tconn->sender.kstc_info.FileObject
+ );
+
+ if (!NT_SUCCESS(status)) {
+ rc = cfs_error_code(status);
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ tconn->kstc_state = ksts_associated;
+
+ /* Allocating Connection Info Together with the Address */
+ AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
+ + TDI_ADDRESS_LENGTH_IP;
+
+ ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
+ NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK');
+
+ if (NULL == ConnectionInfo) {
+
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ rc = cfs_error_code(status);
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ /* Initializing ConnectionInfo ... */
+ {
+ PTRANSPORT_ADDRESS TdiAddress;
+
+ /* ConnectionInfo settings */
+
+ ConnectionInfo->UserDataLength = 0;
+ ConnectionInfo->UserData = NULL;
+ ConnectionInfo->OptionsLength = 0;
+ ConnectionInfo->Options = NULL;
+ ConnectionInfo->RemoteAddressLength = AddrLength;
+ ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
+
+
+ /* intialize the tdi address*/
+
+ TdiAddress = ConnectionInfo->RemoteAddress;
+
+ TdiAddress->TAAddressCount = 1;
+ TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
+ TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP;
+
+ ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
+ ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
+
+ memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
+ }
+
+ /* Now prepare to connect the remote peer ... */
+
+ ConnectionObject = tconn->sender.kstc_info.FileObject;
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ /* allocate a new Irp */
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ rc = cfs_error_code(status);
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ /* setup the Irp */
+
+ TdiBuildConnect(
+ Irp,
+ DeviceObject,
+ ConnectionObject,
+ NULL,
+ NULL,
+ NULL,
+ ConnectionInfo,
+ NULL
+ );
+
+
+ /* sumbit the Irp to the underlying transport driver */
+ status = KsSubmitTdiIrp(
+ DeviceObject,
+ Irp,
+ TRUE,
+ NULL
+ );
+
+ spin_lock(&(tconn->kstc_lock));
+
+ if (NT_SUCCESS(status)) {
+
+ /* Connected! the conneciton is built successfully. */
+
+ tconn->kstc_state = ksts_connected;
+
+ tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo;
+ tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress;
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ } else {
+
+ /* Not connected! Abort it ... */
+
+ if (rc != 0) {
+ cfs_enter_debugger();
+ }
+
+ Irp = NULL;
+ rc = cfs_error_code(status);
+
+ tconn->kstc_state = ksts_associated;
+ spin_unlock(&(tconn->kstc_lock));
+
+ /* disassocidate the connection and the address object,
+ after cleanup, it's safe to set the state to abort ... */
+
+ if ( NT_SUCCESS(KsDisassociateAddress(
+ tconn->sender.kstc_info.FileObject))) {
+ tconn->kstc_state = ksts_aborted;
+ }
+
+ /* reset the event callbacks */
+ rc = ksocknal_reset_handlers(tconn);
+
+ goto errorout;
+ }
+
+errorout:
+
+ if (NT_SUCCESS(status)) {
+
+ ksocknal_query_local_ipaddr(tconn);
+
+ } else {
+
+ if (ConnectionInfo) {
+ ExFreePool(ConnectionInfo);
+ }
+ if (Irp) {
+ IoFreeIrp(Irp);
+ }
+ }
+
+ ksocknal_put_tconn(tconn);
+
+ return (rc);
+}
+
+
+/*
+ * ksocknal_disconnect_tconn
+ * disconnect the tconn from a connection
+ *
+ * Arguments:
+ * tconn: the tdi connecton object connected already
+ * flags: flags & options for disconnecting
+ *
+ * Return Value:
+ * int: ksocknal error code
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_disconnect_tconn(
+ ksock_tconn_t * tconn,
+ ulong_ptr flags
+ )
+{
+ NTSTATUS status = STATUS_SUCCESS;
+
+ ksock_tconn_info_t * info;
+
+ PFILE_OBJECT ConnectionObject;
+ PDEVICE_OBJECT DeviceObject = NULL;
+
+ PIRP Irp = NULL;
+
+ KEVENT Event;
+
+ ksocknal_get_tconn(tconn);
+
+ /* make sure tt's connected already and it
+ must be a sender or a child ... */
+
+ LASSERT(tconn->kstc_state == ksts_connected);
+ LASSERT( tconn->kstc_type == kstt_sender ||
+ tconn->kstc_type == kstt_child);
+
+ /* reset all the event handlers to NULL */
+
+ if (tconn->kstc_type != kstt_child) {
+ ksocknal_reset_handlers (tconn);
+ }
+
+ /* Disconnecting to the remote peer ... */
+
+ if (tconn->kstc_type == kstt_sender) {
+ info = &(tconn->sender.kstc_info);
+ } else {
+ info = &(tconn->child.kstc_info);
+ }
+
+ ConnectionObject = info->FileObject;
+ DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+ /* allocate an Irp and setup it */
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ status = STATUS_INSUFFICIENT_RESOURCES;
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ KeInitializeEvent(
+ &Event,
+ SynchronizationEvent,
+ FALSE
+ );
+
+ TdiBuildDisconnect(
+ Irp,
+ DeviceObject,
+ ConnectionObject,
+ KsDisconectCompletionRoutine,
+ &Event,
+ NULL,
+ flags,
+ NULL,
+ NULL
+ );
+
+ /* issue the Irp to the underlying transport
+ driver to disconnect the connection */
+
+ status = IoCallDriver(DeviceObject, Irp);
+
+ if (STATUS_PENDING == status) {
+
+ status = KeWaitForSingleObject(
+ &Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+
+ status = Irp->IoStatus.Status;
+ }
+
+ KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n",
+ status, KsNtStatusToString(status)));
+
+ IoFreeIrp(Irp);
+
+ if (info->ConnectionInfo) {
+
+ /* disassociate the association between connection/address objects */
+
+ status = KsDisassociateAddress(ConnectionObject);
+
+ if (!NT_SUCCESS(status)) {
+ cfs_enter_debugger();
+ }
+
+ spin_lock(&(tconn->kstc_lock));
+
+ /* cleanup the tsdumgr Lists */
+ KsCleanupTsdu (tconn);
+
+ /* set the state of the tconn */
+ if (NT_SUCCESS(status)) {
+ tconn->kstc_state = ksts_disconnected;
+ } else {
+ tconn->kstc_state = ksts_associated;
+ }
+
+ /* free the connection info to system pool*/
+ ExFreePool(info->ConnectionInfo);
+ info->ConnectionInfo = NULL;
+ info->Remote = NULL;
+
+ spin_unlock(&(tconn->kstc_lock));
+ }
+
+ status = STATUS_SUCCESS;
+
+errorout:
+
+ ksocknal_put_tconn(tconn);
+
+ return cfs_error_code(status);
+}
+
+
+/*
+ * ksocknal_abort_tconn
+ * The connection is broken un-expectedly. We need do
+ * some cleanup.
+ *
+ * Arguments:
+ * tconn: the tdi connection
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_abort_tconn(
+ ksock_tconn_t * tconn
+ )
+{
+ PKS_DISCONNECT_WORKITEM WorkItem = NULL;
+
+ WorkItem = &(tconn->kstc_disconnect);
+
+ ksocknal_get_tconn(tconn);
+ spin_lock(&(tconn->kstc_lock));
+
+ if (tconn->kstc_state != ksts_connected) {
+ ksocknal_put_tconn(tconn);
+ } else {
+
+ if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
+
+ WorkItem->Flags = TDI_DISCONNECT_ABORT;
+ WorkItem->tconn = tconn;
+
+ cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+
+ ExQueueWorkItem(
+ &(WorkItem->WorkItem),
+ DelayedWorkQueue
+ );
+ }
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+}
+
+
+/*
+ * ksocknal_query_local_ipaddr
+ * query the local connection ip address
+ *
+ * Arguments:
+ * tconn: the tconn which is connected
+ *
+ * Return Value:
+ * int: ksocknal error code
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_query_local_ipaddr(
+ ksock_tconn_t * tconn
+ )
+{
+ PFILE_OBJECT FileObject = NULL;
+ NTSTATUS status;
+
+ PTRANSPORT_ADDRESS TdiAddress;
+ ULONG AddressLength;
+
+ if (tconn->kstc_type == kstt_sender) {
+ FileObject = tconn->sender.kstc_info.FileObject;
+ } else if (tconn->kstc_type == kstt_child) {
+ FileObject = tconn->child.kstc_info.FileObject;
+ } else {
+ status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+
+ TdiAddress = &(tconn->kstc_addr.Tdi);
+ AddressLength = MAX_ADDRESS_LENGTH;
+
+ status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength);
+
+ if (NT_SUCCESS(status)) {
+
+ KsPrint((0, "ksocknal_query_local_ipaddr: Local ip address = %xh port = %xh\n",
+ ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr,
+ ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port ));
+ } else {
+ KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n"));
+ }
+
+errorout:
+
+ return cfs_error_code(status);
+}
+
+/*
+ * ksocknal_send_mdl
+ * send MDL chain to the peer for a stream connection
+ *
+ * Arguments:
+ * tconn: tdi connection object
+ * tx: the transmit context
+ * mdl: the mdl chain containing the data
+ * len: length of the data
+ * flags: flags of the transmission
+ *
+ * Return Value:
+ * ksocknal return code
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_send_mdl(
+ ksock_tconn_t * tconn,
+ void * tx,
+ ksock_mdl_t * mdl,
+ int len,
+ int flags
+ )
+{
+ NTSTATUS Status;
+ int rc = 0;
+ ulong_ptr length;
+ ulong_ptr tflags;
+ ksock_tdi_tx_t * context;
+
+ PKS_CHAIN KsChain;
+ PKS_TSDUMGR KsTsduMgr;
+ PKS_TSDU KsTsdu;
+ PKS_TSDU_BUF KsTsduBuf;
+ PKS_TSDU_DAT KsTsduDat;
+
+ BOOLEAN bNewTsdu = FALSE; /* newly allocated */
+ BOOLEAN bNewBuff = FALSE; /* newly allocated */
+
+ BOOLEAN bBuffed; /* bufferred sending */
+
+ PUCHAR Buffer = NULL;
+ ksock_mdl_t * NewMdl = NULL;
+
+ PIRP Irp = NULL;
+ PFILE_OBJECT ConnObject;
+ PDEVICE_OBJECT DeviceObject;
+
+ BOOLEAN bIsNonBlock;
+
+ ksocknal_get_tconn(tconn);
+
+ tflags = ksocknal_tdi_send_flags(flags);
+ bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT);
+
+ spin_lock(&tconn->kstc_lock);
+
+ LASSERT( tconn->kstc_type == kstt_sender ||
+ tconn->kstc_type == kstt_child );
+
+ if (tconn->kstc_state != ksts_connected) {
+ spin_unlock(&tconn->kstc_lock);
+ ksocknal_put_tconn(tconn);
+ return -ENOTCONN;
+ }
+
+ /* get the latest Tsdu buffer form TsduMgr list.
+ just set NULL if the list is empty. */
+
+ if (tconn->kstc_type == kstt_sender) {
+ KsChain = &(tconn->sender.kstc_send);
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ KsChain = &(tconn->child.kstc_send);
+ }
+
+ if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) {
+ KsTsduMgr = &(KsChain->Expedited);
+ } else {
+ KsTsduMgr = &(KsChain->Normal);
+ }
+
+ if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) {
+ bBuffed = TRUE;
+ } else {
+ bBuffed = FALSE;
+ }
+
+ /* do the preparation work for bufferred sending */
+
+ if (bBuffed) {
+
+ /* if the data is even larger than the biggest Tsdu, we have
+ to allocate new buffer and use TSDU_TYOE_BUF to store it */
+
+ if ( KS_TSDU_STRU_SIZE((ULONG)len) > ksocknal_data.ksnd_tsdu_size
+ - KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
+ bNewBuff = TRUE;
+ }
+
+ if (list_empty(&(KsTsduMgr->TsduList))) {
+
+ LASSERT(KsTsduMgr->NumOfTsdu == 0);
+ KsTsdu = NULL;
+
+ } else {
+
+ LASSERT(KsTsduMgr->NumOfTsdu > 0);
+ KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+ LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+
+ /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */
+ if (bNewBuff) {
+ if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) {
+ KsTsdu = NULL;
+ }
+ } else {
+ if ( KS_TSDU_STRU_SIZE((ULONG)len) >
+ KsTsdu->TotalLength - KsTsdu->LastOffset ) {
+ KsTsdu = NULL;
+ }
+ }
+ }
+
+ /* if there's no Tsdu or the free size is not enough for the
+ KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu. */
+
+ if (NULL == KsTsdu) {
+
+ KsTsdu = KsAllocateKsTsdu();
+
+ if (NULL == KsTsdu) {
+ bBuffed = FALSE;
+ bNewBuff = FALSE;
+ } else {
+ bNewTsdu = TRUE;
+ }
+ }
+
+ /* process the case that a new buffer is to be allocated from system memory */
+ if (bNewBuff) {
+
+ /* now allocating internal buffer to contain the payload */
+ Buffer = ExAllocatePool(NonPagedPool, len);
+
+ if (NULL == Buffer) {
+ bBuffed = FALSE;
+ }
+ }
+ }
+
+ if (bBuffed) {
+
+ if (bNewBuff) {
+
+ /* queue a new KS_TSDU_BUF to the Tsdu buffer */
+ KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+ KsTsduBuf->TsduFlags = 0;
+ KsTsduBuf->DataLength = (ULONG)len;
+ KsTsduBuf->StartOffset = 0;
+ KsTsduBuf->UserBuffer = Buffer;
+ } else {
+ /* queue a new KS_TSDU_BUF to the Tsdu buffer */
+ KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+ KsTsduDat->TsduFlags = 0;
+ KsTsduDat->DataLength = (ULONG)len;
+ KsTsduDat->StartOffset = 0;
+ KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE((ULONG)len);
+
+ Buffer = &KsTsduDat->Data[0];
+ }
+
+ /* now locking the Buffer and copy user payload into the buffer */
+ ASSERT(Buffer != NULL);
+
+ rc = ksocknal_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl);
+ if (rc != 0) {
+ printk("ksocknal_send_mdl: bufferred: error allocating mdl.\n");
+ bBuffed = FALSE;
+ } else {
+ ULONG BytesCopied = 0;
+ TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied);
+ if (BytesCopied != (ULONG) len) {
+ bBuffed = FALSE;
+ }
+ }
+
+ /* Do the finializing job if we succeed to to lock the buffer and move
+ user data. Or we need do cleaning up ... */
+ if (bBuffed) {
+
+ if (bNewBuff) {
+ KsTsduBuf->TsduType = TSDU_TYPE_BUF;
+ KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+
+ } else {
+ KsTsduDat->TsduType = TSDU_TYPE_DAT;
+ KsTsdu->LastOffset += KsTsduDat->TotalLength;
+ }
+
+ /* attach it to the TsduMgr list if the Tsdu is newly created. */
+ if (bNewTsdu) {
+
+ list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+ KsTsduMgr->NumOfTsdu++;
+ }
+
+ } else {
+
+ if (NewMdl) {
+ ksocknal_release_mdl(NewMdl, FALSE);
+ NewMdl = NULL;
+ }
+
+ if (bNewBuff) {
+ ExFreePool(Buffer);
+ Buffer = NULL;
+ bNewBuff = FALSE;
+ }
+ }
+ }
+
+ /* update the TotalBytes being in sending */
+ KsTsduMgr->TotalBytes += (ULONG)len;
+
+ spin_unlock(&tconn->kstc_lock);
+
+ /* cleanup the Tsdu if not successful */
+ if (!bBuffed && bNewTsdu) {
+ KsPutKsTsdu(KsTsdu);
+ bNewTsdu = FALSE;
+ KsTsdu = NULL;
+ }
+
+ /* we need allocate the ksock_tx_t structure from memory pool. */
+
+ context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0);
+ if (!context) {
+ /* release the chained mdl */
+ ksocknal_release_mdl(mdl, FALSE);
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ /* intialize the TcpContext */
+
+ memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT));
+
+ context->tconn = tconn;
+ context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t));
+
+ KeInitializeEvent(context->Event, SynchronizationEvent, FALSE);
+
+ if (bBuffed) {
+
+ /* for bufferred transmission, we need set
+ the internal completion routine. */
+
+ context->CompletionRoutine = KsTcpSendCompletionRoutine;
+ context->KsTsduMgr = KsTsduMgr;
+ context->CompletionContext = KsTsdu;
+ context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat);
+ context->bCounted = FALSE;
+
+ } else if (bIsNonBlock) {
+
+ /* for non-blocking transmission, we need set
+ the internal completion routine too. */
+
+ context->CompletionRoutine = KsTcpSendCompletionRoutine;
+ context->CompletionContext = tx;
+ context->KsTsduMgr = KsTsduMgr;
+ context->bCounted = TRUE;
+ context->ReferCount = 2;
+ }
+
+ if (tconn->kstc_type == kstt_sender) {
+ ConnObject = tconn->sender.kstc_info.FileObject;
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ ConnObject = tconn->child.kstc_info.FileObject;
+ }
+
+ DeviceObject = IoGetRelatedDeviceObject(ConnObject);
+
+ Irp = KsBuildTdiIrp(DeviceObject);
+
+ if (NULL == Irp) {
+
+ /* release the chained mdl */
+ ksocknal_release_mdl(mdl, FALSE);
+
+ Status = STATUS_INSUFFICIENT_RESOURCES;
+ goto errorout;
+ }
+
+ length = KsQueryMdlsSize(mdl);
+
+ LASSERT((ULONG)len <= length);
+
+ ksocknal_get_tconn(tconn);
+
+ TdiBuildSend(
+ Irp,
+ DeviceObject,
+ ConnObject,
+ KsTcpCompletionRoutine,
+ context,
+ (bBuffed ? NewMdl : mdl),
+ (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags),
+ (ULONG)len;
+ );
+
+ Status = IoCallDriver(DeviceObject, Irp);
+
+ /* the context is to be freed by the complete routine */
+ context = NULL;
+
+ if (bBuffed) {
+ ksocknal_release_mdl(mdl, FALSE);
+ NewMdl = NULL;
+ }
+
+ if (!NT_SUCCESS(Status)) {
+ cfs_enter_debugger();
+ rc = cfs_error_code(Status);
+ goto errorout;
+ }
+
+ if (bBuffed) {
+ Status = STATUS_SUCCESS;
+ rc = len;
+ } else {
+ if (bIsNonBlock) {
+ if (InterlockedDecrement(&context->ReferCount) == 0) {
+ Status = Irp->IoStatus.Status;
+ } else {
+ Status = STATUS_PENDING;
+ }
+ } else {
+
+ if (STATUS_PENDING == Status) {
+ Status = KeWaitForSingleObject(
+ context->Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+
+ if (NT_SUCCESS(Status)) {
+ Status = Irp->IoStatus.Status;
+ }
+ }
+ }
+
+ if (Status == STATUS_SUCCESS) {
+ rc = (int)(Irp->IoStatus.Information);
+
+ spin_lock(&tconn->kstc_lock);
+ KsTsduMgr->TotalBytes -= rc;
+ spin_unlock(&tconn->kstc_lock);
+
+ } else {
+ rc = cfs_error_code(Status);
+ }
+ }
+
+errorout:
+
+ if (bBuffed) {
+
+ if (NewMdl) {
+ ksocknal_release_mdl(NewMdl, FALSE);
+ NewMdl = NULL;
+ }
+
+ if (bNewBuff) {
+ if (!NT_SUCCESS(Status)) {
+ ExFreePool(Buffer);
+ Buffer = NULL;
+ }
+ }
+
+ } else {
+
+ if (Status != STATUS_PENDING) {
+
+ if (Irp) {
+
+ /* Freeing the Irp ... */
+
+ IoFreeIrp(Irp);
+ Irp = NULL;
+ }
+ }
+ }
+
+ if (!NT_SUCCESS(Status)) {
+
+ spin_lock(&tconn->kstc_lock);
+
+ KsTsduMgr->TotalBytes -= (ULONG)len;
+
+ if (bBuffed) {
+
+ /* attach it to the TsduMgr list if the Tsdu is newly created. */
+ if (bNewTsdu) {
+
+ list_del(&(KsTsdu->Link));
+ KsTsduMgr->NumOfTsdu--;
+
+ KsPutKsTsdu(KsTsdu);
+ } else {
+ if (bNewBuff) {
+ if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) ==
+ (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
+ KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF);
+ KsTsduBuf->TsduType = 0;
+ } else {
+ cfs_enter_debugger();
+ KsTsduBuf->StartOffset = KsTsduBuf->DataLength;
+ }
+ } else {
+ if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength ==
+ (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
+ KsTsdu->LastOffset -= KsTsduDat->TotalLength;
+ KsTsduDat->TsduType = 0;
+ } else {
+ cfs_enter_debugger();
+ KsTsduDat->StartOffset = KsTsduDat->DataLength;
+ }
+ }
+ }
+ }
+
+ spin_unlock(&tconn->kstc_lock);
+ }
+
+ /* free the context if is not used at all */
+ if (context) {
+ cfs_free(context);
+ }
+
+ ksocknal_put_tconn(tconn);
+
+ return rc;
+}
+
+/*
+ * ksocknal_recv_mdl
+ * Receive data from the peer for a stream connection
+ *
+ * Arguments:
+ * tconn: tdi connection object
+ * mdl: the mdl chain to contain the incoming data
+ * len: length of the data
+ * flags: flags of the receiving
+ *
+ * Return Value:
+ * ksocknal return code
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_recv_mdl(
+ ksock_tconn_t * tconn,
+ ksock_mdl_t * mdl,
+ int size,
+ int flags
+ )
+{
+ NTSTATUS Status = STATUS_SUCCESS;
+ int rc = 0;
+
+ BOOLEAN bIsNonBlock;
+ BOOLEAN bIsExpedited;
+
+ PKS_CHAIN KsChain;
+ PKS_TSDUMGR KsTsduMgr;
+ PKS_TSDU KsTsdu;
+ PKS_TSDU_DAT KsTsduDat;
+ PKS_TSDU_BUF KsTsduBuf;
+ PKS_TSDU_MDL KsTsduMdl;
+
+ PUCHAR Buffer;
+
+ ULONG BytesRecved = 0;
+ ULONG RecvedOnce;
+
+ bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT);
+ bIsExpedited = cfs_is_flag_set(flags, MSG_OOB);
+
+ ksocknal_get_tconn(tconn);
+
+Again:
+
+ RecvedOnce = 0;
+
+ spin_lock(&(tconn->kstc_lock));
+
+ if ( tconn->kstc_type != kstt_sender &&
+ tconn->kstc_type != kstt_child) {
+
+ rc = -EINVAL;
+ spin_unlock(&(tconn->kstc_lock));
+
+ goto errorout;
+ }
+
+ if (tconn->kstc_state != ksts_connected) {
+
+ rc = -ENOTCONN;
+ spin_unlock(&(tconn->kstc_lock));
+
+ goto errorout;
+ }
+
+ if (tconn->kstc_type == kstt_sender) {
+ KsChain = &(tconn->sender.kstc_recv);
+ } else {
+ LASSERT(tconn->kstc_type == kstt_child);
+ KsChain = &(tconn->child.kstc_recv);
+ }
+
+ if (bIsExpedited) {
+ KsTsduMgr = &(KsChain->Expedited);
+ } else {
+ KsTsduMgr = &(KsChain->Normal);
+ }
+
+NextTsdu:
+
+ if (list_empty(&(KsTsduMgr->TsduList))) {
+
+ //
+ // It's a notification event. We need reset it to
+ // un-signaled state in case there no any tsdus.
+ //
+
+ KeResetEvent(&(KsTsduMgr->Event));
+
+ } else {
+
+ KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
+ LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+ /* remove the KsTsdu from TsduMgr list to release the lock */
+ list_del(&(KsTsdu->Link));
+ KsTsduMgr->NumOfTsdu--;
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ while ((ULONG)size > BytesRecved) {
+
+ ULONG BytesCopied = 0;
+ ULONG BytesToCopy = 0;
+ ULONG StartOffset = 0;
+
+ KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+ KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+ KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+
+ if ( TSDU_TYPE_DAT == KsTsduDat->TsduType ||
+ TSDU_TYPE_BUF == KsTsduBuf->TsduType ) {
+
+
+ //
+ // Data Tsdu Unit ...
+ //
+
+ if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+
+ if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
+ /* data is not ready yet*/
+ KeResetEvent(&(KsTsduMgr->Event));
+ printk("ksocknal_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat);
+ break;
+ }
+
+ Buffer = &KsTsduDat->Data[0];
+ StartOffset = KsTsduDat->StartOffset;
+ if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) {
+ /* Recvmsg requst could be statisfied ... */
+ BytesToCopy = size - BytesRecved;
+ } else {
+ BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset;
+ }
+
+ } else {
+
+ if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
+ /* data is not ready yet*/
+ KeResetEvent(&(KsTsduMgr->Event));
+ DbgPrint("ksocknal_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf);
+ break;
+ }
+
+ ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+ Buffer = KsTsduBuf->UserBuffer;
+ StartOffset = KsTsduBuf->StartOffset;
+
+ if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) {
+ /* Recvmsg requst could be statisfied ... */
+ BytesToCopy = size - BytesRecved;
+ } else {
+ BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset;
+ }
+ }
+
+ if (BytesToCopy > 0) {
+ Status = TdiCopyBufferToMdl(
+ Buffer,
+ StartOffset,
+ BytesToCopy,
+ mdl,
+ BytesRecved,
+ &BytesCopied
+ );
+
+ if (NT_SUCCESS(Status)) {
+
+ if (BytesToCopy != BytesCopied) {
+ cfs_enter_debugger();
+ }
+
+ BytesRecved += BytesCopied;
+ RecvedOnce += BytesCopied;
+
+ } else {
+
+ cfs_enter_debugger();
+
+ if (STATUS_BUFFER_OVERFLOW == Status) {
+ }
+ }
+ }
+
+ if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+
+ KsTsduDat->StartOffset += BytesCopied;
+
+ if (KsTsduDat->StartOffset == KsTsduDat->DataLength) {
+ KsTsdu->StartOffset += KsTsduDat->TotalLength;
+ }
+
+ } else {
+
+ ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+ KsTsduBuf->StartOffset += BytesCopied;
+ if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) {
+ KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+ /* now we need release the buf to system pool */
+ ExFreePool(KsTsduBuf->UserBuffer);
+ }
+ }
+
+ } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
+
+ //
+ // MDL Tsdu Unit ...
+ //
+
+ if (KsTsduMdl->DataLength > size - BytesRecved) {
+
+ /* Recvmsg requst could be statisfied ... */
+
+ BytesToCopy = size - BytesRecved;
+
+ } else {
+
+ BytesToCopy = KsTsduMdl->DataLength;
+ }
+
+ Status = KsCopyMdlChainToMdlChain(
+ KsTsduMdl->Mdl,
+ KsTsduMdl->StartOffset,
+ mdl,
+ BytesRecved,
+ BytesToCopy,
+ &BytesCopied
+ );
+
+ if (NT_SUCCESS(Status)) {
+
+ if (BytesToCopy != BytesCopied) {
+ cfs_enter_debugger();
+ }
+
+ KsTsduMdl->StartOffset += BytesCopied;
+ KsTsduMdl->DataLength -= BytesCopied;
+
+ BytesRecved += BytesCopied;
+ RecvedOnce += BytesCopied;
+ } else {
+ cfs_enter_debugger();
+ }
+
+ if (0 == KsTsduMdl->DataLength) {
+
+ //
+ // Call TdiReturnChainedReceives to release the Tsdu memory
+ //
+
+ TdiReturnChainedReceives(
+ &(KsTsduMdl->Descriptor),
+ 1 );
+
+ KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+ }
+
+ } else {
+ printk("ksocknal_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n",
+ KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength);
+ printk(" Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x",
+ KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength);
+ cfs_enter_debugger();
+ }
+
+ if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+
+ //
+ // KsTsdu is empty now, we need free it ...
+ //
+
+ KsPutKsTsdu(KsTsdu);
+ KsTsdu = NULL;
+
+ break;
+ }
+ }
+
+ spin_lock(&(tconn->kstc_lock));
+
+ /* we need attach the KsTsdu to the list header */
+ if (KsTsdu) {
+ KsTsduMgr->NumOfTsdu++;
+ list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+ } else if ((ULONG)size > BytesRecved) {
+ goto NextTsdu;
+ }
+ }
+
+ if (KsTsduMgr->TotalBytes < RecvedOnce) {
+ cfs_enter_debugger();
+ KsTsduMgr->TotalBytes = 0;
+ } else {
+ KsTsduMgr->TotalBytes -= RecvedOnce;
+ }
+
+ spin_unlock(&(tconn->kstc_lock));
+
+ if (NT_SUCCESS(Status)) {
+
+ if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) {
+
+ KeWaitForSingleObject(
+ &(KsTsduMgr->Event),
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+
+ goto Again;
+ }
+
+ if (bIsNonBlock && (BytesRecved == 0)) {
+ rc = -EAGAIN;
+ } else {
+ rc = BytesRecved;
+ }
+ }
+
+errorout:
+
+ ksocknal_put_tconn(tconn);
+
+ if (rc > 0) {
+ KsPrint((1, "ksocknal_recv_mdl: recvieving %d bytes ...\n", rc));
+ } else {
+ KsPrint((0, "ksocknal_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status));
+ }
+
+ /* release the chained mdl */
+ ksocknal_release_mdl(mdl, FALSE);
+
+ return (rc);
+}
+
+
+/*
+ * ksocknal_init_tdi_data
+ * initialize the global data in ksockal_data
+ *
+ * Arguments:
+ * N/A
+ *
+ * Return Value:
+ * int: ksocknal error code
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_init_tdi_data()
+{
+ int rc = 0;
+
+ /* initialize tconn related globals */
+
+ spin_lock_init(&ksocknal_data.ksnd_tconn_lock);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_tconns);
+ cfs_init_event(&ksocknal_data.ksnd_tconn_exit, TRUE, FALSE);
+
+ ksocknal_data.ksnd_tconn_slab = cfs_mem_cache_create(
+ "tcon", sizeof(ksock_tconn_t) , 0, 0);
+
+ if (!ksocknal_data.ksnd_tconn_slab) {
+ rc = -ENOMEM;
+ goto errorout;
+ }
+
+ /* initialize tsdu related globals */
+
+ spin_lock_init(&ksocknal_data.ksnd_tsdu_lock);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_freetsdus);
+ ksocknal_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */
+ ksocknal_data.ksnd_tsdu_slab = cfs_mem_cache_create(
+ "tsdu", ksocknal_data.ksnd_tsdu_size, 0, 0);
+
+ if (!ksocknal_data.ksnd_tsdu_slab) {
+ rc = -ENOMEM;
+ cfs_mem_cache_destroy(ksocknal_data.ksnd_tconn_slab);
+ ksocknal_data.ksnd_tconn_slab = NULL;
+ goto errorout;
+ }
+
+ /* initialize daemon related globals */
+
+ spin_lock_init(&ksocknal_data.ksnd_daemon_lock);
+ CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_daemons);
+ cfs_init_event(&ksocknal_data.ksnd_daemon_exit, TRUE, FALSE);
+
+errorout:
+
+ return rc;
+}
+
+
+/*
+ * ksocknal_fini_tdi_data
+ * finalize the global data in ksockal_data
+ *
+ * Arguments:
+ * N/A
+ *
+ * Return Value:
+ * int: ksocknal error code
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_fini_tdi_data()
+{
+ PKS_TSDU KsTsdu = NULL;
+ struct list_head * list = NULL;
+
+ /* we need wait until all the tconn are freed */
+ spin_lock(&(ksocknal_data.ksnd_tconn_lock));
+
+ if (list_empty(&(ksocknal_data.ksnd_tconns))) {
+ cfs_wake_event(&ksocknal_data.ksnd_tconn_exit);
+ }
+ spin_unlock(&(ksocknal_data.ksnd_tconn_lock));
+
+ /* now wait on the tconn exit event */
+ cfs_wait_event(&ksocknal_data.ksnd_tconn_exit, 0);
+
+ /* it's safe to delete the tconn slab ... */
+ cfs_mem_cache_destroy(ksocknal_data.ksnd_tconn_slab);
+ ksocknal_data.ksnd_tconn_slab = NULL;
+
+ /* clean up all the tsud buffers in the free list */
+ spin_lock(&(ksocknal_data.ksnd_tsdu_lock));
+ list_for_each (list, &ksocknal_data.ksnd_freetsdus) {
+ KsTsdu = list_entry (list, KS_TSDU, Link);
+
+ cfs_mem_cache_free(
+ ksocknal_data.ksnd_tsdu_slab,
+ KsTsdu );
+ }
+ spin_unlock(&(ksocknal_data.ksnd_tsdu_lock));
+
+ /* it's safe to delete the tsdu slab ... */
+ cfs_mem_cache_destroy(ksocknal_data.ksnd_tsdu_slab);
+ ksocknal_data.ksnd_tsdu_slab = NULL;
+
+ /* good! it's smooth to do the cleaning up...*/
+}
+
+/*
+ * ksocknal_create_child_tconn
+ * Create the backlog child connection for a listener
+ *
+ * Arguments:
+ * parent: the listener daemon connection
+ *
+ * Return Value:
+ * the child connection or NULL in failure
+ *
+ * Notes:
+ * N/A
+ */
+
+ksock_tconn_t *
+ksocknal_create_child_tconn(
+ ksock_tconn_t * parent
+ )
+{
+ NTSTATUS status;
+ ksock_tconn_t * backlog;
+
+ /* allocate the tdi connecton object */
+ backlog = ksocknal_create_tconn();
+
+ if (!backlog) {
+ goto errorout;
+ }
+
+ /* initialize the tconn as a child */
+ ksocknal_init_child(backlog);
+
+
+ /* now bind it */
+ if (ksocknal_bind_tconn(backlog, parent, 0, 0) < 0) {
+ ksocknal_free_tconn(backlog);
+ backlog = NULL;
+ goto errorout;
+ }
+
+ /* open the connection object */
+ status = KsOpenConnection(
+ &(backlog->kstc_dev),
+ (PVOID)backlog,
+ &(backlog->child.kstc_info.Handle),
+ &(backlog->child.kstc_info.FileObject)
+ );
+
+ if (!NT_SUCCESS(status)) {
+
+ ksocknal_put_tconn(backlog);
+ backlog = NULL;
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ /* associate it now ... */
+ status = KsAssociateAddress(
+ backlog->kstc_addr.Handle,
+ backlog->child.kstc_info.FileObject
+ );
+
+ if (!NT_SUCCESS(status)) {
+
+ ksocknal_put_tconn(backlog);
+ backlog = NULL;
+ cfs_enter_debugger();
+ goto errorout;
+ }
+
+ backlog->kstc_state = ksts_associated;
+
+errorout:
+
+ return backlog;
+}
+
+/*
+ * ksocknal_replenish_backlogs(
+ * to replenish the backlogs listening...
+ *
+ * Arguments:
+ * tconn: the parent listen tdi connect
+ * nbacklog: number fo child connections in queue
+ *
+ * Return Value:
+ * N/A
+ *
+ * Notes:
+ * N/A
+ */
+
+void
+ksocknal_replenish_backlogs(
+ ksock_tconn_t * parent,
+ int nbacklog
+ )
+{
+ ksock_tconn_t * backlog;
+ int n = 0;
+
+ /* calculate how many backlogs needed */
+ if ( ( parent->listener.kstc_listening.num +
+ parent->listener.kstc_accepted.num ) < nbacklog ) {
+ n = nbacklog - ( parent->listener.kstc_listening.num +
+ parent->listener.kstc_accepted.num );
+ } else {
+ n = 0;
+ }
+
+ while (n--) {
+
+ /* create the backlog child tconn */
+ backlog = ksocknal_create_child_tconn(parent);
+
+ spin_lock(&(parent->kstc_lock));
+
+ if (backlog) {
+ spin_lock(&backlog->kstc_lock);
+ /* attch it into the listing list of daemon */
+ list_add( &backlog->child.kstc_link,
+ &parent->listener.kstc_listening.list );
+ parent->listener.kstc_listening.num++;
+
+ backlog->child.kstc_queued = TRUE;
+ spin_unlock(&backlog->kstc_lock);
+ } else {
+ cfs_enter_debugger();
+ }
+ }
+
+ spin_unlock(&(parent->kstc_lock));
+}
+
+/*
+ * ksocknal_start_listen
+ * setup the listener tdi connection and make it listen
+ * on the user specified ip address and port.
+ *
+ * Arguments:
+ * tconn: the parent listen tdi connect
+ * nbacklog: number fo child connections in queue
+ *
+ * Return Value:
+ * ksocknal error code >=: success; otherwise error.
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_start_listen(ksock_tconn_t *tconn, int nbacklog)
+{
+ int rc = 0;
+
+ /* now replenish the backlogs */
+ ksocknal_replenish_backlogs(tconn, nbacklog);
+
+ /* set the event callback handlers */
+ rc = ksocknal_set_handlers(tconn);
+
+ if (rc < 0) {
+ return rc;
+ }
+
+ spin_lock(&(tconn->kstc_lock));
+ tconn->listener.nbacklog = nbacklog;
+ tconn->kstc_state = ksts_listening;
+ cfs_set_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED);
+ spin_unlock(&(tconn->kstc_lock));
+
+ return rc;
+}
+
+
+/*
+ * ksocknal_wait_child_tconn
+ * accept a child connection from peer
+ *
+ * Arguments:
+ * parent: the daemon tdi connection listening
+ * child: to contain the accepted connection
+ *
+ * Return Value:
+ * ksocknal error code;
+ *
+ * Notes:
+ * N/A
+ */
+
+int
+ksocknal_wait_child_tconn(
+ ksock_tconn_t * parent,
+ ksock_tconn_t ** child
+ )
+{
+ struct list_head * tmp;
+ ksock_tconn_t * backlog = NULL;
+
+ ksocknal_replenish_backlogs(parent, parent->listener.nbacklog);
+
+ spin_lock(&(parent->kstc_lock));
+
+ if (parent->listener.kstc_listening.num <=0 ) {
+ spin_unlock(&(parent->kstc_lock));
+ return -1;
+ }
+
+again:
+
+ /* check the listening queue and try to search the accepted connecton */
+
+ list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
+ backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link);
+
+ spin_lock(&(backlog->kstc_lock));
+
+ if (backlog->child.kstc_accepted) {
+
+ LASSERT(backlog->kstc_state == ksts_connected);
+ LASSERT(backlog->child.kstc_busy);
+
+ list_del(&(backlog->child.kstc_link));
+ list_add(&(backlog->child.kstc_link),
+ &(parent->listener.kstc_accepted.list));
+ parent->listener.kstc_accepted.num++;
+ parent->listener.kstc_listening.num--;
+ backlog->child.kstc_queueno = 1;
+
+ spin_unlock(&(backlog->kstc_lock));
+
+ break;
+ } else {
+ spin_unlock(&(backlog->kstc_lock));
+ backlog = NULL;
+ }
+ }
+
+ spin_unlock(&(parent->kstc_lock));
+
+ /* we need wait until new incoming connections are requested
+ or the case of shuting down the listenig daemon thread */
+ if (backlog == NULL) {
+
+ NTSTATUS Status;
+
+ Status = KeWaitForSingleObject(
+ &(parent->listener.kstc_accept_event),
+ Executive,
+ KernelMode,
+ FALSE,
+ NULL
+ );
+
+ spin_lock(&(parent->kstc_lock));
+
+ /* check whether it's exptected to exit ? */
+ if (!cfs_is_flag_set(parent->kstc_flags, KS_TCONN_DAEMON_STARTED)) {
+ spin_unlock(&(parent->kstc_lock));
+ } else {
+ goto again;
+ }
+ }
+
+ if (backlog) {
+ /* query the local ip address of the connection */
+ ksocknal_query_local_ipaddr(backlog);
+ }
+
+ *child = backlog;
+
+ return 0;
+}
+
+int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
+{
+ return 0;
+}
+
+int libcfs_ipif_enumerate(char ***names){
+ return 0;
+}
+
+void libcfs_ipif_free_enumeration(char **names, int n)
+{
+}
+
+int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog)
+{
+ int rc = 0;
+ ksock_tconn_t * parent;
+
+ parent = ksocknal_create_tconn();
+ if (!parent) {
+ rc = -ENOMEM;
+ goto errorout;
+ }
+
+ /* initialize the tconn as a listener */
+ ksocknal_init_listener(parent);
+
+ /* bind the daemon->tconn */
+ rc = ksocknal_bind_tconn(parent, NULL, ip, port);
+
+ if (rc < 0) {
+ ksocknal_free_tconn(parent);
+ goto errorout;
+ }
+
+ /* create listening children and make it to listen state*/
+ rc = ksocknal_start_listen(parent, backlog);
+
+errorout:
+
+ return rc;
+}
+
+int libcfs_sock_accept(struct socket **newsockp, struct socket *sock)
+{
+ int rc;
+ ksock_tconn_t * child = NULL;
+
+ /* wait for incoming connecitons */
+ rc = ksocknal_wait_child_tconn(sock, &child);
+
+ *newsockp = child;
+ return rc;
+}
+
+void libcfs_sock_abort_accept(struct socket *sock)
+{
+ spin_lock(&(sock->kstc_lock));
+
+ /* mark the flag to shutdonw it */
+ cfs_clear_flag(sock->kstc_flags, KS_TCONN_DAEMON_STARTED);
+
+ /* wake up it from the waiting on new incoming connections */
+ KeSetEvent(&sock->listener.kstc_accept_event, 0, FALSE);
+
+ spin_unlock(&(sock->kstc_lock));
+}
+
+/*
+ * libcfs_sock_connect
+ * build a conntion between local ip/port and the peer ip/port.
+ *
+ * Arguments:
+ * laddr: local ip address
+ * lport: local port number
+ * paddr: peer's ip address
+ * pport: peer's port number
+ *
+ * Return Value:
+ * int: return code ...
+ *
+ * Notes:
+ * N/A
+ */
+
+
+int libcfs_sock_connect(struct socket **sockp, int *fatal,
+ __u32 local_ip, int local_port,
+ __u32 peer_ip, int peer_port)
+{
+ ksock_tconn_t * tconn = NULL;
+ int rc = 0;
+
+ /* create the tdi connecion structure */
+ tconn = ksocknal_create_tconn();
+ if (!tconn) {
+ rc = -ENOMEM;
+ goto errorout;
+ }
+
+ /* initialize the tdi sender connection */
+ ksocknal_init_sender(tconn);
+
+ /* bind the local ip address with the tconn */
+ rc = ksocknal_bind_tconn(tconn, NULL, local_ip, local_port);
+ if (rc < 0) {
+ ksocknal_free_tconn(tconn);
+ goto errorout;
+ }
+
+errorout:
+
+ *sockp = tconn;
+ return rc;
+}
+
+int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize)
+{
+ return 0;
+}
+
+int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize)
+{
+ return 0;
+}
+
+int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port)
+{
+ PTRANSPORT_ADDRESS taddr = NULL;
+
+ if (remote) {
+ if (socket->kstc_type == kstt_sender) {
+ taddr = socket->sender.kstc_info.Remote;
+ } else if (socket->kstc_type == kstt_sender) {
+ taddr = socket->child.kstc_info.Remote;
+ }
+ } else {
+ NTSTATUS status;
+ status = ksocknal_query_local_ipaddr(socket);
+ if (NT_SUCCESS(status)) {
+ taddr = &(socket->kstc_addr.Tdi);
+ } else {
+ }
+ }
+
+ if (taddr) {
+ PTDI_ADDRESS_IP addr = (PTDI_ADDRESS_IP)(&(taddr->Address[0].Address));
+ if (ip != NULL)
+ *ip = ntohl (addr->in_addr);
+ if (port != NULL)
+ *port = ntohs (addr->sin_port);
+ } else {
+ return -ENOTCONN;
+ }
+
+ return 0;
+}
+
+int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ ksock_mdl_t * mdl;
+
+ int offset = 0;
+
+ while (nob > offset) {
+
+ /* lock the user buffer */
+ rc = ksocknal_lock_buffer( (char *)buffer + offset,
+ FALSE, nob - offset, IoReadAccess, &mdl );
+
+ if (rc < 0) {
+ return (rc);
+ }
+
+ /* send out the whole mdl */
+ rc = ksocknal_send_mdl( sock, NULL, mdl,
+ nob - offset, 0 );
+
+ if (rc > 0) {
+ offset += rc;
+ } else {
+ return (rc);
+ }
+ }
+
+ return (0);
+}
+
+int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
+{
+ int rc;
+ ksock_mdl_t * mdl;
+
+ int offset = 0;
+
+ while (nob > offset) {
+
+ /* lock the user buffer */
+ rc = ksocknal_lock_buffer( (char *)buffer + offset,
+ FALSE, nob - offset, IoWriteAccess, &mdl );
+
+ if (rc < 0) {
+ return (rc);
+ }
+
+ /* recv the requested buffer */
+ rc = ksocknal_recv_mdl( sock, mdl, nob - offset, 0 );
+
+ if (rc > 0) {
+ offset += rc;
+ } else {
+ return (rc);
+ }
+ }
+
+ return (0);
+}
+
+void libcfs_sock_release(struct socket *sock)
+{
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_PORTALS
+#define LUSTRE_TRACEFILE_PRIVATE
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+#ifndef get_cpu
+#define get_cpu() smp_processor_id()
+#define put_cpu() do { } while (0)
+#endif
+
+extern union trace_data_union trace_data[NR_CPUS];
+extern char *tracefile;
+extern int64_t tracefile_size;
+
+event_t tracefile_event;
+
+void tracefile_lock_init()
+{
+ cfs_init_event(&tracefile_event, TRUE, TRUE);
+}
+
+void tracefile_read_lock()
+{
+ cfs_wait_event(&tracefile_event, 0);
+}
+
+void tracefile_read_unlock()
+{
+ cfs_wake_event(&tracefile_event);
+}
+
+void tracefile_write_lock()
+{
+ cfs_wait_event(&tracefile_event, 0);
+}
+
+void tracefile_write_unlock()
+{
+ cfs_wake_event(&tracefile_event);
+}
+
+
+inline struct trace_cpu_data *
+__trace_get_tcd(unsigned long *flags)
+{
+ struct trace_cpu_data *ret;
+
+ int cpu = get_cpu();
+ local_irq_save(*flags);
+ ret = &trace_data[cpu].tcd;
+
+ return ret;
+}
+
+inline void
+trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
+{
+ local_irq_restore(flags);
+ put_cpu();
+}
+
+void
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+ const int line, unsigned long stack)
+{
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+
+ header->ph_subsys = subsys;
+ header->ph_mask = mask;
+ header->ph_cpu_id = smp_processor_id();
+ header->ph_sec = (__u32)tv.tv_sec;
+ header->ph_usec = tv.tv_usec;
+ header->ph_stack = stack;
+ header->ph_pid = current->pid;
+ header->ph_line_num = line;
+ header->ph_extern_pid = 0;
+ return;
+}
+
+void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
+ int len, char *file, const char *fn)
+{
+ char *prefix = NULL, *ptype = NULL;
+
+ if ((mask & D_EMERG) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_EMERG;
+ } else if ((mask & D_ERROR) != 0) {
+ prefix = "LustreError";
+ ptype = KERN_ERR;
+ } else if ((mask & D_WARNING) != 0) {
+ prefix = "Lustre";
+ ptype = KERN_WARNING;
+ } else if (libcfs_printk != 0 || (mask & D_CONSOLE)) {
+ prefix = "Lustre";
+ ptype = KERN_INFO;
+ }
+
+ if ((mask & D_CONSOLE) != 0) {
+ printk("%s%s: %s", ptype, prefix, buf);
+ } else {
+ printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid,
+ hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf);
+ }
+ return;
+}
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
+{
+ return 1;
+}
+
+
+int trace_write_daemon_file(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ char *name;
+ unsigned long off;
+ int rc;
+
+ name =cfs_alloc(count + 1, 0);
+ if (name == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user((void *)name, (void*)buffer, count)) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ /* be nice and strip out trailing '\n' */
+ for (off = count ; off > 2 && isspace(name[off - 1]); off--)
+ ;
+
+ name[off] = '\0';
+
+ tracefile_write_lock();
+ if (strcmp(name, "stop") == 0) {
+ tracefile = NULL;
+ trace_stop_thread();
+ goto out_sem;
+ } else if (strncmp(name, "size=", 5) == 0) {
+ tracefile_size = simple_strtoul(name + 5, NULL, 0);
+ if (tracefile_size < 10 || tracefile_size > 20480)
+ tracefile_size = TRACEFILE_SIZE;
+ else
+ tracefile_size <<= 20;
+ goto out_sem;
+ }
+
+#ifndef __WINNT__
+ if (name[0] != '/') {
+ rc = -EINVAL;
+ goto out_sem;
+ }
+#endif
+
+ if (tracefile != NULL)
+ cfs_free(tracefile);
+
+ tracefile = name;
+ name = NULL;
+ printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
+ "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
+
+ trace_start_thread();
+out_sem:
+ tracefile_write_unlock();
+out:
+ if (name != NULL)
+ cfs_free(name);
+ return count;
+}
+
+int trace_read_daemon_file(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ int rc;
+
+ tracefile_read_lock();
+ rc = snprintf(page, count, "%s", tracefile);
+ tracefile_read_unlock();
+
+ return rc;
+}
+
+int trace_write_debug_mb(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ char string[32];
+ int i;
+ unsigned max;
+
+ if (count >= sizeof(string)) {
+ printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
+ count);
+ return -EOVERFLOW;
+ }
+
+ if (copy_from_user((void *)string, (void *)buffer, count))
+ return -EFAULT;
+
+ max = simple_strtoul(string, NULL, 0);
+ if (max == 0)
+ return -EINVAL;
+
+ if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
+ printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
+ "%dMB, which is more than 80%% of available RAM (%lu)\n",
+ max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
+ return -EINVAL;
+ }
+
+ max /= smp_num_cpus;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ struct trace_cpu_data *tcd;
+ tcd = &trace_data[i].tcd;
+ tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
+ }
+ return count;
+}
+
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+ int *eof, void *data)
+{
+ struct trace_cpu_data *tcd;
+ unsigned long flags;
+ int rc;
+
+ tcd = trace_get_tcd(flags);
+ rc = snprintf(page, count, "%lu\n",
+ (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
+ trace_put_tcd(tcd, flags);
+ return rc;
+}
+
--- /dev/null
+
+#ifndef __KERNEL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <io.h>
+#include <time.h>
+#include <windows.h>
+
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+ const int line, unsigned long stack,
+ char *format, ...) {
+ }
+
+void print_last_error(char* Prefix)
+{
+ LPVOID lpMsgBuf;
+
+ FormatMessage(
+ FORMAT_MESSAGE_ALLOCATE_BUFFER |
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL,
+ GetLastError(),
+ 0,
+ (LPTSTR) &lpMsgBuf,
+ 0,
+ NULL
+ );
+
+ printf("%s %s", Prefix, (LPTSTR) lpMsgBuf);
+
+ LocalFree(lpMsgBuf);
+}
+
+//
+// The following declarations are defined in io.h of VC
+// sys/types.h will conflict with io.h, so we need place
+// these declartions here.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ void
+ __declspec (naked) __cdecl _chkesp(void)
+ {
+#if _X86_
+ __asm { jz exit_chkesp };
+ __asm { int 3 };
+ exit_chkesp:
+ __asm { ret };
+#endif
+ }
+#ifdef __cplusplus
+}
+#endif
+
+unsigned int sleep (unsigned int seconds)
+{
+ Sleep(seconds * 1000);
+ return 0;
+}
+
+int gethostname(char * name, int namelen)
+{
+ return 0;
+}
+
+int ioctl (
+ int handle,
+ int cmd,
+ void *buffer
+ )
+{
+ printf("hello, world\n");
+ return 0;
+}
+
+#endif /* __KERNEL__ */
\ No newline at end of file
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under
+ * the terms of version 2 of the GNU General Public License as published by
+ * the Free Software Foundation. Lustre is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details. You should have received a
+ * copy of the GNU General Public License along with Lustre; if not, write
+ * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ */
+
+
+/*
+ * miscellaneous libcfs stuff
+ */
+#define DEBUG_SUBSYSTEM S_PORTALS
+#include <lnet/types.h>
+
+/*
+ * Convert server error code to client format. Error codes are from
+ * Linux errno.h, so for Linux client---identity.
+ */
+int convert_server_error(__u64 ecode)
+{
+ return cfs_error_code((NTSTATUS)ecode);
+}
+
+/*
+ * convert <fcntl.h> flag from client to server.
+ *
+ * nt kernel uses several members to describe the open flags
+ * such as DesiredAccess/ShareAccess/CreateDisposition/CreateOptions
+ * so it's better to convert when using, not here.
+ */
+
+int convert_client_oflag(int cflag, int *result)
+{
+ *result = 0;
+ return 0;
+}
+
+
+int cfs_error_code(NTSTATUS Status)
+{
+ switch (Status) {
+
+ case STATUS_ACCESS_DENIED:
+ return (-EACCES);
+
+ case STATUS_ACCESS_VIOLATION:
+ return (-EFAULT);
+
+ case STATUS_BUFFER_TOO_SMALL:
+ return (-ETOOSMALL);
+
+ case STATUS_INVALID_PARAMETER:
+ return (-EINVAL);
+
+ case STATUS_NOT_IMPLEMENTED:
+ case STATUS_NOT_SUPPORTED:
+ return (-EOPNOTSUPP);
+
+ case STATUS_INVALID_ADDRESS:
+ case STATUS_INVALID_ADDRESS_COMPONENT:
+ return (-EADDRNOTAVAIL);
+
+ case STATUS_NO_SUCH_DEVICE:
+ case STATUS_NO_SUCH_FILE:
+ case STATUS_OBJECT_NAME_NOT_FOUND:
+ case STATUS_OBJECT_PATH_NOT_FOUND:
+ case STATUS_NETWORK_BUSY:
+ case STATUS_INVALID_NETWORK_RESPONSE:
+ case STATUS_UNEXPECTED_NETWORK_ERROR:
+ return (-ENETDOWN);
+
+ case STATUS_BAD_NETWORK_PATH:
+ case STATUS_NETWORK_UNREACHABLE:
+ case STATUS_PROTOCOL_UNREACHABLE:
+ return (-ENETUNREACH);
+
+ case STATUS_LOCAL_DISCONNECT:
+ case STATUS_TRANSACTION_ABORTED:
+ case STATUS_CONNECTION_ABORTED:
+ return (-ECONNABORTED);
+
+ case STATUS_REMOTE_DISCONNECT:
+ case STATUS_LINK_FAILED:
+ case STATUS_CONNECTION_DISCONNECTED:
+ case STATUS_CONNECTION_RESET:
+ case STATUS_PORT_UNREACHABLE:
+ return (-ECONNRESET);
+
+ case STATUS_PAGEFILE_QUOTA:
+ case STATUS_NO_MEMORY:
+ case STATUS_CONFLICTING_ADDRESSES:
+ case STATUS_QUOTA_EXCEEDED:
+ case STATUS_TOO_MANY_PAGING_FILES:
+ case STATUS_INSUFFICIENT_RESOURCES:
+ case STATUS_WORKING_SET_QUOTA:
+ case STATUS_COMMITMENT_LIMIT:
+ case STATUS_TOO_MANY_ADDRESSES:
+ case STATUS_REMOTE_RESOURCES:
+ return (-ENOBUFS);
+
+ case STATUS_INVALID_CONNECTION:
+ return (-ENOTCONN);
+
+ case STATUS_PIPE_DISCONNECTED:
+ return (-ESHUTDOWN);
+
+ case STATUS_TIMEOUT:
+ case STATUS_IO_TIMEOUT:
+ case STATUS_LINK_TIMEOUT:
+ return (-ETIMEDOUT);
+
+ case STATUS_REMOTE_NOT_LISTENING:
+ case STATUS_CONNECTION_REFUSED:
+ return (-ECONNREFUSED);
+
+ case STATUS_HOST_UNREACHABLE:
+ return (-EHOSTUNREACH);
+
+ case STATUS_PENDING:
+ case STATUS_DEVICE_NOT_READY:
+ return (-EAGAIN);
+
+ case STATUS_CANCELLED:
+ case STATUS_REQUEST_ABORTED:
+ return (-EINTR);
+
+ case STATUS_BUFFER_OVERFLOW:
+ case STATUS_INVALID_BUFFER_SIZE:
+ return (-EMSGSIZE);
+
+ }
+
+ if (NT_SUCCESS(Status))
+ return 0;
+
+ return (-EINVAL);
+}
+
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{
+}
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+ return NULL;
+}
\ No newline at end of file
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleExecutable</key>
- <string>portals</string>
+ <string>lnet</string>
<key>CFBundleIconFile</key>
<string></string>
<key>CFBundleIdentifier</key>
- <string>com.clusterfs.lustre.portals</string>
+ <string>com.clusterfs.lustre.lnet</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundlePackageType</key>
}
snprintf(name, sizeof(name), "acceptor_%03d", accept_port);
- libcfs_daemonize(name);
- libcfs_blockallsigs();
+ cfs_daemonize(name);
+ cfs_block_allsigs();
rc = libcfs_sock_listen(&lnet_acceptor_state.pta_sock,
0, accept_port, accept_backlog);
endif # LINUX
if DARWIN
-macos_PROGRAMS := portals
+macos_PROGRAMS := lnet
-portals_SOURCES := api-errno.c api-ni.c config.c
-portals_SOURCES += lib-me.c lib-msg.c lib-eq.c lib-md.c
-portals_SOURCES += lib-move.c module.c lo.c
-portals_SOURCES += router.c acceptor.c
+lnet_SOURCES := api-errno.c api-ni.c config.c
+lnet_SOURCES += lib-me.c lib-msg.c lib-eq.c lib-md.c
+lnet_SOURCES += lib-move.c module.c lo.c router.c router_proc.c
+lnet_SOURCES += acceptor.c peer.c
-portals_CFLAGS := $(EXTRA_KCFLAGS)
-portals_LDFLAGS := $(EXTRA_KLDFLAGS)
-portals_LDADD := $(EXTRA_KLIBS)
+lnet_CFLAGS := $(EXTRA_KCFLAGS)
+lnet_LDFLAGS := $(EXTRA_KLDFLAGS)
+lnet_LDADD := $(EXTRA_KLIBS)
plist_DATA := Info.plist
*/
#define DEBUG_SUBSYSTEM S_LNET
+#include <libcfs/libcfs.h>
#include <lnet/lib-lnet.h>
typedef struct { /* tmp struct for parsing routes */
LNET_LOCK();
lnet_eq_free (eq);
LNET_UNLOCK();
+
+ return -ENOMEM;
}
/* NB this resets all event sequence numbers to 0, to be earlier
LNET_UNLOCK();
if (timeout_ms < 0) {
- cfs_waitq_wait (&wl);
+ cfs_waitq_wait (&wl, CFS_TASK_INTERRUPTIBLE);
} else {
struct timeval tv;
now = cfs_time_current();
- cfs_waitq_timedwait(&wl, cfs_time_seconds(timeout_ms)/1000);
- cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv);
+ cfs_waitq_timedwait(&wl, CFS_TASK_INTERRUPTIBLE,
+ cfs_time_seconds(timeout_ms)/1000);
+ cfs_duration_usec(cfs_time_sub(cfs_time_current(), now),
+ &tv);
timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000;
if (timeout_ms < 0)
timeout_ms = 0;
siov->iov_len - soffset);
this_nob = MIN(this_nob, nob);
- memcpy (diov->iov_base + doffset,
- siov->iov_base + soffset, this_nob);
+ memcpy ((char *)diov->iov_base + doffset,
+ (char *)siov->iov_base + soffset, this_nob);
nob -= this_nob;
if (diov->iov_len > doffset + this_nob) {
addr = ((char *)cfs_kmap(kiov->kiov_page)) +
kiov->kiov_offset + kiovoffset;
- memcpy (iov->iov_base + iovoffset, addr, this_nob);
+ memcpy ((char *)iov->iov_base + iovoffset, addr, this_nob);
nob -= this_nob;
if (iov->iov_len > iovoffset + this_nob) {
addr = ((char *)cfs_kmap(kiov->kiov_page)) +
kiov->kiov_offset + kiovoffset;
- memcpy (addr, iov->iov_base + iovoffset, this_nob);
+ memcpy (addr, (char *)iov->iov_base + iovoffset, this_nob);
nob -= this_nob;
if (kiov->kiov_len > kiovoffset + this_nob) {
unsigned int rlength = hdr->payload_length;
unsigned int mlength = 0;
unsigned int offset = 0;
- lnet_process_id_t src = {.nid = hdr->src_nid,
- .pid = hdr->src_pid};
+ lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
+ /* .pid = */ hdr->src_pid};
lnet_libmd_t *md;
/* Convert put fields to host byte order */
lnet_hdr_t *hdr = &msg->msg_hdr;
unsigned int mlength = 0;
unsigned int offset = 0;
- lnet_process_id_t src = {.nid = hdr->src_nid,
- .pid = hdr->src_pid};
+ lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
+ /* .pid = */ hdr->src_pid};
lnet_handle_wire_t reply_wmd;
lnet_libmd_t *md;
int rc;
{
void *private = msg->msg_private;
lnet_hdr_t *hdr = &msg->msg_hdr;
- lnet_process_id_t src = {.nid = hdr->src_nid,
- .pid = hdr->src_pid};
+ lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
+ /* .pid = */ hdr->src_pid};
lnet_libmd_t *md;
int rlength;
int mlength;
lnet_parse_ack(lnet_ni_t *ni, lnet_msg_t *msg)
{
lnet_hdr_t *hdr = &msg->msg_hdr;
- lnet_process_id_t src = {.nid = hdr->src_nid,
- .pid = hdr->src_pid};
+ lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
+ /* .pid = */ hdr->src_pid};
lnet_libmd_t *md;
/* Convert ack fields to host byte order */
void
lnet_print_hdr(lnet_hdr_t * hdr)
{
- lnet_process_id_t src = {.nid = hdr->src_nid,
- .pid = hdr->src_pid};
- lnet_process_id_t dst = {.nid = hdr->dest_nid,
- .pid = hdr->dest_pid};
+ lnet_process_id_t src = {/* .nid = */ hdr->src_nid,
+ /* .pid = */ hdr->src_pid};
+ lnet_process_id_t dst = {/* .nid = */ hdr->dest_nid,
+ /* .pid = */ hdr->dest_pid};
char *type_str = lnet_msgtyp2str (hdr->type);
CWARN("P3 Header at %p of type %s\n", hdr, type_str);
}
lnd_t the_lolnd = {
- .lnd_type = LOLND,
- .lnd_startup = lolnd_startup,
- .lnd_shutdown = lolnd_shutdown,
- .lnd_send = lolnd_send,
- .lnd_recv = lolnd_recv,
+ /* .lnd_list = */ {&the_lolnd.lnd_list, &the_lolnd.lnd_list},
+ /* .lnd_refcount = */ 0,
+ /* .lnd_type = */ LOLND,
+ /* .lnd_startup = */ lolnd_startup,
+ /* .lnd_shutdown = */ lolnd_shutdown,
+ /* .lnt_ctl = */ NULL,
+ /* .lnd_send = */ lolnd_send,
+ /* .lnd_recv = */ lolnd_recv,
+ /* .lnd_eager_recv = */ NULL,
+ /* .lnd_notify = */ NULL,
+ /* .lnd_accept
+ .lnd_wait = */ NULL
};
#include <lnet/lib-lnet.h>
-#ifdef __KERNEL__
+#if defined(__KERNEL__) && defined(LNET_ROUTER)
static char *forwarding = "";
CFS_MODULE_PARM(forwarding, "s", charp, 0444,
kpr_do_upcall (void *arg)
{
kpr_upcall_t *u = (kpr_upcall_t *)arg;
+
+#ifndef __WINNT__
+
char nidstr[36];
char whenstr[36];
char *argv[] = {
libcfs_run_upcall (argv);
+ libcfs_run_upcall (argv);
+
+#endif /* __WINNT__ */
+
LIBCFS_FREE(u, sizeof(*u));
}
return -ENOENT;
}
-#ifdef __KERNEL__
+#if defined(__KERNEL__) && defined(LNET_ROUTER)
void
lnet_destroy_rtrbuf(lnet_rtrbuf_t *rb, int npages)
int sz = offsetof(lnet_rtrbuf_t, rb_kiov[npages]);
while (--npages >= 0)
- __free_page(rb->rb_kiov[npages].kiov_page);
+ cfs_free_page(rb->rb_kiov[npages].kiov_page);
LIBCFS_FREE(rb, sz);
}
rb->rb_pool = rbp;
for (i = 0; i < npages; i++) {
- page = alloc_page(GFP_KERNEL); /* HIGH? */
+ page = cfs_alloc_page(CFS_ALLOC_ZERO /*GFP_KERNEL*/); /* HIGH? */
if (page == NULL) {
while (--i >= 0)
- __free_page(rb->rb_kiov[i].kiov_page);
+ cfs_free_page(rb->rb_kiov[i].kiov_page);
LIBCFS_FREE(rb, sz);
return NULL;
*
*/
+#include <libcfs/libcfs.h>
#include <lnet/lib-lnet.h>
-#ifdef __KERNEL__
+#if defined(__KERNEL__) && defined(LNET_ROUTER)
#include <linux/seq_file.h>
#include <linux/lustre_compat25.h>
}
typedef struct {
- unsigned long long lrsi_version;
+ __u64 lrsi_version;
lnet_remotenet_t *lrsi_net;
lnet_route_t *lrsi_route;
loff_t lrsi_off;
{
int i;
unsigned magic;
- i = __le32_to_cpu(*(int *)(ev->md.start + ev->offset + sizeof(unsigned)));
- magic = __le32_to_cpu(*(int *)(ev->md.start + ev->offset));
+ i = __le32_to_cpu(*(int *)((char *)ev->md.start + ev->offset + sizeof(unsigned)));
+ magic = __le32_to_cpu(*(int *)((char *)ev->md.start + ev->offset));
if(magic != 0xcafebabe) {
CERROR("Unexpected response %x\n", magic);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Matt Wu <mattwu@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+/*
+ * Included Headers
+ */
+
+
+#include <libcfs/libcfs.h>
+
+
+/* libcfs module init/exit routines */
+DECLARE_INIT(init_libcfs_module);
+DECLARE_EXIT(exit_libcfs_module);
+
+/* portal module init/exit routines */
+DECLARE_INIT(init_lnet);
+DECLARE_EXIT(fini_lnet);
+
+/* tdinal module init/exit routines */
+DECLARE_INIT(ksocknal_module_init);
+DECLARE_EXIT(ksocknal_module_fini);
+
+/* pingcli module init/exit routines */
+DECLARE_INIT(pingcli_init);
+DECLARE_EXIT(pingcli_cleanup);
+
+
+/* pingsrv module init/exit routines */
+DECLARE_INIT(pingsrv_init);
+DECLARE_EXIT(pingsrv_cleanup);
+
+/*
+ * structure definitions
+ */
+
+
+#define LUSTRE_PING_VERSION 0x00010000 /* ping srv/cli version: 0001.0000 */
+
+#define LUSTRE_PING_DEVICE L"\\Device\\LNET" /* device object name */
+#define LUSTRE_PING_SYMLNK L"\\DosDevices\\LNET" /* user-visible name for the device*/
+
+typedef struct _DEVICE_EXTENSION
+{
+ BOOLEAN bProcFS;
+
+} DEVICE_EXTENSION, *PDEVICE_EXTENSION;
+
+
+/*
+ * global definitions
+ */
+
+PDEVICE_OBJECT PingObject = NULL; /* ping device object */
+PDEVICE_OBJECT ProcObject = NULL; /* procfs emulator device */
+
+
+/*
+ * common routines
+ */
+
+
+//
+// complete Irp request ...
+//
+
+NTSTATUS
+UTCompleteIrp(
+ PIRP Irp,
+ NTSTATUS Status,
+ ULONG Info
+ )
+{
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = Info;
+ IoCompleteRequest(Irp,IO_NO_INCREMENT);
+
+ return Status;
+}
+
+//
+// Open/Create Device ...
+//
+
+NTSTATUS
+UTCreate(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ KdPrint(("UTCreate: DeviceCreate ...\n"));
+
+ return UTCompleteIrp(Irp,STATUS_SUCCESS,0);
+}
+
+//
+// Close Devcie ...
+//
+
+NTSTATUS
+UTClose(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp)
+{
+ KdPrint(("UTClose: Device Closed.\n"));
+
+ return UTCompleteIrp(Irp, STATUS_SUCCESS, 0);
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+
+
+NTSTATUS
+UTShutdown(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ KdPrint(("UTShutdown: shuting TdiSock ...\n"));
+
+ return UTCompleteIrp(Irp, STATUS_SUCCESS, 0);
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+//
+// driver frame Routines ...
+//
+
+
+NTSTATUS
+UTDeviceControl(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST;
+ PIO_STACK_LOCATION IrpSp;
+
+ ULONG ControlCode;
+ ULONG InputLength;
+ ULONG OutputLength;
+
+ PVOID lpvInBuffer;
+
+ KdPrint(("UTDeviceControl: Device Ioctl ...\n"));
+
+ Irp->IoStatus.Information = 0;
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode;
+ InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength;
+ OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength;
+ lpvInBuffer = Irp->AssociatedIrp.SystemBuffer;
+
+ ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
+
+ switch (ControlCode)
+ {
+ case IOCTL_LIBCFS_VERSION:
+
+ *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION);
+ Irp->IoStatus.Information = sizeof(ULONG);
+ Status = STATUS_SUCCESS;
+ break;
+
+ default:
+ break;
+ }
+
+ Irp->IoStatus.Status = Status;
+
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ KdPrint(("UTDeviceControl: Device Ioctl returned.\n"));
+
+ return Status;
+}
+
+NTSTATUS
+ProcCreate(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status;
+ PIO_STACK_LOCATION IrpSp;
+
+ FILE_FULL_EA_INFORMATION * ea;
+ cfs_file_t * fp;
+
+ KdPrint(("ProcCreate: Proc device is being opened ...\n"));
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ ea = (PFILE_FULL_EA_INFORMATION) Irp->AssociatedIrp.SystemBuffer;
+
+ if (!ea) {
+ Status = STATUS_INVALID_PARAMETER;
+ } else {
+ fp = lustre_open_file(&ea->EaName[0]);
+ if (!fp) {
+ Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ } else {
+ IrpSp->FileObject->FsContext = fp;
+ IrpSp->FileObject->FsContext2 = fp->private_data;
+ Status = STATUS_SUCCESS;
+ }
+ }
+
+ return UTCompleteIrp(Irp, Status, 0);
+}
+
+//
+// Close Devcie ...
+//
+
+NTSTATUS
+ProcClose(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp)
+{
+ PIO_STACK_LOCATION IrpSp;
+
+ cfs_file_t * fp;
+
+ KdPrint(("ProcClose: Proc device object is to be closed.\n"));
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ fp = (cfs_file_t *) IrpSp->FileObject->FsContext;
+
+ ASSERT(fp != NULL);
+ ASSERT(IrpSp->FileObject->FsContext2 == fp->private_data);
+
+ lustre_close_file(fp);
+
+ return UTCompleteIrp(Irp, STATUS_SUCCESS, 0);
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+/*
+ * proc frame routines
+ */
+
+NTSTATUS
+ProcDeviceControl(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST;
+ PIO_STACK_LOCATION IrpSp;
+
+ ULONG ControlCode;
+ ULONG InputLength;
+ ULONG OutputLength;
+
+ PVOID lpvInBuffer;
+
+ KdPrint(("ProcDeviceControl: Proc device ioctling ...\n"));
+
+ Irp->IoStatus.Information = 0;
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode;
+ InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength;
+ OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength;
+ lpvInBuffer = Irp->AssociatedIrp.SystemBuffer;
+
+ ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
+
+ switch (ControlCode)
+ {
+ case IOCTL_LIBCFS_VERSION:
+
+ *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION);
+ Irp->IoStatus.Information = sizeof(ULONG);
+
+ Status = STATUS_SUCCESS;
+
+ break;
+
+ case IOCTL_LIBCFS_ENTRY:
+ {
+ int rc = 0;
+ cfs_file_t * fp;
+
+ fp = (cfs_file_t *) IrpSp->FileObject->FsContext;
+
+ if (!fp) {
+ rc = -EINVAL;
+ } else {
+ rc = lustre_ioctl_file(fp, (PCFS_PROC_IOCTL) (lpvInBuffer));
+ }
+
+ if (rc == 0) {
+ Irp->IoStatus.Information = InputLength;
+ Status = STATUS_SUCCESS;
+ }
+ }
+ }
+
+ Irp->IoStatus.Status = Status;
+
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ KdPrint(("ProcDeviceControl: Proc device ioctl returned with status = %xh.\n", Status));
+
+ return Status;
+}
+
+
+
+NTSTATUS
+ProcReadWrite (PDEVICE_OBJECT DeviceObject, PIRP Irp)
+{
+ PIO_STACK_LOCATION IrpSp;
+ NTSTATUS Status;
+
+ cfs_file_t * fp;
+ int rc;
+ PCHAR buf;
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ if (Irp->MdlAddress) {
+ buf = MmGetSystemAddressForMdlSafe(
+ Irp->MdlAddress,
+ NormalPagePriority);
+ } else {
+ buf = Irp->AssociatedIrp.SystemBuffer;
+ }
+
+ if (buf == NULL) {
+ Status = STATUS_SUCCESS;
+ rc = 0;
+ } else {
+ fp = (cfs_file_t *) IrpSp->FileObject->FsContext;
+
+ if (!fp) {
+ Status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+
+ if (IrpSp->MajorFunction == IRP_MJ_READ) {
+ rc = lustre_read_file(
+ fp, IrpSp->Parameters.Read.ByteOffset.LowPart,
+ IrpSp->Parameters.Read.Length, buf);
+ } else {
+ rc = lustre_write_file(
+ fp, IrpSp->Parameters.Write.ByteOffset.LowPart,
+ IrpSp->Parameters.Write.Length, buf);
+ }
+ if (rc < 0) {
+ cfs_enter_debugger();
+ Status = STATUS_UNSUCCESSFUL;
+ } else {
+ Status = STATUS_SUCCESS;
+ }
+ }
+
+
+errorout:
+ return UTCompleteIrp(Irp, Status, rc);
+}
+
+
+//
+// common dispatch routines
+//
+
+NTSTATUS
+UTDispatchRequest(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status;
+ PIO_STACK_LOCATION IrpSp;
+
+ Status = STATUS_INVALID_DEVICE_REQUEST;
+
+ __try {
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ switch (IrpSp->MajorFunction) {
+
+ case IRP_MJ_CREATE:
+ if (DeviceObject == PingObject) {
+ Status = UTCreate(DeviceObject, Irp);
+ } else if (DeviceObject == ProcObject) {
+ Status = ProcCreate(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_CLOSE:
+ if (DeviceObject == PingObject) {
+ Status = UTClose(DeviceObject, Irp);
+ } else if (DeviceObject == ProcObject) {
+ Status = ProcClose(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_READ:
+ case IRP_MJ_WRITE:
+ if (DeviceObject == ProcObject) {
+ Status = ProcReadWrite(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_DEVICE_CONTROL:
+ if (DeviceObject == PingObject) {
+ Status = UTDeviceControl(DeviceObject, Irp);
+ } else if (DeviceObject == ProcObject) {
+ Status = ProcDeviceControl(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_SHUTDOWN:
+ Status = UTShutdown(DeviceObject, Irp);
+ break;
+
+ default:
+
+ KdPrint(("UTDispatchRequest: Major Function: %xh is not supported.\n",
+ IrpSp->MajorFunction));
+ UTCompleteIrp(Irp, Status, 0);
+ break;
+ }
+ }
+
+ __finally {
+ }
+
+ return Status;
+}
+
+//
+// create a device object and a dosdevice symbol link
+//
+
+PDEVICE_OBJECT
+CreateDevice(
+ IN PDRIVER_OBJECT DriverObject,
+ IN PWCHAR DeviceName,
+ IN PWCHAR SymlnkName,
+ IN BOOLEAN bProcFS
+ )
+{
+ NTSTATUS Status;
+
+ UNICODE_STRING NtDevName;
+ UNICODE_STRING Win32DevName;
+
+ PDEVICE_EXTENSION DeviceExtension;
+ PDEVICE_OBJECT DeviceObject;
+
+ /* create the device object with the specified name */
+
+ RtlInitUnicodeString(&NtDevName, DeviceName);
+
+ Status = IoCreateDevice(
+ DriverObject,
+ sizeof(DEVICE_EXTENSION),
+ &NtDevName,
+ FILE_DEVICE_UNKNOWN,
+ 0,
+ FALSE,
+ &DeviceObject );
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ return NULL;
+ }
+
+ /* create the symlink to make the device visible to user */
+
+ RtlInitUnicodeString(&Win32DevName, SymlnkName);
+
+ Status = IoCreateSymbolicLink(&Win32DevName, &NtDevName);
+
+ if (!NT_SUCCESS(Status)) {
+
+ IoDeleteDevice(DeviceObject);
+ return NULL;
+ }
+
+ DeviceExtension = (PDEVICE_EXTENSION)DeviceObject->DeviceObjectExtension;
+ DeviceExtension->bProcFS = bProcFS;
+
+ DeviceObject->Flags |= DO_BUFFERED_IO;
+ DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING;
+
+ return DeviceObject;
+}
+
+
+//
+// DriverEntry
+//
+
+NTSTATUS DriverEntry(
+ IN PDRIVER_OBJECT DriverObject,
+ IN PUNICODE_STRING RegistryPath
+ )
+{
+ KdPrint(("Lustre ping test: Build Time: " __DATE__ " " __TIME__ "\n"));
+ KdPrint(("Lustre ping test: DriverEntry ... \n"));
+
+ /* initialize libcfs module */
+ if (module_init_libcfs_module() != 0) {
+ KdPrint(("ping: error initialize module: libcfs ...\n"));
+ goto errorout;
+ }
+
+ /* initialize lnet module */
+ if (module_init_lnet() != 0) {
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: lnet ...\n"));
+ goto errorout;
+ }
+
+ /* initialize tdinal module */
+ if (module_ksocknal_module_init() != 0) {
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: tdilnd ...\n"));
+ goto errorout;
+ }
+
+#if defined(LUSTRE_PING_CLI)
+ /* initialize pingcli module */
+ if (module_pingcli_init() != 0) {
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: pingcli ...\n"));
+ goto errorout;
+ }
+#endif
+
+#if defined(LUSTRE_PING_SRV)
+ /* initialize pingsrv module */
+ if (module_pingsrv_init() != 0) {
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: pingsrv ...\n"));
+ goto errorout;
+ }
+#endif
+
+ /* create the ping device object */
+ PingObject = CreateDevice(
+ DriverObject,
+ LUSTRE_PING_DEVICE,
+ LUSTRE_PING_SYMLNK,
+ FALSE );
+ if (!PingObject) {
+#if defined(LUSTRE_PING_CLI)
+ module_pingcli_cleanup();
+#endif
+#if defined(LUSTRE_PING_SRV)
+ module_pingsrv_cleanup();
+#endif
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ /* create the libcfs proc fs emultor device object */
+ ProcObject = CreateDevice(
+ DriverObject,
+ LUSTRE_PROC_DEVICE,
+ LUSTRE_PROC_SYMLNK,
+ TRUE );
+ if (!ProcObject) {
+
+ IoDeleteDevice(PingObject);
+#if defined(LUSTRE_PING_CLI)
+ module_pingcli_cleanup();
+#endif
+#if defined(LUSTRE_PING_SRV)
+ module_pingsrv_cleanup();
+#endif
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ /* initialize the driver callback routines */
+
+ DriverObject->MajorFunction[IRP_MJ_CREATE] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_CLOSE] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_READ] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_WRITE] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = UTDispatchRequest;
+
+ return STATUS_SUCCESS;
+
+errorout:
+
+ cfs_enter_debugger();
+
+ return STATUS_UNSUCCESSFUL;
+}
continue;
}
- magic = __le32_to_cpu(*((int *)(server->evnt.md.start
+ magic = __le32_to_cpu(*((int *)((char *)server->evnt.md.start
+ server->evnt.offset)));
"(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n",
libcfs_nid2str(ev->initiator.nid),
ev->offset, ev->rlength, ev->mlength,
- __le32_to_cpu(*((int *)(ev->md.start + ev->offset))),
- __le32_to_cpu(*((int *)(ev->md.start + ev->offset + sizeof(unsigned)))),
- __le32_to_cpu(*((int *)(ev->md.start + ev->offset + 2 *
+ __le32_to_cpu(*((int *)((char *)ev->md.start + ev->offset))),
+ __le32_to_cpu(*((int *)((char *)ev->md.start + ev->offset + sizeof(unsigned)))),
+ __le32_to_cpu(*((int *)((char *)ev->md.start + ev->offset + 2 *
sizeof(unsigned)))));
packets_valid++;
{
int rc;
- /* Aquire and initialize the proper nal for portals. */
+ /* Aquire and initialize the proper nal for portals. */
rc = LNetNIInit(0);
if (!(rc == 0 || rc == 1)) {
CDEBUG (D_OTHER, "LNetNIInit: error %d\n", rc);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Matt Wu <mattwu@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+/*
+ * Included Headers
+ */
+
+
+#include <libcfs/libcfs.h>
+
+
+/* libcfs module init/exit routines */
+DECLARE_INIT(init_libcfs_module);
+DECLARE_EXIT(exit_libcfs_module);
+
+/* portal module init/exit routines */
+DECLARE_INIT(init_lnet);
+DECLARE_EXIT(fini_lnet);
+
+/* tdinal module init/exit routines */
+DECLARE_INIT(ksocknal_module_init);
+DECLARE_EXIT(ksocknal_module_fini);
+
+/* pingcli module init/exit routines */
+DECLARE_INIT(pingcli_init);
+DECLARE_EXIT(pingcli_cleanup);
+
+
+/* pingsrv module init/exit routines */
+DECLARE_INIT(pingsrv_init);
+DECLARE_EXIT(pingsrv_cleanup);
+
+/*
+ * structure definitions
+ */
+
+
+#define LUSTRE_PING_VERSION 0x00010000 /* ping srv/cli version: 0001.0000 */
+
+#define LUSTRE_PING_DEVICE L"\\Device\\LNET" /* device object name */
+#define LUSTRE_PING_SYMLNK L"\\DosDevices\\LNET" /* user-visible name for the device*/
+
+typedef struct _DEVICE_EXTENSION
+{
+ BOOLEAN bProcFS;
+
+} DEVICE_EXTENSION, *PDEVICE_EXTENSION;
+
+
+/*
+ * global definitions
+ */
+
+PDEVICE_OBJECT PingObject = NULL; /* ping device object */
+PDEVICE_OBJECT ProcObject = NULL; /* procfs emulator device */
+
+
+/*
+ * common routines
+ */
+
+
+//
+// complete Irp request ...
+//
+
+NTSTATUS
+UTCompleteIrp(
+ PIRP Irp,
+ NTSTATUS Status,
+ ULONG Info
+ )
+{
+ Irp->IoStatus.Status = Status;
+ Irp->IoStatus.Information = Info;
+ IoCompleteRequest(Irp,IO_NO_INCREMENT);
+
+ return Status;
+}
+
+//
+// Open/Create Device ...
+//
+
+NTSTATUS
+UTCreate(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ KdPrint(("UTCreate: DeviceCreate ...\n"));
+
+ return UTCompleteIrp(Irp,STATUS_SUCCESS,0);
+}
+
+//
+// Close Devcie ...
+//
+
+NTSTATUS
+UTClose(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp)
+{
+ KdPrint(("UTClose: Device Closed.\n"));
+
+ return UTCompleteIrp(Irp, STATUS_SUCCESS, 0);
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+
+
+NTSTATUS
+UTShutdown(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ KdPrint(("UTShutdown: shuting TdiSock ...\n"));
+
+ return UTCompleteIrp(Irp, STATUS_SUCCESS, 0);
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+//
+// driver frame Routines ...
+//
+
+
+NTSTATUS
+UTDeviceControl(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST;
+ PIO_STACK_LOCATION IrpSp;
+
+ ULONG ControlCode;
+ ULONG InputLength;
+ ULONG OutputLength;
+
+ PVOID lpvInBuffer;
+
+ KdPrint(("UTDeviceControl: Device Ioctl ...\n"));
+
+ Irp->IoStatus.Information = 0;
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode;
+ InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength;
+ OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength;
+ lpvInBuffer = Irp->AssociatedIrp.SystemBuffer;
+
+ ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
+
+ switch (ControlCode)
+ {
+ case IOCTL_LIBCFS_VERSION:
+
+ *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION);
+ Irp->IoStatus.Information = sizeof(ULONG);
+ Status = STATUS_SUCCESS;
+ break;
+
+ default:
+ break;
+ }
+
+ Irp->IoStatus.Status = Status;
+
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ KdPrint(("UTDeviceControl: Device Ioctl returned.\n"));
+
+ return Status;
+}
+
+NTSTATUS
+ProcCreate(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status;
+ PIO_STACK_LOCATION IrpSp;
+
+ FILE_FULL_EA_INFORMATION * ea;
+ cfs_file_t * fp;
+
+ KdPrint(("ProcCreate: Proc device is being opened ...\n"));
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ ea = (PFILE_FULL_EA_INFORMATION) Irp->AssociatedIrp.SystemBuffer;
+
+ if (!ea) {
+ Status = STATUS_INVALID_PARAMETER;
+ } else {
+ fp = lustre_open_file(&ea->EaName[0]);
+ if (!fp) {
+ Status = STATUS_OBJECT_NAME_NOT_FOUND;
+ } else {
+ IrpSp->FileObject->FsContext = fp;
+ IrpSp->FileObject->FsContext2 = fp->private_data;
+ Status = STATUS_SUCCESS;
+ }
+ }
+
+ return UTCompleteIrp(Irp, Status, 0);
+}
+
+//
+// Close Devcie ...
+//
+
+NTSTATUS
+ProcClose(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp)
+{
+ PIO_STACK_LOCATION IrpSp;
+
+ cfs_file_t * fp;
+
+ KdPrint(("ProcClose: Proc device object is to be closed.\n"));
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ fp = (cfs_file_t *) IrpSp->FileObject->FsContext;
+
+ ASSERT(fp != NULL);
+ ASSERT(IrpSp->FileObject->FsContext2 == fp->private_data);
+
+ lustre_close_file(fp);
+
+ return UTCompleteIrp(Irp, STATUS_SUCCESS, 0);
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+/*
+ * proc frame routines
+ */
+
+NTSTATUS
+ProcDeviceControl(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status = STATUS_INVALID_DEVICE_REQUEST;
+ PIO_STACK_LOCATION IrpSp;
+
+ ULONG ControlCode;
+ ULONG InputLength;
+ ULONG OutputLength;
+
+ PVOID lpvInBuffer;
+
+ KdPrint(("ProcDeviceControl: Proc device ioctling ...\n"));
+
+ Irp->IoStatus.Information = 0;
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ ControlCode = IrpSp->Parameters.DeviceIoControl.IoControlCode;
+ InputLength = IrpSp->Parameters.DeviceIoControl.InputBufferLength;
+ OutputLength = IrpSp->Parameters.DeviceIoControl.OutputBufferLength;
+ lpvInBuffer = Irp->AssociatedIrp.SystemBuffer;
+
+ ASSERT (IrpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
+
+ switch (ControlCode)
+ {
+ case IOCTL_LIBCFS_VERSION:
+
+ *((ULONG *)lpvInBuffer) = (ULONG)(LUSTRE_PING_VERSION);
+ Irp->IoStatus.Information = sizeof(ULONG);
+
+ Status = STATUS_SUCCESS;
+
+ break;
+
+ case IOCTL_LIBCFS_ENTRY:
+ {
+ int rc = 0;
+ cfs_file_t * fp;
+
+ fp = (cfs_file_t *) IrpSp->FileObject->FsContext;
+
+ if (!fp) {
+ rc = -EINVAL;
+ } else {
+ rc = lustre_ioctl_file(fp, (PCFS_PROC_IOCTL) (lpvInBuffer));
+ }
+
+ if (rc == 0) {
+ Irp->IoStatus.Information = InputLength;
+ Status = STATUS_SUCCESS;
+ }
+ }
+ }
+
+ Irp->IoStatus.Status = Status;
+
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ KdPrint(("ProcDeviceControl: Proc device ioctl returned with status = %xh.\n", Status));
+
+ return Status;
+}
+
+
+
+NTSTATUS
+ProcReadWrite (PDEVICE_OBJECT DeviceObject, PIRP Irp)
+{
+ PIO_STACK_LOCATION IrpSp;
+ NTSTATUS Status;
+
+ cfs_file_t * fp;
+ int rc;
+ PCHAR buf;
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+ if (Irp->MdlAddress) {
+ buf = MmGetSystemAddressForMdlSafe(
+ Irp->MdlAddress,
+ NormalPagePriority);
+ } else {
+ buf = Irp->AssociatedIrp.SystemBuffer;
+ }
+
+ if (buf == NULL) {
+ Status = STATUS_SUCCESS;
+ rc = 0;
+ } else {
+ fp = (cfs_file_t *) IrpSp->FileObject->FsContext;
+
+ if (!fp) {
+ Status = STATUS_INVALID_PARAMETER;
+ goto errorout;
+ }
+
+ if (IrpSp->MajorFunction == IRP_MJ_READ) {
+ rc = lustre_read_file(
+ fp, IrpSp->Parameters.Read.ByteOffset.LowPart,
+ IrpSp->Parameters.Read.Length, buf);
+ } else {
+ rc = lustre_write_file(
+ fp, IrpSp->Parameters.Write.ByteOffset.LowPart,
+ IrpSp->Parameters.Write.Length, buf);
+ }
+ if (rc < 0) {
+ cfs_enter_debugger();
+ Status = STATUS_UNSUCCESSFUL;
+ } else {
+ Status = STATUS_SUCCESS;
+ }
+ }
+
+
+errorout:
+ return UTCompleteIrp(Irp, Status, rc);
+}
+
+
+//
+// common dispatch routines
+//
+
+NTSTATUS
+UTDispatchRequest(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+{
+ NTSTATUS Status;
+ PIO_STACK_LOCATION IrpSp;
+
+ Status = STATUS_INVALID_DEVICE_REQUEST;
+
+ __try {
+
+ IrpSp = IoGetCurrentIrpStackLocation(Irp);
+
+ switch (IrpSp->MajorFunction) {
+
+ case IRP_MJ_CREATE:
+ if (DeviceObject == PingObject) {
+ Status = UTCreate(DeviceObject, Irp);
+ } else if (DeviceObject == ProcObject) {
+ Status = ProcCreate(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_CLOSE:
+ if (DeviceObject == PingObject) {
+ Status = UTClose(DeviceObject, Irp);
+ } else if (DeviceObject == ProcObject) {
+ Status = ProcClose(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_READ:
+ case IRP_MJ_WRITE:
+ if (DeviceObject == ProcObject) {
+ Status = ProcReadWrite(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_DEVICE_CONTROL:
+ if (DeviceObject == PingObject) {
+ Status = UTDeviceControl(DeviceObject, Irp);
+ } else if (DeviceObject == ProcObject) {
+ Status = ProcDeviceControl(DeviceObject, Irp);
+ }
+ break;
+
+ case IRP_MJ_SHUTDOWN:
+ Status = UTShutdown(DeviceObject, Irp);
+ break;
+
+ default:
+
+ KdPrint(("UTDispatchRequest: Major Function: %xh is not supported.\n",
+ IrpSp->MajorFunction));
+ UTCompleteIrp(Irp, Status, 0);
+ break;
+ }
+ }
+
+ __finally {
+ }
+
+ return Status;
+}
+
+//
+// create a device object and a dosdevice symbol link
+//
+
+PDEVICE_OBJECT
+CreateDevice(
+ IN PDRIVER_OBJECT DriverObject,
+ IN PWCHAR DeviceName,
+ IN PWCHAR SymlnkName,
+ IN BOOLEAN bProcFS
+ )
+{
+ NTSTATUS Status;
+
+ UNICODE_STRING NtDevName;
+ UNICODE_STRING Win32DevName;
+
+ PDEVICE_EXTENSION DeviceExtension;
+ PDEVICE_OBJECT DeviceObject;
+
+ /* create the device object with the specified name */
+
+ RtlInitUnicodeString(&NtDevName, DeviceName);
+
+ Status = IoCreateDevice(
+ DriverObject,
+ sizeof(DEVICE_EXTENSION),
+ &NtDevName,
+ FILE_DEVICE_UNKNOWN,
+ 0,
+ FALSE,
+ &DeviceObject );
+
+ if (!NT_SUCCESS(Status)) {
+
+ cfs_enter_debugger();
+ return NULL;
+ }
+
+ /* create the symlink to make the device visible to user */
+
+ RtlInitUnicodeString(&Win32DevName, SymlnkName);
+
+ Status = IoCreateSymbolicLink(&Win32DevName, &NtDevName);
+
+ if (!NT_SUCCESS(Status)) {
+
+ IoDeleteDevice(DeviceObject);
+ return NULL;
+ }
+
+ DeviceExtension = (PDEVICE_EXTENSION)DeviceObject->DeviceObjectExtension;
+ DeviceExtension->bProcFS = bProcFS;
+
+ DeviceObject->Flags |= DO_BUFFERED_IO;
+ DeviceObject->Flags &= ~DO_DEVICE_INITIALIZING;
+
+ return DeviceObject;
+}
+
+
+//
+// DriverEntry
+//
+
+NTSTATUS DriverEntry(
+ IN PDRIVER_OBJECT DriverObject,
+ IN PUNICODE_STRING RegistryPath
+ )
+{
+ KdPrint(("Lustre ping test: Build Time: " __DATE__ " " __TIME__ "\n"));
+ KdPrint(("Lustre ping test: DriverEntry ... \n"));
+
+ /* initialize libcfs module */
+ if (module_init_libcfs_module() != 0) {
+ KdPrint(("ping: error initialize module: libcfs ...\n"));
+ goto errorout;
+ }
+
+ /* initialize lnet module */
+ if (module_init_lnet() != 0) {
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: lnet ...\n"));
+ goto errorout;
+ }
+
+ /* initialize tdinal module */
+ if (module_ksocknal_module_init() != 0) {
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: tdilnd ...\n"));
+ goto errorout;
+ }
+
+#if defined(LUSTRE_PING_CLI)
+ /* initialize pingcli module */
+ if (module_pingcli_init() != 0) {
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: pingcli ...\n"));
+ goto errorout;
+ }
+#endif
+
+#if defined(LUSTRE_PING_SRV)
+ /* initialize pingsrv module */
+ if (module_pingsrv_init() != 0) {
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ KdPrint(("ping: error initialize module: pingsrv ...\n"));
+ goto errorout;
+ }
+#endif
+
+ /* create the ping device object */
+ PingObject = CreateDevice(
+ DriverObject,
+ LUSTRE_PING_DEVICE,
+ LUSTRE_PING_SYMLNK,
+ FALSE );
+ if (!PingObject) {
+#if defined(LUSTRE_PING_CLI)
+ module_pingcli_cleanup();
+#endif
+#if defined(LUSTRE_PING_SRV)
+ module_pingsrv_cleanup();
+#endif
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ /* create the libcfs proc fs emultor device object */
+ ProcObject = CreateDevice(
+ DriverObject,
+ LUSTRE_PROC_DEVICE,
+ LUSTRE_PROC_SYMLNK,
+ TRUE );
+ if (!ProcObject) {
+
+ IoDeleteDevice(PingObject);
+#if defined(LUSTRE_PING_CLI)
+ module_pingcli_cleanup();
+#endif
+#if defined(LUSTRE_PING_SRV)
+ module_pingsrv_cleanup();
+#endif
+ module_ksocknal_module_fini();
+ module_fini_lnet();
+ module_exit_libcfs_module();
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ /* initialize the driver callback routines */
+
+ DriverObject->MajorFunction[IRP_MJ_CREATE] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_CLOSE] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_READ] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_WRITE] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_SHUTDOWN] = UTDispatchRequest;
+ DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = UTDispatchRequest;
+
+ return STATUS_SUCCESS;
+
+errorout:
+
+ cfs_enter_debugger();
+
+ return STATUS_UNSUCCESSFUL;
+}
"PTLLND_PID", PTLLND_PID);
if (rc != 0)
return rc;
- plni->plni_pid = (ptl_pid_t)temp;
+ plni->plni_ptllnd_pid = (ptl_pid_t)temp;
rc = ptllnd_parse_int_tunable(&plni->plni_peer_credits,
"PTLLND_PEERCREDITS", PTLLND_PEERCREDITS);
plni->plni_buffer_size = plni->plni_max_msg_size * msgs_per_buffer;
- PJK_UT_MSG("portal = %d\n",plni->plni_portal);
- PJK_UT_MSG("pid = %d\n",plni->plni_pid);
- PJK_UT_MSG("max_immediate = %d\n",max_immediate);
- PJK_UT_MSG("msgs_per_buffer = %d\n",msgs_per_buffer);
- PJK_UT_MSG("msgs_spare = %d\n",plni->plni_msgs_spare);
- PJK_UT_MSG("peer_hash_size = %d\n",plni->plni_peer_hash_size);
- PJK_UT_MSG("eq_size = %d\n",plni->plni_eq_size);
- PJK_UT_MSG("max_msg_size = %d\n",plni->plni_max_msg_size);
- PJK_UT_MSG("buffer_size = %d\n",plni->plni_buffer_size);
+ CDEBUG(D_NET, "portal = %d\n",plni->plni_portal);
+ CDEBUG(D_NET, "ptllnd_pid = %d\n",plni->plni_ptllnd_pid);
+ CDEBUG(D_NET, "max_immediate = %d\n",max_immediate);
+ CDEBUG(D_NET, "msgs_per_buffer = %d\n",msgs_per_buffer);
+ CDEBUG(D_NET, "msgs_spare = %d\n",plni->plni_msgs_spare);
+ CDEBUG(D_NET, "peer_hash_size = %d\n",plni->plni_peer_hash_size);
+ CDEBUG(D_NET, "eq_size = %d\n",plni->plni_eq_size);
+ CDEBUG(D_NET, "max_msg_size = %d\n",plni->plni_max_msg_size);
+ CDEBUG(D_NET, "buffer_size = %d\n",plni->plni_buffer_size);
return 0;
}
int nbufs;
int rc;
- PJK_UT_MSG("nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
- PJK_UT_MSG("nbuffers = %d (before)\n",plni->plni_nbuffers);
+ CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
+ CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
nmsgs = plni->plni_npeers * plni->plni_peer_credits +
}
}
- PJK_UT_MSG("nposted_buffers = %d (after)\n",plni->plni_nposted_buffers);
- PJK_UT_MSG("nbuffers = %d (after)\n",plni->plni_nbuffers);
+ CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers);
+ CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers);
return 0;
}
struct list_head *tmp;
struct list_head *nxt;
- PJK_UT_MSG("nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
- PJK_UT_MSG("nbuffers = %d (before)\n",plni->plni_nbuffers);
+ CDEBUG(D_NET, "nposted_buffers = %d (before)\n",plni->plni_nposted_buffers);
+ CDEBUG(D_NET, "nbuffers = %d (before)\n",plni->plni_nbuffers);
list_for_each_safe(tmp, nxt, &plni->plni_buffers) {
buf = list_entry(tmp, ptllnd_buffer_t, plb_list);
- //PJK_UT_MSG("buf=%p posted=%d\n",buf,buf->plb_posted);
+ //CDEBUG(D_NET, "buf=%p posted=%d\n",buf,buf->plb_posted);
LASSERT (plni->plni_nbuffers > 0);
if (buf->plb_posted) {
ptllnd_destroy_buffer(buf);
}
- PJK_UT_MSG("nposted_buffers = %d (after)\n",plni->plni_nposted_buffers);
- PJK_UT_MSG("nbuffers = %d (after)\n",plni->plni_nbuffers);
+ CDEBUG(D_NET, "nposted_buffers = %d (after)\n",plni->plni_nposted_buffers);
+ CDEBUG(D_NET, "nbuffers = %d (after)\n",plni->plni_nbuffers);
LASSERT (plni->plni_nposted_buffers == 0);
LASSERT (plni->plni_nbuffers == 0);
ptllnd_peer_t *plp;
int i;
- PJK_UT_MSG(">>> npeers=%d\n",plni->plni_npeers);
+ CDEBUG(D_NET, ">>> npeers=%d\n",plni->plni_npeers);
for (i = 0; i < plni->plni_peer_hash_size; i++)
while (!list_empty(&plni->plni_peer_hash[i])) {
ptllnd_close_peer(plp);
}
- PJK_UT_MSG("<<< npeers=%d\n",plni->plni_npeers);
+ CDEBUG(D_NET, "<<< npeers=%d\n",plni->plni_npeers);
}
__u64
ptllnd_ni_t *plni = ni->ni_data;
int rc;
- PJK_UT_MSG(">>>\n");
+ CDEBUG(D_NET, ">>>\n");
LASSERT (ptllnd_ni_count == 1);
LIBCFS_FREE(plni, sizeof(*plni));
ptllnd_ni_count--;
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
}
int
ptllnd_ni_t *plni;
int rc;
- PJK_UT_MSG(">>> ni=%p\n",ni);
+ CDEBUG(D_NET, ">>> ni=%p\n",ni);
/* could get limits from portals I guess... */
ni->ni_maxtxcredits =
* the lnet pid to the pid of this process.
*/
the_lnet.ln_pid = getpid();
- PJK_UT_MSG("Forcing LNET pid to %d\n",the_lnet.ln_pid);
+ CDEBUG(D_NET, "Forcing LNET pid to %d\n",the_lnet.ln_pid);
plni->plni_stamp = ptllnd_get_timestamp();
plni->plni_nrxs = 0;
if (rc != 0)
goto failed1;
- rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_pid,
+ /* NB I most probably won't get the PID I requested here. It doesn't
+ * matter because I don't need a fixed PID (only connection acceptors
+ * need a "well known" PID). */
+
+ rc = PtlNIInit(PTL_IFACE_DEFAULT, plni->plni_ptllnd_pid,
NULL, NULL, &plni->plni_nih);
if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
CERROR("PtlNIInit failed: %d\n", rc);
rc = -ENODEV;
goto failed2;
}
- PJK_UT_MSG("plni->plni_nih=%x\n",plni->plni_nih);
+ CDEBUG(D_NET, "plni->plni_nih=%x\n",plni->plni_nih);
rc = PtlEQAlloc(plni->plni_nih, plni->plni_eq_size,
PTL_EQ_HANDLER_NONE, &plni->plni_eqh);
rc = -ENODEV;
goto failed3;
}
- PJK_UT_MSG("plni->plni_eqh=%x\n",plni->plni_eqh);
+ CDEBUG(D_NET, "plni->plni_eqh=%x\n",plni->plni_eqh);
/*
* Fetch the Portals NID
goto failed4;
}
- PJK_UT_MSG("lnet nid=" LPX64 " (passed in)\n",ni->ni_nid);
+ CDEBUG(D_NET, "lnet nid=" LPX64 " (passed in)\n",ni->ni_nid);
/*
* Create the new NID. Based on the LND network type
*/
ni->ni_nid = ptl2lnetnid(ni,plni->plni_portals_id.nid);
- PJK_UT_MSG("ptl pid=" FMT_PID "\n",plni->plni_portals_id.pid);
- PJK_UT_MSG("ptl nid=" FMT_NID "\n",plni->plni_portals_id.nid);
- PJK_UT_MSG("lnet nid=" LPX64 " (passed back)\n",ni->ni_nid);
-
- CDEBUG(D_INFO,"ptl pid=" FMT_PID "\n",plni->plni_portals_id.pid);
- CDEBUG(D_INFO,"ptl nid=" FMT_NID "\n",plni->plni_portals_id.nid);
- CDEBUG(D_INFO,"lnet nid=" LPX64 "\n",ni->ni_nid);
+ CDEBUG(D_NET, "ptl pid=" FMT_PID "\n",plni->plni_portals_id.pid);
+ CDEBUG(D_NET, "ptl nid=" FMT_NID "\n",plni->plni_portals_id.nid);
+ CDEBUG(D_NET, "lnet nid=" LPX64 " (passed back)\n",ni->ni_nid);
rc = ptllnd_grow_buffers(ni);
if (rc != 0)
goto failed4;
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return 0;
failed4:
LIBCFS_FREE(plni, sizeof(*plni));
failed0:
ptllnd_ni_count--;
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
}
typedef struct
{
int plni_portal;
- ptl_pid_t plni_pid;
+ ptl_pid_t plni_ptllnd_pid; /* Portals PID of peers I may connect to */
int plni_peer_credits;
int plni_max_msg_size;
int plni_buffer_size;
}
/*
- * Define this to enable console debug logging
- * and simulation
- */
-//#define PJK_DEBUGGING
-
-/*
* A note about lprintf():
* Normally printf() is redirected to stdout of the console
* from which yod launched the catamount application. However
* cases.
*/
-#ifdef PJK_DEBUGGING
-
-#define PJK_UT_MSG_ALWAYS(fmt, a...) \
-do{ \
- lprintf("ptllnd:%-30s:",__FUNCTION__); \
- lprintf(fmt,## a); \
-}while(0)
-
-
-#define PJK_UT_MSG_SIMULATION(fmt, a...) PJK_UT_MSG_ALWAYS(fmt, ## a )
-
-
-#if 1
-#define PJK_UT_MSG_DATA(fmt, a...) PJK_UT_MSG_ALWAYS(fmt, ## a )
-#else
-#define PJK_UT_MSG_DATA(fmt, a...) do{}while(0)
-#endif
-
-#if 1
-#define PJK_UT_MSG(fmt, a...) PJK_UT_MSG_ALWAYS(fmt, ## a )
-#else
-#define PJK_UT_MSG(fmt, a...) do{}while(0)
-#endif
-
-#else
-
-
-#define PJK_UT_MSG_ALWAYS(fmt, a...) do{}while(0)
-#define PJK_UT_MSG_SIMULATION(fmt, a...) do{}while(0)
-#define PJK_UT_MSG_DATA(fmt, a...) do{}while(0)
-#define PJK_UT_MSG(fmt, a...) do{}while(0)
-
-#endif
ptllnd_tx_t *tx;
int rc;
- PJK_UT_MSG(">>> nid=%s\n",libcfs_nid2str(nid));
+ CDEBUG(D_NET, ">>> nid=%s\n",libcfs_nid2str(nid));
LASSERT (LNET_NIDNET(nid) == LNET_NIDNET(ni->ni_nid));
if (plp->plp_nid == nid) {
ptllnd_peer_addref(plp);
- PJK_UT_MSG("<<< peer=%p FOUND\n",plp);
+ CDEBUG(D_NET, "<<< peer=%p FOUND\n",plp);
return plp;
}
}
return NULL;
}
- PJK_UT_MSG("new peer=%p\n",plp);
+ CDEBUG(D_NET, "new peer=%p\n",plp);
plp->plp_ni = ni;
plp->plp_nid = nid;
plp->plp_ptlid.nid = LNET_NIDADDR(nid);
- plp->plp_ptlid.pid = plni->plni_pid;
+ plp->plp_ptlid.pid = plni->plni_ptllnd_pid;
plp->plp_max_credits =
plp->plp_credits = 1; /* add more later when she gives me credits */
plp->plp_max_msg_size = plni->plni_max_msg_size; /* until I hear from her */
ptllnd_post_tx(tx);
- PJK_UT_MSG("<<< peer=%p NEW\n",plp);
+ CDEBUG(D_NET, "<<< peer=%p NEW\n",plp);
return plp;
}
ptllnd_tx_t *tx;
int msgsize;
- PJK_UT_MSG("peer=%p type=%d payload=%d\n",peer,type,payload_nob);
+ CDEBUG(D_NET, "peer=%p type=%d payload=%d\n",peer,type,payload_nob);
switch (type) {
default:
LASSERT (msgsize <= peer->plp_max_msg_size);
- PJK_UT_MSG("msgsize=%d\n",msgsize);
+ CDEBUG(D_NET, "msgsize=%d\n",msgsize);
LIBCFS_ALLOC(tx, offsetof(ptllnd_tx_t, tx_msg) + msgsize);
ptllnd_peer_addref(peer);
plni->plni_ntxs++;
- PJK_UT_MSG("tx=%p\n",tx);
+ CDEBUG(D_NET, "tx=%p\n",tx);
return tx;
}
* events for this tx until it's unlinked. So I set tx_completing to
* flag the tx is getting handled */
- PJK_UT_MSG(">>> tx=%p peer=%p\n",tx,peer);
- PJK_UT_MSG("completing=%d\n",tx->tx_completing);
- PJK_UT_MSG("status=%d\n",tx->tx_status);
- PJK_UT_MSG("niov=%d\n",tx->tx_niov);
- PJK_UT_MSG("lnetreplymsg=%p\n",tx->tx_lnetreplymsg);
- PJK_UT_MSG("lnetmsg=%p\n",tx->tx_lnetmsg);
+ CDEBUG(D_NET, ">>> tx=%p peer=%p\n",tx,peer);
+ CDEBUG(D_NET, "completing=%d\n",tx->tx_completing);
+ CDEBUG(D_NET, "status=%d\n",tx->tx_status);
+ CDEBUG(D_NET, "niov=%d\n",tx->tx_niov);
+ CDEBUG(D_NET, "lnetreplymsg=%p\n",tx->tx_lnetreplymsg);
+ CDEBUG(D_NET, "lnetmsg=%p\n",tx->tx_lnetmsg);
if (tx->tx_completing)
return;
LASSERT (tx->tx_lnetmsg != NULL);
/* Simulate GET success always */
lnet_finalize(ni, tx->tx_lnetmsg, 0);
- PJK_UT_MSG("lnet_finalize(tx_lnetreplymsg=%p)\n",tx->tx_lnetreplymsg);
+ CDEBUG(D_NET, "lnet_finalize(tx_lnetreplymsg=%p)\n",tx->tx_lnetreplymsg);
lnet_finalize(ni, tx->tx_lnetreplymsg, tx->tx_status);
} else if (tx->tx_lnetmsg != NULL) {
lnet_finalize(ni, tx->tx_lnetmsg, tx->tx_status);
plni->plni_ntxs--;
LIBCFS_FREE(tx, offsetof(ptllnd_tx_t, tx_msg) + tx->tx_msgsize);
- PJK_UT_MSG("<<< tx=%p\n",tx);
+ CDEBUG(D_NET, "<<< tx=%p\n",tx);
}
void
return 0;
}
- PJK_UT_MSG("niov =%d\n",niov);
- PJK_UT_MSG("offset=%d\n",offset);
- PJK_UT_MSG("len =%d\n",len);
+ CDEBUG(D_NET, "niov =%d\n",niov);
+ CDEBUG(D_NET, "offset=%d\n",offset);
+ CDEBUG(D_NET, "len =%d\n",len);
/*
iov++;
}
- PJK_UT_MSG("niov =%d (after)\n",niov);
- PJK_UT_MSG("offset=%d (after)\n",offset);
- PJK_UT_MSG("len =%d (after)\n",len);
+ CDEBUG(D_NET, "niov =%d (after)\n",niov);
+ CDEBUG(D_NET, "offset=%d (after)\n",offset);
+ CDEBUG(D_NET, "len =%d (after)\n",len);
for (;;) {
int temp_offset = offset;
return -ENOMEM;
for (npiov = 0;; npiov++) {
- PJK_UT_MSG("npiov=%d\n",npiov);
- PJK_UT_MSG("offset=%d\n",temp_offset);
- PJK_UT_MSG("len=%d\n",resid);
- PJK_UT_MSG("iov[npiov].iov_len=%d\n",iov[npiov].iov_len);
+ CDEBUG(D_NET, "npiov=%d\n",npiov);
+ CDEBUG(D_NET, "offset=%d\n",temp_offset);
+ CDEBUG(D_NET, "len=%d\n",resid);
+ CDEBUG(D_NET, "iov[npiov].iov_len=%d\n",iov[npiov].iov_len);
LASSERT (npiov < niov);
LASSERT (iov->iov_len >= temp_offset);
if (npiov == niov) {
tx->tx_niov = niov;
tx->tx_iov = piov;
- PJK_UT_MSG("tx->tx_iov=%p\n",tx->tx_iov);
- PJK_UT_MSG("tx->tx_niov=%d\n",tx->tx_niov);
+ CDEBUG(D_NET, "tx->tx_iov=%p\n",tx->tx_iov);
+ CDEBUG(D_NET, "tx->tx_niov=%d\n",tx->tx_niov);
return 0;
}
ptl_handle_md_t mdh;
int rc;
- PJK_UT_MSG(">>> peer=%p\n",peer);
- PJK_UT_MSG("plp_outstanding_credits=%d\n",peer->plp_outstanding_credits);
+ CDEBUG(D_NET, ">>> peer=%p\n",peer);
+ CDEBUG(D_NET, "plp_outstanding_credits=%d\n",peer->plp_outstanding_credits);
if (list_empty(&peer->plp_txq) &&
peer->plp_outstanding_credits >=
PTLLND_CREDIT_HIGHWATER(plni)) {
tx = ptllnd_new_tx(peer, PTLLND_MSG_TYPE_NOOP, 0);
- PJK_UT_MSG("NOOP tx=%p\n",tx);
+ CDEBUG(D_NET, "NOOP tx=%p\n",tx);
if (tx == NULL) {
CERROR("Can't return credits to %s\n",
libcfs_nid2str(peer->plp_nid));
while (!list_empty(&peer->plp_txq)) {
tx = list_entry(peer->plp_txq.next, ptllnd_tx_t, tx_list);
- PJK_UT_MSG("Looking at TX=%p\n",tx);
- PJK_UT_MSG("plp_credits=%d\n",peer->plp_credits);
- PJK_UT_MSG("plp_outstanding_credits=%d\n",peer->plp_outstanding_credits);
+ CDEBUG(D_NET, "Looking at TX=%p\n",tx);
+ CDEBUG(D_NET, "plp_credits=%d\n",peer->plp_credits);
+ CDEBUG(D_NET, "plp_outstanding_credits=%d\n",peer->plp_outstanding_credits);
LASSERT (tx->tx_msgsize > 0);
list_del_init(&tx->tx_list);
- PJK_UT_MSG("Sending at TX=%p type=%s (%d)\n",tx,
+ CDEBUG(D_NET, "Sending at TX=%p type=%s (%d)\n",tx,
get_msg_type_string(tx->tx_type),tx->tx_type);
if (tx->tx_type == PTLLND_MSG_TYPE_NOOP &&
*/
tx->tx_msg.ptlm_dststamp = peer->plp_stamp;
- PJK_UT_MSG("Returning %d to peer\n",peer->plp_outstanding_credits);
+ CDEBUG(D_NET, "Returning %d to peer\n",peer->plp_outstanding_credits);
/*
* Return all the credits we have
list_add_tail(&tx->tx_list, &plni->plni_active_txs);
}
- PJK_UT_MSG("<<< peer=%p\n",peer);
+ CDEBUG(D_NET, "<<< peer=%p\n",peer);
}
int
int rc;
int rc2;
- PJK_UT_MSG(">>> peer=%p type=%s(%d) tx=%p\n",peer,
+ CDEBUG(D_NET, ">>> peer=%p type=%s(%d) tx=%p\n",peer,
type == PTLLND_MSG_TYPE_GET ? "GET" : "PUT/REPLY",type,tx);
- PJK_UT_MSG("niov=%d offset=%d len=%d\n",niov,offset,len);
+ CDEBUG(D_NET, "niov=%d offset=%d len=%d\n",niov,offset,len);
LASSERT (type == PTLLND_MSG_TYPE_GET ||
type == PTLLND_MSG_TYPE_PUT);
ptllnd_set_md_buffer(&md, tx);
while (!peer->plp_recvd_hello) { /* wait to validate plp_match */
- PJK_UT_MSG("Wait For Hello\n");
+ CDEBUG(D_NET, "Wait For Hello\n");
if (peer->plp_closing) {
rc = -EIO;
goto failed;
if(peer->plp_match < PTL_RESERVED_MATCHBITS)
peer->plp_match = PTL_RESERVED_MATCHBITS;
matchbits = peer->plp_match++;
- PJK_UT_MSG("matchbits " LPX64 "\n",matchbits);
- PJK_UT_MSG("nid " FMT_NID " pid=%d\n",peer->plp_ptlid.nid,peer->plp_ptlid.pid);
+ CDEBUG(D_NET, "matchbits " LPX64 "\n",matchbits);
+ CDEBUG(D_NET, "nid " FMT_NID " pid=%d\n",peer->plp_ptlid.nid,peer->plp_ptlid.pid);
rc = PtlMEAttach(plni->plni_nih, plni->plni_portal, peer->plp_ptlid,
matchbits, 0, PTL_UNLINK, PTL_INS_BEFORE, &meh);
goto failed;
}
-/*
- PJK_UT_MSG("md.start=%p\n",md.start);
- PJK_UT_MSG("md.length=%d\n",md.length);
- PJK_UT_MSG("md.threshold=%d\n",md.threshold);
- PJK_UT_MSG("md.max_size=%d\n",md.max_size);
- PJK_UT_MSG("md.options=0x%x\n",md.options);
- PJK_UT_MSG("md.user_ptr=%p\n",md.user_ptr);
- PJK_UT_MSG("md.eq_handle=%p\n",md.eq_handle);
-*/
+ CDEBUG(D_NET, "md.start=%p\n",md.start);
+ CDEBUG(D_NET, "md.length=%d\n",md.length);
+ CDEBUG(D_NET, "md.threshold=%d\n",md.threshold);
+ CDEBUG(D_NET, "md.max_size=%d\n",md.max_size);
+ CDEBUG(D_NET, "md.options=0x%x\n",md.options);
+ CDEBUG(D_NET, "md.user_ptr=%p\n",md.user_ptr);
+
rc = PtlMDAttach(meh, md, LNET_UNLINK, &mdh);
if (rc != PTL_OK) {
CERROR("PtlMDAttach for %s failed: %d\n",
tx->tx_lnetmsg = msg;
ptllnd_post_tx(tx);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return 0;
failed:
ptllnd_tx_done(tx);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
}
ptl_handle_md_t mdh;
int rc;
- PJK_UT_MSG(">>> peer=%p type=%d tx=%p\n",peer,type,tx);
- PJK_UT_MSG("niov=%u offset=%u len=%u\n",niov,offset,len);
- PJK_UT_MSG("matchbits " LPX64 "\n",matchbits);
+ CDEBUG(D_NET, ">>> peer=%p type=%d tx=%p\n",peer,type,tx);
+ CDEBUG(D_NET, "niov=%u offset=%u len=%u\n",niov,offset,len);
+ CDEBUG(D_NET, "matchbits " LPX64 "\n",matchbits);
LASSERT (type == PTLLND_RDMA_READ ||
type == PTLLND_RDMA_WRITE);
rc = PtlPut(mdh, PTL_NOACK_REQ, peer->plp_ptlid,
plni->plni_portal, 0, matchbits, 0, 0);
if (rc == 0){
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return 0;
}
failed:
tx->tx_status = rc;
ptllnd_tx_done(tx); /* this will close peer */
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
}
LASSERT (msg->msg_niov <= PTL_MD_MAX_IOV); /* !!! */
- PJK_UT_MSG("msg=%p nid=%s\n",msg,libcfs_nid2str(msg->msg_target.nid));
- PJK_UT_MSG("is_target_router=%d\n",msg->msg_target_is_router);
- PJK_UT_MSG("msg_niov=%d\n",msg->msg_niov);
- PJK_UT_MSG("msg_offset=%d\n",msg->msg_offset);
- PJK_UT_MSG("msg_len=%d\n",msg->msg_len);
+ CDEBUG(D_NET, "msg=%p nid=%s\n",msg,libcfs_nid2str(msg->msg_target.nid));
+ CDEBUG(D_NET, "is_target_router=%d\n",msg->msg_target_is_router);
+ CDEBUG(D_NET, "msg_niov=%d\n",msg->msg_niov);
+ CDEBUG(D_NET, "msg_offset=%d\n",msg->msg_offset);
+ CDEBUG(D_NET, "msg_len=%d\n",msg->msg_len);
plp = ptllnd_find_peer(ni, msg->msg_target.nid, 1);
if (plp == NULL)
LBUG();
case LNET_MSG_ACK:
- PJK_UT_MSG("LNET_MSG_ACK\n");
+ CDEBUG(D_NET, "LNET_MSG_ACK\n");
LASSERT (msg->msg_len == 0);
break; /* send IMMEDIATE */
case LNET_MSG_GET:
- PJK_UT_MSG("LNET_MSG_GET nob=%d\n",msg->msg_md->md_length);
+ CDEBUG(D_NET, "LNET_MSG_GET nob=%d\n",msg->msg_md->md_length);
if (msg->msg_target_is_router)
break; /* send IMMEDIATE */
msg->msg_md->md_iov.iov,
0, msg->msg_md->md_length);
ptllnd_peer_decref(plp);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
case LNET_MSG_REPLY:
case LNET_MSG_PUT:
- PJK_UT_MSG("LNET_MSG_PUT nob=%d\n",msg->msg_len);
+ CDEBUG(D_NET, "LNET_MSG_PUT nob=%d\n",msg->msg_len);
nob = msg->msg_len;
nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[nob]);
- PJK_UT_MSG("msg_size=%d max=%d\n",msg->msg_len,plp->plp_max_msg_size);
+ CDEBUG(D_NET, "msg_size=%d max=%d\n",msg->msg_len,plp->plp_max_msg_size);
if (nob <= plp->plp_max_msg_size)
break; /* send IMMEDIATE */
msg->msg_niov, msg->msg_iov,
msg->msg_offset, msg->msg_len);
ptllnd_peer_decref(plp);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
}
/* send IMMEDIATE
* NB copy the payload so we don't have to do a fragmented send */
- PJK_UT_MSG("IMMEDIATE len=%d\n", msg->msg_len);
+ CDEBUG(D_NET, "IMMEDIATE len=%d\n", msg->msg_len);
tx = ptllnd_new_tx(plp, PTLLND_MSG_TYPE_IMMEDIATE, msg->msg_len);
if (tx == NULL) {
CERROR("Can't allocate tx for lnet type %d to %s\n",
tx->tx_lnetmsg = msg;
ptllnd_post_tx(tx);
ptllnd_peer_decref(plp);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
return 0;
}
lnet_ni_t *ni = plp->plp_ni;
ptllnd_ni_t *plni = ni->ni_data;
- PJK_UT_MSG("rx=%p\n", rx);
+ CDEBUG(D_NET, "rx=%p\n", rx);
plp->plp_outstanding_credits++;
ptllnd_check_sends(rx->rx_peer);
/* Shouldn't get here; recvs only block for router buffers */
LBUG();
-
- PJK_UT_MSG("rx=%p (stack)\n", stackrx);
+
+ CDEBUG(D_NET, "rx=%p (stack)\n", stackrx);
/* Don't ++plni_nrxs: heaprx replaces stackrx */
if (heaprx == NULL)
return -ENOMEM;
- PJK_UT_MSG("rx=%p (new heap)\n", stackrx);
+ CDEBUG(D_NET, "rx=%p (new heap)\n", stackrx);
heaprx->rx_msg = (kptl_msg_t *)heaprx->rx_space;
memcpy(&heaprx->rx_msg, stackrx->rx_msg, stackrx->rx_nob);
LASSERT (kiov == NULL);
LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */
- PJK_UT_MSG(">>> msg=%p\n",msg);
- PJK_UT_MSG("rx=%p rx_nob=%d\n",rx,rx->rx_nob);
- PJK_UT_MSG("niov=%d\n",niov);
- PJK_UT_MSG("offset=%d\n",offset);
- PJK_UT_MSG("mlen=%d rlen=%d\n",mlen,rlen);
+ CDEBUG(D_NET, ">>> msg=%p\n",msg);
+ CDEBUG(D_NET, "rx=%p rx_nob=%d\n",rx,rx->rx_nob);
+ CDEBUG(D_NET, "niov=%d\n",niov);
+ CDEBUG(D_NET, "offset=%d\n",offset);
+ CDEBUG(D_NET, "mlen=%d rlen=%d\n",mlen,rlen);
switch (rx->rx_msg->ptlm_type) {
default:
case PTLLND_MSG_TYPE_IMMEDIATE:
nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[mlen]);
- PJK_UT_MSG("PTLLND_MSG_TYPE_IMMEDIATE nob=%d\n",nob);
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE nob=%d\n",nob);
if (nob > rx->rx_nob) {
CERROR("Immediate message from %s too big: %d(%d)\n",
libcfs_nid2str(rx->rx_peer->plp_nid),
break;
case PTLLND_MSG_TYPE_PUT:
- PJK_UT_MSG("PTLLND_MSG_TYPE_PUT offset=%d mlen=%d\n",offset,mlen);
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT offset=%d mlen=%d\n",offset,mlen);
rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_READ, msg,
rx->rx_msg->ptlm_u.req.kptlrm_matchbits,
niov, iov, offset, mlen);
break;
case PTLLND_MSG_TYPE_GET:
- PJK_UT_MSG("PTLLND_MSG_TYPE_GET\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET\n");
if (msg != NULL) {
/* matched! */
- PJK_UT_MSG("matchbits="LPX64"\n",
+ CDEBUG(D_NET, "matchbits="LPX64"\n",
rx->rx_msg->ptlm_u.req.kptlrm_matchbits);
rc = ptllnd_active_rdma(rx->rx_peer, PTLLND_RDMA_WRITE, msg,
rx->rx_msg->ptlm_u.req.kptlrm_matchbits,
msg->msg_niov, msg->msg_iov,
msg->msg_offset, msg->msg_len);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
break;
} else {
ptllnd_close_peer(rx->rx_peer);
}
ptllnd_rx_done(rx);
- PJK_UT_MSG("<<< rc=%d\n",rc);
+ CDEBUG(D_NET, "<<< rc=%d\n",rc);
return rc;
}
int rc;
- PJK_UT_MSG(">>> initiator=%s nob=%d\n",ptllnd_ptlid2str(initiator),nob);
+ CDEBUG(D_NET, ">>> initiator=%s nob=%d\n",ptllnd_ptlid2str(initiator),nob);
if (nob < basenob) {
CERROR("Short receive from %s\n",
__swab64s(&msg->ptlm_seq);
}
- PJK_UT_MSG_ALWAYS("src = %s\n",libcfs_nid2str(msg->ptlm_srcnid));
+ CDEBUG(D_NET, "src = %s\n",libcfs_nid2str(msg->ptlm_srcnid));
if (msg->ptlm_version != PTLLND_MSG_VERSION) {
CERROR("Bad version %d from %s\n", (__u32)msg->ptlm_version,
switch (msg->ptlm_type) {
case PTLLND_MSG_TYPE_PUT:
case PTLLND_MSG_TYPE_GET:
- PJK_UT_MSG("PTLLND_MSG_TYPE_%s\n",
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
msg->ptlm_type==PTLLND_MSG_TYPE_PUT ? "PUT" : "GET");
if (nob < basenob + sizeof(kptl_request_msg_t)) {
CERROR("Short rdma request from %s(%s)\n",
break;
case PTLLND_MSG_TYPE_IMMEDIATE:
- PJK_UT_MSG("PTLLND_MSG_TYPE_IMMEDIATE\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
if (nob < offsetof(kptl_msg_t,
ptlm_u.immediate.kptlim_payload)) {
CERROR("Short immediate from %s(%s)\n",
break;
case PTLLND_MSG_TYPE_HELLO:
- PJK_UT_MSG("PTLLND_MSG_TYPE_HELLO from %s(%s)\n",
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO from %s(%s)\n",
libcfs_nid2str(msg->ptlm_srcnid),
ptllnd_ptlid2str(initiator));
if (nob < basenob + sizeof(kptl_hello_msg_t)) {
break;
case PTLLND_MSG_TYPE_NOOP:
- PJK_UT_MSG("PTLLND_MSG_TYPE_NOOP from %s(%s)\n",
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP from %s(%s)\n",
libcfs_nid2str(msg->ptlm_srcnid),
ptllnd_ptlid2str(initiator));
break;
return;
}
- PJK_UT_MSG("kptlhm_max_msg_size=%d\n",msg->ptlm_u.hello.kptlhm_max_msg_size);
- PJK_UT_MSG("kptlhm_matchbits="LPX64"\n",msg->ptlm_u.hello.kptlhm_matchbits);
- PJK_UT_MSG("ptlm_srcstamp="LPX64"\n",msg->ptlm_srcstamp);
+ CDEBUG(D_NET, "kptlhm_max_msg_size=%d\n",msg->ptlm_u.hello.kptlhm_max_msg_size);
+ CDEBUG(D_NET, "kptlhm_matchbits="LPX64"\n",msg->ptlm_u.hello.kptlhm_matchbits);
+ CDEBUG(D_NET, "ptlm_srcstamp="LPX64"\n",msg->ptlm_srcstamp);
plp->plp_max_msg_size = MAX(plni->plni_max_msg_size,
msg->ptlm_u.hello.kptlhm_max_msg_size);
plp->plp_max_credits += msg->ptlm_credits;
plp->plp_recvd_hello = 1;
- PJK_UT_MSG("plp_max_msg_size=%d\n",plp->plp_max_msg_size);
+ CDEBUG(D_NET, "plp_max_msg_size=%d\n",plp->plp_max_msg_size);
} else if (!plp->plp_recvd_hello) {
}
if (msg->ptlm_credits > 0) {
- PJK_UT_MSG("Getting back %d credits from peer\n",msg->ptlm_credits);
+ CDEBUG(D_NET, "Getting back %d credits from peer\n",msg->ptlm_credits);
if (plp->plp_credits + msg->ptlm_credits >
plp->plp_max_credits) {
CWARN("Too many credits from %s: %d + %d > %d\n",
rx.rx_nob = nob;
plni->plni_nrxs++;
- PJK_UT_MSG("rx=%p type=%d\n",&rx,msg->ptlm_type);
+ CDEBUG(D_NET, "rx=%p type=%d\n",&rx,msg->ptlm_type);
switch (msg->ptlm_type) {
default: /* message types have been checked already */
case PTLLND_MSG_TYPE_PUT:
case PTLLND_MSG_TYPE_GET:
- PJK_UT_MSG("PTLLND_MSG_TYPE_%s\n",
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
msg->ptlm_type==PTLLND_MSG_TYPE_PUT ? "PUT" : "GET");
rc = lnet_parse(ni, &msg->ptlm_u.req.kptlrm_hdr,
msg->ptlm_srcnid, &rx, 1);
- PJK_UT_MSG("lnet_parse rc=%d\n",rc);
+ CDEBUG(D_NET, "lnet_parse rc=%d\n",rc);
if (rc < 0)
ptllnd_rx_done(&rx);
break;
case PTLLND_MSG_TYPE_IMMEDIATE:
- PJK_UT_MSG("PTLLND_MSG_TYPE_IMMEDIATE\n");
+ CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
rc = lnet_parse(ni, &msg->ptlm_u.immediate.kptlim_hdr,
msg->ptlm_srcnid, &rx, 0);
- PJK_UT_MSG("lnet_parse rc=%d\n",rc, 0);
+ CDEBUG(D_NET, "lnet_parse rc=%d\n",rc);
if (rc < 0)
ptllnd_rx_done(&rx);
break;
ptllnd_peer_decref(plp);
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
}
void
LASSERT (event->type == PTL_EVENT_PUT_END ||
event->type == PTL_EVENT_UNLINK);
- PJK_UT_MSG("buf=%p event=%d\n",buf,event->type);
+ CDEBUG(D_NET, "buf=%p event=%d\n",buf,event->type);
if (event->type == PTL_EVENT_PUT_END)
ptllnd_parse_request(ni, event->initiator,
repost = (event->type == PTL_EVENT_UNLINK);
#endif
- PJK_UT_MSG("repost=%d unlinked=%d\n",repost,unlinked);
+ CDEBUG(D_NET, "repost=%d unlinked=%d\n",repost,unlinked);
if(unlinked){
LASSERT(buf->plb_posted);
LASSERT (!PtlHandleIsEqual(event->md_handle, PTL_INVALID_HANDLE));
- PJK_UT_MSG("tx=%p type=%s (%d)\n",tx,
+ CDEBUG(D_NET, "tx=%p type=%s (%d)\n",tx,
get_msg_type_string(tx->tx_type),tx->tx_type);
- PJK_UT_MSG("unlinked=%d\n",unlinked);
- PJK_UT_MSG("error=%d\n",error);
+ CDEBUG(D_NET, "unlinked=%d\n",unlinked);
+ CDEBUG(D_NET, "error=%d\n",error);
isreq = PtlHandleIsEqual(event->md_handle, tx->tx_reqmdh);
- PJK_UT_MSG("isreq=%d\n",isreq);
+ CDEBUG(D_NET, "isreq=%d\n",isreq);
if (isreq) {
LASSERT (event->md.start == (void *)&tx->tx_msg);
if (unlinked)
isbulk = PtlHandleIsEqual(event->md_handle, tx->tx_bulkmdh);
- PJK_UT_MSG("isbulk=%d\n",isbulk);
+ CDEBUG(D_NET, "isbulk=%d\n",isbulk);
if ( isbulk && unlinked )
tx->tx_bulkmdh = PTL_INVALID_HANDLE;
tx->tx_status = -EIO;
list_del(&tx->tx_list);
list_add_tail(&tx->tx_list, &plni->plni_zombie_txs);
- PJK_UT_MSG("tx=%p ONTO ZOMBIE LIST\n",tx);
+ CDEBUG(D_NET, "tx=%p ONTO ZOMBIE LIST\n",tx);
}
}
int found = 0;
int timeout = 0;
- PJK_UT_MSG(">>> ms=%d\n",milliseconds);
+ CDEBUG(D_NET, ">>> ms=%d\n",milliseconds);
/* Handle any currently queued events, returning immediately if any.
* Otherwise block for the timeout and handle all events queued
for (;;) {
rc = PtlEQPoll(&plni->plni_eqh, 1, timeout, &event, &which);
timeout = 0;
- PJK_UT_MSG("PtlEQPoll rc=%d\n",rc);
+ CDEBUG(D_NET, "PtlEQPoll rc=%d\n",rc);
if (rc == PTL_EQ_EMPTY) {
if (found || /* handled some events */
milliseconds == 0 || /* just checking */
blocked){ /* blocked already */
- PJK_UT_MSG("found=%d blocked=%d\n",found,blocked);
+ CDEBUG(D_NET, "found=%d blocked=%d\n",found,blocked);
break;
}
CERROR("Event queue: size %d is too small\n",
plni->plni_eq_size);
- PJK_UT_MSG("event.type=%s(%d)\n",
+ CDEBUG(D_NET, "event.type=%s(%d)\n",
get_ev_type_string(event.type),event.type);
found = 1;
while (!list_empty(&plni->plni_zombie_txs)) {
tx = list_entry(plni->plni_zombie_txs.next,
ptllnd_tx_t, tx_list);
- PJK_UT_MSG("Process ZOMBIE tx=%p\n",tx);
+ CDEBUG(D_NET, "Process ZOMBIE tx=%p\n",tx);
ptllnd_tx_done(tx);
}
- PJK_UT_MSG("<<<\n");
+ CDEBUG(D_NET, "<<<\n");
}
int jt_dbg_mark_debug_buf(int argc, char **argv)
{
+ char scratch[MAX_MARK_SIZE];
int rc, max_size = MAX_MARK_SIZE-1;
struct libcfs_ioctl_data data;
char *text;
if (argc > 1) {
int counter;
- text = malloc(MAX_MARK_SIZE);
+ text = scratch;
strncpy(text, argv[1], max_size);
max_size-=strlen(argv[1]);
for(counter = 2; (counter < argc) && (max_size > 0) ; counter++){
{"mds_ext3", "lustre/mds"},
{"mds_extN", "lustre/mds"},
{"ptlbd", "lustre/ptlbd"},
- {"mgmt_svc", "lustre/mgmt"},
- {"mgmt_cli", "lustre/mgmt"},
{"cobd", "lustre/cobd"},
{"cmobd", "lustre/cmobd"},
- {"confobd", "lustre/obdclass"},
{"lquota", "lustre/quota"},
+ {"mgs", "lustre/mgs"},
+ {"mgc", "lustre/mgc"},
{NULL, NULL}
};