From: mattwu Date: Tue, 7 Oct 2008 04:14:14 +0000 (+0000) Subject: b=16150 X-Git-Tag: v1_9_80~17 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=c8616363809a73b3d4b6c736e6b55a84d7f137b4 b=16150 i=adilger i=robert i=liang winnt libcfs cleanup --- diff --git a/libcfs/include/libcfs/Makefile.am b/libcfs/include/libcfs/Makefile.am index dc2b841..c9f9478 100644 --- a/libcfs/include/libcfs/Makefile.am +++ b/libcfs/include/libcfs/Makefile.am @@ -1,4 +1,3 @@ - SUBDIRS := linux posix util if DARWIN SUBDIRS += darwin @@ -6,7 +5,8 @@ endif DIST_SUBDIRS := $(SUBDIRS) EXTRA_DIST := curproc.h libcfs_private.h libcfs.h list.h lltrace.h \ - user-lock.h user-prim.h user-time.h \ - user-tcpip.h user-bitops.h bitmap.h user-mem.h\ - libcfs_prim.h libcfs_private.h libcfs_hash.h libcfs_time.h \ - libcfs_debug.h libcfsutil.h libcfs_ioctl.h + user-lock.h user-prim.h user-time.h user-mem.h \ + user-tcpip.h user-bitops.h bitmap.h \ + libcfs_prim.h libcfs_time.h libcfs_hash.h \ + libcfs_debug.h libcfsutil.h libcfs_ioctl.h \ + libcfs_pack.h libcfs_unpack.h diff --git a/libcfs/include/libcfs/darwin/darwin-types.h b/libcfs/include/libcfs/darwin/darwin-types.h index 1845ac2..ea9d985 100644 --- a/libcfs/include/libcfs/darwin/darwin-types.h +++ b/libcfs/include/libcfs/darwin/darwin-types.h @@ -61,6 +61,10 @@ typedef int16_t __s16; typedef int32_t __s32; typedef int64_t __s64; +/* long integer with size equal to pointer */ +typedef unsigned long ulong_ptr_t; +typedef long long_ptr_t; + #ifdef __KERNEL__ #include diff --git a/libcfs/include/libcfs/darwin/kp30.h b/libcfs/include/libcfs/darwin/kp30.h index f5e5619..1fc1a1d 100644 --- a/libcfs/include/libcfs/darwin/kp30.h +++ b/libcfs/include/libcfs/darwin/kp30.h @@ -127,4 +127,16 @@ typedef struct { # define LL_POISON ((long)0x5a5a5a5a) # define LP_POISON ((void *)(long)0x5a5a5a5a) +/* + * long_ptr_t & ulong_ptr_t, same to "long" for gcc + */ +# define LPLU "%lu" +# define LPLD "%ld" +# define LPLX "%#lx" + +/* + * pid_t + */ +# define LPPID "%d" + #endif diff --git a/libcfs/include/libcfs/libcfs.h b/libcfs/include/libcfs/libcfs.h index 5486e0c..08be620 100644 --- a/libcfs/include/libcfs/libcfs.h +++ b/libcfs/include/libcfs/libcfs.h @@ -56,10 +56,9 @@ #include "curproc.h" #ifndef offsetof -# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) +# define offsetof(typ,memb) ((long)(long_ptr_t)((char *)&(((typ *)0)->memb))) #endif -/* cardinality of array */ #ifndef ARRAY_SIZE #define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof ((a)[0]))) #endif @@ -68,31 +67,15 @@ /* given a pointer @ptr to the field @member embedded into type (usually * struct) @type, return pointer to the embedding instance of @type. */ #define container_of(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) #endif -#define container_of0(ptr, type, member) \ -({ \ - typeof(ptr) __ptr = (ptr); \ - type *__res; \ - \ - if (unlikely(IS_ERR(__ptr) || __ptr == NULL)) \ - __res = (type *)__ptr; \ - else \ - __res = container_of(__ptr, type, member); \ - __res; \ -}) +static inline int __is_po2(unsigned long long val) +{ + return !(val & (val - 1)); +} -/* - * true iff @i is power-of-2 - */ -#define IS_PO2(i) \ -({ \ - typeof(i) __i; \ - \ - __i = (i); \ - !(__i & (__i - 1)); \ -}) +#define IS_PO2(val) __is_po2((unsigned long long)(val)) #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) @@ -301,6 +284,18 @@ int cfs_univ2oflags(int flags); #include #include +/* container_of depends on "likely" which is defined in libcfs_private.h */ +static inline void *__container_of(void *ptr, unsigned long shift) +{ + if (unlikely(IS_ERR(ptr) || ptr == NULL)) + return ptr; + else + return (char *)ptr - shift; +} + +#define container_of0(ptr, type, member) \ + ((type *)__container_of((void *)(ptr), offsetof(type, member))) + #define _LIBCFS_H #endif /* _LIBCFS_H */ diff --git a/libcfs/include/libcfs/libcfs_debug.h b/libcfs/include/libcfs/libcfs_debug.h index 87037f0..379bc9c 100644 --- a/libcfs/include/libcfs/libcfs_debug.h +++ b/libcfs/include/libcfs/libcfs_debug.h @@ -179,23 +179,23 @@ static inline int cdebug_show(unsigned int mask, unsigned int subsystem) ((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem)); } -#define __CDEBUG(cdls, mask, format, a...) \ +#define __CDEBUG(cdls, mask, format, ...) \ do { \ CHECK_STACK(); \ \ if (cdebug_show(mask, DEBUG_SUBSYSTEM)) \ libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask, \ __FILE__, __FUNCTION__, __LINE__, \ - format, ## a); \ + format, ## __VA_ARGS__); \ } while (0) -#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a) +#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__) -#define CDEBUG_LIMIT(mask, format, a...) \ +#define CDEBUG_LIMIT(mask, format, ...) \ do { \ static cfs_debug_limit_state_t cdls; \ \ - __CDEBUG(&cdls, mask, format, ## a); \ + __CDEBUG(&cdls, mask, format, ## __VA_ARGS__);\ } while (0) #else /* !CDEBUG_ENABLED */ @@ -203,18 +203,18 @@ static inline int cdebug_show(unsigned int mask, unsigned int subsystem) { return 0; } -#define CDEBUG(mask, format, a...) (void)(0) -#define CDEBUG_LIMIT(mask, format, a...) (void)(0) +#define CDEBUG(mask, format, ...) (void)(0) +#define CDEBUG_LIMIT(mask, format, ...) (void)(0) #warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!" #endif #else /* !__KERNEL__ && (!__arch_lib__ || LUSTRE_UTILS) */ -#define CDEBUG(mask, format, a...) \ +#define CDEBUG(mask, format, ...) \ do { \ if (((mask) & D_CANTMASK) != 0) \ fprintf(stderr, "(%s:%d:%s()) " format, \ - __FILE__, __LINE__, __FUNCTION__, ## a); \ + __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__);\ } while (0) #define CDEBUG_LIMIT CDEBUG @@ -222,27 +222,27 @@ do { \ #endif /* !__KERNEL__ ... */ -#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a) -#define CEMERG(format, a...) CDEBUG_LIMIT(D_EMERG, format, ## a) +#define CWARN(format, ...) CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__) +#define CERROR(format, ...) CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__) +#define CEMERG(format, ...) CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__) -#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a) -#define LCONSOLE_INFO(format, a...) CDEBUG_LIMIT(D_CONSOLE, format, ## a) -#define LCONSOLE_WARN(format, a...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a) -#define LCONSOLE_ERROR_MSG(errnum, format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \ - "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## a) -#define LCONSOLE_ERROR(format, a...) LCONSOLE_ERROR_MSG(0x00, format, ## a) +#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__) +#define LCONSOLE_INFO(format, ...) CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__) +#define LCONSOLE_WARN(format, ...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__) +#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \ + "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__) +#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__) -#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a) +#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__) #ifdef CDEBUG_ENABLED #define GOTO(label, rc) \ do { \ - long GOTO__ret = (long)(rc); \ - CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ - #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ - (signed long)GOTO__ret); \ + long_ptr_t GOTO__ret = (long_ptr_t)(rc); \ + CDEBUG(D_TRACE,"Process leaving via %s (rc=" LPLU " : " LPLD \ + " : " LPLX ")\n", #label, (ulong_ptr_t)GOTO__ret, \ + GOTO__ret, GOTO__ret); \ goto label; \ } while (0) #else @@ -255,6 +255,7 @@ do { \ * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise * there will be a warning in osx. */ +#if defined(__GNUC__) #define RETURN(rc) \ do { \ typeof(rc) RETURN__ret = (rc); \ @@ -263,6 +264,16 @@ do { \ EXIT_NESTING; \ return RETURN__ret; \ } while (0) +#elif defined(_MSC_VER) +#define RETURN(rc) \ +do { \ + CDEBUG(D_TRACE, "Process leaving.\n"); \ + EXIT_NESTING; \ + return (rc); \ +} while (0) +#else +# error "Unkown compiler" +#endif /* __GNUC__ */ #define ENTRY \ ENTRY_NESTING; \ @@ -293,11 +304,11 @@ struct libcfs_debug_msg_data { }; #define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \ - .msg_cdls = (cdls), \ - .msg_subsys = (subsystem), \ - .msg_file = (file), \ - .msg_fn = (func), \ - .msg_line = (ln) \ + /* msg_cdls */ (cdls), \ + /* msg_subsys */ (subsystem), \ + /* msg_file */ (file), \ + /* msg_fn */ (func), \ + /* msg_line */ (ln) \ } @@ -311,8 +322,8 @@ extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, #define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args) \ libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL) -#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...) \ - libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a) +#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, ...) \ + libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ## __VA_ARGS__) #define cdebug_va(cdls, mask, file, func, line, fmt, args) do { \ CHECK_STACK(); \ @@ -322,17 +333,26 @@ extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, (file), (func), (line), fmt, args); \ } while(0); -#define cdebug(cdls, mask, file, func, line, fmt, a...) do { \ +#define cdebug(cdls, mask, file, func, line, fmt, ...) do { \ CHECK_STACK(); \ \ if (cdebug_show(mask, DEBUG_SUBSYSTEM)) \ libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask), \ - (file), (func), (line), fmt, ## a); \ + (file), (func), (line), fmt, ## __VA_ARGS__);\ } while(0); extern void libcfs_assertion_failed(const char *expr, const char *file, const char *fn, const int line); +#if defined(HAVE_BGL_SUPPORT) +#define DEBUG_FILE_PATH_DEFAULT "/bgl/ion/tmp/lustre-log" +#elif defined(__arch_um__) +#define DEBUG_FILE_PATH_DEFAULT "/r/tmp/lustre-log" +#elif defined(__WINNT__) +#define DEBUG_FILE_PATH_DEFAULT "\\SystemRoot\\temp\\lustre-log" +#else +#define DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log" +#endif #endif /* __LIBCFS_DEBUG_H__ */ diff --git a/libcfs/include/libcfs/libcfs_pack.h b/libcfs/include/libcfs/libcfs_pack.h new file mode 100644 index 0000000..74ba33c --- /dev/null +++ b/libcfs/include/libcfs/libcfs_pack.h @@ -0,0 +1,5 @@ +#if !defined(__GNUC__) && defined(_MSC_VER) +#pragma warning(disable:4103) +#pragma pack(push, 1) +#endif + diff --git a/libcfs/include/libcfs/libcfs_prim.h b/libcfs/include/libcfs/libcfs_prim.h index 3885bb7..64938a5 100644 --- a/libcfs/include/libcfs/libcfs_prim.h +++ b/libcfs/include/libcfs/libcfs_prim.h @@ -69,7 +69,7 @@ int64_t cfs_waitq_timedwait(cfs_waitlink_t *link, cfs_task_state_t state, /* * Timer */ -typedef void (cfs_timer_func_t)(unsigned long); +typedef void (cfs_timer_func_t)(ulong_ptr_t); void cfs_init_timer(cfs_timer_t *t); void cfs_timer_init(cfs_timer_t *t, cfs_timer_func_t *func, void *arg); diff --git a/libcfs/include/libcfs/libcfs_private.h b/libcfs/include/libcfs/libcfs_private.h index dd04602..9e3fe71 100644 --- a/libcfs/include/libcfs/libcfs_private.h +++ b/libcfs/include/libcfs/libcfs_private.h @@ -71,55 +71,53 @@ * * requires -Wall. Unfortunately this rules out use of likely/unlikely. */ -#define LASSERT(cond) \ -({ \ - if (cond) \ - ; \ - else \ - libcfs_assertion_failed( #cond , __FILE__, \ - __FUNCTION__, __LINE__); \ -}) - -#define LASSERTF(cond, fmt, a...) \ -({ \ +#define LASSERT(cond) \ +do { \ + if (cond) \ + ; \ + else \ + libcfs_assertion_failed( #cond , __FILE__, \ + __FUNCTION__, __LINE__); \ +} while(0) + +#define LASSERTF(cond, fmt, ...) \ +do { \ if (cond) \ ; \ else { \ libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ __FILE__, __FUNCTION__,__LINE__, \ "ASSERTION(" #cond ") failed: " fmt, \ - ## a); \ + ## __VA_ARGS__); \ LBUG(); \ } \ -}) - +} while(0) #else /* !LASSERT_CHECKED */ -#define LASSERT(cond) \ -({ \ - if (unlikely(!(cond))) \ - libcfs_assertion_failed(#cond , __FILE__, \ - __FUNCTION__, __LINE__); \ -}) +#define LASSERT(cond) \ +do { \ + if (unlikely(!(cond))) \ + libcfs_assertion_failed(#cond , __FILE__, \ + __FUNCTION__, __LINE__); \ +} while(0) -#define LASSERTF(cond, fmt, a...) \ -({ \ +#define LASSERTF(cond, fmt, ...) \ +do { \ if (unlikely(!(cond))) { \ libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ __FILE__, __FUNCTION__,__LINE__, \ "ASSERTION(" #cond ") failed: " fmt, \ - ## a); \ + ## __VA_ARGS__ ); \ LBUG(); \ } \ -}) - +} while(0) #endif /* !LASSERT_CHECKED */ #else /* !LIBCFS_DEBUG */ /* sizeof is to use expression without evaluating it. */ # define LASSERT(e) ((void)sizeof!!(e)) -# define LASSERTF(cond, fmt...) ((void)sizeof!!(cond)) +# define LASSERTF(cond, ...) ((void)sizeof!!(cond)) #endif /* !LIBCFS_DEBUG */ #ifdef INVARIANT_CHECK @@ -237,10 +235,10 @@ void libcfs_debug_set_level(unsigned int debug_level); # undef NDEBUG # include # define LASSERT(e) assert(e) -# define LASSERTF(cond, args...) \ +# define LASSERTF(cond, ...) \ do { \ if (!(cond)) \ - CERROR(args); \ + CERROR(__VA_ARGS__); \ assert(cond); \ } while (0) # define LBUG() assert(0) @@ -251,12 +249,12 @@ do { \ # endif # else # define LASSERT(e) ((void)sizeof!!(e)) -# define LASSERTF(cond, args...) ((void)sizeof!!(cond)) +# define LASSERTF(cond, ...) ((void)sizeof!!(cond)) # define LBUG() ((void)(0)) # define LINVRNT(exp) ((void)sizeof!!(exp)) # endif /* LIBCFS_DEBUG */ # define KLASSERT(e) ((void)0) -# define printk(format, args...) printf (format, ## args) +# define printk printf # ifdef CRAY_XT3 /* buggy calloc! */ # define LIBCFS_ALLOC(ptr, size) \ do { \ @@ -277,10 +275,13 @@ int libcfs_debug_cleanup(void); * build go below this comment. Actual compiler/compiler version * specific implementations come from the above header files */ - +#ifdef __GNUC__ #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) - +#else +#define likely(x) (!!(x)) +#define unlikely(x) (!!(x)) +#endif /* !__KERNEL__ */ #endif @@ -298,7 +299,7 @@ int libcfs_debug_cleanup(void); * value after conversion... * */ -#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) +#define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0) /* support decl needed both by kernel and liblustre */ int libcfs_isknown_lnd(int type); diff --git a/libcfs/include/libcfs/libcfs_time.h b/libcfs/include/libcfs/libcfs_time.h index 94d6862..946c2be 100644 --- a/libcfs/include/libcfs/libcfs_time.h +++ b/libcfs/include/libcfs/libcfs_time.h @@ -47,12 +47,12 @@ static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) { - return t + d; + return (cfs_time_t)(t + d); } static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) { - return t1 - t2; + return (cfs_time_t)(t1 - t2); } static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) diff --git a/libcfs/include/libcfs/libcfs_unpack.h b/libcfs/include/libcfs/libcfs_unpack.h new file mode 100644 index 0000000..7ea2205 --- /dev/null +++ b/libcfs/include/libcfs/libcfs_unpack.h @@ -0,0 +1,4 @@ +#if !defined(__GNUC__) && defined(_MSC_VER) +#pragma warning(disable:4103) +#pragma pack(pop) +#endif diff --git a/libcfs/include/libcfs/linux/kp30.h b/libcfs/include/libcfs/linux/kp30.h index 378bfc7..02a45e1 100644 --- a/libcfs/include/libcfs/linux/kp30.h +++ b/libcfs/include/libcfs/linux/kp30.h @@ -354,6 +354,18 @@ extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, # define LPF64 "l" #endif +/* + * long_ptr_t & ulong_ptr_t, same to "long" for gcc + */ +# define LPLU "%lu" +# define LPLD "%ld" +# define LPLX "%#lx" + +/* + * pid_t + */ +# define LPPID "%d" + #ifdef HAVE_SIZE_T_LONG # define LPSZ "%lu" #else diff --git a/libcfs/include/libcfs/linux/libcfs.h b/libcfs/include/libcfs/linux/libcfs.h index f2b7744..6f0a9e8 100644 --- a/libcfs/include/libcfs/linux/libcfs.h +++ b/libcfs/include/libcfs/linux/libcfs.h @@ -117,6 +117,10 @@ typedef kernel_cap_t cfs_kernel_cap_t; struct cfs_stack_trace { }; +/* long integer with size equal to pointer */ +typedef unsigned long ulong_ptr_t; +typedef long long_ptr_t; + #ifndef WITH_WATCHDOG #define WITH_WATCHDOG #endif diff --git a/libcfs/include/libcfs/linux/linux-lock.h b/libcfs/include/libcfs/linux/linux-lock.h index f294428..f1c442f 100644 --- a/libcfs/include/libcfs/linux/linux-lock.h +++ b/libcfs/include/libcfs/linux/linux-lock.h @@ -82,6 +82,7 @@ * - down_write(x) * - up_write(x) */ +#define fini_rwsem(s) do {} while(0) /* * rwlock_t (use Linux kernel's primitives) diff --git a/libcfs/include/libcfs/linux/linux-prim.h b/libcfs/include/libcfs/linux/linux-prim.h index 20f3674..3414c9c 100644 --- a/libcfs/include/libcfs/linux/linux-prim.h +++ b/libcfs/include/libcfs/linux/linux-prim.h @@ -53,6 +53,7 @@ #include #endif #include +#include #include #include #include @@ -204,15 +205,15 @@ do { \ retval == 0; condition met; we're good. retval > 0; timed out. */ -#define cfs_waitq_wait_event_timeout(wq, condition, timeout) \ -({ \ - int __ret = 0; \ +#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \ +do { \ + ret = 0; \ if (!(condition)) \ - __wait_event_timeout(wq, condition, timeout, __ret); \ - __ret; \ -}) + __wait_event_timeout(wq, condition, timeout, ret); \ +} while (0) #else -#define cfs_waitq_wait_event_timeout wait_event_timeout +#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret) \ + ret = wait_event_timeout(wq, condition, timeout) #endif #ifndef wait_event_interruptible_timeout /* Only for RHEL3 2.4.21 kernel */ @@ -251,16 +252,16 @@ do { \ retval < 0; interrupted by signal. retval > 0; timed out. */ -#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout) \ -({ \ - int __ret = 0; \ +#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout, ret)\ +do { \ + ret = 0; \ if (!(condition)) \ __wait_event_interruptible_timeout(wq, condition, \ - timeout, __ret); \ - __ret; \ -}) + timeout, ret); \ +} while (0) #else -#define cfs_waitq_wait_event_interruptible_timeout wait_event_interruptible_timeout +#define cfs_waitq_wait_event_interruptible_timeout(wq, c, timeout, ret) \ + ret = wait_event_interruptible_timeout(wq, c, timeout) #endif #endif diff --git a/libcfs/include/libcfs/linux/portals_compat25.h b/libcfs/include/libcfs/linux/portals_compat25.h index 492fd00..38906e9 100644 --- a/libcfs/include/libcfs/linux/portals_compat25.h +++ b/libcfs/include/libcfs/linux/portals_compat25.h @@ -96,6 +96,11 @@ #endif +# define cfs_wait_event_interruptible(wq, condition, ret) \ + ret = wait_event_interruptible(wq, condition) +# define cfs_wait_event_interruptible_exclusive(wq, condition, ret) \ + ret = wait_event_interruptible(wq, condition) + #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) #define UML_PID(tsk) ((tsk)->thread.extern_pid) #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) diff --git a/libcfs/include/libcfs/list.h b/libcfs/include/libcfs/list.h index ed03bd5..548bdf5 100644 --- a/libcfs/include/libcfs/list.h +++ b/libcfs/include/libcfs/list.h @@ -28,11 +28,7 @@ * using the generic single-entry routines. */ -#ifndef __WINNT__ #define prefetch(a) ((void)a) -#else -#define prefetch(a) ((void *)a) -#endif struct list_head { struct list_head *next, *prev; @@ -208,7 +204,7 @@ static inline void list_splice_init(struct list_head *list, * @member: the name of the list_struct within the struct. */ #define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + ((type *)((char *)(ptr)-(char *)(&((type *)0)->member))) /** * list_for_each - iterate over a list @@ -253,8 +249,8 @@ struct hlist_node { #define NULL_P ((void *)0) #endif -#define CFS_HLIST_HEAD_INIT { .first = NULL_P } -#define CFS_HLIST_HEAD(name) struct hlist_head name = { .first = NULL_P } +#define CFS_HLIST_HEAD_INIT { NULL_P } +#define CFS_HLIST_HEAD(name) struct hlist_head name = { NULL_P } #define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P) #define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P) @@ -329,11 +325,11 @@ static inline void hlist_add_after(struct hlist_node *n, #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ + for (pos = (head)->first; pos && (prefetch(pos->next), 1); \ pos = pos->next) #define hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ + for (pos = (head)->first; pos && (n = pos->next, 1); \ pos = n) /** @@ -395,7 +391,7 @@ static inline void hlist_add_after(struct hlist_node *n, * @head: the head for your list. */ #define list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ + for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ pos = pos->prev, prefetch(pos->prev)) #endif /* list_for_each_prev */ @@ -441,6 +437,7 @@ static inline void hlist_add_after(struct hlist_node *n, n = list_entry(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) + #endif /* list_for_each_entry_safe */ #ifndef list_for_each_entry_safe_from @@ -454,10 +451,44 @@ static inline void hlist_add_after(struct hlist_node *n, * Iterate over list of given type from current point, safe against * removal of list entry. */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ +#define list_for_each_entry_safe_from(pos, n, head, member) \ + for (n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) #endif /* list_for_each_entry_safe_from */ +#define cfs_list_for_each_entry_typed(pos, head, type, member) \ + for (pos = list_entry((head)->next, type, member), \ + prefetch(pos->member.next); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, type, member), \ + prefetch(pos->member.next)) + +#define cfs_list_for_each_entry_reverse_typed(pos, head, type, member) \ + for (pos = list_entry((head)->prev, type, member); \ + prefetch(pos->member.prev), &pos->member != (head); \ + pos = list_entry(pos->member.prev, type, member)) + +#define cfs_list_for_each_entry_safe_typed(pos, n, head, type, member) \ + for (pos = list_entry((head)->next, type, member), \ + n = list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, type, member)) + +#define cfs_list_for_each_entry_safe_from_typed(pos, n, head, type, member) \ + for (n = list_entry(pos->member.next, type, member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, type, member)) +#define cfs_hlist_for_each_entry_typed(tpos, pos, head, type, member) \ + for (pos = (head)->first; \ + pos && (prefetch(pos->next), 1) && \ + (tpos = hlist_entry(pos, type, member), 1); \ + pos = pos->next) + +#define cfs_hlist_for_each_entry_safe_typed(tpos, pos, n, head, type, member)\ + for (pos = (head)->first; \ + pos && (n = pos->next, 1) && \ + (tpos = hlist_entry(pos, type, member), 1); \ + pos = n) + #endif /* __LIBCFS_LUSTRE_LIST_H__ */ diff --git a/libcfs/include/libcfs/posix/libcfs.h b/libcfs/include/libcfs/posix/libcfs.h index b50554f..95035d2 100644 --- a/libcfs/include/libcfs/posix/libcfs.h +++ b/libcfs/include/libcfs/posix/libcfs.h @@ -43,6 +43,7 @@ #ifndef __LIBCFS_POSIX_LIBCFS_H__ #define __LIBCFS_POSIX_LIBCFS_H__ +#include #include #include #include @@ -51,14 +52,28 @@ #include #include #include -#include #include #include #include +#include #include #include #include #include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_NETDB_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif #ifdef HAVE_LIBPTHREAD #include @@ -331,7 +346,8 @@ static inline struct radix_tree_node *radix_tree_lookup0(struct radix_tree_root if (list_empty(&root->list)) return NULL; - list_for_each_entry(node, &root->list, _node) + cfs_list_for_each_entry_typed(node, &root->list, + struct radix_tree_node, _node) if (node->index == idx) return node; diff --git a/libcfs/include/libcfs/posix/posix-types.h b/libcfs/include/libcfs/posix/posix-types.h index 19ef126..392e70e 100644 --- a/libcfs/include/libcfs/posix/posix-types.h +++ b/libcfs/include/libcfs/posix/posix-types.h @@ -61,4 +61,8 @@ typedef unsigned int __u32; typedef __signed__ long long __s64; typedef unsigned long long __u64; +/* long integer with size equal to pointer */ +typedef unsigned long ulong_ptr_t; +typedef long long_ptr_t; + #endif diff --git a/libcfs/include/libcfs/posix/posix-wordsize.h b/libcfs/include/libcfs/posix/posix-wordsize.h index 87eb165..62bd289 100644 --- a/libcfs/include/libcfs/posix/posix-wordsize.h +++ b/libcfs/include/libcfs/posix/posix-wordsize.h @@ -146,4 +146,16 @@ typedef struct { # error "No word size defined" #endif +/* + * long_ptr_t & ulong_ptr_t, same to "long" for gcc + */ +# define LPLU "%lu" +# define LPLD "%ld" +# define LPLX "%#lx" + +/* + * pid_t + */ +# define LPPID "%d" + #endif diff --git a/libcfs/include/libcfs/user-lock.h b/libcfs/include/libcfs/user-lock.h index 2a38463..01c289b 100644 --- a/libcfs/include/libcfs/user-lock.h +++ b/libcfs/include/libcfs/user-lock.h @@ -149,6 +149,7 @@ struct completion { typedef int (*cfs_wait_handler_t) (int timeout); void set_completion_wait_handler(cfs_wait_handler_t *handler); void init_completion(struct completion *c); +void init_completion_module(cfs_wait_handler_t handler); void complete(struct completion *c); void wait_for_completion(struct completion *c); int wait_for_completion_interruptible(struct completion *c); @@ -182,6 +183,7 @@ void down_write(struct rw_semaphore *s); int down_write_trylock(struct rw_semaphore *s); void up_read(struct rw_semaphore *s); void up_write(struct rw_semaphore *s); +void fini_rwsem(struct rw_semaphore *s); /* * read-write lock : Need to be investigated more!! @@ -348,7 +350,7 @@ static inline int mutex_is_locked(struct mutex *lock) **************************************************************************/ struct lock_class_key { - ; + int foo; }; static inline void lockdep_set_class(void *lock, struct lock_class_key *key) diff --git a/libcfs/include/libcfs/user-prim.h b/libcfs/include/libcfs/user-prim.h index e723182..967cf07 100644 --- a/libcfs/include/libcfs/user-prim.h +++ b/libcfs/include/libcfs/user-prim.h @@ -112,12 +112,11 @@ typedef sigset_t cfs_sigset_t; /* * Timer */ -#include typedef struct { struct list_head tl_list; - void (*function)(unsigned long unused); - unsigned long data; + void (*function)(ulong_ptr_t unused); + ulong_ptr_t data; long expires; } cfs_timer_t; @@ -169,6 +168,7 @@ struct cfs_stack_trace { /* * arithmetic */ +#ifndef do_div /* gcc only, platform-specific will override */ #define do_div(a,b) \ ({ \ unsigned long remainder;\ @@ -176,6 +176,34 @@ struct cfs_stack_trace { (a) = (a) / (b); \ (remainder); \ }) +#endif + +/* utility libcfs init/fini entries */ +#ifdef __WINNT__ +extern int libcfs_arch_init(void); +extern void libcfs_arch_cleanup(void); +#else /* !__WINNT__ */ +static inline int libcfs_arch_init(void) { + return 0; +} +static inline void libcfs_arch_cleanup(void) { +} +/* __WINNT__ */ +#endif + +/* proc interface wrappers for non-win OS */ +#ifndef __WINNT__ +#define cfs_proc_open open +#define cfs_proc_mknod mknod +#define cfs_proc_ioctl ioctl +#define cfs_proc_close close +#define cfs_proc_read read +#define cfs_proc_write write +#define cfs_proc_fopen fopen +#define cfs_proc_fclose fclose +#define cfs_proc_fgets fgets +/* !__WINNT__ */ +#endif /* !__KERNEL__ */ #endif diff --git a/libcfs/include/libcfs/user-tcpip.h b/libcfs/include/libcfs/user-tcpip.h index 94a75d6..fe0464d 100644 --- a/libcfs/include/libcfs/user-tcpip.h +++ b/libcfs/include/libcfs/user-tcpip.h @@ -73,6 +73,7 @@ int libcfs_fcntl_nonblock(int fd); int libcfs_sock_set_nagle(int fd, int nagle); int libcfs_sock_set_bufsiz(int fd, int bufsiz); int libcfs_sock_create(int *fdp); +void libcfs_sock_release(int fd); int libcfs_sock_bind_to_port(int fd, __u16 port); int libcfs_sock_connect(int fd, __u32 ip, __u16 port); int libcfs_sock_writev(int fd, const struct iovec *vector, int count); diff --git a/libcfs/include/libcfs/user-time.h b/libcfs/include/libcfs/user-time.h index 60e466a..5a5cc3e 100644 --- a/libcfs/include/libcfs/user-time.h +++ b/libcfs/include/libcfs/user-time.h @@ -97,7 +97,7 @@ typedef time_t cfs_fs_time_t; typedef time_t cfs_time_t; -typedef long cfs_duration_t; +typedef time_t cfs_duration_t; /* looks like linux */ #define time_after(a, b) ((long)(b) - (long)(a) < 0) @@ -110,7 +110,7 @@ static inline cfs_time_t cfs_time_current(void) return time(NULL); } -static inline cfs_duration_t cfs_time_seconds(int seconds) +static inline cfs_duration_t cfs_time_seconds(cfs_time_t seconds) { return seconds; } diff --git a/libcfs/include/libcfs/util/platform.h b/libcfs/include/libcfs/util/platform.h index 43319af..137dabec 100644 --- a/libcfs/include/libcfs/util/platform.h +++ b/libcfs/include/libcfs/util/platform.h @@ -249,8 +249,10 @@ static inline void l_cond_broadcast(l_cond_t *cond) #include #include #if HAVE_LIBPTHREAD +#ifndef __WINNT__ #include #include +#endif #include typedef pthread_mutex_t l_mutex_t; diff --git a/libcfs/include/libcfs/winnt/kp30.h b/libcfs/include/libcfs/winnt/kp30.h index 49adea2..940bf48 100644 --- a/libcfs/include/libcfs/winnt/kp30.h +++ b/libcfs/include/libcfs/winnt/kp30.h @@ -37,9 +37,6 @@ #ifndef __LIBCFS_WINNT_KP30_H__ #define __LIBCFS_WINNT_KP30_H__ -#include -#include - #ifdef __KERNEL__ /* Module parameter support */ @@ -47,22 +44,18 @@ #define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ +#define cond_resched our_cond_resched +void our_cond_resched(); -static inline void our_cond_resched() -{ - schedule_timeout(1i64); -} - -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ -#else #define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif +#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0) + +/* winnt panic */ +void libcfs_panic(char *msg); +#define LIBCFS_PANIC(msg) libcfs_panic(msg) +void libcfs_register_panic_notifier(); +void libcfs_unregister_panic_notifier(); -#error Need a winnt version of panic() -#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL) -#error libcfs_register_panic_notifier() missing -#error libcfs_unregister_panic_notifier() missing #define cfs_work_struct_t WORK_QUEUE_ITEM #define cfs_prepare_work(tq, routine, contex) @@ -82,6 +75,10 @@ static inline void our_cond_resched() #define printk DbgPrint #define ptintf DbgPrint +#define printk_ratelimit() (FALSE) +#define vprintk(f, a) vDbgPrintEx(DPFLTR_IHVDRIVER_ID, DPFLTR_ERROR_LEVEL, f, a) + /* vDbgPrintEx only available on xp and later OS */ +#define cfs_assert ASSERT #else /* !__KERNEL__ */ @@ -91,6 +88,9 @@ static inline void our_cond_resched() # include #endif # include +#include + +#define cfs_assert _ASSERT #endif /* End of !__KERNEL__ */ @@ -104,12 +104,12 @@ typedef struct { __s64 lwte_when; char *lwte_where; void *lwte_task; - long_ptr lwte_p1; - long_ptr lwte_p2; - long_ptr lwte_p3; - long_ptr lwte_p4; + long_ptr_t lwte_p1; + long_ptr_t lwte_p2; + long_ptr_t lwte_p3; + long_ptr_t lwte_p4; # if BITS_PER_LONG > 32 - long_ptr lwte_pad; + long_ptr_t lwte_pad; # endif } lwt_event_t; @@ -119,7 +119,7 @@ typedef struct { /* ------------------------------------------------------------------ */ -#define IOCTL_LIBCFS_TYPE long_ptr +#define IOCTL_LIBCFS_TYPE long_ptr_t #ifdef __CYGWIN__ # ifndef BITS_PER_LONG @@ -133,35 +133,34 @@ typedef struct { #if BITS_PER_LONG > 32 # define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a) -# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long_ptr_t)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((char *)(long_ptr_t)0x5a5a5a5a5a5a5a5a) #else # define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long_ptr)0x5a5a5a5a) -# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a) +# define LL_POISON ((long_ptr_t)0x5a5a5a5a) +# define LP_POISON ((char *)(long_ptr_t)0x5a5a5a5a) #endif -#if defined(__x86_64__) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%u" -# define LPSSZ "%d" -#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%I64u" -# define LPD64 "%I64d" -# define LPX64 "%I64x" -# define LPSZ "%u" -# define LPSSZ "%d" -#endif -#ifndef LPU64 -# error "No word size defined" +#define LPF64 "%I64d" +#define LPU64 "%I64u" +#define LPD64 "%I64d" +#define LPX64 "%#I64x" +#define LPSZ "%lu" +#define LPSSZ "%ld" + +/* + * long_ptr_t & ulong_ptr_t, same to "long" for linux + */ +#if _x86_ +# define LPLU "%u" +# define LPLD "%d" +# define LPLX "%#x" +# define LPPID "%d" +#else +# define LPLU "%Ii64u" +# define LPLD "%I64d" +# define LPLX "%#I64x" +# define LPPID "%d" #endif #endif diff --git a/libcfs/include/libcfs/winnt/libcfs.h b/libcfs/include/libcfs/winnt/libcfs.h index aaef666..cfff406 100644 --- a/libcfs/include/libcfs/winnt/libcfs.h +++ b/libcfs/include/libcfs/winnt/libcfs.h @@ -42,12 +42,13 @@ #endif /* workgroud for VC compiler */ -#ifndef __FUNCTION__ -#define __FUNCTION__ "generic" +#if _MSC_VER <= 1300 +#define __FUNCTION__ ("generic") #endif +#include #include -#include +#include #include #include #include @@ -56,20 +57,6 @@ #include #include -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); - #ifdef __KERNEL__ enum { @@ -85,18 +72,25 @@ static inline __u32 query_stack_size() { ULONG LowLimit, HighLimit; - IoGetStackLimits(&LowLimit, &HighLimit); + IoGetStackLimits((PULONG_PTR)&LowLimit, (PULONG_PTR)&HighLimit); ASSERT(HighLimit > LowLimit); return (__u32) (HighLimit - LowLimit); } -#else + +/* disable watchdog */ +#undef WITH_WATCHDOG + +#else /* !__KERNEL__*/ + +#include + static inline __u32 query_stack_size() { - return 4096; + return PAGE_SIZE; /* using one page in default */ } -#endif +#endif /* __KERNEL__*/ #ifndef THREAD_SIZE # define THREAD_SIZE query_stack_size() @@ -105,26 +99,8 @@ static inline __u32 query_stack_size() #define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) #ifdef __KERNEL__ -# ifdef __ia64__ -# define CDEBUG_STACK() (THREAD_SIZE - \ - ((ulong_ptr)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK (IoGetRemainingStackSize()) -# error "This doesn't seem right; CDEBUG_STACK should grow with the stack" -# endif /* __ia64__ */ - -#define CHECK_STACK() \ -do { \ - unsigned long _stack = CDEBUG_STACK(); \ - \ - if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ - libcfs_stack = _stack; \ - libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ - __FILE__, NULL, __LINE__, \ - "maximum lustre stack %lu\n", _stack); \ - } \ -} while (0) +#define CDEBUG_STACK() (THREAD_SIZE - (__u32)IoGetRemainingStackSize()) +#define CHECK_STACK() do {} while(0) #else /* !__KERNEL__ */ #define CHECK_STACK() do { } while(0) #define CDEBUG_STACK() (0L) @@ -134,8 +110,8 @@ do { \ #define LUSTRE_LNET_PID 12345 #define ENTRY_NESTING_SUPPORT (0) -#define ENTRY_NESTING do {;} while (0) -#define EXIT_NESTING do {;} while (0) +#define ENTRY_NESTING do {} while (0) +#define EXIT_NESTING do {} while (0) #define __current_nesting_level() (0) #endif /* _WINNT_LIBCFS_H */ diff --git a/libcfs/include/libcfs/winnt/portals_compat25.h b/libcfs/include/libcfs/winnt/portals_compat25.h index 17965e6..2353528 100644 --- a/libcfs/include/libcfs/winnt/portals_compat25.h +++ b/libcfs/include/libcfs/winnt/portals_compat25.h @@ -1,5 +1,5 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: * * GPL HEADER START * @@ -36,7 +36,27 @@ #ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__ #define __LIBCFS_WINNT_PORTALS_COMPAT_H__ +#ifdef __KERNEL__ +/* + * Signal + */ +#define SIGNAL_MASK_ASSERT() do {} while(0) +#define SIGNAL_MASK_LOCK(task, flags) do {} while(0) +#define SIGNAL_MASK_UNLOCK(task, flags) do {} while(0) +#define USERMODEHELPER(path, argv, envp) do {} while(0) +#define RECALC_SIGPENDING do {} while(0) +#define CLEAR_SIGPENDING do {} while(0) +#define CURRENT_SECONDS get_seconds() +#endif +#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ + proc_dointvec(table, write, filp, buffer, lenp) +#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ + proc_dostring(table, write, filp, buffer, lenp) +#define LL_PROC_PROTO(name) \ + name(cfs_sysctl_table_t *table, int write, struct file *filp, \ + void __user *buffer, size_t *lenp) +#define DECLARE_LL_PROC_PPOS_DECL loff_t *ppos = &filp->f_pos #endif /* _PORTALS_COMPAT_H */ diff --git a/libcfs/include/libcfs/winnt/portals_utils.h b/libcfs/include/libcfs/winnt/portals_utils.h index c9ae107..54bb4ee 100644 --- a/libcfs/include/libcfs/winnt/portals_utils.h +++ b/libcfs/include/libcfs/winnt/portals_utils.h @@ -1,5 +1,5 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4 * * GPL HEADER START * @@ -53,30 +53,23 @@ #define cfs_clear_flag(x,f) ((x) &= ~(f)) #endif - -static inline __u32 __do_div(__u32 * n, __u32 b) +static inline __u32 do_div64(__u64 * n, __u64 b) { - __u32 mod; + __u64 mod; mod = *n % b; *n = *n / b; - return mod; + return (__u32)mod; } -#define do_div(n,base) __do_div((__u32 *)&(n), (__u32) (base)) - +#define do_div(n, b) do_div64(&(n), (__u64)b) #ifdef __KERNEL__ #include #include char * strsep(char **s, const char *ct); -static inline size_t strnlen(const char * s, size_t count) { - size_t len = 0; - while(len < count && s[len++]); - return len; -} -char * ul2dstr(ulong_ptr address, char *buf, int len); +char * ul2dstr(ulong_ptr_t address, char *buf, int len); #define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) #define simple_strtoll(a1, a2, a3) (__s64)strtoull(a1, a2, a3) @@ -84,25 +77,154 @@ char * ul2dstr(ulong_ptr address, char *buf, int len); unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base); +static inline int set_bit(int nr, void * addr) +{ + (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31)); + return *((int *) addr); +} + static inline int test_bit(int nr, void * addr) { - return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0; + return (int)(((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0); } -static inline void clear_bit(int nr, void * addr) +static inline int clear_bit(int nr, void * addr) { (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31))); + return *((int *) addr); } +static inline int test_and_set_bit(int nr, volatile void *addr) +{ + int rc; + unsigned char mask; + volatile unsigned char *ADDR = addr; -static inline void set_bit(int nr, void * addr) + ADDR += nr >> 3; + mask = 1 << (nr & 0x07); + rc = ((mask & *ADDR) != 0); + *ADDR |= mask; + + return rc; +} + +#define ext2_set_bit(nr,addr) (set_bit(nr, addr), 0) +#define ext2_clear_bit(nr,addr) (clear_bit(nr, addr), 0) +#define ext2_test_bit(nr,addr) test_bit(nr, addr) + +static inline int ffs(int x) { - (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31)); + int r = 1; + + if (!x) + return 0; + if (!(x & 0xffff)) { + x >>= 16; + r += 16; + } + if (!(x & 0xff)) { + x >>= 8; + r += 8; + } + if (!(x & 0xf)) { + x >>= 4; + r += 4; + } + if (!(x & 3)) { + x >>= 2; + r += 2; + } + if (!(x & 1)) { + x >>= 1; + r += 1; + } + return r; +} + +static inline unsigned long __ffs(unsigned long word) +{ + int num = 0; + +#if BITS_PER_LONG == 64 + if ((word & 0xffffffff) == 0) { + num += 32; + word >>= 32; + } +#endif + if ((word & 0xffff) == 0) { + num += 16; + word >>= 16; + } + if ((word & 0xff) == 0) { + num += 8; + word >>= 8; + } + if ((word & 0xf) == 0) { + num += 4; + word >>= 4; + } + if ((word & 0x3) == 0) { + num += 2; + word >>= 2; + } + if ((word & 0x1) == 0) + num += 1; + return num; +} + +/** + * fls - find last (most-significant) bit set + * @x: the word to search + * + * This is defined the same way as ffs. + * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ +static inline +int fls(int x) +{ + int r = 32; + + if (!x) + return 0; + if (!(x & 0xffff0000u)) { + x <<= 16; + r -= 16; + } + if (!(x & 0xff000000u)) { + x <<= 8; + r -= 8; + } + if (!(x & 0xf0000000u)) { + x <<= 4; + r -= 4; + } + if (!(x & 0xc0000000u)) { + x <<= 2; + r -= 2; + } + if (!(x & 0x80000000u)) { + x <<= 1; + r -= 1; + } + return r; +} + +static inline unsigned find_first_bit(const unsigned long *addr, unsigned size) +{ + unsigned x = 0; + + while (x < size) { + unsigned long val = *addr++; + if (val) + return __ffs(val) + x; + x += (sizeof(*addr)<<3); + } + return x; } static inline void read_random(char *buf, int len) { - ULONG Seed = (ULONG) buf; + ULONG Seed = (ULONG)(ULONG_PTR) buf; Seed = RtlRandom(&Seed); while (len >0) { if (len > sizeof(ULONG)) { @@ -116,6 +238,7 @@ static inline void read_random(char *buf, int len) } } } + #define get_random_bytes(buf, len) read_random(buf, len) /* do NOT use function or expression as parameters ... */ @@ -147,12 +270,18 @@ static int copy_from_user(void *to, void *from, int c) return 0; } -static int copy_to_user(void *to, void *from, int c) +static int copy_to_user(void *to, const void *from, int c) { memcpy(to, from, c); return 0; } +static unsigned long +clear_user(void __user *to, unsigned long n) +{ + memset(to, 0, n); + return n; +} #define put_user(x, ptr) \ ( \ @@ -169,12 +298,46 @@ static int copy_to_user(void *to, void *from, int c) #define num_physpages (64 * 1024) -#define snprintf _snprintf -#define vsnprintf _vsnprintf +#else +#define unlink _unlink +#define close _close +#define open _open +#define fdopen _fdopen +#define strdup _strdup +#define fileno _fileno +#define isattry _isattry +#define stat _stat #endif /* !__KERNEL__ */ int cfs_error_code(NTSTATUS); +static inline int vsnprintf(char *buf, size_t cnt, + const char *fmt, va_list va) +{ + int rc; + +#ifdef TRUE /* using msvcrt from windkk 3790 */ + rc = _vsnprintf(buf, cnt, fmt, va); +#else + rc = _vsnprintf_s(buf, cnt, cnt, fmt, va); +#endif + if (rc == -1) + return cnt; + return rc; +} + +static inline int snprintf(char *buf, size_t cnt, + const char *fmt, ...) +{ + int rc; + va_list va; + + va_start(va, fmt); + rc = vsnprintf(buf, cnt, fmt, va); + va_end(va); + return rc; +} + #endif diff --git a/libcfs/include/libcfs/winnt/winnt-fs.h b/libcfs/include/libcfs/winnt/winnt-fs.h index 7e0c939..e3d52c0 100644 --- a/libcfs/include/libcfs/winnt/winnt-fs.h +++ b/libcfs/include/libcfs/winnt/winnt-fs.h @@ -54,18 +54,51 @@ #define NODEV 0 #define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) +#define PATH_MAX (260) #ifdef __KERNEL__ +/* linux/fs.h */ + +#define MAY_EXEC 1 +#define MAY_WRITE 2 +#define MAY_READ 4 +#define MAY_APPEND 8 + +#define FMODE_READ 1 +#define FMODE_WRITE 2 + +/* Internal kernel extensions */ +#define FMODE_LSEEK 4 +#define FMODE_PREAD 8 +#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ + +/* File is being opened for execution. Primary users of this flag are + distributed filesystems that can use it to achieve correct ETXTBUSY + behavior for cross-node execution/opening_for_writing of files */ +#define FMODE_EXEC 16 + +#define RW_MASK 1 +#define RWA_MASK 2 +#define READ 0 +#define WRITE 1 +#define READA 2 /* read-ahead - don't block if no resources */ +#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ +#define SPECIAL 4 /* For non-blockdevice requests in request queue */ +#define READ_SYNC (READ | (1 << BIO_RW_SYNC)) +#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) +#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) + struct file_operations { - loff_t (*lseek)(struct file * file, loff_t offset, int origin); + struct module *owner; + loff_t (*llseek)(struct file * file, loff_t offset, int origin); ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos); ssize_t (*write)(struct file * file, const char * buffer, size_t count, loff_t *ppos); - int (*ioctl) (struct file *, unsigned int, ulong_ptr); - int (*open) (struct file *); - int (*release) (struct file *); + int (*ioctl) (struct file *, unsigned int, ulong_ptr_t); + int (*open) (struct inode*, struct file *); + int (*release) (struct inode*, struct file *); }; struct file { @@ -73,23 +106,23 @@ struct file { cfs_handle_t f_handle; unsigned int f_flags; mode_t f_mode; - ulong_ptr f_count; - - //struct list_head f_list; - //struct dentry * f_dentry; - - cfs_proc_entry_t * proc_dentry; - cfs_file_operations_t * f_op; + __u32 f_count; size_t f_size; loff_t f_pos; unsigned int f_uid, f_gid; int f_error; - ulong_ptr f_version; + __u32 f_version; - void * private_data; + //struct list_head f_list; + struct dentry * f_dentry; + + cfs_proc_entry_t * proc_dentry; + cfs_file_operations_t * f_op; + void * private_data; + struct inode * f_inode; char f_name[1]; }; @@ -105,9 +138,7 @@ int cfs_filp_fsync(cfs_file_t *fp); int cfs_get_file(cfs_file_t *fp); int cfs_put_file(cfs_file_t *fp); int cfs_file_count(cfs_file_t *fp); - - - +#define cfs_filp_unlink(x, y) (KdBreakPoint(),0) /* * CFS_FLOCK routines */ @@ -146,41 +177,212 @@ typedef struct file_lock{ #define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ #define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ //#define ATTR_CTIME_SET 0x2000 -#define ATTR_BLOCKS 0x4000 + +/* + * set ATTR_BLOCKS to a high value to avoid any risk of collision with other + * ATTR_* attributes (see bug 13828): lustre/include/winnt/lustre_compat25.h + */ +/* #define ATTR_BLOCKS 0x4000 */ +#define ATTR_BLOCKS (1 << 27) + #define ATTR_KILL_SUID 0 #define ATTR_KILL_SGID 0 + + #define in_group_p(x) (0) -/* - * proc fs routines + +/* VFS structures for windows */ + +/* + * inode formats */ -int proc_init_fs(); -void proc_destroy_fs(); +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 + +/* Inode flags - they have nothing to superblock flags now */ + +#define S_SYNC 1 /* Writes are synced at once */ +#define S_NOATIME 2 /* Do not update access times */ +#define S_APPEND 4 /* Append-only file */ +#define S_IMMUTABLE 8 /* Immutable file */ +#define S_DEAD 16 /* removed, but still open directory */ +#define S_NOQUOTA 32 /* Inode is not counted to quota */ +#define S_DIRSYNC 64 /* Directory modifications are synchronous */ +#define S_NOCMTIME 128 /* Do not update file c/mtime */ +#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ +#define S_PRIVATE 512 /* Inode is fs-internal */ + + +struct inode { + __u32 i_mode; + __u64 i_size; + __u64 i_blocks; + struct timespec i_atime; + struct timespec i_ctime; + struct timespec i_mtime; + struct timespec i_dtime; + __u32 i_ino; + __u32 i_generation; + __u32 i_state; + __u32 i_blkbits; + int i_uid; + int i_gid; + __u32 i_flags; + mutex_t i_sem; + void * i_priv; +}; +#define I_FREEING 0x0001 -/* - * misc - */ +struct dentry { + atomic_t d_count; + struct { + int len; + char * name; + } d_name; + struct inode * d_inode; + struct dentry* d_parent; +}; -static inline void *ERR_PTR(long_ptr error) +extern struct dentry *dget(struct dentry *de); +extern void dput(struct dentry *de); +static __inline struct dentry *lookup_one_len(const char *name, struct dentry *de, int len) { - return (void *) error; + cfs_enter_debugger(); + return NULL; } -static inline long_ptr PTR_ERR(const void *ptr) +static inline loff_t i_size_read(const struct inode *inode) { - return (long_ptr) ptr; + cfs_enter_debugger(); + return inode->i_size; } -static inline long_ptr IS_ERR(const void *ptr) +static inline void i_size_write(struct inode *inode, loff_t i_size) { - return (ulong_ptr)ptr > (ulong_ptr)-1000L; + cfs_enter_debugger(); + inode->i_size = i_size; } +struct kstatfs { + u64 f_type; + long f_bsize; + u64 f_blocks; + u64 f_bfree; + u64 f_bavail; + u64 f_files; + u64 f_ffree; + __u32 f_fsid; + long f_namelen; + long f_frsize; + long f_spare[5]; +}; + +struct super_block { + void * s_fs_info; +}; + +struct vfsmount { + struct dentry * pwd; + struct dentry * mnt_root; + struct super_block *mnt_sb; +}; + + +/* + * quota definitions (linux/quota.h) + */ + +#define MAXQUOTAS 2 +#define USRQUOTA 0 /* element used for user quotas */ +#define GRPQUOTA 1 /* element used for group quotas */ + + +/* + * proc fs routines + */ + +typedef int (read_proc_t)(char *page, char **start, off_t off, + int count, int *eof, void *data); + +struct file; /* forward ref */ +typedef int (write_proc_t)(struct file *file, const char *buffer, + unsigned long count, void *data); + +void proc_destory_subtree(cfs_proc_entry_t *entry); + +int proc_init_fs(); +void proc_destroy_fs(); + +/* + * thread affinity + */ + +HANDLE cfs_open_current_thread(); +void cfs_close_thread_handle(HANDLE handle); +KAFFINITY cfs_query_thread_affinity(); +int cfs_set_thread_affinity(KAFFINITY affinity); +int cfs_tie_thread_to_cpu(int cpu); +typedef PVOID mm_segment_t; + +/* + * thread priority + */ +int cfs_set_thread_priority(KPRIORITY priority); + +#define MAKE_MM_SEG(s) ((mm_segment_t)(ulong_ptr_t)(s)) +#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFFUL) +#define USER_DS MAKE_MM_SEG(PAGE_OFFSET) + +#define get_ds() (KERNEL_DS) +#define set_fs(x) do {} while(0) +#define get_fs() (NULL) + +/* + * radix tree (linux/radix_tree.h) + */ + +/* radix tree root structure */ +struct radix_tree_root { + RTL_GENERIC_TABLE table; +}; + +/* #define RADIX_TREE_INIT(mask) {0} + +#define RADIX_TREE(name, mask) \ + struct radix_tree_root name RADIX_TREE_INIT(mask) */ + +VOID RadixInitTable(IN PRTL_GENERIC_TABLE Table); +#define INIT_RADIX_TREE(root, mask) RadixInitTable(&((root)->table)) + +/* all radix tree routines should be protected by external locks */ +unsigned int +radix_tree_gang_lookup(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items); +void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index); +int radix_tree_insert(struct radix_tree_root *root,unsigned long index, void *item); +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index); + +struct rcu_head { + int foo; +}; + #else /* !__KERNEL__ */ +#if !defined(_WINDOWS_) + #define CREATE_NEW 1 #define CREATE_ALWAYS 2 #define OPEN_EXISTING 3 @@ -222,6 +424,13 @@ CloseHandle( ); NTSYSAPI +DWORD +NTAPI +GetLastError( + VOID + ); + +NTSYSAPI HANDLE NTAPI CreateFileMappingA( @@ -259,6 +468,7 @@ NTAPI UnmapViewOfFile( IN PVOID lpBaseAddress ); +#endif #endif /* __KERNEL__ */ @@ -266,5 +476,12 @@ typedef struct { void *d; } cfs_dentry_t; +/* + * misc + */ + +#define ERR_PTR(error) ((void *)(long_ptr_t)(error)) +#define PTR_ERR(ptr) ((long)(long_ptr_t) (ptr)) +#define IS_ERR(ptr) ((long)(((ulong_ptr_t) (ptr)) > (ulong_ptr_t)(-1000L))) #endif /* __LIBCFS_WINNT_CFS_FS_H__*/ diff --git a/libcfs/include/libcfs/winnt/winnt-lock.h b/libcfs/include/libcfs/winnt/winnt-lock.h index e9b1b57..0b6bac5 100644 --- a/libcfs/include/libcfs/winnt/winnt-lock.h +++ b/libcfs/include/libcfs/winnt/winnt-lock.h @@ -49,9 +49,18 @@ /* - * nt specific part ... + * IMPORTANT !!!!!!!! + * + * All locks' declaration are not guaranteed to be initialized, + * Althought some of they are initialized in Linux. All locks + * declared by CFS_DECL_* should be initialized explicitly. + */ + +/* + * spinlock & event definitions */ +typedef struct spin_lock spinlock_t; /* atomic */ @@ -73,6 +82,13 @@ void FASTCALL atomic_dec(atomic_t *v); int FASTCALL atomic_dec_and_test(atomic_t *v); int FASTCALL atomic_inc_and_test(atomic_t *v); +int FASTCALL atomic_add_return(int i, atomic_t *v); +int FASTCALL atomic_sub_return(int i, atomic_t *v); + +#define atomic_inc_return(v) atomic_add_return(1, v) +#define atomic_dec_return(v) atomic_sub_return(1, v) + +int FASTCALL atomic_dec_and_lock(atomic_t *v, spinlock_t *lock); /* event */ @@ -107,7 +123,7 @@ static inline void } /* - * cfs_wait_event + * cfs_wait_event_internal * To wait on an event to syncrhonize the process * * Arguments: @@ -123,7 +139,7 @@ static inline void */ static inline int64_t -cfs_wait_event(event_t * event, int64_t timeout) +cfs_wait_event_internal(event_t * event, int64_t timeout) { NTSTATUS Status; LARGE_INTEGER TimeOut; @@ -185,16 +201,6 @@ cfs_clear_event(event_t * event) KeResetEvent(event); } - -/* - * IMPORTANT !!!!!!!! - * - * All locks' declaration are not guaranteed to be initialized, - * Althought some of they are initialized in Linux. All locks - * declared by CFS_DECL_* should be initialized explicitly. - */ - - /* * spin lock defintions / routines */ @@ -209,29 +215,31 @@ cfs_clear_event(event_t * event) * */ -typedef struct spin_lock { - +struct spin_lock { KSPIN_LOCK lock; KIRQL irql; - -} spinlock_t; - +}; #define CFS_DECL_SPIN(name) spinlock_t name; #define CFS_DECL_SPIN_EXTERN(name) extern spinlock_t name; +#define SPIN_LOCK_UNLOCKED {0} static inline void spin_lock_init(spinlock_t *lock) { KeInitializeSpinLock(&(lock->lock)); } - static inline void spin_lock(spinlock_t *lock) { KeAcquireSpinLock(&(lock->lock), &(lock->irql)); } +static inline void spin_lock_nested(spinlock_t *lock, unsigned subclass) +{ + KeAcquireSpinLock(&(lock->lock), &(lock->irql)); +} + static inline void spin_unlock(spinlock_t *lock) { KIRQL irql = lock->irql; @@ -248,7 +256,7 @@ static inline void spin_unlock(spinlock_t *lock) no way to identify the system is MP build or UP build on the runtime. We just uses a workaround for it. */ -extern int MPSystem; +extern int libcfs_mp_system; static int spin_trylock(spinlock_t *lock) { @@ -259,8 +267,8 @@ static int spin_trylock(spinlock_t *lock) KeRaiseIrql(DISPATCH_LEVEL, &Irql); - if (MPSystem) { - if (0 == (ulong_ptr)lock->lock) { + if (libcfs_mp_system) { + if (0 == (ulong_ptr_t)lock->lock) { #if _X86_ __asm { mov edx, dword ptr [ebp + 8] @@ -287,6 +295,16 @@ static int spin_trylock(spinlock_t *lock) return rc; } +static int spin_is_locked(spinlock_t *lock) +{ +#if _WIN32_WINNT >= 0x502 + /* KeTestSpinLock only avalilable on 2k3 server or later */ + return (!KeTestSpinLock(&lock->lock)); +#else + return (int) (lock->lock); +#endif +} + /* synchronization between cpus: it will disable all DPCs kernel task scheduler on the CPU */ #define spin_lock_bh(x) spin_lock(x) @@ -305,7 +323,7 @@ typedef struct rw_semaphore { #define CFS_DECL_RWSEM(name) rw_semaphore_t name #define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name - +#define DECLARE_RWSEM CFS_DECL_RWSEM /* * init_rwsem @@ -325,7 +343,7 @@ static inline void init_rwsem(rw_semaphore_t *s) { ExInitializeResourceLite(&s->rwsem); } - +#define rwsem_init init_rwsem /* * fini_rwsem @@ -346,6 +364,7 @@ static inline void fini_rwsem(rw_semaphore_t *s) { ExDeleteResourceLite(&s->rwsem); } +#define rwsem_fini fini_rwsem /* * down_read @@ -365,6 +384,7 @@ static inline void down_read(struct rw_semaphore *s) { ExAcquireResourceSharedLite(&s->rwsem, TRUE); } +#define down_read_nested down_read /* @@ -406,7 +426,7 @@ static inline void down_write(struct rw_semaphore *s) { ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE); } - +#define down_write_nested down_write /* * down_write_trylock @@ -500,6 +520,11 @@ void write_unlock(rwlock_t * rwlock); #define read_lock_irqsave(l, f) do {f=0; read_lock(l);} while(0) #define read_unlock_irqrestore(l, f) do {read_unlock(l);} while(0) +#define write_lock_bh write_lock +#define write_unlock_bh write_unlock + +struct lock_class_key {int foo;}; +#define lockdep_set_class(lock, class) do {} while(0) /* * Semaphore @@ -509,9 +534,9 @@ void write_unlock(rwlock_t * rwlock); * - __up(x) */ -typedef struct semaphore { +struct semaphore{ KSEMAPHORE sem; -} mutex_t; +}; static inline void sema_init(struct semaphore *s, int val) { @@ -524,12 +549,25 @@ static inline void __down(struct semaphore *s) KernelMode, FALSE, NULL ); } - static inline void __up(struct semaphore *s) { KeReleaseSemaphore(&s->sem, 0, 1, FALSE); } +static inline int down_trylock(struct semaphore * s) +{ + LARGE_INTEGER timeout = {0}; + NTSTATUS status = + KeWaitForSingleObject( &(s->sem), Executive, + KernelMode, FALSE, &timeout); + + if (status == STATUS_SUCCESS) { + return 0; + } + + return 1; +} + /* * mutex_t: * @@ -539,6 +577,10 @@ static inline void __up(struct semaphore *s) * - mutex_down(x) */ +#define mutex semaphore +typedef struct semaphore mutex_t; + +#define DECLARE_MUTEX(x) mutex_t x /* * init_mutex @@ -553,13 +595,13 @@ static inline void __up(struct semaphore *s) * Notes: * N/A */ - +#define mutex_init init_mutex static inline void init_mutex(mutex_t *mutex) { sema_init(mutex, 1); } - +#define init_MUTEX init_mutex /* * mutex_down * To acquire the mutex lock @@ -579,6 +621,10 @@ static inline void mutex_down(mutex_t *mutex) __down(mutex); } +#define mutex_lock(m) mutex_down(m) +#define mutex_trylock(s) down_trylock(s) +#define mutex_lock_nested(m) mutex_down(m) +#define down(m) mutex_down(m) /* * mutex_up @@ -599,6 +645,8 @@ static inline void mutex_up(mutex_t *mutex) __up(mutex); } +#define mutex_unlock(m) mutex_up(m) +#define up(m) mutex_up(m) /* * init_mutex_locked @@ -614,12 +662,18 @@ static inline void mutex_up(mutex_t *mutex) * N/A */ -static inline init_mutex_locked(mutex_t *mutex) +static inline void init_mutex_locked(mutex_t *mutex) { init_mutex(mutex); mutex_down(mutex); } +#define init_MUTEX_LOCKED init_mutex_locked + +static inline void mutex_destroy(mutex_t *mutex) +{ +} + /* * completion * @@ -689,14 +743,15 @@ static inline void complete(struct completion *c) static inline void wait_for_completion(struct completion *c) { - cfs_wait_event(&(c->event), 0); + cfs_wait_event_internal(&(c->event), 0); } -/* __KERNEL__ */ -#else - -#include "../user-lock.h" +static inline int wait_for_completion_interruptible(struct completion *c) +{ + cfs_wait_event_internal(&(c->event), 0); + return 0; +} -/* __KERNEL__ */ -#endif +#else /* !__KERNEL__ */ +#endif /* !__KERNEL__ */ #endif diff --git a/libcfs/include/libcfs/winnt/winnt-mem.h b/libcfs/include/libcfs/winnt/winnt-mem.h index c5cbfa0..211ffef 100644 --- a/libcfs/include/libcfs/winnt/winnt-mem.h +++ b/libcfs/include/libcfs/winnt/winnt-mem.h @@ -45,8 +45,16 @@ #error Do not #include this file directly. #include instead #endif +#include + #ifdef __KERNEL__ +typedef struct cfs_mem_cache cfs_mem_cache_t; + +/* + * page definitions + */ + #define CFS_PAGE_SIZE PAGE_SIZE #define CFS_PAGE_SHIFT PAGE_SHIFT #define CFS_PAGE_MASK (~(PAGE_SIZE - 1)) @@ -54,11 +62,104 @@ typedef struct cfs_page { void * addr; atomic_t count; + void * private; + void * mapping; + __u32 index; + __u32 flags; } cfs_page_t; +#define page cfs_page + +#ifndef page_private +#define page_private(page) ((page)->private) +#define set_page_private(page, v) ((page)->private = (v)) +#endif + +#define page_count(page) (0) + +#define PG_locked 0 /* Page is locked. Don't touch. */ +#define PG_error 1 +#define PG_referenced 2 +#define PG_uptodate 3 + +#define PG_dirty 4 +#define PG_lru 5 +#define PG_active 6 +#define PG_slab 7 /* slab debug (Suparna wants this) */ + +#define PG_owner_priv_1 8 /* Owner use. If pagecache, fs may use*/ +#define PG_arch_1 9 +#define PG_reserved 10 +#define PG_private 11 /* If pagecache, has fs-private data */ + +#define PG_writeback 12 /* Page is under writeback */ +#define PG_compound 14 /* Part of a compound page */ +#define PG_swapcache 15 /* Swap page: swp_entry_t in private */ + +#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ +#define PG_reclaim 17 /* To be reclaimed asap */ +#define PG_buddy 19 /* Page is free, on buddy lists */ + +#define PG_virt 31 /* addr is not */ + +#ifndef arch_set_page_uptodate +#define arch_set_page_uptodate(page) +#endif + +/* Make it prettier to test the above... */ +#define UnlockPage(page) unlock_page(page) +#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) +#define SetPageUptodate(page) \ + do { \ + arch_set_page_uptodate(page); \ + set_bit(PG_uptodate, &(page)->flags); \ + } while (0) +#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) +#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) +#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) +#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) +#define PageLocked(page) test_bit(PG_locked, &(page)->flags) +#define LockPage(page) set_bit(PG_locked, &(page)->flags) +#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) +#define PageChecked(page) test_bit(PG_checked, &(page)->flags) +#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) +#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) +#define PageLaunder(page) test_bit(PG_launder, &(page)->flags) +#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) +#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) +#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags) + +#define PageError(page) test_bit(PG_error, &(page)->flags) +#define SetPageError(page) set_bit(PG_error, &(page)->flags) +#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) +#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) +#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) +#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) + +#define PageActive(page) test_bit(PG_active, &(page)->flags) +#define SetPageActive(page) set_bit(PG_active, &(page)->flags) +#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) + +#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) +#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback, \ + &(page)->flags) +#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback, \ + &(page)->flags) + +#define __GFP_FS (1) +#define GFP_KERNEL (2) +#define GFP_ATOMIC (4) cfs_page_t *cfs_alloc_page(int flags); void cfs_free_page(cfs_page_t *pg); +void cfs_release_page(cfs_page_t *pg); +cfs_page_t * virt_to_page(void * addr); +cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order); +void __cfs_free_pages(cfs_page_t *page, unsigned int order); +int cfs_mem_is_in_cache(const void *addr, const cfs_mem_cache_t *kmem); + +#define page_cache_get(a) do {} while (0) +#define page_cache_release(a) do {} while (0) static inline void *cfs_page_address(cfs_page_t *page) { @@ -90,15 +191,18 @@ static inline int cfs_page_count(cfs_page_t *page) return atomic_read(&page->count); } +#define cfs_page_index(p) ((p)->index) + /* * Memory allocator */ #define CFS_ALLOC_ATOMIC_TRY (0) - extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); extern void cfs_free(void *addr); +#define kmalloc cfs_alloc + extern void *cfs_alloc_large(size_t nr_bytes); extern void cfs_free_large(void *addr); @@ -110,20 +214,36 @@ extern void cfs_free_large(void *addr); /* The cache name is limited to 20 chars */ -typedef struct cfs_mem_cache { - +struct cfs_mem_cache { char name[20]; - ulong_ptr flags; + ulong_ptr_t flags; NPAGED_LOOKASIDE_LIST npll; - -} cfs_mem_cache_t; +}; -extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr); +extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); +/* + * shrinker + */ +typedef int (*shrink_callback)(int nr_to_scan, gfp_t gfp_mask); +struct shrinker { + shrink_callback cb; + int seeks; /* seeks to recreate an obj */ + + /* These are for internal use */ + struct list_head list; + long nr; /* objs pending delete */ +}; + +struct shrinker * set_shrinker(int seeks, shrink_callback cb); +void remove_shrinker(struct shrinker *s); + +int start_shrinker_timer(); +void stop_shrinker_timer(); /* * Page allocator slabs @@ -142,8 +262,20 @@ extern cfs_mem_cache_t *cfs_page_p_slab; #define rmb() mb() #define wmb() mb() +/* + * MM defintions from (linux/mm.h) + */ -/* __KERNEL__ */ -#endif +#define DEFAULT_SEEKS 2 /* shrink seek */ + +#else /* !__KERNEL__ */ + +#include "../user-mem.h" + +/* page alignmed buffer allocation */ +void* pgalloc(size_t factor); +void pgfree(void * page); + +#endif /* __KERNEL__ */ #endif /* __WINNT_CFS_MEM_H__ */ diff --git a/libcfs/include/libcfs/winnt/winnt-prim.h b/libcfs/include/libcfs/winnt/winnt-prim.h index 87f905b..6bef4c1 100644 --- a/libcfs/include/libcfs/winnt/winnt-prim.h +++ b/libcfs/include/libcfs/winnt/winnt-prim.h @@ -45,14 +45,13 @@ #error Do not #include this file directly. #include instead #endif - /* * libcfs proc device object */ -#define LUSTRE_PROC_DEVICE L"\\Device\\lproc" /* proc fs emulator device object */ -#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\lproc" /* proc fs user-visible device */ +#define LUSTRE_PROC_DEVICE L"\\Device\\LNetProcFS" /* proc fs emulator device object */ +#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\LNetProcFS" /* proc fs user-visible device */ /* @@ -61,8 +60,6 @@ #define FILE_DEVICE_LIBCFS ('LC') -#define FILE_DEVICE_LIBCFS ('LC') - #define FUNC_LIBCFS_VERSION 0x101 // get version of current libcfs #define FUNC_LIBCFS_IOCTL 0x102 // Device i/o control to proc fs @@ -73,21 +70,22 @@ CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL, METHOD_BUFFERED, FILE_ANY_ACCESS) #pragma pack(4) - typedef struct _CFS_PROC_IOCTL { ULONG cmd; // ioctl command identifier ULONG len; // length of data + int rc; // return code + ULONG usused; // unused // UCHAR data[]; // content of the real ioctl } CFS_PROC_IOCTL, *PCFS_PROC_IOCTL; - #pragma pack() #ifdef __KERNEL__ -#include +void cfs_enter_debugger(void); +#define __builtin_return_address(x) (0) /* * Symbol functions for libcfs @@ -110,8 +108,6 @@ extern void * cfs_symbol_get(const char *); extern void cfs_symbol_put(const char *); extern void cfs_symbol_clean(); - - typedef struct file_operations cfs_file_operations_t; typedef struct file cfs_file_t; @@ -135,9 +131,9 @@ int cfs_psdev_deregister(cfs_psdev_t * psdev); */ typedef int cfs_read_proc_t(char *page, char **start, off_t off, - int count, int *eof, void *data); + int count, int *eof, void *data); typedef int cfs_write_proc_t(struct file *file, const char *buffer, - ulong_ptr count, void *data); + unsigned long count, void *data); #define CFS_PROC_ENTRY_MAGIC 'CPEM' @@ -154,6 +150,8 @@ typedef struct cfs_proc_entry PRTL_SPLAY_LINKS root; }; + struct cfs_proc_entry *parent; + struct _file_entry { // proc file / leaf entry cfs_read_proc_t * read_proc; cfs_write_proc_t * write_proc; @@ -161,10 +159,11 @@ typedef struct cfs_proc_entry mode_t mode; unsigned short nlink; + BOOLEAN deleted; - struct file_operations * proc_fops; - void * data; + struct file_operations *proc_fops; + void *data; // proc_dir_entry ended. @@ -179,28 +178,32 @@ typedef struct cfs_proc_entry } cfs_proc_entry_t, cfs_proc_dir_entry_t; typedef cfs_proc_entry_t cfs_proc_dir_entry_t; +#define proc_dir_entry cfs_proc_entry #define PROC_BLOCK_SIZE PAGE_SIZE +struct proc_dir_entry *PDE(const struct inode *inode); + + /* * Sysctl register */ -typedef struct ctl_table cfs_sysctl_table_t; -typedef struct ctl_table_header cfs_sysctl_table_header_t; +typedef struct ctl_table cfs_sysctl_table_t; +typedef struct ctl_table_header cfs_sysctl_table_header_t; typedef int ctl_handler ( cfs_sysctl_table_t *table, - int *name, int nlen, - void *oldval, size_t *oldlenp, - void *newval, size_t newlen, - void **context ); + int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, + void **context ); typedef int proc_handler ( cfs_sysctl_table_t *ctl, int write, struct file * filp, - void *buffer, size_t *lenp ); + void *buffer, size_t *lenp ); int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, @@ -213,7 +216,6 @@ int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, void *oldval, size_t *oldlenp, void *newval, size_t newlen, void **context); - /* * System io control definitions */ @@ -261,21 +263,107 @@ struct ctl_table_header struct list_head ctl_entry; }; +/* proc root entries, support routines */ +extern cfs_proc_entry_t * cfs_proc_root; /* / */ +extern cfs_proc_entry_t * cfs_proc_proc; /* /proc */ +extern cfs_proc_entry_t * cfs_proc_fs; /* /proc/fs */ +extern cfs_proc_entry_t * cfs_proc_sys; /* /proc/sys */ +extern cfs_proc_entry_t * cfs_proc_dev; /* /dev */ -cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod, +cfs_proc_entry_t * create_proc_entry(const char *name, mode_t mod, cfs_proc_entry_t *parent); void proc_free_entry(cfs_proc_entry_t *de); -void remove_proc_entry(char *name, cfs_proc_entry_t *entry); -cfs_proc_entry_t * search_proc_entry(char * name, +void remove_proc_entry(const char *name, cfs_proc_entry_t *entry); +cfs_proc_entry_t * search_proc_entry(const char * name, cfs_proc_entry_t * root ); +cfs_proc_entry_t *proc_symlink(const char *name, + cfs_proc_entry_t *parent, + const char *dest); +cfs_proc_entry_t *proc_mkdir(const char *name, + cfs_proc_entry_t *parent); #define cfs_create_proc_entry create_proc_entry #define cfs_free_proc_entry proc_free_entry #define cfs_remove_proc_entry remove_proc_entry -#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a) -#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a) +struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table, + int insert_at_head); +void unregister_sysctl_table(struct ctl_table_header * header); + +#define cfs_register_sysctl_table(t, a) register_sysctl_table(t, a) +#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t) + +/* + * seq device (linux/seq_file.h) + */ + + +/* + * seq file definitions + */ + +struct dentry; +struct vfsmount; + +struct path { + struct vfsmount *mnt; + struct dentry *dentry; +}; + +struct seq_operations; +struct file; +struct inode; + +struct seq_file { + char *buf; + size_t size; + size_t from; + size_t count; + loff_t index; + u32 version; + mutex_t lock; + const struct seq_operations *op; + void *private; +}; + +struct seq_operations { + void * (*start) (struct seq_file *m, loff_t *pos); + void (*stop) (struct seq_file *m, void *v); + void * (*next) (struct seq_file *m, void *v, loff_t *pos); + int (*show) (struct seq_file *m, void *v); +}; + +int seq_open(struct file *, const struct seq_operations *); +ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); +loff_t seq_lseek(struct file *, loff_t, int); +int seq_release(struct inode *, struct file *); +int seq_escape(struct seq_file *, const char *, const char *); +int seq_putc(struct seq_file *m, char c); +int seq_puts(struct seq_file *m, const char *s); + +int seq_printf(struct seq_file *, const char *, ...) + __attribute__ ((format (printf,2,3))); +int seq_path(struct seq_file *, struct path *, char *); + +int single_open(struct file *, int (*)(struct seq_file *, void *), void *); +int single_release(struct inode *, struct file *); +void *__seq_open_private(struct file *, const struct seq_operations *, int); +int seq_open_private(struct file *, const struct seq_operations *, int); +int seq_release_private(struct inode *, struct file *); + +#define SEQ_START_TOKEN ((void *)1) + +/* + * Helpers for iteration over list_head-s in seq_files + */ + +extern struct list_head *seq_list_start(struct list_head *head, + loff_t pos); +extern struct list_head *seq_list_start_head(struct list_head *head, + loff_t pos); +extern struct list_head *seq_list_next(void *v, struct list_head *head, + loff_t *ppos); /* * declaration of proc kernel process routines @@ -290,7 +378,7 @@ lustre_close_file(cfs_file_t * fh); int lustre_do_ioctl( cfs_file_t * fh, unsigned long cmd, - ulong_ptr arg ); + ulong_ptr_t arg ); int lustre_ioctl_file( cfs_file_t * fh, @@ -298,7 +386,7 @@ lustre_ioctl_file( cfs_file_t * fh, size_t lustre_read_file( cfs_file_t * fh, - loff_t off, + loff_t offl, size_t size, char * buf ); @@ -320,7 +408,7 @@ typedef int cfs_task_state_t; #define CFS_TASK_INTERRUPTIBLE 0x00000001 #define CFS_TASK_UNINT 0x00000002 #define CFS_TASK_RUNNING 0x00000003 - +#define CFS_TASK_UNINTERRUPTIBLE CFS_TASK_UNINT #define CFS_WAITQ_MAGIC 'CWQM' #define CFS_WAITLINK_MAGIC 'CWLM' @@ -367,26 +455,6 @@ enum { #define CFS_DECL_WAITQ(name) cfs_waitq_t name - -void cfs_waitq_init(struct cfs_waitq *waitq); -void cfs_waitlink_init(struct cfs_waitlink *link); - -void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); -void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, - struct cfs_waitlink *link); -void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); -int cfs_waitq_active(struct cfs_waitq *waitq); - -void cfs_waitq_signal(struct cfs_waitq *waitq); -void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); -void cfs_waitq_broadcast(struct cfs_waitq *waitq); - -void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); -cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, - cfs_task_state_t state, cfs_duration_t timeout); - - - /* Kernel thread */ typedef int (*cfs_thread_t) (void *arg); @@ -397,6 +465,7 @@ typedef struct _cfs_thread_context { } cfs_thread_context_t; int cfs_kernel_thread(int (*func)(void *), void *arg, int flag); +#define kernel_thread cfs_kernel_thread /* * thread creation flags from Linux, not used in winnt @@ -417,39 +486,121 @@ int cfs_kernel_thread(int (*func)(void *), void *arg, int flag); /* - * sigset ... + * group_info: linux/sched.h */ +#define NGROUPS_SMALL 32 +#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) +struct group_info { + int ngroups; + atomic_t usage; + gid_t small_block[NGROUPS_SMALL]; + int nblocks; + gid_t *blocks[0]; +}; -typedef sigset_t cfs_sigset_t; +#define get_group_info(group_info) do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +#define put_group_info(group_info) do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +static __inline struct group_info *groups_alloc(int gidsetsize) +{ + struct group_info * groupinfo; + KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__)); + groupinfo = (struct group_info *)cfs_alloc(sizeof(struct group_info), 0); + if (groupinfo) { + memset(groupinfo, 0, sizeof(struct group_info)); + } + return groupinfo; +} +static __inline void groups_free(struct group_info *group_info) +{ + KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__)); + cfs_free(group_info); +} +static __inline int set_current_groups(struct group_info *group_info) { + KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__)); + return 0; +} +static __inline int groups_search(struct group_info *group_info, gid_t grp) { + KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__)); + return 0; +} /* - * Task struct + * capability issue (linux/capability.h) */ -#define MAX_SCHEDULE_TIMEOUT ((long_ptr)(~0UL>>12)) +/* Override resource limits. Set resource limits. */ +/* Override quota limits. */ +/* Override reserved space on ext2 filesystem */ +/* Modify data journaling mode on ext3 filesystem (uses journaling + resources) */ +/* NOTE: ext2 honors fsuid when checking for resource overrides, so + you can override using fsuid too */ +/* Override size restrictions on IPC message queues */ +/* Allow more than 64hz interrupts from the real-time clock */ +/* Override max number of consoles on console allocation */ +/* Override max number of keymaps */ +#define CAP_SYS_RESOURCE 24 -#define NGROUPS 1 -#define CFS_CURPROC_COMM_MAX (16) -typedef struct task_sruct{ - mode_t umask; +/* + * capabilities support + */ + +typedef __u32 cfs_kernel_cap_t; + +#define cap_raise(c, flag) do {} while(0) +#define cap_lower(c, flag) do {} while(0) +#define cap_raised(c, flag) do {} while(0) - pid_t pid; - pid_t pgrp; - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; +/* + * Task struct + */ + +#define MAX_SCHEDULE_TIMEOUT ((long_ptr_t)(~0UL>>12)) +#define schedule_timeout(t) cfs_schedule_timeout(0, t) - int ngroups; - gid_t groups[NGROUPS]; - cfs_kernel_cap_t cap_effective, - cap_inheritable, - cap_permitted; +struct vfsmount; - char comm[CFS_CURPROC_COMM_MAX]; - void * journal_info; +#define NGROUPS 1 +#define CFS_CURPROC_COMM_MAX (16) +typedef struct task_sruct{ + mode_t umask; + sigset_t blocked; + + pid_t pid; + pid_t pgrp; + + uid_t uid,euid,suid,fsuid; + gid_t gid,egid,sgid,fsgid; + + int ngroups; + int cgroups; + gid_t groups[NGROUPS]; + struct group_info *group_info; + cfs_kernel_cap_t cap_effective, + cap_inheritable, + cap_permitted; + + char comm[CFS_CURPROC_COMM_MAX]; + void *journal_info; + struct vfsmount *fs; } cfs_task_t; +static inline void task_lock(cfs_task_t *t) +{ +} + +static inline void task_unlock(cfs_task_t *t) +{ +} /* * linux task struct emulator ... @@ -498,49 +649,80 @@ typedef struct _TASK_SLOT { #define current cfs_current() #define set_current_state(s) do {;} while (0) -#define wait_event(wq, condition) \ -do { \ - cfs_waitlink_t __wait; \ - \ - cfs_waitlink_init(&__wait); \ - while (TRUE) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) { \ - break; \ - } \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ +#define wait_event(wq, condition) \ +do { \ + cfs_waitlink_t __wait; \ + \ + cfs_waitlink_init(&__wait); \ + while (TRUE) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) { \ + break; \ + } \ + cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ } while(0) -#define wait_event_interruptible(wq, condition, __ret) \ -do { \ - cfs_waitlink_t __wait; \ - \ - __ret = 0; \ - cfs_waitlink_init(&__wait); \ - while (TRUE) { \ - cfs_waitq_add(&wq, &__wait); \ - if (condition) { \ - break; \ - } \ - cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ - cfs_waitq_del(&wq, &__wait); \ - } \ - cfs_waitq_del(&wq, &__wait); \ +#define cfs_wait_event_interruptible(wq, condition, __ret) \ +do { \ + cfs_waitlink_t __wait; \ + \ + __ret = 0; \ + cfs_waitlink_init(&__wait); \ + while (TRUE) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) { \ + break; \ + } \ + cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ } while(0) +# define cfs_wait_event_interruptible_exclusive(wq, condition, rc) \ + cfs_wait_event_interruptible(wq, condition, rc) + +/* + retval == 0; condition met; we're good. + retval < 0; interrupted by signal. + retval > 0; timed out. +*/ + +#define cfs_waitq_wait_event_interruptible_timeout( \ + wq, condition, timeout, rc) \ +do { \ + cfs_waitlink_t __wait; \ + \ + rc = 0; \ + cfs_waitlink_init(&__wait); \ + while (TRUE) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) { \ + break; \ + } \ + if (cfs_waitq_timedwait(&__wait, \ + CFS_TASK_INTERRUPTIBLE, timeout) == 0) { \ + rc = TRUE; \ + break; \ + } \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ +} while(0) + + +#define cfs_waitq_wait_event_timeout \ + cfs_waitq_wait_event_interruptible_timeout int init_task_manager(); void cleanup_task_manager(); cfs_task_t * cfs_current(); -int schedule_timeout(int64_t time); -int schedule(); int wake_up_process(cfs_task_t * task); -#define cfs_schedule_timeout(state, time) schedule_timeout(time) void sleep_on(cfs_waitq_t *waitq); - +#define might_sleep() do {} while(0) #define CFS_DECL_JOURNAL_DATA #define CFS_PUSH_JOURNAL do {;} while(0) #define CFS_POP_JOURNAL do {;} while(0) @@ -555,8 +737,14 @@ void sleep_on(cfs_waitq_t *waitq); #define __init #endif -#define request_module(x) (0) +struct module { + const char *name; +}; + +extern struct module libcfs_global_module; +#define THIS_MODULE &libcfs_global_module +#define request_module(x) (0) #define EXPORT_SYMBOL(s) #define MODULE_AUTHOR(s) #define MODULE_DESCRIPTION(s) @@ -581,20 +769,42 @@ void sleep_on(cfs_waitq_t *waitq); #define cfs_module(name, version, init, fini) \ module_init(init); \ module_exit(fini) +#define module_refcount(x) (1) +/* + * typecheck + */ + +#define typecheck(a, b) do {} while(0) /* - * Linux kernel version definition + * linux/crypto.h */ -#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE (2*100+6*10+7) +#define CRYPTO_MAX_ALG_NAME 64 +#define CRYPTO_TFM_MODE_ECB 0x00000001 +#define CRYPTO_TFM_MODE_CBC 0x00000002 +#define CRYPTO_TFM_MODE_CFB 0x00000004 +#define CRYPTO_TFM_MODE_CTR 0x00000008 +#define CRYPTO_TFM_MODE_EME 0x00000010 /* - * Signal + * hash */ -#define SIGNAL_MASK_ASSERT() +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME_32 0x9e370001UL + +#if 0 /* defined in libcfs/libcfs_hash.h */ +static inline u32 hash_long(u32 val, unsigned int bits) +{ + /* On some cpus multiply is faster, on others gcc will do shifts */ + u32 hash = val * GOLDEN_RATIO_PRIME_32; + + /* High bits are more random, so use them. */ + return hash >> (32 - bits); +} +#endif /* * Timer @@ -614,40 +824,11 @@ typedef struct cfs_timer { cfs_time_t deadline; - void (*proc)(ulong_ptr); + void (*proc)(ulong_ptr_t); void * arg; } cfs_timer_t; - -typedef void (*timer_func_t)(ulong_ptr); - -#define cfs_init_timer(t) - -void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg); -void cfs_timer_done(cfs_timer_t *t); -void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline); -void cfs_timer_disarm(cfs_timer_t *t); -int cfs_timer_is_armed(cfs_timer_t *t); -cfs_time_t cfs_timer_deadline(cfs_timer_t *t); - - -/* deschedule for a bit... */ -static inline void cfs_pause(cfs_duration_t ticks) -{ - cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks); -} - - -static inline void cfs_enter_debugger(void) -{ -#if _X86_ - __asm int 3; -#else - KdBreakPoint(); -#endif -} - /* * libcfs globals initialization/cleanup */ @@ -659,23 +840,33 @@ void libcfs_arch_cleanup(void); /* + * cache alignment size + */ + +#define L1_CACHE_ALIGN(x) (x) + +#define __cacheline_aligned + +/* * SMP ... */ + #define SMP_CACHE_BYTES 128 -#define __cacheline_aligned -#define NR_CPUS (2) -#define smp_processor_id() KeGetCurrentProcessorNumber() -#define smp_num_cpus NR_CPUS -#define num_online_cpus() smp_num_cpus +#define NR_CPUS (32) +#define smp_num_cpus ((CCHAR)KeNumberProcessors) +#define num_possible_cpus() smp_num_cpus +#define num_online_cpus() smp_num_cpus +#define smp_processor_id() ((USHORT)KeGetCurrentProcessorNumber()) #define smp_call_function(f, a, n, w) do {} while(0) +#define smp_rmb() do {} while(0) /* * Irp related */ -#define NR_IRQS 512 -#define in_interrupt() (0) +#define NR_IRQS 512 +#define in_interrupt() (0) /* * printk flags @@ -694,7 +885,6 @@ libcfs_arch_cleanup(void); * Misc */ - #define inter_module_get(n) cfs_symbol_get(n) #define inter_module_put(n) cfs_symbol_put(n) @@ -708,65 +898,79 @@ libcfs_arch_cleanup(void); #define lock_kernel() do {} while(0) #define unlock_kernel() do {} while(0) -#define USERMODEHELPER(path, argv, envp) (0) - - #define local_irq_save(x) #define local_irq_restore(x) -#define cfs_assert ASSERT - #define THREAD_NAME -#else /* !__KERNEL__ */ +#define va_copy(_d, _s) (_d = _s) -#define PAGE_CACHE_SIZE PAGE_SIZE -#define PAGE_CACHE_MASK PAGE_MASK +char *strnchr(const char *s, size_t count, int c); -#define getpagesize() (PAGE_SIZE) +#define adler32(a,b,l) zlib_adler32(a,b,l) +ULONG zlib_adler32(ULONG adler, const BYTE *buf, UINT len); +typedef ssize_t (*read_actor_t)(); -typedef struct { - int foo; -} pthread_mutex_t; +#if DBG +/* + * winnt debug routines + */ -typedef struct { - int foo; -} pthread_cond_t; +VOID +KsPrintf( + LONG DebugPrintLevel, + PCHAR DebugMessage, + ... + ); -#define pthread_mutex_init(x, y) do {} while(0) -#define pthread_cond_init(x, y) do {} while(0) +PUCHAR +KsNtStatusToString (IN NTSTATUS Status); +#endif -#define pthread_mutex_lock(x) do {} while(0) -#define pthread_mutex_unlock(x) do {} while(0) +#else /* !__KERNEL__ */ -#define pthread_cond_wait(x,y) do {} while(0) -#define pthread_cond_broadcast(x) do {} while(0) +void cfs_enter_debugger(); -typedef struct file { - int foo; -} cfs_file_t; +/* + * PAGE_SIZE ... + */ -typedef struct cfs_proc_dir_entry{ - void *data; -}cfs_proc_dir_entry_t; +#ifndef PAGE_SIZE +#define PAGE_SIZE (4096) +#endif +#define getpagesize() (4096) +#define PAGE_CACHE_SIZE PAGE_SIZE +#define PAGE_CACHE_MASK PAGE_MASK -#include "../user-prim.h" +#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t) -1) +#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER ((pthread_mutex_t) -2) +#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER ((pthread_mutex_t) -3) +typedef struct file { + int foo; +} cfs_file_t; + +#include "../user-prim.h" +#include "../user-lock.h" #include #include #define strcasecmp strcmp #define strncasecmp strncmp -#define snprintf _snprintf #define getpid() (0) - -#define getpwuid(x) (NULL) +#define getuid() (0) #define getgrgid(x) (NULL) +struct passwd { + uid_t pw_uid; + char pw_name[64]; +}; +struct passwd * getpwuid(uid_t uid); + int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev); int gethostname(char * name, int namelen); @@ -774,195 +978,191 @@ int gethostname(char * name, int namelen); #define setlinebuf(x) do {} while(0) -NTSYSAPI VOID NTAPI DebugBreak(); +/* Maximum EA Information Length */ +#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15) +/* + * proc user mode routines + */ -static inline void cfs_enter_debugger(void) -{ -#if _X86_ - __asm int 3; +int cfs_proc_open (char * filename, int oflag); +int cfs_proc_close(int fd); +int cfs_proc_read(int fd, void *buffer, unsigned int count); +int cfs_proc_write(int fd, void *buffer, unsigned int count); +int cfs_proc_ioctl(int fd, int cmd, void *buffer); +FILE *cfs_proc_fopen(char *path, char * mode); +char *cfs_proc_fgets(char * buf, int len, FILE *fp); +int cfs_proc_fclose(FILE *fp); + +/* Bits set in the FLAGS argument to `glob'. */ +#define GLOB_ERR (1 << 0)/* Return on read errors. */ +#define GLOB_MARK (1 << 1)/* Append a slash to each name. */ +#define GLOB_NOSORT (1 << 2)/* Don't sort the names. */ +#define GLOB_DOOFFS (1 << 3)/* Insert PGLOB->gl_offs NULLs. */ +#define GLOB_NOCHECK (1 << 4)/* If nothing matches, return the pattern. */ +#define GLOB_APPEND (1 << 5)/* Append to results of a previous call. */ +#define GLOB_NOESCAPE (1 << 6)/* Backslashes don't quote metacharacters. */ +#define GLOB_PERIOD (1 << 7)/* Leading `.' can be matched by metachars. */ + +#if !defined __USE_POSIX2 || defined __USE_BSD || defined __USE_GNU +# define GLOB_MAGCHAR (1 << 8)/* Set in gl_flags if any metachars seen. */ +# define GLOB_ALTDIRFUNC (1 << 9)/* Use gl_opendir et al functions. */ +# define GLOB_BRACE (1 << 10)/* Expand "{a,b}" to "a" "b". */ +# define GLOB_NOMAGIC (1 << 11)/* If no magic chars, return the pattern. */ +# define GLOB_TILDE (1 << 12)/* Expand ~user and ~ to home directories. */ +# define GLOB_ONLYDIR (1 << 13)/* Match only directories. */ +# define GLOB_TILDE_CHECK (1 << 14)/* Like GLOB_TILDE but return an error + if the user name is not available. */ +# define __GLOB_FLAGS (GLOB_ERR|GLOB_MARK|GLOB_NOSORT|GLOB_DOOFFS| \ + GLOB_NOESCAPE|GLOB_NOCHECK|GLOB_APPEND| \ + GLOB_PERIOD|GLOB_ALTDIRFUNC|GLOB_BRACE| \ + GLOB_NOMAGIC|GLOB_TILDE|GLOB_ONLYDIR|GLOB_TILDE_CHECK) #else - DebugBreak(); +# define __GLOB_FLAGS (GLOB_ERR|GLOB_MARK|GLOB_NOSORT|GLOB_DOOFFS| \ + GLOB_NOESCAPE|GLOB_NOCHECK|GLOB_APPEND| \ + GLOB_PERIOD) #endif -} -/* Maximum EA Information Length */ -#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15) +/* Error returns from `glob'. */ +#define GLOB_NOSPACE 1 /* Ran out of memory. */ +#define GLOB_ABORTED 2 /* Read error. */ +#define GLOB_NOMATCH 3 /* No matches found. */ +#define GLOB_NOSYS 4 /* Not implemented. */ +#ifdef __USE_GNU +/* Previous versions of this file defined GLOB_ABEND instead of + GLOB_ABORTED. Provide a compatibility definition here. */ +# define GLOB_ABEND GLOB_ABORTED +#endif + +/* Structure describing a globbing run. */ +#ifdef __USE_GNU +struct stat; +#endif +typedef struct + { + size_t gl_pathc; /* Count of paths matched by the pattern. */ + char **gl_pathv; /* List of matched pathnames. */ + size_t gl_offs; /* Slots to reserve in `gl_pathv'. */ + int gl_flags; /* Set to FLAGS, maybe | GLOB_MAGCHAR. */ + + /* If the GLOB_ALTDIRFUNC flag is set, the following functions + are used instead of the normal file access functions. */ + void (*gl_closedir) (void *); +#ifdef __USE_GNU + struct dirent *(*gl_readdir) (void *); +#else + void *(*gl_readdir) (void *); +#endif + void *(*gl_opendir) (const char *); +#ifdef __USE_GNU + int (*gl_lstat) (const char *__restrict, struct stat *__restrict); + int (*gl_stat) (const char *__restrict, struct stat *__restrict); +#else + int (*gl_lstat) (const char *__restrict, void *__restrict); + int (*gl_stat) (const char *__restrict, void *__restrict); +#endif + } glob_t; + +#ifdef __USE_LARGEFILE64 +# ifdef __USE_GNU +struct stat64; +# endif +typedef struct + { + __size_t gl_pathc; + char **gl_pathv; + __size_t gl_offs; + int gl_flags; + + /* If the GLOB_ALTDIRFUNC flag is set, the following functions + are used instead of the normal file access functions. */ + void (*gl_closedir) (void *); +# ifdef __USE_GNU + struct dirent64 *(*gl_readdir) (void *); +# else + void *(*gl_readdir) (void *); +# endif + void *(*gl_opendir) (__const char *); +# ifdef __USE_GNU + int (*gl_lstat) (__const char *__restrict, struct stat64 *__restrict); + int (*gl_stat) (__const char *__restrict, struct stat64 *__restrict); +# else + int (*gl_lstat) (__const char *__restrict, void *__restrict); + int (*gl_stat) (__const char *__restrict, void *__restrict); +# endif + } glob64_t; +#endif + +int glob (const char * __pattern, int __flags, + int (*__errfunc) (const char *, int), + glob_t * __pglob); +void globfree(glob_t *__pglog); +#endif /* !__KERNEL__ */ /* - * proc user mode routines + * module routines */ -HANDLE cfs_proc_open (char * filename, int oflag); -int cfs_proc_close(HANDLE handle); -int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count); -int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count); -int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer); +static inline void __module_get(struct module *module) +{ +} +static inline int try_module_get(struct module *module) +{ + return 1; +} + +static inline void module_put(struct module *module) +{ +} /* - * Native API definitions + * sigset_t routines */ -// -// Disk I/O Routines -// - -NTSYSAPI -NTSTATUS -NTAPI -NtReadFile(HANDLE FileHandle, - HANDLE Event OPTIONAL, - PIO_APC_ROUTINE ApcRoutine OPTIONAL, - PVOID ApcContext OPTIONAL, - PIO_STATUS_BLOCK IoStatusBlock, - PVOID Buffer, - ULONG Length, - PLARGE_INTEGER ByteOffset OPTIONAL, - PULONG Key OPTIONAL); - -NTSYSAPI -NTSTATUS -NTAPI -NtWriteFile(HANDLE FileHandle, - HANDLE Event OPTIONAL, - PIO_APC_ROUTINE ApcRoutine OPTIONAL, - PVOID ApcContext OPTIONAL, - PIO_STATUS_BLOCK IoStatusBlock, - PVOID Buffer, - ULONG Length, - PLARGE_INTEGER ByteOffset OPTIONAL, - PULONG Key OPTIONAL); - -NTSYSAPI -NTSTATUS -NTAPI -NtClose(HANDLE Handle); - -NTSYSAPI -NTSTATUS -NTAPI -NtCreateFile(PHANDLE FileHandle, - ACCESS_MASK DesiredAccess, - POBJECT_ATTRIBUTES ObjectAttributes, - PIO_STATUS_BLOCK IoStatusBlock, - PLARGE_INTEGER AllocationSize OPTIONAL, - ULONG FileAttributes, - ULONG ShareAccess, - ULONG CreateDisposition, - ULONG CreateOptions, - PVOID EaBuffer OPTIONAL, - ULONG EaLength); - - -NTSYSAPI -NTSTATUS -NTAPI -NtDeviceIoControlFile( - IN HANDLE FileHandle, - IN HANDLE Event, - IN PIO_APC_ROUTINE ApcRoutine, - IN PVOID ApcContext, - OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG IoControlCode, - IN PVOID InputBuffer, - IN ULONG InputBufferLength, - OUT PVOID OutputBuffer, - OUT ULONG OutputBufferLength - ); - -NTSYSAPI -NTSTATUS -NTAPI -NtFsControlFile( - IN HANDLE FileHandle, - IN HANDLE Event OPTIONAL, - IN PIO_APC_ROUTINE ApcRoutine OPTIONAL, - IN PVOID ApcContext OPTIONAL, - OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG FsControlCode, - IN PVOID InputBuffer OPTIONAL, - IN ULONG InputBufferLength, - OUT PVOID OutputBuffer OPTIONAL, - IN ULONG OutputBufferLength -); - - -NTSYSAPI -NTSTATUS -NTAPI -NtQueryInformationFile( - IN HANDLE FileHandle, - OUT PIO_STATUS_BLOCK IoStatusBlock, - OUT PVOID FileInformation, - IN ULONG Length, - IN FILE_INFORMATION_CLASS FileInformationClass - ); +typedef sigset_t cfs_sigset_t; +#define sigaddset(what,sig) (*(what) |= (1<<(sig)), 0) +#define sigdelset(what,sig) (*(what) &= ~(1<<(sig)), 0) +#define sigemptyset(what) (*(what) = 0, 0) +#define sigfillset(what) (*(what) = ~(0), 0) +#define sigismember(what,sig) (((*(what)) & (1<<(sig))) != 0) + +static __inline int +sigprocmask(int sig, cfs_sigset_t *w1, cfs_sigset_t *w2) { + return 0; +} +static __inline int +sigpending(cfs_sigset_t *what) { + return 0; +} + +/* + * common inode flags (user & kernel) + */ -// -// Random routines ... -// - -NTSYSAPI -ULONG -NTAPI -RtlRandom( - IN OUT PULONG Seed - ); - -#endif /* __KERNEL__ */ - - -// -// Inode flags (Linux uses octad number, but why ? strange!!!) -// - -#undef S_IFMT -#undef S_IFDIR -#undef S_IFCHR -#undef S_IFREG -#undef S_IREAD -#undef S_IWRITE -#undef S_IEXEC - -#define S_IFMT 0x0F000 /* 017 0000 */ -#define S_IFSOCK 0x0C000 /* 014 0000 */ -#define S_IFLNK 0x0A000 /* 012 0000 */ -#define S_IFREG 0x08000 /* 010 0000 */ -#define S_IFBLK 0x06000 /* 006 0000 */ -#define S_IFDIR 0x04000 /* 004 0000 */ -#define S_IFCHR 0x02000 /* 002 0000 */ -#define S_IFIFO 0x01000 /* 001 0000 */ -#define S_ISUID 0x00800 /* 000 4000 */ -#define S_ISGID 0x00400 /* 000 2000 */ -#define S_ISVTX 0x00200 /* 000 1000 */ - -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) -#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) -#define S_ISFIL(m) (((m) & S_IFMT) == S_IFFIL) -#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) -#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) -#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) - -#define S_IPERMISSION_MASK 0x1FF /* */ - -#define S_IRWXU 0x1C0 /* 0 0700 */ -#define S_IRUSR 0x100 /* 0 0400 */ -#define S_IWUSR 0x080 /* 0 0200 */ -#define S_IXUSR 0x040 /* 0 0100 */ - -#define S_IRWXG 0x038 /* 0 0070 */ -#define S_IRGRP 0x020 /* 0 0040 */ -#define S_IWGRP 0x010 /* 0 0020 */ -#define S_IXGRP 0x008 /* 0 0010 */ - -#define S_IRWXO 0x007 /* 0 0007 */ -#define S_IROTH 0x004 /* 0 0004 */ -#define S_IWOTH 0x002 /* 0 0002 */ -#define S_IXOTH 0x001 /* 0 0001 */ +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) + +#define S_IRWXU 00700 +#define S_IRUSR 00400 +#define S_IWUSR 00200 +#define S_IXUSR 00100 + +#define S_IRWXG 00070 +#define S_IRGRP 00040 +#define S_IWGRP 00020 +#define S_IXGRP 00010 + +#define S_IRWXO 00007 +#define S_IROTH 00004 +#define S_IWOTH 00002 +#define S_IXOTH 00001 #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) #define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) @@ -970,6 +1170,13 @@ RtlRandom( #define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) + +/* + * Linux kernel version definition + */ + +#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) + /* * linux ioctl coding definitions */ @@ -979,7 +1186,7 @@ RtlRandom( #define _IOC_SIZEBITS 14 #define _IOC_DIRBITS 2 -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) #define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) #define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) #define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) @@ -1014,16 +1221,26 @@ RtlRandom( #define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) #define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) -/* - * Io vector ... - */ - -struct iovec -{ +/* i/o vector sgructure ... */ +struct iovec { void *iov_base; size_t iov_len; }; +/* idr support routines */ +struct idr_context *cfs_idr_init(); +int cfs_idr_remove(struct idr_context *idp, int id); +int cfs_idr_get_new(struct idr_context *idp, void *ptr); +int cfs_idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id); +void *cfs_idr_find(struct idr_context *idp, int id); +void cfs_idr_exit(struct idr_context *idp); + +/* runtime time routines for both kenrel and user mode */ +extern int cfs_isalpha(int); +extern int cfs_isspace(int); +extern int cfs_isupper(int); +extern int cfs_isdigit(int); +extern int cfs_isxdigit(int); #define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF)) /* @@ -1032,61 +1249,86 @@ struct iovec * Ignores `locale' stuff. Assumes that the upper and lower case * alphabets and digits are each contiguous. */ -static inline __u64 -strtoull( - char *nptr, - char **endptr, - int base) +__u64 strtoull(char *nptr, char **endptr,int base); + +/* + * getopt routines + */ + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + + +struct option { - char *s = nptr; - __u64 acc, cutoff; - int c, neg = 0, any, cutlim; - - /* - * See strtol for comments as to the logic used. - */ - do { - c = *s++; - } while (isspace(c)); - if (c == '-') { - neg = 1; - c = *s++; - } else if (c == '+') - c = *s++; - if ((base == 0 || base == 16) && - c == '0' && (*s == 'x' || *s == 'X')) { - c = s[1]; - s += 2; - base = 16; - } - if (base == 0) - base = c == '0' ? 8 : 10; - cutoff = (__u64)ULONG_LONG_MAX / (__u64)base; - cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base); - for (acc = 0, any = 0;; c = *s++) { - if (isdigit(c)) - c -= '0'; - else if (isalpha(c)) - c -= isupper(c) ? 'A' - 10 : 'a' - 10; - else - break; - if (c >= base) - break; - if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) - any = -1; - else { - any = 1; - acc *= base; - acc += c; - } - } - if (any < 0) { - acc = ULONG_LONG_MAX; - } else if (neg) - acc = 0 - acc; - if (endptr != 0) - *endptr = (char *) (any ? s - 1 : nptr); - return (acc); -} + const char *name; + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 + +extern int getopt(int ___argc, char *const *___argv, const char *__shortopts); +extern int getopt_long (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int ___argc, char *const *___argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); + +extern char *strcasestr (const char *phaystack, const char *pneedle); + +/* + * global environment runtime routine + */ + +static __inline char * __cdecl cfs_getenv(const char *ENV) {return NULL;} +static __inline void __cdecl set_getenv(const char *ENV, const char *value, int overwrite) {} + +int setenv(const char *envname, const char *envval, int overwrite); + +struct utsname { + char sysname[64]; + char nodename[64]; + char release[128]; + char version[128]; + char machine[64]; +}; + +int uname(struct utsname *uts); #endif diff --git a/libcfs/include/libcfs/winnt/winnt-tcpip.h b/libcfs/include/libcfs/winnt/winnt-tcpip.h index 179b36b..ebaa4f9 100644 --- a/libcfs/include/libcfs/winnt/winnt-tcpip.h +++ b/libcfs/include/libcfs/winnt/winnt-tcpip.h @@ -1,5 +1,5 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: * * GPL HEADER START * @@ -51,22 +51,21 @@ // iovec is defined in libcfs: winnt_prim.h // lnetkiov_t is defined in lnet/types.h -typedef struct socket ksock_tconn_t; -typedef struct socket cfs_socket_t; +typedef struct socket ks_tconn_t, cfs_socket_t; // completion notification callback routine -typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr); +typedef VOID (*ks_schedule_cb)(struct socket*, int); -/* completion routine to update tx structure for async sending */ -typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); +#define SOCK_ERROR(s) ((s->kstc_state >= ksts_disconnected) ? ECONNRESET : 0) +#define SOCK_TEST_NOSPACE(s) (1) // // tdinal definitions // -#if TDI_LIBCFS_DBG +#if DBG #define KsPrint(X) KsPrintf X #else #define KsPrint(X) @@ -77,17 +76,17 @@ typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); // Socket Addresses Related ... // -#define INADDR_ANY (ULONG)0x00000000 +#define INADDR_ANY (ULONG)0x00000000 #define INADDR_LOOPBACK (ULONG)0x7f000001 -#define INADDR_BROADCAST (ULONG)0xffffffff -#define INADDR_NONE (ULONG)0xffffffff +#define INADDR_BROADCAST (ULONG)0xffffffff +#define INADDR_NONE (ULONG)0xffffffff /* * TCP / IP options */ #define SOL_TCP 6 -#define SOL_UDP 17 +#define SOL_UD 17 #define TL_INSTANCE 0 @@ -104,7 +103,7 @@ typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); Added those for 1003.1g not all are supported yet */ -#define MSG_OOB 1 +#define MSG_OOB 1 #define MSG_PEEK 2 #define MSG_DONTROUTE 4 #define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ @@ -168,15 +167,14 @@ typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); typedef struct _KS_TSDU { - ULONG Magic; - ULONG Flags; + ULONG Magic; /* magic */ + ULONG Flags; /* flags */ - struct list_head Link; + struct list_head Link; /* link list */ - ULONG TotalLength; // Total size of KS_TSDU - - ULONG StartOffset; // Start offset of the first Tsdu unit - ULONG LastOffset; // End offset of the last Tsdu unit + ULONG TotalLength; /* total size of KS_TSDU */ + ULONG StartOffset; /* offset of the first Tsdu unit */ + ULONG LastOffset; /* end offset of the last Tsdu unit */ /* union { @@ -192,7 +190,8 @@ typedef struct _KS_TSDU { #define TSDU_TYPE_DAT ((USHORT)0x5402) #define TSDU_TYPE_MDL ((USHORT)0x5403) -#define KS_TSDU_BUF_RECEIVING 0x0001 +#define KS_TSDU_COMM_PARTIAL 0x0001 + typedef struct _KS_TSDU_BUF { USHORT TsduType; @@ -202,11 +201,9 @@ typedef struct _KS_TSDU_BUF { ULONG StartOffset; PVOID UserBuffer; - + PMDL Mdl; /* mdl */ } KS_TSDU_BUF, *PKS_TSDU_BUF; -#define KS_TSDU_DAT_RECEIVING 0x0001 - typedef struct _KS_TSDU_DAT { USHORT TsduType; @@ -216,48 +213,65 @@ typedef struct _KS_TSDU_DAT { ULONG StartOffset; ULONG TotalLength; + PMDL Mdl; /* mdl */ - UCHAR Data[1]; + UCHAR Data[0]; } KS_TSDU_DAT, *PKS_TSDU_DAT; -#define KS_DWORD_ALIGN(x) (((x) + 0x03) & (~(0x03))) -#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data))) +#define KS_QWORD_ALIGN(x) (((x) + 0x07) & 0xFFFFFFF8) +#define KS_TSDU_STRU_SIZE(Len) (KS_QWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data[0]))) typedef struct _KS_TSDU_MDL { + USHORT TsduType; /* TSDU_TYPE_MDL */ + USHORT TsduFlags; /* */ - USHORT TsduType; - USHORT TsduFlags; - - ULONG DataLength; - ULONG StartOffset; + ULONG DataLength; /* total valid data length */ + ULONG BaseOffset; /* payload offset in Tsdu */ + ULONG StartOffset; /* offset in payload */ + PVOID Descriptor; /* tdi descriptor for receiving */ PMDL Mdl; - PVOID Descriptor; - } KS_TSDU_MDL, *PKS_TSDU_MDL; +typedef struct ks_engine_mgr { + spinlock_t lock; + int stop; + event_t exit; + event_t start; + struct list_head list; +} ks_engine_mgr_t; + +typedef struct ks_engine_slot { + ks_tconn_t * tconn; + void * tsdumgr; + struct list_head link; + int queued; + ks_engine_mgr_t * emgr; +} ks_engine_slot_t; typedef struct _KS_TSDUMGR { - - struct list_head TsduList; - ULONG NumOfTsdu; - ULONG TotalBytes; - KEVENT Event; - + struct list_head TsduList; + ULONG NumOfTsdu; + ULONG TotalBytes; + KEVENT Event; + spinlock_t Lock; + ks_engine_slot_t Slot; + ULONG Payload; + int Busy:1; + int OOB:1; } KS_TSDUMGR, *PKS_TSDUMGR; +#define ks_lock_tsdumgr(mgr) spin_lock(&((mgr)->Lock)) +#define ks_unlock_tsdumgr(mgr) spin_unlock(&((mgr)->Lock)) typedef struct _KS_CHAIN { - - KS_TSDUMGR Normal; - KS_TSDUMGR Expedited; - + KS_TSDUMGR Normal; /* normal queue */ + KS_TSDUMGR Expedited; /* OOB/expedited queue */ } KS_CHAIN, *PKS_CHAIN; -#define TDINAL_SCHED_FACTOR (1) -#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR)) +#define KS_CAN_SCHED(TM) ((TM)->TotalBytes >= ((TM)->Payload >> 2)) // // Handler Settings Indictor @@ -299,7 +313,7 @@ typedef struct _KS_ADDRESS { typedef struct _KS_DISCONNECT_WORKITEM { WORK_QUEUE_ITEM WorkItem; // Workitem to perform disconnection - ksock_tconn_t * tconn; // tdi connecton + ks_tconn_t * tconn; // tdi connecton ULONG Flags; // connection broken/discnnection flags KEVENT Event; // sync event @@ -323,45 +337,38 @@ typedef struct _KS_CONNECTION { // type definitions // -typedef MDL ksock_mdl_t; -typedef UNICODE_STRING ksock_unicode_name_t; -typedef WORK_QUEUE_ITEM ksock_workitem_t; +typedef MDL ks_mdl_t; +typedef UNICODE_STRING ks_unicode_name_t; +typedef WORK_QUEUE_ITEM ks_workitem_t; -typedef KS_CHAIN ksock_chain_t; -typedef KS_ADDRESS ksock_tdi_addr_t; -typedef KS_CONNECTION ksock_tconn_info_t; -typedef KS_DISCONNECT_WORKITEM ksock_disconnect_workitem_t; +typedef KS_CHAIN ks_chain_t; +typedef KS_ADDRESS ks_tdi_addr_t; +typedef KS_CONNECTION ks_tconn_info_t; +typedef KS_DISCONNECT_WORKITEM ks_disconnect_t; // // Structures for transmission done Workitem // -typedef struct _KS_TCPX_FINILIZE { - ksock_workitem_t item; - void * tx; -} ksock_tcpx_fini_t; - - -typedef struct ksock_backlogs { +typedef struct ks_backlogs { struct list_head list; /* list to link the backlog connections */ int num; /* number of backlogs in the list */ -} ksock_backlogs_t; +} ks_backlogs_t; -typedef struct ksock_daemon { +typedef struct ks_daemon { - ksock_tconn_t * tconn; /* the listener connection object */ + ks_tconn_t * tconn; /* the listener connection object */ unsigned short nbacklogs; /* number of listening backlog conns */ unsigned short port; /* listening port number */ int shutdown; /* daemon threads is to exit */ - struct list_head list; /* to be attached into ksock_nal_data_t*/ - -} ksock_daemon_t ; + struct list_head list; /* to be attached into ks_nal_data_t */ +} ks_daemon_t; typedef enum { @@ -373,8 +380,10 @@ typedef enum { // or refuse the connecting request from remote peers. kstt_child, // accepted child connection type, it's parent must be Listener + kstt_lasttype -} ksock_tconn_type; + +} ks_tconn_type_t; typedef enum { @@ -401,7 +410,8 @@ typedef enum { ksts_aborted, // un-exptected broken status ksts_last // total number of tconn statuses -} ksock_tconn_state; + +} ks_tconn_state_t; #define KS_TCONN_MAGIC 'KSTM' @@ -411,23 +421,22 @@ typedef enum { #define KS_TCONN_DAEMON_STARTED 0x00100000 // indict the daemon is started, // only valid for listener - struct socket { - ulong_ptr kstc_magic; /* Magic & Flags */ - ulong_ptr kstc_flags; + ulong kstc_magic; /* Magic & Flags */ + ulong kstc_flags; spinlock_t kstc_lock; /* serialise lock*/ - void * kstc_conn; /* ksock_conn_t */ + void * kstc_conn; /* ks_conn_t */ - ksock_tconn_type kstc_type; /* tdi connection Type */ - ksock_tconn_state kstc_state; /* tdi connection state flag */ + ks_tconn_type_t kstc_type; /* tdi connection Type */ + ks_tconn_state_t kstc_state; /* tdi connection state flag */ - ksock_unicode_name_t kstc_dev; /* tcp transport device name */ + ks_unicode_name_t kstc_dev; /* tcp transport device name */ - ksock_tdi_addr_t kstc_addr; /* local address handlers / Objects */ + ks_tdi_addr_t kstc_addr; /* local address handlers / Objects */ - atomic_t kstc_refcount; /* reference count of ksock_tconn */ + atomic_t kstc_refcount; /* reference count of ks_tconn_t */ struct list_head kstc_list; /* linked to global ksocknal_data */ @@ -435,17 +444,17 @@ struct socket { struct { int nbacklog; /* total number of backlog tdi connections */ - ksock_backlogs_t kstc_listening; /* listeing backlog child connections */ - ksock_backlogs_t kstc_accepted; /* connected backlog child connections */ + ks_backlogs_t kstc_listening; /* listeing backlog child connections */ + ks_backlogs_t kstc_accepted; /* connected backlog child connections */ event_t kstc_accept_event; /* Signaled by AcceptedHander, ksocknal_wait_accpeted_conns waits on */ event_t kstc_destroy_event; /* Signaled when accepted child is released */ } listener; struct { - ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ - ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ - ksock_chain_t kstc_send; /* tsdu engine for data sending */ + ks_tconn_info_t kstc_info; /* Connection Info if Connected */ + ks_chain_t kstc_recv; /* tsdu engine for data receiving */ + ks_chain_t kstc_send; /* tsdu engine for data sending */ int kstc_queued; /* Attached to Parent->ChildList ... */ int kstc_queueno; /* 0: Attached to Listening list @@ -455,30 +464,28 @@ struct socket { int kstc_accepted; /* the connection is built ready ? */ struct list_head kstc_link; /* linked to parent tdi connection */ - ksock_tconn_t * kstc_parent; /* pointers to it's listener parent */ + ks_tconn_t * kstc_parent; /* pointers to it's listener parent */ } child; struct { - ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ - ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ - ksock_chain_t kstc_send; /* tsdu engine for data sending */ + ks_tconn_info_t kstc_info; /* Connection Info if Connected */ + ks_chain_t kstc_recv; /* tsdu engine for data receiving */ + ks_chain_t kstc_send; /* tsdu engine for data sending */ } sender; }; - ulong_ptr kstc_snd_wnd; /* Sending window size */ - ulong_ptr kstc_rcv_wnd; /* Recving window size */ + ulong kstc_snd_wnd; /* Sending window size */ + ulong kstc_rcv_wnd; /* Recving window size */ - ksock_workitem_t kstc_destroy; /* tconn destruction workitem */ - ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */ + ks_workitem_t kstc_destroy; /* tconn destruction workitem */ + ks_disconnect_t kstc_disconnect; /* connection disconnect workitem */ - ksock_schedule_cb kstc_sched_cb; /* notification callback routine of completion */ - ksock_update_tx kstc_update_tx; /* aync sending callback to update tx */ + ks_schedule_cb kstc_sched_cb; /* notification callback routine of completion */ }; #define SOCK_WMEM_QUEUED(sock) (0) - #define TDINAL_WINDOW_DEFAULT_SIZE (0x100000) - +#define TDINAL_MAX_TSDU_QUEUE_SIZE (0x200000) struct _KS_UDP_COMPLETION_CONTEXT; struct _KS_TCP_COMPLETION_CONTEXT; @@ -510,7 +517,7 @@ typedef struct _KS_UDP_COMPLETION_CONTEXT { PKEVENT Event; union { PFILE_OBJECT AddressObject; - ksock_tconn_t * tconn; + ks_tconn_t * tconn; }; PKS_UDP_COMPLETION_ROUTINE CompletionRoutine; @@ -523,29 +530,20 @@ typedef struct _KS_UDP_COMPLETION_CONTEXT { // Tcp Irp Completion Context (used by tcp data recv/send) // -typedef struct _KS_TCP_COMPLETION_CONTEXT { +#define KS_TCP_CONTEXT_MAGIC 'CCTK' +typedef struct _KS_TCP_COMPLETION_CONTEXT { PKEVENT Event; // Event to be waited on by Irp caller ... - - ksock_tconn_t * tconn; // the tdi connection - + ks_tconn_t * tconn; // the tdi connection PKS_TCP_COMPLETION_ROUTINE CompletionRoutine; PVOID CompletionContext; - PVOID CompletionContext2; - - PKS_TSDUMGR KsTsduMgr; // Tsdu buffer manager - - // - // These tow new members are for NON_BLOCKING transmission - // - - BOOLEAN bCounted; // To indict needing refcount to - // execute CompetionRoutine - ULONG ReferCount; // Refer count of this structure - + PKS_TSDUMGR TsduMgr; // Tsdu buffer manager + ULONG Length; // Payload length in KsTsdu queue + PCHAR Buffer; // User allocated buffer + ULONG Magic; // Magic key } KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT; -typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t; +typedef KS_TCP_COMPLETION_CONTEXT ks_tdi_tx_t, ks_tdi_rx_t; /* @@ -596,7 +594,6 @@ typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t; Irp->UserBuffer = OutBuffer; \ } - typedef struct ks_addr_slot { LIST_ENTRY link; int up; @@ -626,30 +623,28 @@ typedef struct { int ksnd_init; /* initialisation state */ - TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */ + TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */ spinlock_t ksnd_tconn_lock; /* tdi connections access serialise */ int ksnd_ntconns; /* number of tconns attached in list */ struct list_head ksnd_tconns; /* tdi connections list */ - cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ksock_tconn_t allocations */ + cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ks_tconn_t allocations */ event_t ksnd_tconn_exit; /* exit event to be signaled by the last tconn */ spinlock_t ksnd_tsdu_lock; /* tsdu access serialise */ int ksnd_ntsdus; /* number of tsdu buffers allocated */ - ulong_ptr ksnd_tsdu_size; /* the size of a signel tsdu buffer */ + ulong ksnd_tsdu_size; /* the size of a signel tsdu buffer */ cfs_mem_cache_t * ksnd_tsdu_slab; /* slab cache for tsdu buffer allocation */ int ksnd_nfreetsdus; /* number of tsdu buffers in the freed list */ - struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */ + struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */ - spinlock_t ksnd_daemon_lock; /* stabilize daemon ops */ - int ksnd_ndaemons; /* number of listening daemons */ - struct list_head ksnd_daemons; /* listening daemon list */ - event_t ksnd_daemon_exit; /* the last daemon quiting should singal it */ + int ksnd_engine_nums; /* number of tcp sending engine threads */ + ks_engine_mgr_t * ksnd_engine_mgr; /* tcp sending engine structure */ -} ks_data_t; +} ks_tdi_data_t; int ks_init_tdi_data(); @@ -658,6 +653,71 @@ void ks_fini_tdi_data(); +int +ks_query_local_ipaddr( + ks_tconn_t * tconn + ); + +void +ks_get_tconn( + ks_tconn_t * tconn + ); + +void +ks_put_tconn( + ks_tconn_t * tconn + ); + +void +ks_abort_tconn( + ks_tconn_t * tconn + ); +int +ks_disconnect_tconn( + ks_tconn_t * tconn, + ulong flags + ); + +void +ks_destroy_tconn( + ks_tconn_t * tconn + ); + +NTSTATUS +KsLockUserBuffer ( + IN PVOID UserBuffer, + IN BOOLEAN bPaged, + IN ULONG Length, + IN LOCK_OPERATION Operation, + OUT PMDL * pMdl + ); + +VOID +KsReleaseMdl (IN PMDL Mdl, + IN int Paged ); + +void +KsQueueTdiEngine(ks_tconn_t * tconn, PKS_TSDUMGR); + +void +KsRemoveTdiEngine(PKS_TSDUMGR); + +NTSTATUS +ks_set_tcp_option ( + ks_tconn_t * tconn, + ULONG ID, + PVOID OptionValue, + ULONG Length + ); + +int +ks_get_tcp_option ( + ks_tconn_t * tconn, + ULONG ID, + PVOID OptionValue, + PULONG Length + ); + #endif /* __KERNEL__ */ #endif /* __LIBCFS_WINNT_TCPIP_H__ */ diff --git a/libcfs/include/libcfs/winnt/winnt-time.h b/libcfs/include/libcfs/winnt/winnt-time.h index 35c6526..3c46828 100644 --- a/libcfs/include/libcfs/winnt/winnt-time.h +++ b/libcfs/include/libcfs/winnt/winnt-time.h @@ -84,33 +84,36 @@ * */ -#define ONE_BILLION ((u_int64_t)1000000000) -#define ONE_MILLION ((u_int64_t) 1000000) - -#define HZ (100) - struct timeval { - time_t tv_sec; /* seconds */ - suseconds_t tv_usec; /* microseconds */ + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ }; -struct timespec { - ulong_ptr tv_sec; - ulong_ptr tv_nsec; -}; +typedef time_t cfs_time_t; +typedef time_t cfs_duration_t; #ifdef __KERNEL__ #include +#define HZ (100) + +struct timespec { + __u32 tv_sec; + __u32 tv_nsec; +}; +typedef struct timeval cfs_fs_time_t; + + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION ((u_int64_t) 1000000) + /* * Generic kernel stuff */ -typedef struct timeval cfs_fs_time_t; - -typedef u_int64_t cfs_time_t; -typedef int64_t cfs_duration_t; +#define jiffies (ULONG_PTR)JIFFIES() +#define cfs_jiffies (ULONG_PTR)JIFFIES() static inline void do_gettimeofday(struct timeval *tv) { @@ -118,11 +121,11 @@ static inline void do_gettimeofday(struct timeval *tv) KeQuerySystemTime(&Time); - tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); - tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; + tv->tv_sec = (time_t) (Time.QuadPart / 10000000); + tv->tv_usec = (suseconds_t) (Time.QuadPart % 10000000) / 10; } -static inline cfs_time_t JIFFIES() +static inline LONGLONG JIFFIES() { LARGE_INTEGER Tick; LARGE_INTEGER Elapse; @@ -137,33 +140,16 @@ static inline cfs_time_t JIFFIES() static inline cfs_time_t cfs_time_current(void) { - return JIFFIES(); + return (cfs_time_t)JIFFIES(); } -static inline cfs_time_t cfs_time_current_sec(void) +static inline time_t cfs_time_current_sec(void) { - return (JIFFIES() / HZ); + return (time_t)(JIFFIES() / HZ); } -static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) -{ - return (t + d); -} - -static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) -{ - return (t1 - t2); -} - -static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) -{ - return ((int64_t)t1 - (int64_t)t2) < 0; -} - -static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) -{ - return ((int64_t)t1 - (int64_t)t2) <= 0; -} +#define time_before(t1, t2) (((signed)(t1) - (signed)(t2)) < 0) +#define time_before_eq(t1, t2) (((signed)(t1) - (signed)(t2)) <= 0) static inline void cfs_fs_time_current(cfs_fs_time_t *t) { @@ -178,14 +164,21 @@ static inline void cfs_fs_time_current(cfs_fs_time_t *t) t->tv_usec = (Sys.LowPart % 10000000) / 10; } +static inline unsigned long get_seconds(void) +{ + cfs_fs_time_t t; + cfs_fs_time_current(&t); + return (unsigned long) t.tv_sec; +} + static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t) { - return t->tv_sec; + return (cfs_time_t)t->tv_sec; } -static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) +static inline unsigned long __cfs_fs_time_flat(cfs_fs_time_t *t) { - return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec; + return (unsigned long)(t->tv_sec) * ONE_MILLION + t->tv_usec; } static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) @@ -198,126 +191,95 @@ static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2)); } -static inline cfs_duration_t cfs_time_seconds(int seconds) +static inline cfs_duration_t cfs_time_seconds(cfs_duration_t seconds) { - return (cfs_duration_t)seconds * HZ; + return (cfs_duration_t)(seconds * HZ); } -static inline cfs_time_t cfs_duration_sec(cfs_duration_t d) +static inline time_t cfs_duration_sec(cfs_duration_t d) { - return d / HZ; + return (time_t)(d / HZ); } static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) { - s->tv_sec = (suseconds_t) (d / HZ); - s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * + s->tv_sec = (__u32)(d / HZ); + s->tv_usec = (__u32)((d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION / HZ); } static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) { - s->tv_sec = (suseconds_t) (d / HZ); - s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * - ONE_BILLION / HZ); + s->tv_sec = (__u32) (d / HZ); + s->tv_nsec = (__u32)((d - (cfs_duration_t)s->tv_sec * HZ) * + ONE_BILLION / HZ); } static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) { - *v = *t; + *v = *t; } static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) { - s->tv_sec = t->tv_sec; - s->tv_nsec = t->tv_usec * 1000; + s->tv_sec = (__u32) t->tv_sec; + s->tv_nsec = (__u32) t->tv_usec * 1000; } -#define cfs_time_current_64 cfs_time_current -#define cfs_time_add_64 cfs_time_add -#define cfs_time_shift_64 cfs_time_shift -#define cfs_time_before_64 cfs_time_before -#define cfs_time_beforeq_64 cfs_time_beforeq - -/* - * One jiffy - */ -#define CFS_TICK (1) - -#define LTIME_S(t) (t) - -#define CFS_TIME_T "%I64u" -#define CFS_DURATION_T "%I64d" - -#else /* !__KERNEL__ */ - -/* - * Liblustre. time(2) based implementation. - */ -#include - - -// -// Time routines ... -// - -NTSYSAPI -CCHAR -NTAPI -NtQuerySystemTime( - OUT PLARGE_INTEGER CurrentTime - ); - -NTSYSAPI -BOOLEAN -NTAPI -RtlTimeToSecondsSince1970( - IN PLARGE_INTEGER Time, - OUT PULONG ElapsedSeconds - ); +#define cfs_time_current_64 JIFFIES +static inline __u64 cfs_time_add_64(__u64 t, __u64 d) +{ + return t + d; +} -NTSYSAPI -VOID -NTAPI -RtlSecondsSince1970ToTime( - IN ULONG ElapsedSeconds, - OUT PLARGE_INTEGER Time - ); - -NTSYSAPI -VOID -NTAPI -Sleep( - DWORD dwMilliseconds // sleep time in milliseconds -); +static inline __u64 cfs_time_shift_64(cfs_duration_t seconds) +{ + return cfs_time_add_64(cfs_time_current_64(), + cfs_time_seconds(seconds)); +} +static inline int cfs_time_before_64(__u64 t1, __u64 t2) +{ + return (__s64)t2 - (__s64)t1 > 0; +} -static inline void sleep(int time) +static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2) { - DWORD Time = 1000 * time; - Sleep(Time); + return (__s64)t2 - (__s64)t1 >= 0; } +/* + * One jiffy + */ +#define CFS_TICK (1) +#define LTIME_S(t) *((__u64 *)&(t)) -static inline void do_gettimeofday(struct timeval *tv) -{ - LARGE_INTEGER Time; +#define CFS_TIME_T "%u" +#define CFS_DURATION_T "%d" - NtQuerySystemTime(&Time); +#else /* !__KERNEL__ */ - tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); - tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; -} +#include +#ifdef HAVE_LIBPTHREAD +#include +#else +struct timespec { + unsigned long tv_sec; + unsigned long tv_nsec; +}; +#endif /* HAVE_LIBPTHREAD */ -static inline int gettimeofday(struct timeval *tv, void * tz) -{ - do_gettimeofday(tv); - return 0; -} +#include "../user-time.h" + +/* liblustre. time(2) based implementation. */ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp); +void sleep(int time); +void do_gettimeofday(struct timeval *tv); +int gettimeofday(struct timeval *tv, void * tz); -#endif /* __KERNEL__ */ +#endif /* !__KERNEL__ */ /* __LIBCFS_LINUX_LINUX_TIME_H__ */ #endif diff --git a/libcfs/include/libcfs/winnt/winnt-types.h b/libcfs/include/libcfs/winnt/winnt-types.h index a87ec81..95660e1 100644 --- a/libcfs/include/libcfs/winnt/winnt-types.h +++ b/libcfs/include/libcfs/winnt/winnt-types.h @@ -44,39 +44,36 @@ #ifdef __KERNEL__ #include +#include #include #include #include #include - #include #include #include #else -#include #include #include +#include #include -#include #include +#include #include #include - #endif #define __LITTLE_ENDIAN +#define __user #define inline __inline #define __inline__ __inline -typedef unsigned __int8 __u8; -typedef signed __int8 __s8; - -typedef signed __int64 __s64; -typedef unsigned __int64 __u64; +typedef unsigned __int8 __u8; +typedef signed __int8 __s8; typedef signed __int16 __s16; typedef unsigned __int16 __u16; @@ -87,19 +84,7 @@ typedef unsigned __int32 __u32; typedef signed __int64 __s64; typedef unsigned __int64 __u64; -typedef unsigned long ULONG; - - -#if defined(_WIN64) - #define long_ptr __int64 - #define ulong_ptr unsigned __int64 - #define BITS_PER_LONG (64) -#else - #define long_ptr long - #define ulong_ptr unsigned long - #define BITS_PER_LONG (32) - -#endif +typedef unsigned long ULONG; /* bsd */ typedef unsigned char u_char; @@ -123,6 +108,11 @@ typedef __s16 int16_t; typedef __u32 u_int32_t; typedef __s32 int32_t; +#define u8 __u8 +#define u16 __u16 +#define u32 __u32 +#define u64 __u64 + #endif /* !(__BIT_TYPES_DEFINED__) */ typedef __u8 uint8_t; @@ -133,72 +123,99 @@ typedef __u64 uint64_t; typedef __u64 u_int64_t; typedef __s64 int64_t; -typedef long ssize_t; +typedef long ssize_t; -typedef __u32 suseconds_t; +typedef __u32 suseconds_t; -typedef __u32 pid_t, tid_t; +typedef __u16 uid_t, gid_t; -typedef __u16 uid_t, gid_t; +typedef __u16 mode_t; +typedef __u16 umode_t; -typedef __u16 mode_t; -typedef __u16 umode_t; +typedef __u32 sigset_t; -typedef ulong_ptr sigset_t; - -typedef uint64_t loff_t; -typedef HANDLE cfs_handle_t; +typedef int64_t loff_t; +typedef void * cfs_handle_t; typedef uint64_t cycles_t; #ifndef INVALID_HANDLE_VALUE #define INVALID_HANDLE_VALUE ((HANDLE)-1) #endif +# define BITS_PER_LONG (32) + +#if defined(_WIN64) +typedef __int64 long_ptr_t; +typedef unsigned __int64 ulong_ptr_t; +#else +typedef long long_ptr_t; +typedef unsigned long ulong_ptr_t; +#endif #ifdef __KERNEL__ /* kernel */ typedef __u32 off_t; -typedef __u32 time_t; typedef unsigned short kdev_t; -#else /* !__KERNEL__ */ +typedef __u32 pid_t; +typedef __u32 tid_t; -typedef int BOOL; -typedef __u8 BYTE; -typedef __u16 WORD; -typedef __u32 DWORD; +typedef __u32 ino_t; -#endif /* __KERNEL__ */ +#define dma_addr_t PVOID +#define gfp_t __u32 /* - * Conastants suffix + * Bytes order */ -#define ULL i64 -#define ull i64 +// +// Byte order swapping routines +// -/* - * Winnt kernel has no capabilities. - */ +#if 0 && NTDDI_VERSION < 0x06000000 + +USHORT +FASTCALL +RtlUshortByteSwap( + IN USHORT Source + ); + +ULONG +FASTCALL +RtlUlongByteSwap( + IN ULONG Source + ); + +ULONGLONG +FASTCALL +RtlUlonglongByteSwap( + IN ULONGLONG Source + ); +#endif -typedef __u32 cfs_kernel_cap_t; +#else /* !__KERNEL__ */ -#define INT_MAX ((int)(~0U>>1)) -#define INT_MIN (-INT_MAX - 1) -#define UINT_MAX (~0U) +typedef int BOOL; -#endif /* _WINNT_TYPES_H */ +#ifndef _WINDOWS_ +typedef __u8 BYTE; +typedef __u16 WORD; +typedef __u32 DWORD; +#endif + +#define __WORDSIZE 32 +typedef long off_t; +#endif /* __KERNEL__ */ /* - * Bytes order + * Conastants suffix */ -// -// Byte order swapping routines -// - +#define ULL i64 +#define ull i64 #define ___swab16(x) RtlUshortByteSwap(x) #define ___swab32(x) RtlUlongByteSwap(x) @@ -218,14 +235,14 @@ typedef __u32 cfs_kernel_cap_t; #define ___constant_swab64(x) \ ((__u64)( \ - (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \ - (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \ - (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) << 8) | \ - (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >> 8) | \ - (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \ - (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \ - (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) )) + (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUi64) << 56) | \ + (__u64)(((__u64)(x) & (__u64)0x000000000000ff00Ui64) << 40) | \ + (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000Ui64) << 24) | \ + (__u64)(((__u64)(x) & (__u64)0x00000000ff000000Ui64) << 8) | \ + (__u64)(((__u64)(x) & (__u64)0x000000ff00000000Ui64) >> 8) | \ + (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000Ui64) >> 24) | \ + (__u64)(((__u64)(x) & (__u64)0x00ff000000000000Ui64) >> 40) | \ + (__u64)(((__u64)(x) & (__u64)0xff00000000000000Ui64) >> 56) )) #define __swab16(x) ___constant_swab16(x) @@ -347,15 +364,23 @@ typedef __u32 cfs_kernel_cap_t; #define htons(x) ntohs(x) +/* + * array must be used for array not pointer + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#endif /* _WINNT_TYPES_H */ #ifndef _I386_ERRNO_H #define _I386_ERRNO_H +#include + #define EPERM 1 /* Operation not permitted */ #define ENOENT 2 /* No such file or directory */ #define ESRCH 3 /* No such process */ #define EINTR 4 /* Interrupted system call */ -#define EIO 5 /* I/O error */ +#define EIO 5 /* I/O error */ #define ENXIO 6 /* No such device or address */ #define E2BIG 7 /* Arg list too long */ #define ENOEXEC 8 /* Exec format error */ @@ -385,16 +410,6 @@ typedef __u32 cfs_kernel_cap_t; #define EPIPE 32 /* Broken pipe */ #define EDOM 33 /* Math argument out of domain of func */ #define ERANGE 34 /* Math result not representable */ -#undef EDEADLK -#define EDEADLK 35 /* Resource deadlock would occur */ -#undef ENAMETOOLONG -#define ENAMETOOLONG 36 /* File name too long */ -#undef ENOLCK -#define ENOLCK 37 /* No record locks available */ -#undef ENOSYS -#define ENOSYS 38 /* Function not implemented */ -#undef ENOTEMPTY -#define ENOTEMPTY 39 /* Directory not empty */ #define ELOOP 40 /* Too many symbolic links encountered */ #define EWOULDBLOCK EAGAIN /* Operation would block */ #define ENOMSG 42 /* No message of desired type */ @@ -441,8 +456,6 @@ typedef __u32 cfs_kernel_cap_t; #define ELIBSCN 81 /* .lib section in a.out corrupted */ #define ELIBMAX 82 /* Attempting to link in too many shared libraries */ #define ELIBEXEC 83 /* Cannot exec a shared library directly */ -#undef EILSEQ -#define EILSEQ 84 /* Illegal byte sequence */ #define ERESTART 85 /* Interrupted system call should be restarted */ #define ESTRPIPE 86 /* Streams pipe error */ #define EUSERS 87 /* Too many users */ @@ -501,8 +514,6 @@ typedef __u32 cfs_kernel_cap_t; #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ - - /* open/fcntl - O_SYNC is only implemented on blocks devices and on files located on an ext2 file system */ #define O_ACCMODE 0003 @@ -578,6 +589,7 @@ typedef __u32 cfs_kernel_cap_t; * signal values ... */ +#ifdef __KERNEL__ #define SIGHUP 1 #define SIGINT 2 #define SIGQUIT 3 @@ -620,6 +632,8 @@ typedef __u32 cfs_kernel_cap_t; #define SIGRTMIN 32 #define SIGRTMAX (_NSIG-1) +#endif + /* * SA_FLAGS values: * diff --git a/libcfs/libcfs/darwin/darwin-tracefile.c b/libcfs/libcfs/darwin/darwin-tracefile.c index 7c0095d..8cf3c80 100644 --- a/libcfs/libcfs/darwin/darwin-tracefile.c +++ b/libcfs/libcfs/darwin/darwin-tracefile.c @@ -68,6 +68,7 @@ int tracefile_init_arch() { } void tracefile_fini_arch() { + fini_rwsem(&tracefile_sem); } void tracefile_read_lock() { diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c index 12ec7a9..23b1b78 100644 --- a/libcfs/libcfs/debug.c +++ b/libcfs/libcfs/debug.c @@ -112,13 +112,8 @@ EXPORT_SYMBOL(libcfs_kmemory); static cfs_waitq_t debug_ctlwq; -#ifdef HAVE_BGL_SUPPORT -char debug_file_path_arr[1024] = "/bgl/ion/tmp/lustre-log"; -#elif defined(__arch_um__) -char debug_file_path_arr[1024] = "/r/tmp/lustre-log"; -#else -char debug_file_path_arr[1024] = "/tmp/lustre-log"; -#endif +char debug_file_path_arr[1024] = DEBUG_FILE_PATH_DEFAULT; + /* We need to pass a pointer here, but elsewhere this must be a const */ static char *debug_file_path = &debug_file_path_arr[0]; CFS_MODULE_PARM(debug_file_path, "s", charp, 0644, @@ -429,8 +424,8 @@ void libcfs_debug_dumplog_internal(void *arg) if (strncmp(debug_file_path_arr, "NONE", 4) != 0) { snprintf(debug_file_name, sizeof(debug_file_name) - 1, - "%s.%ld.%ld", debug_file_path_arr, - cfs_time_current_sec(), (long)arg); + "%s.%ld." LPLD, debug_file_path_arr, + cfs_time_current_sec(), (long_ptr_t)arg); printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); tracefile_dump_all_pages(debug_file_name); @@ -461,7 +456,7 @@ void libcfs_debug_dumplog(void) cfs_waitq_add(&debug_ctlwq, &wait); rc = cfs_kernel_thread(libcfs_debug_dumplog_thread, - (void *)(long)cfs_curproc_pid(), + (void *)(long_ptr_t)cfs_curproc_pid(), CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " diff --git a/libcfs/libcfs/linux/linux-tracefile.c b/libcfs/libcfs/linux/linux-tracefile.c index 6029352..6111060 100644 --- a/libcfs/libcfs/linux/linux-tracefile.c +++ b/libcfs/libcfs/linux/linux-tracefile.c @@ -124,6 +124,8 @@ void tracefile_fini_arch() kfree(trace_data[i]); trace_data[i] = NULL; } + + fini_rwsem(&tracefile_sem); } void tracefile_read_lock() diff --git a/libcfs/libcfs/lwt.c b/libcfs/libcfs/lwt.c index 325785b..c8ca37b 100644 --- a/libcfs/libcfs/lwt.c +++ b/libcfs/libcfs/lwt.c @@ -42,23 +42,6 @@ # define EXPORT_SYMTAB #endif -#ifndef AUTOCONF_INCLUDED -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #define DEBUG_SUBSYSTEM S_LNET #include diff --git a/libcfs/libcfs/module.c b/libcfs/libcfs/module.c index 5918560..2fff7fb 100644 --- a/libcfs/libcfs/module.c +++ b/libcfs/libcfs/module.c @@ -319,7 +319,8 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a struct libcfs_ioctl_handler *hand; err = -EINVAL; down_read(&ioctl_list_sem); - list_for_each_entry(hand, &ioctl_list, item) { + cfs_list_for_each_entry_typed(hand, &ioctl_list, + struct libcfs_ioctl_handler, item) { err = hand->handle_ioctl(cmd, data); if (err != -EINVAL) { if (err == 0) @@ -432,6 +433,10 @@ static void exit_libcfs_module(void) rc = libcfs_debug_cleanup(); if (rc) printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc); + + fini_rwsem(&ioctl_list_sem); + fini_rwsem(&tracefile_sem); + libcfs_arch_cleanup(); } diff --git a/libcfs/libcfs/nidstrings.c b/libcfs/libcfs/nidstrings.c index 9cf58b4..df02584 100644 --- a/libcfs/libcfs/nidstrings.c +++ b/libcfs/libcfs/nidstrings.c @@ -479,7 +479,7 @@ libcfs_str2nid(const char *str) LASSERT (nf != NULL); } - if (!nf->nf_str2addr(str, sep - str, &addr)) + if (!nf->nf_str2addr(str, (int)(sep - str), &addr)) return LNET_NID_ANY; return LNET_MKNID(net, addr); diff --git a/libcfs/libcfs/tracefile.c b/libcfs/libcfs/tracefile.c index 15f70de..5b9543e 100644 --- a/libcfs/libcfs/tracefile.c +++ b/libcfs/libcfs/tracefile.c @@ -195,7 +195,8 @@ static void tcd_shrink(struct trace_cpu_data *tcd) CFS_INIT_LIST_HEAD(&pc.pc_pages); spin_lock_init(&pc.pc_lock); - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages, + struct trace_page, linkage) { if (pgcount-- == 0) break; @@ -537,7 +538,8 @@ static void put_pages_back_on_cpu(void *info) tcd_for_each_type_lock(tcd, i) { cur_head = tcd->tcd_pages.next; - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages, + struct trace_page, linkage) { __LASSERT_TAGE_INVARIANT(tage); @@ -568,7 +570,8 @@ static void put_pages_on_tcd_daemon_list(struct page_collection *pc, struct trace_page *tmp; spin_lock(&pc->pc_lock); - list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages, + struct trace_page, linkage) { __LASSERT_TAGE_INVARIANT(tage); @@ -619,7 +622,8 @@ void trace_debug_print(void) pc.pc_want_daemon_pages = 1; collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages, + struct trace_page, linkage) { char *p, *file, *fn; cfs_page_t *page; @@ -636,7 +640,7 @@ void trace_debug_print(void) p += strlen(file) + 1; fn = p; p += strlen(fn) + 1; - len = hdr->ph_len - (p - (char *)hdr); + len = hdr->ph_len - (int)(p - (char *)hdr); print_to_console(hdr, D_EMERG, p, len, file, fn); @@ -680,7 +684,8 @@ int tracefile_dump_all_pages(char *filename) /* ok, for now, just write the pages. in the future we'll be building * iobufs with the pages and calling generic_direct_IO */ CFS_MMSPACE_OPEN; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages, + struct trace_page, linkage) { __LASSERT_TAGE_INVARIANT(tage); @@ -717,7 +722,8 @@ void trace_flush_pages(void) pc.pc_want_daemon_pages = 1; collect_pages(&pc); - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages, + struct trace_page, linkage) { __LASSERT_TAGE_INVARIANT(tage); @@ -997,14 +1003,15 @@ static int tracefiled(void *arg) hdr = cfs_page_address(tage->page); hdr->ph_flags |= PH_FLAG_FIRST_RECORD; - list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages, + struct trace_page, linkage) { static loff_t f_pos; __LASSERT_TAGE_INVARIANT(tage); if (f_pos >= (off_t)tracefile_size) f_pos = 0; - else if (f_pos > cfs_filp_size(filp)) + else if (f_pos > (off_t)cfs_filp_size(filp)) f_pos = cfs_filp_size(filp); rc = cfs_filp_write(filp, cfs_page_address(tage->page), @@ -1105,7 +1112,8 @@ static void trace_cleanup_on_cpu(void *info) tcd_for_each_type_lock(tcd, i) { tcd->tcd_shutting_down = 1; - list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages, + struct trace_page, linkage) { __LASSERT_TAGE_INVARIANT(tage); list_del(&tage->linkage); diff --git a/libcfs/libcfs/tracefile.h b/libcfs/libcfs/tracefile.h index dd25327..7faf803 100644 --- a/libcfs/libcfs/tracefile.h +++ b/libcfs/libcfs/tracefile.h @@ -265,21 +265,21 @@ extern void trace_assertion_failed(const char *str, const char *fn, const char *file, int line); /* ASSERTION that is safe to use within the debug system */ -#define __LASSERT(cond) \ -({ \ - if (unlikely(!(cond))) { \ - trace_assertion_failed("ASSERTION("#cond") failed", \ - __FUNCTION__, __FILE__, __LINE__); \ - } \ -}) - -#define __LASSERT_TAGE_INVARIANT(tage) \ -({ \ - __LASSERT(tage != NULL); \ - __LASSERT(tage->page != NULL); \ - __LASSERT(tage->used <= CFS_PAGE_SIZE); \ - __LASSERT(cfs_page_count(tage->page) > 0); \ -}) +#define __LASSERT(cond) \ + do { \ + if (unlikely(!(cond))) { \ + trace_assertion_failed("ASSERTION("#cond") failed", \ + __FUNCTION__, __FILE__, __LINE__); \ + } \ + } while (0) + +#define __LASSERT_TAGE_INVARIANT(tage) \ + do { \ + __LASSERT(tage != NULL); \ + __LASSERT(tage->page != NULL); \ + __LASSERT(tage->used <= CFS_PAGE_SIZE); \ + __LASSERT(cfs_page_count(tage->page) > 0); \ + } while (0) #endif /* LUSTRE_TRACEFILE_PRIVATE */ diff --git a/libcfs/libcfs/user-lock.c b/libcfs/libcfs/user-lock.c index 53ab2c4..cee36d7 100644 --- a/libcfs/libcfs/user-lock.c +++ b/libcfs/libcfs/user-lock.c @@ -246,6 +246,12 @@ void up_write(struct rw_semaphore *s) (void)s; } +void fini_rwsem(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + #ifdef HAVE_LIBPTHREAD /* diff --git a/libcfs/libcfs/user-mem.c b/libcfs/libcfs/user-mem.c index 7c2cc4d..1140159 100644 --- a/libcfs/libcfs/user-mem.c +++ b/libcfs/libcfs/user-mem.c @@ -63,6 +63,8 @@ cfs_page_t *cfs_alloc_pages(int mask, unsigned long order) pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); #elif defined (__DARWIN__) pg->addr = valloc(CFS_PAGE_SIZE << order); +#elif defined (__WINNT__) + pg->addr = pgalloc(order); #else pg->addr = memalign(CFS_PAGE_SIZE, CFS_PAGE_SIZE << order); #endif @@ -78,6 +80,8 @@ void cfs_free_pages(cfs_page_t *pg, int what) { #if 0 //#ifdef MAP_ANONYMOUS munmap(pg->addr, PAGE_SIZE); +#elif defined (__WINNT__) + pgfree(pg->addr); #else free(pg->addr); #endif diff --git a/libcfs/libcfs/user-prim.c b/libcfs/libcfs/user-prim.c index 07678e0..ee381d6 100644 --- a/libcfs/libcfs/user-prim.c +++ b/libcfs/libcfs/user-prim.c @@ -156,22 +156,13 @@ void cfs_timer_init(cfs_timer_t *l, cfs_timer_func_t *func, void *arg) { CFS_INIT_LIST_HEAD(&l->tl_list); l->function = func; - l->data = (unsigned long)arg; + l->data = (ulong_ptr_t)arg; return; } -#define cfs_jiffies \ -({ \ - unsigned long _ret = 0; \ - struct timeval tv; \ - if (gettimeofday(&tv, NULL) == 0) \ - _ret = tv.tv_sec; \ - _ret; \ -}) - int cfs_timer_is_armed(cfs_timer_t *l) { - if (cfs_time_before(cfs_jiffies, l->expires)) + if (cfs_time_before(cfs_time_current(), l->expires)) return 1; else return 0; @@ -185,8 +176,7 @@ void cfs_timer_arm(cfs_timer_t *l, cfs_time_t deadline) void cfs_timer_disarm(cfs_timer_t *l) { } - -long cfs_timer_deadline(cfs_timer_t *l) +cfs_time_t cfs_timer_deadline(cfs_timer_t *l) { return l->expires; } @@ -247,7 +237,7 @@ int cfs_parse_int_tunable(int *value, char *name) if (env == NULL) return 0; - *value = strtoull(env, &end, 0); + *value = (int)strtoull(env, &end, 0); if (*end == 0) return 0; diff --git a/libcfs/libcfs/user-tcpip.c b/libcfs/libcfs/user-tcpip.c index a78ddde..9dcf4b5 100644 --- a/libcfs/libcfs/user-tcpip.c +++ b/libcfs/libcfs/user-tcpip.c @@ -528,6 +528,11 @@ libcfs_sock_create(int *fdp) return 0; } +void libcfs_sock_release(int fd) +{ + close(fd); +} + int libcfs_sock_bind_to_port(int fd, __u16 port) { diff --git a/libcfs/libcfs/util/l_ioctl.c b/libcfs/libcfs/util/l_ioctl.c index 59dc6d8..bc7e6a1 100644 --- a/libcfs/libcfs/util/l_ioctl.c +++ b/libcfs/libcfs/util/l_ioctl.c @@ -23,7 +23,6 @@ #define __USE_FILE_OFFSET64 #include - #include #include @@ -77,15 +76,15 @@ open_ioc_dev(int dev_id) } if (ioc_dev_list[dev_id].dev_fd < 0) { - int fd = open(dev_name, O_RDWR); + int fd = cfs_proc_open((char *)dev_name, O_RDWR); /* Make the /dev/ node if we need to */ if (fd < 0 && errno == ENOENT) { - if (mknod(dev_name, + if (cfs_proc_mknod(dev_name, S_IFCHR|S_IWUSR|S_IRUSR, MKDEV(ioc_dev_list[dev_id].dev_major, ioc_dev_list[dev_id].dev_minor)) == 0) - fd = open(dev_name, O_RDWR); + fd = cfs_proc_open((char *)dev_name, O_RDWR); else fprintf(stderr, "mknod %s failed: %s\n", dev_name, strerror(errno)); @@ -113,7 +112,7 @@ do_ioctl(int dev_id, unsigned int opc, void *buf) if (fd < 0) return fd; - rc = ioctl(fd, opc, buf); + rc = cfs_proc_ioctl(fd, opc, buf); return rc; } @@ -197,7 +196,7 @@ unregister_ioc_dev(int dev_id) return; if (ioc_dev_list[dev_id].dev_name != NULL && ioc_dev_list[dev_id].dev_fd >= 0) - close(ioc_dev_list[dev_id].dev_fd); + cfs_proc_close(ioc_dev_list[dev_id].dev_fd); ioc_dev_list[dev_id].dev_name = NULL; ioc_dev_list[dev_id].dev_fd = -1; @@ -237,16 +236,53 @@ int parse_dump(char * dump_file, ioc_handler_t ioc_func) { int line =0; - struct stat st; char *start, *buf, *end; -#ifndef __CYGWIN__ - int fd; -#else + +#if defined(__CYGWIN__) || defined(__WINNT__) + HANDLE fd, hmap; DWORD size; -#endif - -#ifndef __CYGWIN__ + + fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (fd == INVALID_HANDLE_VALUE) { + fprintf(stderr, "couldn't open %s (error code: %u)\n", + dump_file, GetLastError()); + exit(1); + } + size = GetFileSize(fd, NULL); + if (size < 1 || size == 0xFFFFFFFF) { + fprintf(stderr, "KML is empty\n"); + CloseHandle(fd); + exit(1); + } + + hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL); + if (hmap == NULL) { + fprintf(stderr, "can't create file mapping\n"); + CloseHandle(fd); + exit(1); + } + start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0); + if (start == NULL) { + fprintf(stderr, "can't map file content\n"); + CloseHandle(hmap); + CloseHandle(fd); + exit(1); + } + end = buf + size; + CloseHandle(fd); + if (start == NULL) { + fprintf(stderr, "can't create file mapping\n"); + UnmapViewOfFile(start); + CloseHandle(hmap); + exit(1); + } +#else + + struct stat st; + int fd; + fd = open(dump_file, O_RDONLY); if (fd < 0) { fprintf(stderr, "couldn't open %s: %s\n", dump_file, @@ -271,24 +307,7 @@ parse_dump(char * dump_file, ioc_handler_t ioc_func) fprintf(stderr, "can't create file mapping\n"); exit(1); } -#else - fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - size = GetFileSize(fd, NULL); - if (size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL); - start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0); - end = buf + size; - CloseHandle(fd); - if (start == NULL) { - fprintf(stderr, "can't create file mapping\n"); - exit(1); - } -#endif /* __CYGWIN__ */ +#endif while (buf < end) { struct dump_hdr *dump_hdr = (struct dump_hdr *) buf; @@ -323,11 +342,11 @@ parse_dump(char * dump_file, ioc_handler_t ioc_func) buf += data->ioc_len + sizeof(*dump_hdr); } -#ifndef __CYGWIN__ - munmap(start, end - start); -#else +#if defined(__CYGWIN__) || defined(__WINNT__) UnmapViewOfFile(start); CloseHandle(hmap); +#else + munmap(start, end - start); #endif return 0; diff --git a/libcfs/libcfs/util/parser.c b/libcfs/libcfs/util/parser.c index ce2a99e..dbb3e92 100644 --- a/libcfs/libcfs/util/parser.c +++ b/libcfs/libcfs/util/parser.c @@ -19,14 +19,6 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * */ -#include -#include -#include -#include -#include -#include -#include -#include #include @@ -137,7 +129,7 @@ static command_t * find_cmd(char * name, command_t cmds[], char ** next) this with strtok*/ name = skipwhitespace(name); *next = skiptowhitespace(name); - len = *next - name; + len = (int)(*next - name); if (len == 0) return NULL; @@ -343,7 +335,7 @@ char * readline(char * prompt) if ((c = fgetc(stdin)) != EOF) { if (c == '\n') goto out; - *ptr++ = c; + *ptr++ = (char)c; if (ptr - line >= size - 1) { char *tmp; diff --git a/libcfs/libcfs/watchdog.c b/libcfs/libcfs/watchdog.c index 7cfbe1e..6c90188 100644 --- a/libcfs/libcfs/watchdog.c +++ b/libcfs/libcfs/watchdog.c @@ -70,7 +70,7 @@ struct lc_watchdog { */ static struct completion lcw_start_completion; static struct completion lcw_stop_completion; -static wait_queue_head_t lcw_event_waitq; +static cfs_waitq_t lcw_event_waitq; /* * Set this and wake lcw_event_waitq to stop the dispatcher. @@ -94,7 +94,7 @@ static DECLARE_MUTEX(lcw_refcount_sem); */ static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */ static struct list_head lcw_pending_timers = \ - LIST_HEAD_INIT(lcw_pending_timers); + CFS_LIST_HEAD_INIT(lcw_pending_timers); #ifdef HAVE_TASKLIST_LOCK static void @@ -107,11 +107,11 @@ lcw_dump(struct lc_watchdog *lcw) tsk = find_task_by_pid(lcw->lcw_pid); if (tsk == NULL) { - CWARN("Process %d was not found in the task list; " - "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); + CWARN("Process " LPPID " was not found in the task list; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); } else if (tsk != lcw->lcw_task) { - CWARN("The current process %d did not set the watchdog; " - "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); + CWARN("The current process " LPPID " did not set the watchdog; " + "watchdog callback may be incomplete\n", lcw->lcw_pid); } else { libcfs_debug_dumpstack(tsk); } @@ -127,7 +127,7 @@ lcw_dump(struct lc_watchdog *lcw) } #endif -static void lcw_cb(unsigned long data) +static void lcw_cb(ulong_ptr_t data) { struct lc_watchdog *lcw = (struct lc_watchdog *)data; @@ -143,15 +143,15 @@ static void lcw_cb(unsigned long data) /* NB this warning should appear on the console, but may not get into * the logs since we're running in a softirq handler */ - CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n", - (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); + CWARN("Watchdog triggered for pid: " LPPID " it was inactive for %lds\n", + lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); lcw_dump(lcw); spin_lock_bh(&lcw_pending_timers_lock); if (list_empty(&lcw->lcw_list)) { list_add(&lcw->lcw_list, &lcw_pending_timers); - wake_up(&lcw_event_waitq); + cfs_waitq_signal(&lcw_event_waitq); } spin_unlock_bh(&lcw_pending_timers_lock); @@ -190,7 +190,7 @@ static int lcw_dispatch_main(void *data) complete(&lcw_start_completion); while (1) { - wait_event_interruptible(lcw_event_waitq, is_watchdog_fired()); + cfs_wait_event_interruptible(lcw_event_waitq, is_watchdog_fired(), rc); CDEBUG(D_INFO, "Watchdog got woken up...\n"); if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); @@ -214,9 +214,8 @@ static int lcw_dispatch_main(void *data) list_del_init(&lcw->lcw_list); spin_unlock_bh(&lcw_pending_timers_lock); - CDEBUG(D_INFO, "found lcw for pid %d: inactive for " - "%lds\n", (int)lcw->lcw_pid, - cfs_duration_sec(lcw->lcw_time)); + CDEBUG(D_INFO, "found lcw for pid " LPPID ": inactive for " + "%lds\n", lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); if (lcw->lcw_state != LC_WATCHDOG_DISABLED) lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); @@ -240,7 +239,7 @@ static void lcw_dispatch_start(void) init_completion(&lcw_stop_completion); init_completion(&lcw_start_completion); - init_waitqueue_head(&lcw_event_waitq); + cfs_waitq_init(&lcw_event_waitq); CDEBUG(D_INFO, "starting dispatch thread\n"); rc = kernel_thread(lcw_dispatch_main, NULL, 0); @@ -263,7 +262,7 @@ static void lcw_dispatch_stop(void) CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); set_bit(LCW_FLAG_STOP, &lcw_flags); - wake_up(&lcw_event_waitq); + cfs_waitq_signal(&lcw_event_waitq); wait_for_completion(&lcw_stop_completion); @@ -292,12 +291,8 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms, lcw->lcw_data = data; lcw->lcw_state = LC_WATCHDOG_DISABLED; - INIT_LIST_HEAD(&lcw->lcw_list); - - lcw->lcw_timer.function = lcw_cb; - lcw->lcw_timer.data = (unsigned long)lcw; - lcw->lcw_timer.expires = jiffies + lcw->lcw_time; - init_timer(&lcw->lcw_timer); + CFS_INIT_LIST_HEAD(&lcw->lcw_list); + cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw); down(&lcw_refcount_sem); if (++lcw_refcount == 1) @@ -307,7 +302,8 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms, /* Keep this working in case we enable them by default */ if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { do_gettimeofday(&lcw->lcw_last_touched); - add_timer(&lcw->lcw_timer); + cfs_timer_arm(&lcw->lcw_timer, lcw->lcw_time + + cfs_time_current()); } RETURN(lcw); @@ -322,7 +318,7 @@ static void lcw_update_time(struct lc_watchdog *lcw, const char *message) do_gettimeofday(&newtime); if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff); - CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", + CWARN("Expired watchdog for pid " LPPID " %s after %lu.%.4lus\n", lcw->lcw_pid, message, timediff.tv_sec, @@ -343,8 +339,8 @@ void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms) lcw_update_time(lcw, "touched"); lcw->lcw_state = LC_WATCHDOG_ENABLED; - mod_timer(&lcw->lcw_timer, jiffies + - cfs_time_seconds(timeout_ms) / 1000); + cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() + + cfs_time_seconds(timeout_ms) / 1000); EXIT; } @@ -353,7 +349,7 @@ EXPORT_SYMBOL(lc_watchdog_touch_ms); /* deprecated - use above instead */ void lc_watchdog_touch(struct lc_watchdog *lcw) { - lc_watchdog_touch_ms(lcw, cfs_duration_sec(lcw->lcw_time) * 1000); + lc_watchdog_touch_ms(lcw, (int)cfs_duration_sec(lcw->lcw_time) * 1000); } EXPORT_SYMBOL(lc_watchdog_touch); @@ -379,7 +375,7 @@ void lc_watchdog_delete(struct lc_watchdog *lcw) ENTRY; LASSERT(lcw != NULL); - del_timer(&lcw->lcw_timer); + cfs_timer_disarm(&lcw->lcw_timer); lcw_update_time(lcw, "deleted"); @@ -405,7 +401,7 @@ EXPORT_SYMBOL(lc_watchdog_delete); void lc_watchdog_dumplog(pid_t pid, void *data) { - libcfs_debug_dumplog_internal((void *)((unsigned long)pid)); + libcfs_debug_dumplog_internal((void *)((long_ptr_t)pid)); } EXPORT_SYMBOL(lc_watchdog_dumplog); diff --git a/libcfs/libcfs/winnt/winnt-curproc.c b/libcfs/libcfs/winnt/winnt-curproc.c index 7e6f394..93d3af6 100644 --- a/libcfs/libcfs/winnt/winnt-curproc.c +++ b/libcfs/libcfs/winnt/winnt-curproc.c @@ -42,16 +42,21 @@ #include - /* * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) * for Linux kernel. */ -cfs_task_t this_task = - { 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, - "sysetm\0" }; +cfs_task_t this_task = + { /* umask */ 0,/* blocked*/0, /* pid */ 0, /* pgrp */ 0, + /* uid,euid,suid,fsuid */ 0, 0, 0, 0, + /* gid_t gid,egid,sgid,fsgid */ 0, 0, 0, 0, + /* ngroups*/ 1, /*cgroups*/ 0, /*groups*/ 0, + /* group_info */ NULL, + /* cap_effective, cap_inheritable, cap_permitted */ 0, 0, 0, + /* comm */"sysetm\0", + /* journal_info */ NULL + }; uid_t cfs_curproc_uid(void) @@ -141,8 +146,10 @@ int cfs_capable(cfs_cap_t cap) /* global of the task manager structure */ -TASK_MAN TaskMan; +TASK_MAN cfs_win_task_manger; +/* global idr context */ +struct idr_context * cfs_win_task_slot_idp = NULL; /* * task slot routiens @@ -153,8 +160,8 @@ alloc_task_slot() { PTASK_SLOT task = NULL; - if (TaskMan.slab) { - task = cfs_mem_cache_alloc(TaskMan.slab, 0); + if (cfs_win_task_manger.slab) { + task = cfs_mem_cache_alloc(cfs_win_task_manger.slab, 0); } else { task = cfs_alloc(sizeof(TASK_SLOT), 0); } @@ -168,16 +175,18 @@ init_task_slot(PTASK_SLOT task) memset(task, 0, sizeof(TASK_SLOT)); task->Magic = TASKSLT_MAGIC; task->task = this_task; - task->task.pid = (pid_t)PsGetCurrentThreadId(); cfs_init_event(&task->Event, TRUE, FALSE); } - void cleanup_task_slot(PTASK_SLOT task) { - if (TaskMan.slab) { - cfs_mem_cache_free(TaskMan.slab, task); + if (task->task.pid) { + cfs_idr_remove(cfs_win_task_slot_idp, task->task.pid); + } + + if (cfs_win_task_manger.slab) { + cfs_mem_cache_free(cfs_win_task_manger.slab, task); } else { cfs_free(task); } @@ -197,25 +206,19 @@ task_manager_notify( PLIST_ENTRY ListEntry = NULL; PTASK_SLOT TaskSlot = NULL; - spin_lock(&(TaskMan.Lock)); + spin_lock(&(cfs_win_task_manger.Lock)); - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { + ListEntry = cfs_win_task_manger.TaskList.Flink; + while (ListEntry != (&(cfs_win_task_manger.TaskList))) { TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) { - if (Create) { -/* - DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n", - ProcessId, ThreadId, TaskSlot->Tet); -*/ - } else { + if (!Create) { /* remove the taskslot */ RemoveEntryList(&(TaskSlot->Link)); - TaskMan.NumOfTasks--; + cfs_win_task_manger.NumOfTasks--; /* now free the task slot */ cleanup_task_slot(TaskSlot); @@ -225,7 +228,7 @@ task_manager_notify( ListEntry = ListEntry->Flink; } - spin_unlock(&(TaskMan.Lock)); + spin_unlock(&(cfs_win_task_manger.Lock)); } int @@ -234,24 +237,35 @@ init_task_manager() NTSTATUS status; /* initialize the content and magic */ - memset(&TaskMan, 0, sizeof(TASK_MAN)); - TaskMan.Magic = TASKMAN_MAGIC; + memset(&cfs_win_task_manger, 0, sizeof(TASK_MAN)); + cfs_win_task_manger.Magic = TASKMAN_MAGIC; /* initialize the spinlock protection */ - spin_lock_init(&TaskMan.Lock); + spin_lock_init(&cfs_win_task_manger.Lock); /* create slab memory cache */ - TaskMan.slab = cfs_mem_cache_create( + cfs_win_task_manger.slab = cfs_mem_cache_create( "TSLT", sizeof(TASK_SLOT), 0, 0); /* intialize the list header */ - InitializeListHead(&(TaskMan.TaskList)); + InitializeListHead(&(cfs_win_task_manger.TaskList)); + + cfs_win_task_slot_idp = cfs_idr_init(); + if (!cfs_win_task_slot_idp) { + return -ENOMEM; + } /* set the thread creation/destruction notify routine */ status = PsSetCreateThreadNotifyRoutine(task_manager_notify); if (!NT_SUCCESS(status)) { cfs_enter_debugger(); + /* remove idr context */ + if (cfs_win_task_slot_idp) { + cfs_idr_exit(cfs_win_task_slot_idp); + cfs_win_task_slot_idp = NULL; + } + return cfs_error_code(status); } return 0; @@ -263,28 +277,32 @@ cleanup_task_manager() PLIST_ENTRY ListEntry = NULL; PTASK_SLOT TaskSlot = NULL; - /* we must stay in system since we succeed to register the - CreateThreadNotifyRoutine: task_manager_notify */ - cfs_enter_debugger(); + /* remove ThreadNotifyRoutine: task_manager_notify */ + PsRemoveCreateThreadNotifyRoutine(task_manager_notify); + /* remove idr context */ + if (cfs_win_task_slot_idp) { + cfs_idr_exit(cfs_win_task_slot_idp); + cfs_win_task_slot_idp = NULL; + } /* cleanup all the taskslots attached to the list */ - spin_lock(&(TaskMan.Lock)); + spin_lock(&(cfs_win_task_manger.Lock)); - while (!IsListEmpty(&(TaskMan.TaskList))) { + while (!IsListEmpty(&(cfs_win_task_manger.TaskList))) { - ListEntry = TaskMan.TaskList.Flink; + ListEntry = cfs_win_task_manger.TaskList.Flink; TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); RemoveEntryList(ListEntry); cleanup_task_slot(TaskSlot); } - spin_unlock(&TaskMan.Lock); + spin_unlock(&cfs_win_task_manger.Lock); /* destroy the taskslot cache slab */ - cfs_mem_cache_destroy(TaskMan.slab); - memset(&TaskMan, 0, sizeof(TASK_MAN)); + cfs_mem_cache_destroy(cfs_win_task_manger.slab); + memset(&cfs_win_task_manger, 0, sizeof(TASK_MAN)); } @@ -303,21 +321,15 @@ cfs_current() PLIST_ENTRY ListEntry = NULL; PTASK_SLOT TaskSlot = NULL; - spin_lock(&(TaskMan.Lock)); + spin_lock(&(cfs_win_task_manger.Lock)); - ListEntry = TaskMan.TaskList.Flink; - - while (ListEntry != (&(TaskMan.TaskList))) { + ListEntry = cfs_win_task_manger.TaskList.Flink; + while (ListEntry != (&(cfs_win_task_manger.TaskList))) { TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); - if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) { if (TaskSlot->Tet != Tet) { -/* - DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n", - Pid, Tid, Tet, TaskSlot->Tet); -*/ // // The old thread was already exit. This must be a // new thread which get the same Tid to the previous. @@ -329,16 +341,15 @@ cfs_current() } else { - if ((ULONG)TaskSlot->Pid > (ULONG)Pid) { + if (TaskSlot->Pid > Pid) { TaskSlot = NULL; break; - } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) { - if ((ULONG)TaskSlot->Tid > (ULONG)Tid) { + } else if (TaskSlot->Pid == Pid) { + if (TaskSlot->Tid > Tid) { TaskSlot = NULL; break; } } - TaskSlot = NULL; } @@ -347,24 +358,25 @@ cfs_current() if (!TaskSlot) { + /* allocate new task slot */ TaskSlot = alloc_task_slot(); - if (!TaskSlot) { cfs_enter_debugger(); goto errorout; } + /* set task slot IDs */ init_task_slot(TaskSlot); - TaskSlot->Pid = Pid; TaskSlot->Tid = Tid; TaskSlot->Tet = Tet; + TaskSlot->task.pid = (pid_t)cfs_idr_get_new(cfs_win_task_slot_idp, Tet); - if (ListEntry == (&(TaskMan.TaskList))) { + if (ListEntry == (&(cfs_win_task_manger.TaskList))) { // // Empty case or the biggest case, put it to the tail. // - InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link)); + InsertTailList(&(cfs_win_task_manger.TaskList), &(TaskSlot->Link)); } else { // // Get a slot and smaller than it's tid, put it just before. @@ -372,7 +384,7 @@ cfs_current() InsertHeadList(ListEntry->Blink, &(TaskSlot->Link)); } - TaskMan.NumOfTasks++; + cfs_win_task_manger.NumOfTasks++; } // @@ -382,18 +394,18 @@ cfs_current() { PTASK_SLOT Prev = NULL, Curr = NULL; - ListEntry = TaskMan.TaskList.Flink; + ListEntry = cfs_win_task_manger.TaskList.Flink; - while (ListEntry != (&(TaskMan.TaskList))) { + while (ListEntry != (&(cfs_win_task_manger.TaskList))) { Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); ListEntry = ListEntry->Flink; if (Prev) { - if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) { + if (Prev->Pid > Curr->Pid) { cfs_enter_debugger(); - } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) { - if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) { + } else if (Prev->Pid == Curr->Pid) { + if (Prev->Tid > Curr->Tid) { cfs_enter_debugger(); } } @@ -405,7 +417,7 @@ cfs_current() errorout: - spin_unlock(&(TaskMan.Lock)); + spin_unlock(&(cfs_win_task_manger.Lock)); if (!TaskSlot) { cfs_enter_debugger(); @@ -415,15 +427,28 @@ errorout: return (&(TaskSlot->task)); } -int -schedule_timeout(int64_t time) +/* deschedule for a bit... */ +void +cfs_pause(cfs_duration_t ticks) +{ + cfs_schedule_timeout(CFS_TASK_UNINTERRUPTIBLE, ticks); +} + +void +our_cond_resched() +{ + cfs_schedule_timeout(CFS_TASK_UNINTERRUPTIBLE, 1i64); +} + +void +cfs_schedule_timeout(cfs_task_state_t state, int64_t time) { cfs_task_t * task = cfs_current(); PTASK_SLOT slot = NULL; if (!task) { cfs_enter_debugger(); - return 0; + return; } slot = CONTAINING_RECORD(task, TASK_SLOT, task); @@ -433,13 +458,13 @@ schedule_timeout(int64_t time) time = 0; } - return (cfs_wait_event(&(slot->Event), time) != 0); + cfs_wait_event_internal(&(slot->Event), time); } -int -schedule() +void +cfs_schedule() { - return schedule_timeout(0); + cfs_schedule_timeout(CFS_TASK_UNINTERRUPTIBLE, 0); } int @@ -463,9 +488,7 @@ wake_up_process( } void -sleep_on( - cfs_waitq_t *waitq - ) +sleep_on(cfs_waitq_t *waitq) { cfs_waitlink_t link; diff --git a/libcfs/libcfs/winnt/winnt-debug.c b/libcfs/libcfs/winnt/winnt-debug.c index e226608..16657c83 100644 --- a/libcfs/libcfs/winnt/winnt-debug.c +++ b/libcfs/libcfs/winnt/winnt-debug.c @@ -39,50 +39,50 @@ #include #include "tracefile.h" -void lnet_debug_dumpstack(cfs_task_t *tsk) +void libcfs_debug_dumpstack(cfs_task_t *tsk) { return; } -cfs_task_t *lnet_current(void) +void libcfs_run_debug_log_upcall(char *file) { - return cfs_current(); } -int lnet_arch_debug_init(unsigned long bufsize) +cfs_task_t *libcfs_current(void) { - return 0; -} - -int lnet_arch_debug_cleanup(void) -{ - return 0; + return cfs_current(); } void libcfs_run_lbug_upcall(const char *file, const char *fn, const int line) { } -void libcfs_debug_dumplog(void) -{ -} - void lbug_with_loc(const char *file, const char *func, const int line) { libcfs_catastrophe = 1; CEMERG("LBUG: pid: %u thread: %#x\n", - (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread()); + cfs_curproc_pid(), PsGetCurrentThread()); + cfs_enter_debugger(); libcfs_debug_dumplog(); libcfs_run_lbug_upcall(file, func, line); } -#if TDI_LIBCFS_DBG +void cfs_enter_debugger(void) +{ +# if _X86_ + __asm int 3; +# else + KdBreakPoint(); +# endif +} + +#if DBG /* * Definitions */ -LONG KsDebugLevel = 0x5; +LONG KsDebugLevel = 1; /* @@ -1058,21 +1058,66 @@ KsPrintf( ... ) { - va_list ap; + LARGE_INTEGER tick; + va_list ap; va_start(ap, DebugMessage); - - if (DebugPrintLevel <= KsDebugLevel) - { + if (DebugPrintLevel <= KsDebugLevel) { CHAR buffer[0x200]; - + KeQueryTickCount(&tick); vsprintf(buffer, DebugMessage, ap); - - KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer)); + KdPrint(("%8.8X cpu:%d:%d tid:%p %s", + tick.LowPart, + KeGetCurrentProcessorNumber(), + KeGetCurrentIrql(), + PsGetCurrentThread(), buffer)); } - va_end(ap); } // KsPrint() #endif + + +void libcfs_panic(char *msg) +{ + DbgPrint("%s", msg); + cfs_enter_debugger(); +} + +/* BUGCHECK callback record */ +static int libcfs_bugcheck_inited = 0; +KBUGCHECK_CALLBACK_RECORD libcfs_bugcheck_record; + +void +libcfs_bugcheck_callback( + IN PVOID Buffer, + IN ULONG Length + ) +{ + cfs_enter_debugger(); +} + + +void libcfs_register_panic_notifier(void) +{ + if (libcfs_bugcheck_inited) { + return; + } + + KeInitializeCallbackRecord(&libcfs_bugcheck_record); + KeRegisterBugCheckCallback(&libcfs_bugcheck_record, + libcfs_bugcheck_callback, + &libcfs_bugcheck_record, + sizeof(KBUGCHECK_CALLBACK_RECORD), + "Lustre"); +} + +void libcfs_unregister_panic_notifier(void) +{ + if (!libcfs_bugcheck_inited) { + return; + } + + KeDeregisterBugCheckCallback(&libcfs_bugcheck_record); +} diff --git a/libcfs/libcfs/winnt/winnt-fs.c b/libcfs/libcfs/winnt/winnt-fs.c index 2270771..515eee5 100644 --- a/libcfs/libcfs/winnt/winnt-fs.c +++ b/libcfs/libcfs/winnt/winnt-fs.c @@ -38,7 +38,9 @@ #include -const CHAR *dos_file_prefix = "\\??\\"; +const CHAR *dos_file_prefix[] = { + "\\??\\", "\\DosDevices\\", + "\\SystemRoot\\", NULL}; /* * cfs_filp_open @@ -57,6 +59,9 @@ const CHAR *dos_file_prefix = "\\??\\"; * N/A */ +#define is_drv_letter_valid(x) (((x) >= 0 && (x) <= 9) || \ + ( ((x)|0x20) <= 'z' && ((x)|0x20) >= 'a')) + cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) { cfs_file_t * fp = NULL; @@ -81,7 +86,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) PUCHAR AnsiString = NULL; /* Analyze the flags settings */ - if (cfs_is_flag_set(flags, O_WRONLY)) { DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); ShareAccess = 0; @@ -126,11 +130,28 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) } /* Initialize the unicode path name for the specified file */ - NameLength = (USHORT)strlen(name); + /* Check file & path name */ if (name[0] != '\\') { - PrefixLength = (USHORT)strlen(dos_file_prefix); + if (NameLength < 1 || name[1] != ':' || !is_drv_letter_valid(name[0])) { + /* invalid file path name */ + if (err) *err = -EINVAL; + return NULL; + } + PrefixLength = (USHORT)strlen(dos_file_prefix[0]); + } else { + int i, j; + for (i=0; i < 3 && dos_file_prefix[i] != NULL; i++) { + j = strlen(dos_file_prefix[i]); + if (NameLength > j && _strnicmp(dos_file_prefix[i], name, j) == 0) { + break; + } + } + if (i >= 3) { + if (err) *err = -EINVAL; + return NULL; + } } AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1), @@ -142,7 +163,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1), CFS_ALLOC_ZERO); - if (NULL == UnicodeString) { if (err) *err = -ENOMEM; cfs_free(AnsiString); @@ -150,7 +170,7 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) } if (PrefixLength) { - RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength); + RtlCopyMemory(&AnsiString[0], dos_file_prefix[0], PrefixLength); } RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength); @@ -167,7 +187,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE); /* Setup the object attributes structure for the file. */ - InitializeObjectAttributes( &ObjectAttributes, &UnicodeName, @@ -177,7 +196,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) NULL ); /* Now to open or create the file now */ - Status = ZwCreateFile( &FileHandle, DesiredAccess, @@ -192,22 +210,24 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) 0 ); /* Check the returned status of IoStatus... */ - if (!NT_SUCCESS(IoStatus.Status)) { - *err = cfs_error_code(IoStatus.Status); + if (err) { + *err = cfs_error_code(IoStatus.Status); + } cfs_free(UnicodeString); cfs_free(AnsiString); return NULL; } /* Allocate the cfs_file_t: libcfs file object */ - fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO); if (NULL == fp) { Status = ZwClose(FileHandle); ASSERT(NT_SUCCESS(Status)); - *err = -ENOMEM; + if (err) { + *err = -ENOMEM; + } cfs_free(UnicodeString); cfs_free(AnsiString); return NULL; @@ -218,7 +238,9 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) fp->f_flags = flags; fp->f_mode = (mode_t)mode; fp->f_count = 1; - *err = 0; + if (err) { + *err = 0; + } /* free the memory of temporary name strings */ cfs_free(UnicodeString); @@ -260,6 +282,164 @@ int cfs_filp_close(cfs_file_t *fp) } +NTSTATUS CompletionRoutine(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID Context) +{ + /* copy the IoStatus result */ + if (Irp->UserIosb) + *Irp->UserIosb = Irp->IoStatus; + + /* singal the event we set */ + KeSetEvent((PKEVENT) Context, 0, FALSE); + + /* free the Irp we allocated */ + IoFreeIrp(Irp); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + + +NTSTATUS cfs_nt_filp_io(HANDLE Handle, BOOLEAN Writing, PLARGE_INTEGER Offset, + ULONG Length, PUCHAR Buffer, PULONG Bytes) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + + PIRP irp = NULL; + PIO_STACK_LOCATION irpSp = NULL; + + PFILE_OBJECT fileObject = NULL; + PDEVICE_OBJECT deviceObject; + + KEVENT event; + + KeInitializeEvent(&event, SynchronizationEvent, FALSE); + + status = ObReferenceObjectByHandle( Handle, + Writing ? FILE_WRITE_DATA : + FILE_READ_DATA, + *IoFileObjectType, + KernelMode, + (PVOID *) &fileObject, + NULL ); + if (!NT_SUCCESS(status)) { + goto errorout; + } + + /* query the DeviceObject in case no input */ + deviceObject = IoGetBaseFileSystemDeviceObject(fileObject); + + + /* allocate our own irp */ + irp = IoAllocateIrp(deviceObject->StackSize, FALSE); + if (NULL == irp) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + irp->Tail.Overlay.OriginalFileObject = fileObject; + irp->Tail.Overlay.Thread = PsGetCurrentThread(); + irp->Tail.Overlay.AuxiliaryBuffer = (PVOID) NULL; + irp->PendingReturned = FALSE; + irp->Cancel = FALSE; + irp->CancelRoutine = (PDRIVER_CANCEL) NULL; + irp->RequestorMode = KernelMode; + irp->UserIosb = &iosb; + + /* set up the next I/O stack location. */ + irpSp = (PIO_STACK_LOCATION)IoGetNextIrpStackLocation(irp); + irpSp->MajorFunction = Writing ? IRP_MJ_WRITE : IRP_MJ_READ; + irpSp->FileObject = fileObject; + irpSp->DeviceObject = deviceObject; + + if (deviceObject->Flags & DO_BUFFERED_IO) { + irp->AssociatedIrp.SystemBuffer = Buffer; + irp->UserBuffer = Buffer; + irp->Flags |= (ULONG) (IRP_BUFFERED_IO | + IRP_INPUT_OPERATION); + } else if (deviceObject->Flags & DO_DIRECT_IO) { + + PMDL mdl = NULL; + + mdl = IoAllocateMdl(Buffer, Length, FALSE, TRUE, irp); + if (mdl == NULL) { + KsPrint((0, "cfs_nt_filp_io: failed to allocate MDL for %wZ .\n", + &fileObject->FileName)); + status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + __try { + MmProbeAndLockPages(mdl, KernelMode, Writing ? IoReadAccess : IoWriteAccess ); + } __except(EXCEPTION_EXECUTE_HANDLER) { + KsPrint((0, "cfs_nt_filp_io: failed to lock buffer %p for %wZ .\n", + Buffer, &fileObject->FileName)); + IoFreeMdl(irp->MdlAddress); + irp->MdlAddress = NULL; + status = STATUS_INSUFFICIENT_RESOURCES; + } + } else { + irp->UserBuffer = Buffer; + irp->Flags = 0; + } + + if (Writing) { + irp->Flags |= IRP_WRITE_OPERATION | IRP_DEFER_IO_COMPLETION; + irpSp->Parameters.Write.Length = Length; + irpSp->Parameters.Write.ByteOffset = *Offset; + } else { + irp->Flags |= IRP_READ_OPERATION | IRP_DEFER_IO_COMPLETION; + irpSp->Parameters.Read.Length = Length; + irpSp->Parameters.Read.ByteOffset = *Offset; + } + + /* set the Irp completion routine */ + IoSetCompletionRoutine( irp, CompletionRoutine, + &event, TRUE, TRUE, TRUE); + + + /* issue the irp to the lower layer device */ + status = IoCallDriver(deviceObject, irp); + + /* Irp is to be cleaned up in the compleiton routine */ + irp = NULL; + + if (status == STATUS_PENDING) { + + /* we need wait until operation is completed, then we can + get the returned status and information length */ + + status = KeWaitForSingleObject( + &event, + Executive, + KernelMode, + FALSE, + NULL + ); + if (NT_SUCCESS(status)) { + status = iosb.Status; + } + } + + if (NT_SUCCESS(status)) { + *Bytes = (ULONG)iosb.Information; + } else { + *Bytes = 0; + } + +errorout: + + if (fileObject) { + ObDereferenceObject(fileObject); + } + + /* free the Irp in error case */ + if (irp) { + IoFreeIrp(irp); + } + + return status; +} + /* * cfs_filp_read * To read data from the opened file @@ -281,45 +461,33 @@ int cfs_filp_close(cfs_file_t *fp) int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) { - LARGE_INTEGER address; - NTSTATUS Status; - IO_STATUS_BLOCK IoStatus; - + LARGE_INTEGER offset; + NTSTATUS status; int rc = 0; /* Read data from the file into the specified buffer */ - if (pos != NULL) { - address.QuadPart = *pos; + offset.QuadPart = *pos; } else { - address.QuadPart = fp->f_pos; + offset.QuadPart = fp->f_pos; } - Status = ZwReadFile( fp->f_handle, - 0, - NULL, - NULL, - &IoStatus, - buf, - nbytes, - &address, - NULL ); + status = cfs_nt_filp_io(fp->f_handle, 0, &offset, + nbytes, buf, &rc); - if (!NT_SUCCESS(IoStatus.Status)) { - rc = cfs_error_code(IoStatus.Status); - } else { - rc = (int)IoStatus.Information; - fp->f_pos = address.QuadPart + rc; - - if (pos != NULL) { + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + } + + if (rc > 0) { + fp->f_pos = offset.QuadPart + rc; + if (pos != NULL) *pos = fp->f_pos; - } } - return rc; + return rc; } - /* * cfs_filp_wrtie * To write specified data to the opened file @@ -341,63 +509,33 @@ int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) { - LARGE_INTEGER address; - NTSTATUS Status; - IO_STATUS_BLOCK IoStatus; + LARGE_INTEGER offset; + NTSTATUS status; int rc = 0; - /* Write user specified data into the file */ - + /* Read data from the file into the specified buffer */ if (pos != NULL) { - address.QuadPart = *pos; + offset.QuadPart = *pos; } else { - address.QuadPart = fp->f_pos; + offset.QuadPart = fp->f_pos; } - Status = ZwWriteFile( fp->f_handle, - 0, - NULL, - NULL, - &IoStatus, - buf, - nbytes, - &address, - NULL ); + status = cfs_nt_filp_io(fp->f_handle, 1, &offset, + nbytes, buf, &rc); - if (!NT_SUCCESS(Status)) { - rc = cfs_error_code(Status); - } else { - rc = (int)IoStatus.Information; - fp->f_pos = address.QuadPart + rc; - - if (pos != NULL) { + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + } + + if (rc > 0) { + fp->f_pos = offset.QuadPart + rc; + if (pos != NULL) *pos = fp->f_pos; - } } return rc; } - -NTSTATUS -CompletionRoutine( - PDEVICE_OBJECT DeviceObject, - PIRP Irp, - PVOID Context) -{ - /* copy the IoStatus result */ - *Irp->UserIosb = Irp->IoStatus; - - /* singal the event we set */ - KeSetEvent(Irp->UserEvent, 0, FALSE); - - /* free the Irp we allocated */ - IoFreeIrp(Irp); - - return STATUS_MORE_PROCESSING_REQUIRED; -} - - /* * cfs_filp_fsync * To sync the dirty data of the file to disk @@ -428,7 +566,6 @@ int cfs_filp_fsync(cfs_file_t *fp) PIO_STACK_LOCATION IrpSp; /* get the FileObject and the DeviceObject */ - Status = ObReferenceObjectByHandle( fp->f_handle, FILE_WRITE_DATA, @@ -444,11 +581,8 @@ int cfs_filp_fsync(cfs_file_t *fp) DeviceObject = IoGetRelatedDeviceObject(FileObject); /* allocate a new Irp */ - Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - if (!Irp) { - ObDereferenceObject(FileObject); return -ENOMEM; } @@ -457,7 +591,6 @@ int cfs_filp_fsync(cfs_file_t *fp) KeInitializeEvent(&Event, SynchronizationEvent, FALSE); /* setup the Irp */ - Irp->UserEvent = &Event; Irp->UserIosb = &IoSb; Irp->RequestorMode = KernelMode; @@ -471,7 +604,8 @@ int cfs_filp_fsync(cfs_file_t *fp) IrpSp->DeviceObject = DeviceObject; IrpSp->FileObject = FileObject; - IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE); + IoSetCompletionRoutine( Irp, CompletionRoutine, + &Event, TRUE, TRUE, TRUE); /* issue the Irp to the underlying file system driver */ @@ -553,3 +687,21 @@ int cfs_file_count(cfs_file_t *fp) { return (int)(fp->f_count); } + +struct dentry *dget(struct dentry *de) +{ + if (de) { + atomic_inc(&de->d_count); + } + return de; +} + +void dput(struct dentry *de) +{ + if (!de || atomic_read(&de->d_count) == 0) { + return; + } + if (atomic_dec_and_test(&de->d_count)) { + cfs_free(de); + } +} diff --git a/libcfs/libcfs/winnt/winnt-lock.c b/libcfs/libcfs/winnt/winnt-lock.c index 2b57e7f..7b20b53 100644 --- a/libcfs/libcfs/winnt/winnt-lock.c +++ b/libcfs/libcfs/winnt/winnt-lock.c @@ -40,7 +40,7 @@ #include -#if _X86_ +#if defined(_X86_) void __declspec (naked) FASTCALL atomic_add( @@ -148,7 +148,7 @@ atomic_dec_and_test( } } -#else +#elif defined(_AMD64_) void FASTCALL atomic_add( @@ -235,7 +235,7 @@ atomic_dec_and_test( do { counter = v->counter; - result = counter + 1; + result = counter - 1; } while ( InterlockedCompareExchange( &(v->counter), @@ -245,8 +245,62 @@ atomic_dec_and_test( return (result == 0); } +#else + +#error CPU arch type isn't specified. + #endif +/** + * atomic_add_return - add integer and return + * @v: pointer of type atomic_t + * @i: integer value to add + * + * Atomically adds @i to @v and returns @i + @v + */ +int FASTCALL atomic_add_return(int i, atomic_t *v) +{ + int counter, result; + + do { + + counter = v->counter; + result = counter + i; + + } while ( InterlockedCompareExchange( + &(v->counter), + result, + counter) != counter); + + return result; + +} + +/** + * atomic_sub_return - subtract integer and return + * @v: pointer of type atomic_t + * @i: integer value to subtract + * + * Atomically subtracts @i from @v and returns @v - @i + */ +int FASTCALL atomic_sub_return(int i, atomic_t *v) +{ + return atomic_add_return(-i, v); +} + +int FASTCALL atomic_dec_and_lock(atomic_t *v, spinlock_t *lock) +{ + if (atomic_read(v) != 1) { + return 0; + } + + spin_lock(lock); + if (atomic_dec_and_test(v)) + return 1; + spin_unlock(lock); + return 0; +} + /* * rw spinlock diff --git a/libcfs/libcfs/winnt/winnt-mem.c b/libcfs/libcfs/winnt/winnt-mem.c index 782d6d7..6a8689e 100644 --- a/libcfs/libcfs/winnt/winnt-mem.c +++ b/libcfs/libcfs/winnt/winnt-mem.c @@ -42,6 +42,25 @@ cfs_mem_cache_t *cfs_page_t_slab = NULL; cfs_mem_cache_t *cfs_page_p_slab = NULL; +cfs_page_t * virt_to_page(void * addr) +{ + cfs_page_t *pg; + pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0); + + if (NULL == pg) { + cfs_enter_debugger(); + return NULL; + } + + memset(pg, 0, sizeof(cfs_page_t)); + pg->addr = (void *)((__u64)addr & (~((__u64)PAGE_SIZE-1))); + pg->mapping = addr; + atomic_set(&pg->count, 1); + set_bit(PG_virt, &(pg->flags)); + cfs_enter_debugger(); + return pg; +} + /* * cfs_alloc_page * To allocate the cfs_page_t and also 1 page of memory @@ -57,6 +76,8 @@ cfs_mem_cache_t *cfs_page_p_slab = NULL; * N/A */ +atomic_t libcfs_total_pages; + cfs_page_t * cfs_alloc_page(int flags) { cfs_page_t *pg; @@ -75,6 +96,7 @@ cfs_page_t * cfs_alloc_page(int flags) if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) { memset(pg->addr, 0, CFS_PAGE_SIZE); } + atomic_inc(&libcfs_total_pages); } else { cfs_enter_debugger(); cfs_mem_cache_free(cfs_page_t_slab, pg); @@ -103,10 +125,60 @@ void cfs_free_page(cfs_page_t *pg) ASSERT(pg->addr != NULL); ASSERT(atomic_read(&pg->count) <= 1); - cfs_mem_cache_free(cfs_page_p_slab, pg->addr); + if (!test_bit(PG_virt, &pg->flags)) { + cfs_mem_cache_free(cfs_page_p_slab, pg->addr); + atomic_dec(&libcfs_total_pages); + } else { + cfs_enter_debugger(); + } cfs_mem_cache_free(cfs_page_t_slab, pg); } +cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order) +{ + cfs_page_t *pg; + pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0); + + if (NULL == pg) { + cfs_enter_debugger(); + return NULL; + } + + memset(pg, 0, sizeof(cfs_page_t)); + pg->addr = cfs_alloc((CFS_PAGE_SIZE << order),0); + atomic_set(&pg->count, 1); + + if (pg->addr) { + if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) { + memset(pg->addr, 0, CFS_PAGE_SIZE << order); + } + atomic_add(1 << order, &libcfs_total_pages); + } else { + cfs_enter_debugger(); + cfs_mem_cache_free(cfs_page_t_slab, pg); + pg = NULL; + } + + return pg; +} + +void __cfs_free_pages(cfs_page_t *pg, unsigned int order) +{ + ASSERT(pg != NULL); + ASSERT(pg->addr != NULL); + ASSERT(atomic_read(&pg->count) <= 1); + + atomic_sub(1 << order, &libcfs_total_pages); + cfs_free(pg->addr); + cfs_mem_cache_free(cfs_page_t_slab, pg); +} + +int cfs_mem_is_in_cache(const void *addr, const cfs_mem_cache_t *kmem) +{ + KdPrint(("cfs_mem_is_in_cache: not implemented. (should maintain a" + "chain to keep all allocations traced.)\n")); + return 1; +} /* * cfs_alloc @@ -127,21 +199,19 @@ void cfs_free_page(cfs_page_t *pg) void * cfs_alloc(size_t nr_bytes, u_int32_t flags) { - void *ptr; + void *ptr; /* Ignore the flags: always allcoate from NonPagedPool */ - - ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs'); - - if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) { - memset(ptr, 0, nr_bytes); + ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs'); + if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) { + memset(ptr, 0, nr_bytes); } if (!ptr) { cfs_enter_debugger(); } - return ptr; + return ptr; } /* @@ -161,7 +231,7 @@ cfs_alloc(size_t nr_bytes, u_int32_t flags) void cfs_free(void *addr) { - ExFreePool(addr); + ExFreePool(addr); } /* @@ -182,7 +252,7 @@ cfs_free(void *addr) void * cfs_alloc_large(size_t nr_bytes) { - return cfs_alloc(nr_bytes, 0); + return cfs_alloc(nr_bytes, 0); } /* @@ -202,7 +272,7 @@ cfs_alloc_large(size_t nr_bytes) void cfs_free_large(void *addr) { - cfs_free(addr); + cfs_free(addr); } @@ -252,7 +322,6 @@ cfs_mem_cache_create( } memset(kmc, 0, sizeof(cfs_mem_cache_t)); - kmc->flags = flags; if (name) { @@ -345,3 +414,74 @@ void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf) { ExFreeToNPagedLookasideList(&(kmc->npll), buf); } + +spinlock_t shrinker_guard = {0}; +CFS_LIST_HEAD(shrinker_hdr); +cfs_timer_t shrinker_timer = {0}; + +struct shrinker * set_shrinker(int seeks, shrink_callback cb) +{ + struct shrinker * s = (struct shrinker *) + cfs_alloc(sizeof(struct shrinker), CFS_ALLOC_ZERO); + if (s) { + s->cb = cb; + s->seeks = seeks; + s->nr = 2; + spin_lock(&shrinker_guard); + list_add(&s->list, &shrinker_hdr); + spin_unlock(&shrinker_guard); + } + + return s; +} + +void remove_shrinker(struct shrinker *s) +{ + struct shrinker *tmp; + spin_lock(&shrinker_guard); +#if TRUE + cfs_list_for_each_entry_typed(tmp, &shrinker_hdr, + struct shrinker, list) { + if (tmp == s) { + list_del(&tmp->list); + break; + } + } +#else + list_del(&s->list); +#endif + spin_unlock(&shrinker_guard); + cfs_free(s); +} + +/* time ut test proc */ +void shrinker_timer_proc(ulong_ptr_t arg) +{ + struct shrinker *s; + spin_lock(&shrinker_guard); + + cfs_list_for_each_entry_typed(s, &shrinker_hdr, + struct shrinker, list) { + s->cb(s->nr, __GFP_FS); + } + spin_unlock(&shrinker_guard); + cfs_timer_arm(&shrinker_timer, 300); +} + +int start_shrinker_timer() +{ + /* initialize shriner timer */ + cfs_timer_init(&shrinker_timer, shrinker_timer_proc, NULL); + + /* start the timer to trigger in 5 minutes */ + cfs_timer_arm(&shrinker_timer, 300); + + return 0; +} + +void stop_shrinker_timer() +{ + /* cancel the timer */ + cfs_timer_disarm(&shrinker_timer); + cfs_timer_done(&shrinker_timer); +} diff --git a/libcfs/libcfs/winnt/winnt-module.c b/libcfs/libcfs/winnt/winnt-module.c index ac255ba..3febc7f 100644 --- a/libcfs/libcfs/winnt/winnt-module.c +++ b/libcfs/libcfs/winnt/winnt-module.c @@ -35,7 +35,7 @@ */ -#define DEBUG_SUBSYSTEM S_LIBCFS +#define DEBUG_SUBSYSTEM S_LNET #include @@ -56,17 +56,17 @@ int libcfs_ioctl_getdata(char *buf, char *end, void *arg) RETURN(err); if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { - CERROR(("LIBCFS: version mismatch kernel vs application\n")); + CERROR("LIBCFS: version mismatch kernel vs application\n"); RETURN(-EINVAL); } if (hdr->ioc_len + buf >= end) { - CERROR(("LIBCFS: user buffer exceeds kernel buffer\n")); + CERROR("LIBCFS: user buffer exceeds kernel buffer\n"); RETURN(-EINVAL); } if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { - CERROR(("LIBCFS: user buffer too small for ioctl\n")); + CERROR("LIBCFS: user buffer too small for ioctl\n"); RETURN(-EINVAL); } @@ -75,7 +75,7 @@ int libcfs_ioctl_getdata(char *buf, char *end, void *arg) RETURN(err); if (libcfs_ioctl_is_invalid(data)) { - CERROR(("LIBCFS: ioctl not correctly formatted\n")); + CERROR("LIBCFS: ioctl not correctly formatted\n"); RETURN(-EINVAL); } @@ -88,11 +88,18 @@ int libcfs_ioctl_getdata(char *buf, char *end, void *arg) RETURN(0); } - + +int libcfs_ioctl_popdata(void *arg, void *data, int size) +{ + if (copy_to_user((char *)arg, data, size)) + return -EFAULT; + return 0; +} + extern struct cfs_psdev_ops libcfs_psdev_ops; static int -libcfs_psdev_open(cfs_file_t * file) +libcfs_psdev_open(struct inode *in, cfs_file_t * file) { struct libcfs_device_userstate **pdu = NULL; int rc = 0; @@ -107,7 +114,7 @@ libcfs_psdev_open(cfs_file_t * file) /* called when closing /dev/device */ static int -libcfs_psdev_release(cfs_file_t * file) +libcfs_psdev_release(struct inode *in, cfs_file_t * file) { struct libcfss_device_userstate *pdu; int rc = 0; @@ -121,7 +128,7 @@ libcfs_psdev_release(cfs_file_t * file) } static int -libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) +libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr_t arg) { struct cfs_psdev_file pfile; int rc = 0; @@ -129,8 +136,8 @@ libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { - CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd))); + CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); return (-EINVAL); } @@ -139,8 +146,8 @@ libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) case IOC_LIBCFS_PANIC: if (!cfs_capable(CFS_CAP_SYS_BOOT)) return (-EPERM); - CERROR(("debugctl-invoked panic")); - KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL); + CERROR("debugctl-invoked panic"); + KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL); return (0); case IOC_LIBCFS_MEMHOG: @@ -160,6 +167,7 @@ libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) } static struct file_operations libcfs_fops = { + /* owner */ THIS_MODULE, /* lseek: */ NULL, /* read: */ NULL, /* write: */ NULL, diff --git a/libcfs/libcfs/winnt/winnt-native.c b/libcfs/libcfs/winnt/winnt-native.c new file mode 100644 index 0000000..634024a --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-native.c @@ -0,0 +1,709 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + */ + +# define DEBUG_SUBSYSTEM S_LNET + +#ifndef __KERNEL__ + +#include +#include +#include +#include + +/* + * Native API definitions + */ + +// +// Disk I/O Routines +// + +NTSYSAPI +NTSTATUS +NTAPI +NtReadFile(HANDLE FileHandle, + HANDLE Event OPTIONAL, + PIO_APC_ROUTINE ApcRoutine OPTIONAL, + PVOID ApcContext OPTIONAL, + PIO_STATUS_BLOCK IoStatusBlock, + PVOID Buffer, + ULONG Length, + PLARGE_INTEGER ByteOffset OPTIONAL, + PULONG Key OPTIONAL); + +NTSYSAPI +NTSTATUS +NTAPI +NtWriteFile(HANDLE FileHandle, + HANDLE Event OPTIONAL, + PIO_APC_ROUTINE ApcRoutine OPTIONAL, + PVOID ApcContext OPTIONAL, + PIO_STATUS_BLOCK IoStatusBlock, + PVOID Buffer, + ULONG Length, + PLARGE_INTEGER ByteOffset OPTIONAL, + PULONG Key OPTIONAL); + +NTSYSAPI +NTSTATUS +NTAPI +NtClose(HANDLE Handle); + +NTSYSAPI +NTSTATUS +NTAPI +NtCreateFile(PHANDLE FileHandle, + ACCESS_MASK DesiredAccess, + POBJECT_ATTRIBUTES ObjectAttributes, + PIO_STATUS_BLOCK IoStatusBlock, + PLARGE_INTEGER AllocationSize OPTIONAL, + ULONG FileAttributes, + ULONG ShareAccess, + ULONG CreateDisposition, + ULONG CreateOptions, + PVOID EaBuffer OPTIONAL, + ULONG EaLength); + + +NTSYSAPI +NTSTATUS +NTAPI +NtDeviceIoControlFile( + IN HANDLE FileHandle, + IN HANDLE Event, + IN PIO_APC_ROUTINE ApcRoutine, + IN PVOID ApcContext, + OUT PIO_STATUS_BLOCK IoStatusBlock, + IN ULONG IoControlCode, + IN PVOID InputBuffer, + IN ULONG InputBufferLength, + OUT PVOID OutputBuffer, + OUT ULONG OutputBufferLength + ); + +NTSYSAPI +NTSTATUS +NTAPI +NtFsControlFile( + IN HANDLE FileHandle, + IN HANDLE Event OPTIONAL, + IN PIO_APC_ROUTINE ApcRoutine OPTIONAL, + IN PVOID ApcContext OPTIONAL, + OUT PIO_STATUS_BLOCK IoStatusBlock, + IN ULONG FsControlCode, + IN PVOID InputBuffer OPTIONAL, + IN ULONG InputBufferLength, + OUT PVOID OutputBuffer OPTIONAL, + IN ULONG OutputBufferLength +); + + +NTSYSAPI +NTSTATUS +NTAPI +NtQueryInformationFile( + IN HANDLE FileHandle, + OUT PIO_STATUS_BLOCK IoStatusBlock, + OUT PVOID FileInformation, + IN ULONG Length, + IN FILE_INFORMATION_CLASS FileInformationClass + ); + +// +// Random routines ... +// + +NTSYSAPI +ULONG +NTAPI +RtlRandom( + IN OUT PULONG Seed + ); + +/* + * Time routines ... + */ + +NTSYSAPI +CCHAR +NTAPI +NtQuerySystemTime( + OUT PLARGE_INTEGER CurrentTime + ); + + +NTSYSAPI +BOOLEAN +NTAPI +RtlTimeToSecondsSince1970( + IN PLARGE_INTEGER Time, + OUT PULONG ElapsedSeconds + ); + + +NTSYSAPI +VOID +NTAPI +RtlSecondsSince1970ToTime( + IN ULONG ElapsedSeconds, + OUT PLARGE_INTEGER Time + ); + +NTSYSAPI +NTSTATUS +NTAPI +ZwDelayExecution( + IN BOOLEAN Alertable, + IN PLARGE_INTEGER Interval +); + + +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + NTSTATUS status; + LARGE_INTEGER Interval; + Interval.QuadPart = rqtp->tv_sec * 10000000 + rqtp->tv_nsec / 100; + status = ZwDelayExecution(TRUE, &Interval); + if (rmtp) { + rmtp->tv_sec = 0; + rmtp->tv_nsec = 0; + } + if (status == STATUS_ALERTED || status == STATUS_USER_APC) { + return -1; + } + return 0; +} + + +void do_gettimeofday(struct timeval *tv) +{ + LARGE_INTEGER Time; + + NtQuerySystemTime(&Time); + + tv->tv_sec = (long_ptr_t) (Time.QuadPart / 10000000); + tv->tv_usec = (suseconds_t) (Time.QuadPart % 10000000) / 10; +} + +int gettimeofday(struct timeval *tv, void * tz) +{ + do_gettimeofday(tv); + return 0; +} + +/* + * proc process routines of user space + */ + +struct idr_context *cfs_proc_idp = NULL; + +int cfs_proc_open (char * filename, int oflag) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + int rc = 0; + + HANDLE Handle = INVALID_HANDLE_VALUE; + OBJECT_ATTRIBUTES ObjectAttributes; + ACCESS_MASK DesiredAccess; + ULONG CreateDisposition; + ULONG ShareAccess; + ULONG CreateOptions; + UNICODE_STRING UnicodeName; + USHORT NameLength; + + PFILE_FULL_EA_INFORMATION Ea = NULL; + ULONG EaLength; + PUCHAR EaBuffer = NULL; + + /* Check the filename: should start with "/proc" or "/dev" */ + NameLength = (USHORT)strlen(filename); + if (NameLength > 0x05) { + if (_strnicmp(filename, "/proc/", 6) == 0) { + if (NameLength <= 6) { + rc = -EINVAL; + goto errorout; + } + } else if (_strnicmp(filename, "/dev/", 5) == 0) { + } else { + rc = -EINVAL; + goto errorout; + } + } else { + rc = -EINVAL; + goto errorout; + } + + /* Analyze the flags settings */ + + if (cfs_is_flag_set(oflag, O_WRONLY)) { + DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); + ShareAccess = 0; + } else if (cfs_is_flag_set(oflag, O_RDWR)) { + DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); + ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; + } else { + DesiredAccess = (GENERIC_READ | SYNCHRONIZE); + ShareAccess = FILE_SHARE_READ; + } + + if (cfs_is_flag_set(oflag, O_CREAT)) { + if (cfs_is_flag_set(oflag, O_EXCL)) { + CreateDisposition = FILE_CREATE; + rc = -EINVAL; + goto errorout; + } else { + CreateDisposition = FILE_OPEN_IF; + } + } else { + CreateDisposition = FILE_OPEN; + } + + if (cfs_is_flag_set(oflag, O_TRUNC)) { + if (cfs_is_flag_set(oflag, O_EXCL)) { + CreateDisposition = FILE_OVERWRITE; + } else { + CreateDisposition = FILE_OVERWRITE_IF; + } + } + + CreateOptions = 0; + + if (cfs_is_flag_set(oflag, O_DIRECTORY)) { + cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); + } + + if (cfs_is_flag_set(oflag, O_SYNC)) { + cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); + } + + if (cfs_is_flag_set(oflag, O_DIRECT)) { + cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); + } + + /* Initialize the unicode path name for the specified file */ + RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK); + + /* Setup the object attributes structure for the file. */ + InitializeObjectAttributes( + &ObjectAttributes, + &UnicodeName, + OBJ_CASE_INSENSITIVE, + NULL, + NULL ); + + /* building EA for the proc entry ... */ + EaBuffer = malloc(NameLength + sizeof(FILE_FULL_EA_INFORMATION)); + if (!EaBuffer) { + rc = -ENOMEM; + goto errorout; + } + memset(EaBuffer, 0, NameLength + sizeof(FILE_FULL_EA_INFORMATION)); + Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; + Ea->NextEntryOffset = 0; + Ea->Flags = 0; + Ea->EaNameLength = (UCHAR)NameLength; + Ea->EaValueLength = 0; + RtlCopyMemory( + &(Ea->EaName), + filename, + NameLength + 1 + ); + EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + + Ea->EaNameLength + 1; + + /* Now to open or create the file now */ + status = NtCreateFile( + &Handle, + DesiredAccess, + &ObjectAttributes, + &iosb, + 0, + FILE_ATTRIBUTE_NORMAL, + ShareAccess, + CreateDisposition, + CreateOptions, + Ea, + EaLength ); + + /* Check the returned status of Iosb ... */ + + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + goto errorout; + } + +errorout: + + if (Handle) { + rc = cfs_idr_get_new(cfs_proc_idp, Handle); + if (rc < 0) { + NtClose(Handle); + } + } + + if (EaBuffer) { + free(EaBuffer); + } + + return rc; +} + +int cfs_proc_close(int fd) +{ + HANDLE handle = cfs_idr_find(cfs_proc_idp, fd); + + if (handle) { + NtClose(handle); + } + + cfs_idr_remove(cfs_proc_idp, fd); + + return 0; +} + +int cfs_proc_read_internal( + int fd, void *buffer, + unsigned int count, + unsigned int offlow, + unsigned int offhigh + ) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + LARGE_INTEGER offset; + + HANDLE handle = cfs_idr_find(cfs_proc_idp, fd); + offset.HighPart = offhigh; + offset.LowPart = offlow; + + /* read file data */ + status = NtReadFile( + handle, + 0, + NULL, + NULL, + &iosb, + buffer, + count, + &offset, + NULL); + + /* check the return status */ + if (!NT_SUCCESS(status)) { + printf("NtReadFile request failed with status: 0x%0x\n", status); + goto errorout; + } + +errorout: + + if (NT_SUCCESS(status)) { + return (int)(iosb.Information); + } + + return cfs_error_code(status); +} + +int cfs_proc_read( + int fd, void *buffer, + unsigned int count + ) +{ + return cfs_proc_read_internal(fd, buffer, count, 0, 0); +} + +int cfs_proc_write_internal( + int fd, void *buffer, + unsigned int count, + unsigned int offlow, + unsigned int offhigh + ) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + LARGE_INTEGER offset; + + HANDLE handle = cfs_idr_find(cfs_proc_idp, fd); + offset.HighPart = offhigh; + offset.LowPart = offlow; + + /* write buffer to the opened file */ + status = NtWriteFile( + handle, + 0, + NULL, + NULL, + &iosb, + buffer, + count, + &offset, + NULL); + + /* check the return status */ + if (!NT_SUCCESS(status)) { + printf("NtWriteFile request failed 0x%0x\n", status); + goto errorout; + } + +errorout: + + if (NT_SUCCESS(status)) { + return (int)(iosb.Information); + } + + return cfs_error_code(status); +} + +int cfs_proc_write( + int fd, void *buffer, + unsigned int count + ) +{ + return cfs_proc_write_internal(fd, buffer, count, 0, 0); +} + +int cfs_proc_ioctl(int fd, int cmd, void *buffer) +{ + PUCHAR procdat = NULL; + CFS_PROC_IOCTL procctl; + ULONG length = 0; + ULONG extra = 0; + int rc = 0; + + NTSTATUS status = STATUS_UNSUCCESSFUL; + IO_STATUS_BLOCK iosb; + + struct libcfs_ioctl_data * portal = buffer; + struct obd_ioctl_data * obd = buffer; + struct obd_ioctl_data * data; + + HANDLE handle = cfs_idr_find(cfs_proc_idp, fd); +#if defined(_X86_) + CLASSERT(sizeof(struct obd_ioctl_data) == 528); +#else + CLASSERT(sizeof(struct obd_ioctl_data) == 576); +#endif + memset(&procctl, 0, sizeof(CFS_PROC_IOCTL)); + procctl.cmd = cmd; + + if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) { + length = portal->ioc_len; + } else if (_IOC_TYPE(cmd) == 'f') { + length = obd->ioc_len; + extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2); + } else if(_IOC_TYPE(cmd) == 'u') { + length = 4; + extra = 0; + } else if(_IOC_TYPE(cmd) == 'i') { + length = obd->ioc_len; + extra = 0; + } else { + printf("cfs_proc_ioctl: un-supported ioctl type ...\n"); + cfs_enter_debugger(); + status = STATUS_INVALID_PARAMETER; + goto errorout; + } + + procctl.len = length + extra; + procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL)); + + if (NULL == procdat) { + printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n"); + status = STATUS_INSUFFICIENT_RESOURCES; + cfs_enter_debugger(); + goto errorout; + } + memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL)); + memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL)); + memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length); + length += sizeof(CFS_PROC_IOCTL); + + if (_IOC_TYPE(cmd) == 'f') { + + data = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL)); + if ( cmd != (ULONG)OBD_IOC_BRW_WRITE && + cmd != (ULONG)OBD_IOC_BRW_READ ) { + + if (obd->ioc_pbuf1 && data->ioc_plen1) { + data->ioc_pbuf1 = &procdat[length]; + memcpy(data->ioc_pbuf1, obd->ioc_pbuf1, obd->ioc_plen1); + length += size_round(obd->ioc_plen1); + } else { + data->ioc_plen1 = 0; + data->ioc_pbuf1 = NULL; + } + + if (obd->ioc_pbuf2 && obd->ioc_plen2) { + data->ioc_pbuf2 = &procdat[length]; + memcpy(data->ioc_pbuf2, obd->ioc_pbuf2, obd->ioc_plen2); + length += size_round(obd->ioc_plen2); + } else { + data->ioc_plen2 = 0; + data->ioc_pbuf2 = NULL; + } + } else { + extra = 0; + } + + ASSERT(length == extra + sizeof(CFS_PROC_IOCTL) + data->ioc_len); + if (obd_ioctl_is_invalid(obd)) { + cfs_enter_debugger(); + } + } + + status = NtDeviceIoControlFile( + handle, NULL, NULL, + NULL, &iosb, + IOCTL_LIBCFS_ENTRY, + procdat, length, + procdat, length ); + + + if (_IOC_TYPE(cmd) == 'f') { + + length = sizeof(CFS_PROC_IOCTL); + ASSERT(data == (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL))); + if ( cmd != (ULONG)OBD_IOC_BRW_WRITE && + cmd != (ULONG)OBD_IOC_BRW_READ ) { + + if (obd->ioc_pbuf1) { + ASSERT(obd->ioc_plen1 == data->ioc_plen1); + data->ioc_pbuf1 = &procdat[length]; + memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, obd->ioc_plen1); + length += size_round(obd->ioc_plen1); + } + if (obd->ioc_pbuf2) { + ASSERT(obd->ioc_plen2 == data->ioc_plen2); + data->ioc_pbuf2 = &procdat[length]; + memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, obd->ioc_plen2); + length += size_round(obd->ioc_plen2); + } + } + data->ioc_inlbuf1 = obd->ioc_inlbuf1; + data->ioc_inlbuf2 = obd->ioc_inlbuf2; + data->ioc_inlbuf3 = obd->ioc_inlbuf3; + data->ioc_inlbuf4 = obd->ioc_inlbuf4; + data->ioc_pbuf1 = obd->ioc_pbuf1; + data->ioc_pbuf2 = obd->ioc_pbuf2; + memcpy(obd, data, obd->ioc_len); + + } else { + + memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); + } + +errorout: + + if (STATUS_SUCCESS == status) { + rc = ((CFS_PROC_IOCTL *)procdat)->rc; + } else { + rc = cfs_error_code(status); + } + + if (procdat) { + free(procdat); + } + + return rc; +} + + +int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev) +{ + return 0; +} + +FILE *cfs_proc_fopen(char *path, char * mode) +{ + int fp = cfs_proc_open(path, O_RDWR); + if (fp > 0) { + return (FILE *)(LONG_PTR)fp; + } + + return NULL; +} + +char *cfs_proc_fgets(char * buf, int len, FILE *fp) +{ + int rc = 0; + + if (fp == NULL) { + return NULL; + } + + rc = cfs_proc_read_internal((int)(LONG_PTR)fp, + buf, len, -1, 1); + if (rc <= 0) { + return NULL; + } + + return buf; +} + +int cfs_proc_fclose(FILE *fp) +{ + if (fp == NULL) { + return -1; + } + + return cfs_proc_close((int)(LONG_PTR)fp); +} + +void cfs_libc_init(); + +int +libcfs_arch_init(void) +{ + cfs_libc_init(); + cfs_proc_idp = cfs_idr_init(); + + if (cfs_proc_idp) { + return 0; + } + + return -ENOMEM; +} + +void +libcfs_arch_cleanup(void) +{ + if (cfs_proc_idp) { + cfs_idr_exit(cfs_proc_idp); + cfs_proc_idp = NULL; + } +} + +#endif /* __KERNEL__ */ diff --git a/libcfs/libcfs/winnt/winnt-prim.c b/libcfs/libcfs/winnt/winnt-prim.c index e77ac73..e62a39e 100644 --- a/libcfs/libcfs/winnt/winnt-prim.c +++ b/libcfs/libcfs/winnt/winnt-prim.c @@ -151,7 +151,7 @@ int cfs_kernel_thread(int (*func)(void *), void *arg, int flag) static CFS_DECL_RWSEM(cfs_symbol_lock); CFS_LIST_HEAD(cfs_symbol_list); -int MPSystem = FALSE; +int libcfs_is_mp_system = FALSE; /* * cfs_symbol_get @@ -364,7 +364,12 @@ cfs_timer_dpc_proc ( KeReleaseSpinLock(&(timer->Lock), Irql); /* call the user specified timer procedure */ - timer->proc((unsigned long)(timer->arg)); + timer->proc((long_ptr_t)timer->arg); +} + +void cfs_init_timer(cfs_timer_t *timer) +{ + memset(timer, 0, sizeof(cfs_timer_t)); } /* @@ -383,7 +388,7 @@ cfs_timer_dpc_proc ( * N/A */ -void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg) +void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr_t), void *arg) { memset(timer, 0, sizeof(cfs_timer_t)); @@ -441,7 +446,7 @@ void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline) timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline; - if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) { + if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc)) { cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); } @@ -533,6 +538,11 @@ void cfs_daemonize(char *str) return; } +int cfs_daemonize_ctxt(char *str) { + cfs_daemonize(str); + return 0; +} + /* * routine related with sigals */ @@ -566,10 +576,177 @@ void cfs_clear_sigpending(void) return; } +/* + * thread cpu affinity routines + */ + +typedef struct _THREAD_BASIC_INFORMATION { + NTSTATUS ExitStatus; + PVOID TebBaseAddress; + CLIENT_ID ClientId; + ULONG_PTR AffinityMask; + KPRIORITY Priority; + LONG BasePriority; +} THREAD_BASIC_INFORMATION; + +typedef THREAD_BASIC_INFORMATION *PTHREAD_BASIC_INFORMATION; + +#define THREAD_QUERY_INFORMATION (0x0040) + +NTSYSAPI +NTSTATUS +NTAPI +ZwOpenThread ( + __out PHANDLE ThreadHandle, + __in ACCESS_MASK DesiredAccess, + __in POBJECT_ATTRIBUTES ObjectAttributes, + __in_opt PCLIENT_ID ClientId + ); + +NTSYSAPI +NTSTATUS +NTAPI +ZwQueryInformationThread ( + __in HANDLE ThreadHandle, + __in THREADINFOCLASS ThreadInformationClass, + __out_bcount(ThreadInformationLength) PVOID ThreadInformation, + __in ULONG ThreadInformationLength, + __out_opt PULONG ReturnLength + ); + +NTSYSAPI +NTSTATUS +NTAPI +ZwSetInformationThread ( + __in HANDLE ThreadHandle, + __in THREADINFOCLASS ThreadInformationClass, + __in_bcount(ThreadInformationLength) PVOID ThreadInformation, + __in ULONG ThreadInformationLength + ); + +HANDLE +cfs_open_current_thread() +{ + NTSTATUS status; + HANDLE handle = NULL; + OBJECT_ATTRIBUTES oa; + CLIENT_ID cid; + + /* initialize object attributes */ + InitializeObjectAttributes( &oa, NULL, OBJ_KERNEL_HANDLE | + OBJ_CASE_INSENSITIVE, NULL, NULL); + + /* initialize client id */ + cid.UniqueProcess = PsGetCurrentProcessId(); + cid.UniqueThread = PsGetCurrentThreadId(); + + /* get thread handle */ + status = ZwOpenThread( &handle, THREAD_QUERY_INFORMATION | + THREAD_SET_INFORMATION, &oa, &cid); + if (!NT_SUCCESS(status)) { + handle = NULL; + } + + return handle; +} + +void +cfs_close_thread_handle(HANDLE handle) +{ + if (handle) + ZwClose(handle); +} + +KAFFINITY +cfs_query_thread_affinity() +{ + NTSTATUS status; + HANDLE handle = NULL; + DWORD size; + THREAD_BASIC_INFORMATION TBI = {0}; + + /* open current thread */ + handle = cfs_open_current_thread(); + if (!handle) { + goto errorout; + } + + /* query thread cpu affinity */ + status = ZwQueryInformationThread(handle, ThreadBasicInformation, + &TBI, sizeof(THREAD_BASIC_INFORMATION), &size); + if (!NT_SUCCESS(status)) { + goto errorout; + } + +errorout: + + cfs_close_thread_handle(handle); + return TBI.AffinityMask; +} + +int +cfs_set_thread_affinity(KAFFINITY affinity) +{ + NTSTATUS status; + HANDLE handle = NULL; + + /* open current thread */ + handle = cfs_open_current_thread(); + if (!handle) { + goto errorout; + } + + /* set thread cpu affinity */ + status = ZwSetInformationThread(handle, ThreadAffinityMask, + &affinity, sizeof(KAFFINITY)); + if (!NT_SUCCESS(status)) { + goto errorout; + } + +errorout: + + cfs_close_thread_handle(handle); + return NT_SUCCESS(status); +} + +int +cfs_tie_thread_to_cpu(int cpu) +{ + return cfs_set_thread_affinity((KAFFINITY) (1 << cpu)); +} + +int +cfs_set_thread_priority(KPRIORITY priority) +{ + NTSTATUS status; + HANDLE handle = NULL; + + /* open current thread */ + handle = cfs_open_current_thread(); + if (!handle) { + goto errorout; + } + + /* set thread cpu affinity */ + status = ZwSetInformationThread(handle, ThreadPriority, + &priority, sizeof(KPRIORITY)); + if (!NT_SUCCESS(status)) { + KdPrint(("set_thread_priority failed: %xh\n", status)); + goto errorout; + } + +errorout: + + cfs_close_thread_handle(handle); + return NT_SUCCESS(status); +} + /** ** Initialize routines **/ +void cfs_libc_init(); + int libcfs_arch_init(void) { @@ -579,11 +756,15 @@ libcfs_arch_init(void) /* Workground to check the system is MP build or UP build */ spin_lock_init(&lock); spin_lock(&lock); - MPSystem = (int)lock.lock; + libcfs_is_mp_system = (int)lock.lock; /* MP build system: it's a real spin, for UP build system, it only raises the IRQL to DISPATCH_LEVEL */ spin_unlock(&lock); + /* initialize libc routines (confliction between libcnptr.lib + and kernel ntoskrnl.lib) */ + cfs_libc_init(); + /* create slab memory caches for page alloctors */ cfs_page_t_slab = cfs_mem_cache_create( "CPGT", sizeof(cfs_page_t), 0, 0 ); @@ -598,7 +779,6 @@ libcfs_arch_init(void) } rc = init_task_manager(); - if (rc != 0) { cfs_enter_debugger(); KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n")); @@ -607,7 +787,6 @@ libcfs_arch_init(void) /* initialize the proc file system */ rc = proc_init_fs(); - if (rc != 0) { cfs_enter_debugger(); KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n")); @@ -617,15 +796,16 @@ libcfs_arch_init(void) /* initialize the tdi data */ rc = ks_init_tdi_data(); - if (rc != 0) { cfs_enter_debugger(); - KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n")); + KdPrint(("winnt-prim.c:libcfs_arch_init: failed to initialize tdi.\n")); proc_destroy_fs(); cleanup_task_manager(); goto errorout; } + rc = start_shrinker_timer(); + errorout: if (rc != 0) { @@ -644,12 +824,18 @@ errorout: void libcfs_arch_cleanup(void) { + /* stop shrinker timer */ + stop_shrinker_timer(); + /* finialize the tdi data */ ks_fini_tdi_data(); /* detroy the whole proc fs tree and nodes */ proc_destroy_fs(); + /* cleanup context of task manager */ + cleanup_task_manager(); + /* destroy the taskslot cache slab */ if (cfs_page_t_slab) { cfs_mem_cache_destroy(cfs_page_t_slab); @@ -659,7 +845,7 @@ libcfs_arch_cleanup(void) cfs_mem_cache_destroy(cfs_page_p_slab); } - return; + return; } EXPORT_SYMBOL(libcfs_arch_init); diff --git a/libcfs/libcfs/winnt/winnt-proc.c b/libcfs/libcfs/winnt/winnt-proc.c index dd1e0d2..bf41886 100644 --- a/libcfs/libcfs/winnt/winnt-proc.c +++ b/libcfs/libcfs/winnt/winnt-proc.c @@ -43,6 +43,7 @@ #include #include "tracefile.h" +#include #ifdef __KERNEL__ @@ -51,25 +52,26 @@ * /proc emulator routines ... */ +/* The root node of the proc fs emulation: / */ +cfs_proc_entry_t * cfs_proc_root = NULL; + /* The root node of the proc fs emulation: /proc */ -cfs_proc_entry_t * proc_fs_root = NULL; +cfs_proc_entry_t * cfs_proc_proc = NULL; +/* The fs sys directory: /proc/fs */ +cfs_proc_entry_t * cfs_proc_fs = NULL; /* The sys root: /proc/sys */ -cfs_proc_entry_t * proc_sys_root = NULL; - +cfs_proc_entry_t * cfs_proc_sys = NULL; /* The sys root: /proc/dev | to implement misc device */ - -cfs_proc_entry_t * proc_dev_root = NULL; +cfs_proc_entry_t * cfs_proc_dev = NULL; /* SLAB object for cfs_proc_entry_t allocation */ - cfs_mem_cache_t * proc_entry_cache = NULL; /* root node for sysctl table */ - cfs_sysctl_table_header_t root_table_header; /* The global lock to protect all the access */ @@ -101,7 +103,7 @@ proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos char *start; cfs_proc_entry_t * dp; - dp = (cfs_proc_entry_t *) file->private_data; + dp = (cfs_proc_entry_t *) file->f_inode->i_priv; if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0))) return -ENOMEM; @@ -158,7 +160,7 @@ proc_file_write(struct file * file, const char * buffer, { cfs_proc_entry_t * dp; - dp = (cfs_proc_entry_t *) file->private_data; + dp = (cfs_proc_entry_t *) file->f_inode->i_priv; if (!dp->write_proc) return -EIO; @@ -168,6 +170,7 @@ proc_file_write(struct file * file, const char * buffer, } struct file_operations proc_file_operations = { + /*owner*/ THIS_MODULE, /*lseek:*/ NULL, //proc_file_lseek, /*read:*/ proc_file_read, /*write:*/ proc_file_write, @@ -212,7 +215,7 @@ proc_free_entry(cfs_proc_entry_t * entry) void proc_dissect_name( - char *path, + const char *path, char **first, int *first_len, char **remain @@ -229,12 +232,12 @@ proc_dissect_name( if (i < len) { - *first = path + i; + *first = (char *)path + i; while (i < len && (path[i] != '/')) i++; - *first_len = (path + i - *first); + *first_len = (int)(path + i - *first); if (i + 1 < len) { - *remain = path + i + 1; + *remain = (char *)path + i + 1; } } } @@ -282,7 +285,6 @@ proc_search_splay ( /* The prefix is less than the full name so we go down the right child */ - // link = RtlRightChild(link); @@ -362,6 +364,7 @@ proc_insert_splay ( cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED); parent->nlink++; + child->parent = parent; return TRUE; } @@ -381,6 +384,7 @@ proc_remove_splay ( ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED)); + ASSERT(child->parent == parent); entry = proc_search_splay(parent, child->name); @@ -401,7 +405,7 @@ proc_remove_splay ( cfs_proc_entry_t * proc_search_entry( - char * name, + const char * name, cfs_proc_entry_t * root ) { @@ -463,7 +467,7 @@ errorout: cfs_proc_entry_t * proc_insert_entry( - char * name, + const char * name, cfs_proc_entry_t * root ) { @@ -523,7 +527,7 @@ again: void proc_remove_entry( - char * name, + const char * name, cfs_proc_entry_t * root ) { @@ -564,12 +568,11 @@ proc_remove_entry( cfs_proc_entry_t * create_proc_entry ( - char * name, + const char * name, mode_t mode, - cfs_proc_entry_t * root + cfs_proc_entry_t * parent ) { - cfs_proc_entry_t *parent = root; cfs_proc_entry_t *entry = NULL; if (S_ISDIR(mode)) { @@ -583,11 +586,15 @@ create_proc_entry ( } LOCK_PROCFS(); - - ASSERT(NULL != proc_fs_root); + ASSERT(NULL != cfs_proc_root); if (!parent) { - parent = proc_fs_root; + if (name[0] == '/') { + parent = cfs_proc_root; + } else { + ASSERT(NULL != cfs_proc_proc); + parent = cfs_proc_proc; + } } entry = proc_search_entry(name, parent); @@ -619,15 +626,21 @@ errorout: cfs_proc_entry_t * search_proc_entry( - char * name, + const char * name, cfs_proc_entry_t * root ) { cfs_proc_entry_t * entry; LOCK_PROCFS(); + ASSERT(cfs_proc_root != NULL); if (root == NULL) { - root = proc_fs_root; + if (name[0] == '/') { + root = cfs_proc_root; + } else { + ASSERT(cfs_proc_proc != NULL); + root = cfs_proc_proc; + } } entry = proc_search_entry(name, root); UNLOCK_PROCFS(); @@ -639,13 +652,19 @@ search_proc_entry( void remove_proc_entry( - char * name, + const char * name, cfs_proc_entry_t * parent ) { LOCK_PROCFS(); + ASSERT(cfs_proc_root != NULL); if (parent == NULL) { - parent = proc_fs_root; + if (name[0] == '/') { + parent = cfs_proc_root; + } else { + ASSERT(cfs_proc_proc != NULL); + parent = cfs_proc_proc; + } } proc_remove_entry(name, parent); UNLOCK_PROCFS(); @@ -668,6 +687,30 @@ void proc_destroy_splay(cfs_proc_entry_t * entry) proc_free_entry(entry); } +cfs_proc_entry_t *proc_symlink( + const char *name, + cfs_proc_entry_t *parent, + const char *dest + ) +{ + cfs_enter_debugger(); + return NULL; +} + +cfs_proc_entry_t *proc_mkdir( + const char *name, + cfs_proc_entry_t *parent) +{ + return create_proc_entry((char *)name, S_IFDIR, parent); +} + +void proc_destory_subtree(cfs_proc_entry_t *entry) +{ + LOCK_PROCFS(); + entry->root = NULL; + proc_destroy_splay(entry); + UNLOCK_PROCFS(); +} /* destory the whole proc fs tree */ @@ -675,8 +718,8 @@ void proc_destroy_fs() { LOCK_PROCFS(); - if (proc_fs_root) { - proc_destroy_splay(proc_fs_root); + if (cfs_proc_root) { + proc_destroy_splay(cfs_proc_root); } if (proc_entry_cache) { @@ -686,14 +729,77 @@ void proc_destroy_fs() UNLOCK_PROCFS(); } -/* initilaize / build the proc fs tree */ +static char proc_item_path[512]; + +void proc_show_tree(cfs_proc_entry_t * node); +void proc_print_node(cfs_proc_entry_t * node) +{ + if (node != cfs_proc_root) { + if (S_ISDIR(node->mode)) { + printk("%s/%s/\n", proc_item_path, node->name); + } else { + printk("%s/%s\n", proc_item_path, node->name); + } + } else { + printk("%s\n", node->name); + } + + if (S_ISDIR(node->mode)) { + proc_show_tree(node); + } +} + +void proc_show_child(PRTL_SPLAY_LINKS link) +{ + cfs_proc_entry_t * entry = NULL; + + if (!link) { + return; + } + + proc_show_child(link->LeftChild); + entry = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link); + proc_print_node(entry); + proc_show_child(link->RightChild); +} + +void proc_show_tree(cfs_proc_entry_t * node) +{ + PRTL_SPLAY_LINKS link = NULL; + cfs_proc_entry_t * entry = NULL; + int i; + + link = node->root; + i = strlen(proc_item_path); + ASSERT(S_ISDIR(node->mode)); + if (node != cfs_proc_root) { + strcat(proc_item_path, "/"); + strcat(proc_item_path, node->name); + } + proc_show_child(link); + proc_item_path[i] = 0; +} + +void proc_print_splay() +{ + printk("=================================================\n"); + printk("Lustre virtual proc entries:\n"); + printk("-------------------------------------------------\n"); + LOCK_PROCFS(); + proc_show_tree(cfs_proc_root); + UNLOCK_PROCFS(); + printk("=================================================\n"); +} + + +/* initilaize / build the proc fs tree */ int proc_init_fs() { cfs_proc_entry_t * root = NULL; memset(&(root_table_header), 0, sizeof(struct ctl_table_header)); - INIT_LIST_HEAD(&(root_table_header.ctl_entry)); + CFS_INIT_LIST_HEAD(&(root_table_header.ctl_entry)); INIT_PROCFS_LOCK(); proc_entry_cache = cfs_mem_cache_create( @@ -708,49 +814,49 @@ int proc_init_fs() } root = proc_alloc_entry(); - if (!root) { proc_destroy_fs(); return (-ENOMEM); } - root->magic = CFS_PROC_ENTRY_MAGIC; root->flags = CFS_PROC_FLAG_DIRECTORY; root->mode = S_IFDIR | S_IRUGO | S_IXUGO; root->nlink = 3; // root should never be deleted. + root->name[0]='/'; + root->name[1]= 0; + cfs_proc_root = root; - root->name[0]='p'; - root->name[1]='r'; - root->name[2]='o'; - root->name[3]='c'; - - proc_fs_root = root; - - proc_sys_root = create_proc_entry("sys", S_IFDIR, root); - - if (!proc_sys_root) { - proc_free_entry(root); - proc_fs_root = NULL; - proc_destroy_fs(); - return (-ENOMEM); + cfs_proc_dev = create_proc_entry("dev", S_IFDIR, root); + if (!cfs_proc_dev) { + goto errorout; } + cfs_proc_dev->nlink = 1; - proc_sys_root->nlink = 1; + cfs_proc_proc = create_proc_entry("proc", S_IFDIR, root); + if (!cfs_proc_proc) { + goto errorout; + } + cfs_proc_proc->nlink = 1; - proc_dev_root = create_proc_entry("dev", S_IFDIR, root); + cfs_proc_fs = create_proc_entry("fs", S_IFDIR, cfs_proc_proc); + if (!cfs_proc_fs) { + goto errorout; + } + cfs_proc_fs->nlink = 1; - if (!proc_dev_root) { - proc_free_entry(proc_sys_root); - proc_sys_root = NULL; - proc_free_entry(proc_fs_root); - proc_fs_root = NULL; - proc_destroy_fs(); - return (-ENOMEM); + cfs_proc_sys = create_proc_entry("sys", S_IFDIR, cfs_proc_proc); + if (!cfs_proc_sys) { + goto errorout; } + cfs_proc_sys->nlink = 1; - proc_dev_root->nlink = 1; - + return 0; + +errorout: + + proc_destroy_fs(); + return (-ENOMEM); } @@ -772,9 +878,6 @@ static ssize_t do_rw_proc(int write, struct file * file, char * buf, return -ENOTDIR; op = (write ? 002 : 004); -// if (ctl_perm(table, op)) -// return -EPERM; - res = count; /* @@ -801,6 +904,7 @@ static ssize_t proc_writesys(struct file * file, const char * buf, struct file_operations proc_sys_file_operations = { + /*owner*/ THIS_MODULE, /*lseek:*/ NULL, /*read:*/ proc_readsys, /*write:*/ proc_writesys, @@ -943,14 +1047,14 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) if (*cp == '0') { base = 8; cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { + if ((*cp == 'x') && cfs_isxdigit(cp[1])) { cp++; base = 16; } } } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + while (cfs_isxdigit(*cp) && + (value = cfs_isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { result = result*base + value; cp++; } @@ -1353,13 +1457,13 @@ struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table, return NULL; tmp->ctl_table = table; - INIT_LIST_HEAD(&tmp->ctl_entry); + CFS_INIT_LIST_HEAD(&tmp->ctl_entry); if (insert_at_head) list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); #ifdef CONFIG_PROC_FS - register_proc_table(table, proc_sys_root); + register_proc_table(table, cfs_proc_sys); #endif return tmp; } @@ -1375,7 +1479,7 @@ void unregister_sysctl_table(struct ctl_table_header * header) { list_del(&header->ctl_entry); #ifdef CONFIG_PROC_FS - unregister_proc_table(header->ctl_table, proc_sys_root); + unregister_proc_table(header->ctl_table, cfs_proc_sys); #endif cfs_free(header); } @@ -1388,7 +1492,7 @@ int cfs_psdev_register(cfs_psdev_t * psdev) entry = create_proc_entry ( (char *)psdev->name, S_IFREG, - proc_dev_root + cfs_proc_dev ); if (!entry) { @@ -1409,7 +1513,7 @@ int cfs_psdev_deregister(cfs_psdev_t * psdev) entry = search_proc_entry ( (char *)psdev->name, - proc_dev_root + cfs_proc_dev ); if (entry) { @@ -1419,15 +1523,13 @@ int cfs_psdev_deregister(cfs_psdev_t * psdev) remove_proc_entry( (char *)psdev->name, - proc_dev_root + cfs_proc_dev ); } return 0; } -extern char debug_file_path[1024]; - #define PSDEV_LNET (0x100) enum { PSDEV_DEBUG = 1, /* control debugging */ @@ -1446,10 +1548,8 @@ static struct ctl_table lnet_table[] = { sizeof(int), 0644, NULL, &proc_dointvec}, {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, + {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, - sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, /* {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, sizeof(portals_upcall), 0644, NULL, &proc_dostring, @@ -1469,7 +1569,7 @@ static struct ctl_table top_table[2] = { int trace_write_dump_kernel(struct file *file, const char *buffer, unsigned long count, void *data) { - int rc = trace_dump_debug_buffer_usrstr(buffer, count); + int rc = trace_dump_debug_buffer_usrstr((void *)buffer, count); return (rc < 0) ? rc : count; } @@ -1477,7 +1577,7 @@ int trace_write_dump_kernel(struct file *file, const char *buffer, int trace_write_daemon_file(struct file *file, const char *buffer, unsigned long count, void *data) { - int rc = trace_daemon_command_usrstr(buffer, count); + int rc = trace_daemon_command_usrstr((void *)buffer, count); return (rc < 0) ? rc : count; } @@ -1485,21 +1585,17 @@ int trace_write_daemon_file(struct file *file, const char *buffer, int trace_read_daemon_file(char *page, char **start, off_t off, int count, int *eof, void *data) { - int rc; - - tracefile_read_lock(); - + int rc; + tracefile_read_lock(); rc = trace_copyout_string(page, count, tracefile, "\n"); - tracefile_read_unlock(); - - return rc; + return rc; } int trace_write_debug_mb(struct file *file, const char *buffer, unsigned long count, void *data) { - int rc = trace_set_debug_mb_userstr(buffer, count); + int rc = 0; /*trace_set_debug_mb_userstr((void *)buffer, count);*/ return (rc < 0) ? rc : count; } @@ -1520,14 +1616,14 @@ int insert_proc(void) ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL); if (ent == NULL) { - CERROR(("couldn't register dump_kernel\n")); + CERROR("couldn't register dump_kernel\n"); return -1; } ent->write_proc = trace_write_dump_kernel; ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL); if (ent == NULL) { - CERROR(("couldn't register daemon_file\n")); + CERROR("couldn't register daemon_file\n"); return -1; } ent->write_proc = trace_write_daemon_file; @@ -1535,7 +1631,7 @@ int insert_proc(void) ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL); if (ent == NULL) { - CERROR(("couldn't register debug_mb\n")); + CERROR("couldn't register debug_mb\n"); return -1; } ent->write_proc = trace_write_debug_mb; @@ -1546,15 +1642,9 @@ int insert_proc(void) void remove_proc(void) { - remove_proc_entry("sys/portals/dump_kernel", NULL); - remove_proc_entry("sys/portals/daemon_file", NULL); - remove_proc_entry("sys/portals/debug_mb", NULL); - -#ifdef CONFIG_SYSCTL - if (portals_table_header) - unregister_sysctl_table(portals_table_header); - portals_table_header = NULL; -#endif + remove_proc_entry("sys/lnet/dump_kernel", NULL); + remove_proc_entry("sys/lnet/daemon_file", NULL); + remove_proc_entry("sys/lnet/debug_mb", NULL); } @@ -1569,30 +1659,33 @@ lustre_open_file(char * filename) cfs_file_t * fh = NULL; cfs_proc_entry_t * fp = NULL; - fp = search_proc_entry(filename, proc_fs_root); - + fp = search_proc_entry(filename, cfs_proc_root); if (!fp) { - rc = -ENOENT; return NULL; } fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO); - if (!fh) { - rc = -ENOMEM; return NULL; } - fh->private_data = (void *)fp; + fh->f_inode = cfs_alloc(sizeof(struct inode), CFS_ALLOC_ZERO); + if (!fh->f_inode) { + cfs_free(fh); + return NULL; + } + + fh->f_inode->i_priv = (void *)fp; fh->f_op = fp->proc_fops; if (fh->f_op->open) { - rc = (fh->f_op->open)(fh); + rc = (fh->f_op->open)(fh->f_inode, fh); } else { fp->nlink++; } if (0 != rc) { + cfs_free(fh->f_inode); cfs_free(fh); return NULL; } @@ -1606,14 +1699,14 @@ lustre_close_file(cfs_file_t * fh) int rc = 0; cfs_proc_entry_t * fp = NULL; - fp = (cfs_proc_entry_t *) fh->private_data; - + fp = (cfs_proc_entry_t *) fh->f_inode->i_priv; if (fh->f_op->release) { - rc = (fh->f_op->release)(fh); + rc = (fh->f_op->release)(fh->f_inode, fh); } else { fp->nlink--; } + cfs_free(fh->f_inode); cfs_free(fh); return rc; @@ -1622,7 +1715,7 @@ lustre_close_file(cfs_file_t * fh) int lustre_do_ioctl( cfs_file_t * fh, unsigned long cmd, - ulong_ptr arg ) + ulong_ptr_t arg ) { int rc = 0; @@ -1630,11 +1723,6 @@ lustre_do_ioctl( cfs_file_t * fh, rc = (fh->f_op->ioctl)(fh, cmd, arg); } - if (rc != 0) { - printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n", - cmd, arg, rc); - } - return rc; } @@ -1642,13 +1730,18 @@ int lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl) { int rc = 0; - ulong_ptr data; + ulong_ptr_t data; - data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL); + data = (ulong_ptr_t)devctl + sizeof(CFS_PROC_IOCTL); +#if defined(_X86_) + CLASSERT(sizeof(struct obd_ioctl_data) == 528); +#else + CLASSERT(sizeof(struct obd_ioctl_data) == 576); +#endif /* obd ioctl code */ if (_IOC_TYPE(devctl->cmd) == 'f') { -#if 0 + struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data; if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE && @@ -1656,17 +1749,21 @@ lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl) unsigned long off = obd->ioc_len; - if (obd->ioc_pbuf1) { + if (obd->ioc_plen1) { obd->ioc_pbuf1 = (char *)(data + off); off += size_round(obd->ioc_plen1); + } else { + obd->ioc_pbuf1 = NULL; } - if (obd->ioc_pbuf2) { + if (obd->ioc_plen2) { obd->ioc_pbuf2 = (char *)(data + off); + off += size_round(obd->ioc_plen2); + } else { + obd->ioc_pbuf2 = NULL; } } - #endif - } + } rc = lustre_do_ioctl(fh, devctl->cmd, data); @@ -1682,12 +1779,20 @@ lustre_read_file( char * buf ) { - size_t rc = 0; + size_t rc = 0; + off_t low, high; + + low = (off_t) size; + high = (off_t)(off >> 32); if (fh->f_op->read) { rc = (fh->f_op->read) (fh, buf, size, &off); } + if (rc) { + fh->f_pos = off + rc; + } + return rc; } @@ -1701,7 +1806,7 @@ lustre_write_file( ) { size_t rc = 0; - + off = 0; if (fh->f_op->write) { rc = (fh->f_op->write)(fh, buf, size, &off); } @@ -1709,347 +1814,528 @@ lustre_write_file( return rc; } -#else /* !__KERNEL__ */ - -#include -#include -#include /* - * proc process routines of user space + * seq file routines */ -HANDLE cfs_proc_open (char * filename, int oflag) -{ - NTSTATUS status; - IO_STATUS_BLOCK iosb; - int rc; - - HANDLE FileHandle = INVALID_HANDLE_VALUE; - OBJECT_ATTRIBUTES ObjectAttributes; - ACCESS_MASK DesiredAccess; - ULONG CreateDisposition; - ULONG ShareAccess; - ULONG CreateOptions; - UNICODE_STRING UnicodeName; - USHORT NameLength; - - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; - - /* Check the filename: should start with "/proc" or "/dev" */ - NameLength = (USHORT)strlen(filename); - if (NameLength > 0x05) { - if (_strnicmp(filename, "/proc/", 6) == 0) { - filename += 6; - NameLength -=6; - if (NameLength <= 0) { - rc = -EINVAL; - goto errorout; - } - } else if (_strnicmp(filename, "/dev/", 5) == 0) { - } else { - rc = -EINVAL; - goto errorout; - } - } else { - rc = -EINVAL; - goto errorout; - } - - /* Analyze the flags settings */ - - if (cfs_is_flag_set(oflag, O_WRONLY)) { - DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = 0; - } else if (cfs_is_flag_set(oflag, O_RDWR)) { - DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; - } else { - DesiredAccess = (GENERIC_READ | SYNCHRONIZE); - ShareAccess = FILE_SHARE_READ; - } - - if (cfs_is_flag_set(oflag, O_CREAT)) { - if (cfs_is_flag_set(oflag, O_EXCL)) { - CreateDisposition = FILE_CREATE; - rc = -EINVAL; - goto errorout; - } else { - CreateDisposition = FILE_OPEN_IF; - } - } else { - CreateDisposition = FILE_OPEN; - } - - if (cfs_is_flag_set(oflag, O_TRUNC)) { - if (cfs_is_flag_set(oflag, O_EXCL)) { - CreateDisposition = FILE_OVERWRITE; - } else { - CreateDisposition = FILE_OVERWRITE_IF; - } - } - - CreateOptions = 0; - - if (cfs_is_flag_set(oflag, O_DIRECTORY)) { - cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); - } - - if (cfs_is_flag_set(oflag, O_SYNC)) { - cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); - } - - if (cfs_is_flag_set(oflag, O_DIRECT)) { - cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); - } - - /* Initialize the unicode path name for the specified file */ - RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK); - - /* Setup the object attributes structure for the file. */ - InitializeObjectAttributes( - &ObjectAttributes, - &UnicodeName, - OBJ_CASE_INSENSITIVE, - NULL, - NULL ); - - /* building EA for the proc entry ... */ - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = (UCHAR)NameLength; - Ea->EaValueLength = 0; - RtlCopyMemory( - &(Ea->EaName), - filename, - NameLength + 1 - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + - Ea->EaNameLength + 1; - - /* Now to open or create the file now */ - status = ZwCreateFile( - &FileHandle, - DesiredAccess, - &ObjectAttributes, - &iosb, - 0, - FILE_ATTRIBUTE_NORMAL, - ShareAccess, - CreateDisposition, - CreateOptions, - Ea, - EaLength ); - - /* Check the returned status of Iosb ... */ - - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - goto errorout; - } - -errorout: - - return FileHandle; +/** + * seq_open - initialize sequential file + * @file: file we initialize + * @op: method table describing the sequence + * + * seq_open() sets @file, associating it with a sequence described + * by @op. @op->start() sets the iterator up and returns the first + * element of sequence. @op->stop() shuts it down. @op->next() + * returns the next element of sequence. @op->show() prints element + * into the buffer. In case of error ->start() and ->next() return + * ERR_PTR(error). In the end of sequence they return %NULL. ->show() + * returns 0 in case of success and negative number in case of error. + */ +int seq_open(struct file *file, const struct seq_operations *op) +{ + struct seq_file *p = file->private_data; + + if (!p) { + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + file->private_data = p; + } + memset(p, 0, sizeof(*p)); + mutex_init(&p->lock); + p->op = op; + + /* + * Wrappers around seq_open(e.g. swaps_open) need to be + * aware of this. If they set f_version themselves, they + * should call seq_open first and then set f_version. + */ + file->f_version = 0; + + /* SEQ files support lseek, but not pread/pwrite */ + file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE); + return 0; } +EXPORT_SYMBOL(seq_open); -int cfs_proc_close(HANDLE handle) +/** + * seq_read - ->read() method for sequential files. + * @file: the file to read from + * @buf: the buffer to read to + * @size: the maximum number of bytes to read + * @ppos: the current position in the file + * + * Ready-made ->f_op->read() + */ +ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - if (handle) { - NtClose((HANDLE)handle); - } - - return 0; + struct seq_file *m = (struct seq_file *)file->private_data; + size_t copied = 0; + loff_t pos; + size_t n; + void *p; + int err = 0; + + mutex_lock(&m->lock); + /* + * seq_file->op->..m_start/m_stop/m_next may do special actions + * or optimisations based on the file->f_version, so we want to + * pass the file->f_version to those methods. + * + * seq_file->version is just copy of f_version, and seq_file + * methods can treat it simply as file version. + * It is copied in first and copied out after all operations. + * It is convenient to have it as part of structure to avoid the + * need of passing another argument to all the seq_file methods. + */ + m->version = file->f_version; + /* grab buffer if we didn't have one */ + if (!m->buf) { + m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + if (!m->buf) + goto Enomem; + } + /* if not empty - flush it first */ + if (m->count) { + n = min(m->count, size); + err = copy_to_user(buf, m->buf + m->from, n); + if (err) + goto Efault; + m->count -= n; + m->from += n; + size -= n; + buf += n; + copied += n; + if (!m->count) + m->index++; + if (!size) + goto Done; + } + /* we need at least one record in buffer */ + while (1) { + pos = m->index; + p = m->op->start(m, &pos); + err = PTR_ERR(p); + if (!p || IS_ERR(p)) + break; + err = m->op->show(m, p); + if (err) + break; + if (m->count < m->size) + goto Fill; + m->op->stop(m, p); + cfs_free(m->buf); + m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + if (!m->buf) + goto Enomem; + m->count = 0; + m->version = 0; + } + m->op->stop(m, p); + m->count = 0; + goto Done; +Fill: + /* they want more? let's try to get some more */ + while (m->count < size) { + size_t offs = m->count; + loff_t next = pos; + p = m->op->next(m, p, &next); + if (!p || IS_ERR(p)) { + err = PTR_ERR(p); + break; + } + err = m->op->show(m, p); + if (err || m->count == m->size) { + m->count = offs; + break; + } + pos = next; + } + m->op->stop(m, p); + n = min(m->count, size); + err = copy_to_user(buf, m->buf, n); + if (err) + goto Efault; + copied += n; + m->count -= n; + if (m->count) + m->from = n; + else + pos++; + m->index = pos; +Done: + if (!copied) + copied = err; + else + *ppos += copied; + file->f_version = m->version; + mutex_unlock(&m->lock); + return copied; +Enomem: + err = -ENOMEM; + goto Done; +Efault: + err = -EFAULT; + goto Done; } +EXPORT_SYMBOL(seq_read); -int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count) +static int traverse(struct seq_file *m, loff_t offset) { - NTSTATUS status; - IO_STATUS_BLOCK iosb; - LARGE_INTEGER offset; - - - offset.QuadPart = 0; + loff_t pos = 0, index; + int error = 0; + void *p; + + m->version = 0; + index = 0; + m->count = m->from = 0; + if (!offset) { + m->index = index; + return 0; + } + if (!m->buf) { + m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + if (!m->buf) + return -ENOMEM; + } + p = m->op->start(m, &index); + while (p) { + error = PTR_ERR(p); + if (IS_ERR(p)) + break; + error = m->op->show(m, p); + if (error) + break; + if (m->count == m->size) + goto Eoverflow; + if (pos + (loff_t)(m->count) > offset) { + m->from = (size_t)(offset - pos); + m->count -= m->from; + m->index = index; + break; + } + pos += m->count; + m->count = 0; + if (pos == offset) { + index++; + m->index = index; + break; + } + p = m->op->next(m, p, &index); + } + m->op->stop(m, p); + return error; + +Eoverflow: + m->op->stop(m, p); + cfs_free(m->buf); + m->buf = cfs_alloc(m->size <<= 1, GFP_KERNEL | CFS_ALLOC_ZERO); + return !m->buf ? -ENOMEM : -EAGAIN; +} - /* read file data */ - status = NtReadFile( - (HANDLE)handle, - 0, - NULL, - NULL, - &iosb, - buffer, - count, - &offset, - NULL); +/** + * seq_lseek - ->llseek() method for sequential files. + * @file: the file in question + * @offset: new position + * @origin: 0 for absolute, 1 for relative position + * + * Ready-made ->f_op->llseek() + */ +loff_t seq_lseek(struct file *file, loff_t offset, int origin) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + long long retval = -EINVAL; + + mutex_lock(&m->lock); + m->version = file->f_version; + switch (origin) { + case 1: + offset += file->f_pos; + case 0: + if (offset < 0) + break; + retval = offset; + if (offset != file->f_pos) { + while ((retval=traverse(m, offset)) == -EAGAIN) + ; + if (retval) { + /* with extreme prejudice... */ + file->f_pos = 0; + m->version = 0; + m->index = 0; + m->count = 0; + } else { + retval = file->f_pos = offset; + } + } + } + file->f_version = m->version; + mutex_unlock(&m->lock); + return retval; +} +EXPORT_SYMBOL(seq_lseek); - /* check the return status */ - if (!NT_SUCCESS(status)) { - printf("NtReadFile request failed 0x%0x\n", status); - goto errorout; +/** + * seq_release - free the structures associated with sequential file. + * @file: file in question + * @inode: file->f_path.dentry->d_inode + * + * Frees the structures associated with sequential file; can be used + * as ->f_op->release() if you don't have private data to destroy. + */ +int seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + if (m) { + if (m->buf) + cfs_free(m->buf); + cfs_free(m); } + return 0; +} +EXPORT_SYMBOL(seq_release); -errorout: +/** + * seq_escape - print string into buffer, escaping some characters + * @m: target buffer + * @s: string + * @esc: set of characters that need escaping + * + * Puts string into buffer, replacing each occurrence of character from + * @esc with usual octal escape. Returns 0 in case of success, -1 - in + * case of overflow. + */ +int seq_escape(struct seq_file *m, const char *s, const char *esc) +{ + char *end = m->buf + m->size; + char *p; + char c; + + for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) { + if (!strchr(esc, c)) { + *p++ = c; + continue; + } + if (p + 3 < end) { + *p++ = '\\'; + *p++ = '0' + ((c & 0300) >> 6); + *p++ = '0' + ((c & 070) >> 3); + *p++ = '0' + (c & 07); + continue; + } + m->count = m->size; + return -1; + } + m->count = p - m->buf; + return 0; +} +EXPORT_SYMBOL(seq_escape); - if (NT_SUCCESS(status)) { - return iosb.Information; - } +int seq_printf(struct seq_file *m, const char *f, ...) +{ + va_list args; + int len; + + if (m->count < m->size) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); + va_end(args); + if (m->count + len < m->size) { + m->count += len; + return 0; + } + } + m->count = m->size; + return -1; +} +EXPORT_SYMBOL(seq_printf); - return cfs_error_code(status); +char *d_path(struct path *p, char *buffer, int buflen) +{ + cfs_enter_debugger(); + return ERR_PTR(-ENAMETOOLONG); } +int seq_path(struct seq_file *m, struct path *path, char *esc) +{ + if (m->count < m->size) { + char *s = m->buf + m->count; + char *p = d_path(path, s, m->size - m->count); + if (!IS_ERR(p)) { + while (s <= p) { + char c = *p++; + if (!c) { + p = m->buf + m->count; + m->count = s - m->buf; + return (int)(s - p); + } else if (!strchr(esc, c)) { + *s++ = c; + } else if (s + 4 > p) { + break; + } else { + *s++ = '\\'; + *s++ = '0' + ((c & 0300) >> 6); + *s++ = '0' + ((c & 070) >> 3); + *s++ = '0' + (c & 07); + } + } + } + } + m->count = m->size; + return -1; +} +EXPORT_SYMBOL(seq_path); -int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count) +static void *single_start(struct seq_file *p, loff_t *pos) { - NTSTATUS status; - IO_STATUS_BLOCK iosb; - LARGE_INTEGER offset; + return (void *) (INT_PTR) (*pos == 0); +} - offset.QuadPart = -1; +static void *single_next(struct seq_file *p, void *v, loff_t *pos) +{ + ++*pos; + return NULL; +} - /* write buffer to the opened file */ - status = NtWriteFile( - (HANDLE)handle, - 0, - NULL, - NULL, - &iosb, - buffer, - count, - &offset, - NULL); +static void single_stop(struct seq_file *p, void *v) +{ +} - /* check the return status */ - if (!NT_SUCCESS(status)) { - printf("NtWriteFile request failed 0x%0x\n", status); - goto errorout; - } +int single_open(struct file *file, int (*show)(struct seq_file *, void *), + void *data) +{ + struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL); + int res = -ENOMEM; + + if (op) { + op->start = single_start; + op->next = single_next; + op->stop = single_stop; + op->show = show; + res = seq_open(file, op); + if (!res) + ((struct seq_file *)file->private_data)->private = data; + else + cfs_free(op); + } + return res; +} +EXPORT_SYMBOL(single_open); -errorout: +int single_release(struct inode *inode, struct file *file) +{ + const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; + int res = seq_release(inode, file); + cfs_free((void *)op); + return res; +} +EXPORT_SYMBOL(single_release); - if (NT_SUCCESS(status)) { - return iosb.Information; - } +int seq_release_private(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; - return cfs_error_code(status); + cfs_free(seq->private); + seq->private = NULL; + return seq_release(inode, file); } +EXPORT_SYMBOL(seq_release_private); -int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer) +void *__seq_open_private(struct file *f, const struct seq_operations *ops, + int psize) { - PUCHAR procdat = NULL; - CFS_PROC_IOCTL procctl; - ULONG length = 0; - ULONG extra = 0; - - NTSTATUS status; - IO_STATUS_BLOCK iosb; + int rc; + void *private; + struct seq_file *seq; - procctl.cmd = cmd; + private = cfs_alloc(psize, GFP_KERNEL | CFS_ALLOC_ZERO); + if (private == NULL) + goto out; - if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) { - struct libcfs_ioctl_data * portal; - portal = (struct libcfs_ioctl_data *) buffer; - length = portal->ioc_len; - } else if (_IOC_TYPE(cmd) == 'f') { - struct obd_ioctl_data * obd; - obd = (struct obd_ioctl_data *) buffer; - length = obd->ioc_len; - extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2); - } else if(_IOC_TYPE(cmd) == 'u') { - length = 4; - extra = 0; - } else { - printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n"); - cfs_enter_debugger(); - status = STATUS_INVALID_PARAMETER; - goto errorout; - } + rc = seq_open(f, ops); + if (rc < 0) + goto out_free; - procctl.len = length + extra; - procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL)); + seq = f->private_data; + seq->private = private; + return private; - if (NULL == procdat) { - printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n"); - status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL)); - memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL)); - memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length); - length += sizeof(CFS_PROC_IOCTL); +out_free: + cfs_free(private); +out: + return NULL; +} +EXPORT_SYMBOL(__seq_open_private); - if (_IOC_TYPE(cmd) == 'f') { +int seq_open_private(struct file *filp, const struct seq_operations *ops, + int psize) +{ + return __seq_open_private(filp, ops, psize) ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(seq_open_private); - char *ptr; - struct obd_ioctl_data * data; - struct obd_ioctl_data * obd; +int seq_putc(struct seq_file *m, char c) +{ + if (m->count < m->size) { + m->buf[m->count++] = c; + return 0; + } + return -1; +} +EXPORT_SYMBOL(seq_putc); - data = (struct obd_ioctl_data *) buffer; - obd = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL)); - ptr = obd->ioc_bulk; +int seq_puts(struct seq_file *m, const char *s) +{ + int len = strlen(s); + if (m->count + len < m->size) { + memcpy(m->buf + m->count, s, len); + m->count += len; + return 0; + } + m->count = m->size; + return -1; +} +EXPORT_SYMBOL(seq_puts); - if (data->ioc_inlbuf1) { - obd->ioc_inlbuf1 = ptr; - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - } +struct list_head *seq_list_start(struct list_head *head, loff_t pos) +{ + struct list_head *lh; - if (data->ioc_inlbuf2) { - obd->ioc_inlbuf2 = ptr; - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - } - if (data->ioc_inlbuf3) { - obd->ioc_inlbuf3 = ptr; - LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - } - if (data->ioc_inlbuf4) { - obd->ioc_inlbuf4 = ptr; - LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - } - - if ( cmd != (ULONG)OBD_IOC_BRW_WRITE && - cmd != (ULONG)OBD_IOC_BRW_READ ) { + list_for_each(lh, head) + if (pos-- == 0) + return lh; - if (data->ioc_pbuf1 && data->ioc_plen1) { - obd->ioc_pbuf1 = &procdat[length]; - memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1); - length += size_round(data->ioc_plen1); - } + return NULL; +} - if (data->ioc_pbuf2 && data->ioc_plen2) { - obd->ioc_pbuf2 = &procdat[length]; - memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2); - length += size_round(data->ioc_plen2); - } - } +EXPORT_SYMBOL(seq_list_start); - if (obd_ioctl_is_invalid(obd)) { - cfs_enter_debugger(); - } - } +struct list_head *seq_list_start_head(struct list_head *head, loff_t pos) +{ + if (!pos) + return head; - status = NtDeviceIoControlFile( - (HANDLE)handle, - NULL, NULL, NULL, &iosb, - IOCTL_LIBCFS_ENTRY, - procdat, length, - procdat, length ); + return seq_list_start(head, pos - 1); +} +EXPORT_SYMBOL(seq_list_start_head); - if (NT_SUCCESS(status)) { - memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); - } +struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos) +{ + struct list_head *lh; -errorout: + lh = ((struct list_head *)v)->next; + ++*ppos; + return lh == head ? NULL : lh; +} - if (procdat) { - free(procdat); - } +EXPORT_SYMBOL(seq_list_next); - return cfs_error_code(status); +struct proc_dir_entry *PDE(const struct inode *inode) +{ + return (struct proc_dir_entry *)inode->i_priv; } + #endif /* __KERNEL__ */ diff --git a/libcfs/libcfs/winnt/winnt-strusup.c b/libcfs/libcfs/winnt/winnt-strusup.c new file mode 100644 index 0000000..45fac94 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-strusup.c @@ -0,0 +1,250 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +# define DEBUG_SUBSYSTEM S_LNET + +#include + +/* + * Windows generic table support routines + */ + +#define TAG_RADIX_TABLE 'XIDR' +typedef struct _RADIX_TABLE_ELEMENT { + ULONG Key; + PVOID Value; +} RADIX_TABLE_ELEMENT, *PRADIX_TABLE_ELEMENT; + + +RTL_GENERIC_COMPARE_RESULTS +RadixCompareElement ( + IN PRTL_GENERIC_TABLE Table, + IN PVOID Index1, + IN PVOID Index2 + ) +{ + ULONG Key1, Key2; + + Key1 = *((ULONG UNALIGNED *) Index1); + Key2 = *((ULONG UNALIGNED *) Index2); + + if (Key1 < Key2) { + return GenericLessThan; + } else if (Key1 > Key2) { + return GenericGreaterThan; + } + + return GenericEqual; +} + +PVOID +RadixAllocateElement ( + IN PRTL_GENERIC_TABLE Table, + IN CLONG Size + ) +{ + return FsRtlAllocatePoolWithTag(NonPagedPool,Size, TAG_RADIX_TABLE); +} + +VOID +RadixDestroyElement ( + IN PRTL_GENERIC_TABLE Table, + IN PVOID Buffer + ) +{ + ExFreePoolWithTag(Buffer, TAG_RADIX_TABLE); +} + + +PVOID +RadixInsertElement( + IN PRTL_GENERIC_TABLE Table, + IN ULONG Key, + IN PVOID Value + ) +{ + RADIX_TABLE_ELEMENT element; + element.Key = Key; + element.Value = Value; + return RtlInsertElementGenericTable( Table, &element, + sizeof(RADIX_TABLE_ELEMENT), NULL ); +} + +BOOLEAN +RadixDeleteElement( + IN PRTL_GENERIC_TABLE Table, + IN ULONG Key + ) +{ + RADIX_TABLE_ELEMENT element; + element.Key = Key; + return RtlDeleteElementGenericTable(Table, &element); +} + + +PRADIX_TABLE_ELEMENT +RadixLookupElement ( + IN PRTL_GENERIC_TABLE Table, + IN ULONG Key + ) +{ + RADIX_TABLE_ELEMENT element; + + element.Key = Key; + return (PRADIX_TABLE_ELEMENT) + RtlLookupElementGenericTable(Table, &element); +} + +PRADIX_TABLE_ELEMENT +RadixGetNextElement ( + IN PRTL_GENERIC_TABLE Table, + IN PVOID * Restart + ) +{ + return (PRADIX_TABLE_ELEMENT) + RtlEnumerateGenericTableWithoutSplaying(Table, Restart); +} + + + +VOID +RadixInitTable( + IN PRTL_GENERIC_TABLE Table + ) +{ + + /* initialize rafix generic table. */ + + RtlInitializeGenericTable( + Table, + RadixCompareElement, + RadixAllocateElement, + RadixDestroyElement, + NULL + ); +} + +VOID +RadixDestroyTable( + IN PRTL_GENERIC_TABLE Table + ) +{ + PRADIX_TABLE_ELEMENT element; + PVOID restart = NULL; + +Again: + element = (PRADIX_TABLE_ELEMENT) RadixGetNextElement(Table, &restart); + if (element) { + RadixDeleteElement(Table, element->Key); + goto Again; + } +} + +/* + * Radix Tree Suppoert Rotuines + * + */ + +/** + * radix_tree_gang_lookup - perform multiple lookup on a radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * + * Performs an index-ascending scan of the tree for present items. Places + * them at *@results and returns the number of items which were placed at + * *@results. + * + */ +unsigned int +radix_tree_gang_lookup(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items) +{ + PRADIX_TABLE_ELEMENT element; + PVOID restart = NULL; + unsigned int i = 0; + + element = RadixLookupElement(&root->table, first_index); + restart = element; + while (element && i < max_items) { + results[i++] = element->Value; + element = RadixGetNextElement(&root->table, &restart); + } + + return i; +} + + +/** + * radix_tree_lookup - perform lookup operation on a radix tree + * @root: radix tree root + * @index: index key + * + * Lookup the item at the position @index in the radix tree @root. + * + */ +void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) +{ + PRADIX_TABLE_ELEMENT element; + int i = 0; + + element = RadixLookupElement(&root->table, index); + if (element) { + return element->Value; + } + + return NULL; +} + +/** + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert + * + * Insert an item into the radix tree at position @index. + */ +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + if (RadixInsertElement(&root->table, index, item)) { + return 0; + } + + return -ENOMEM; +} + +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + RadixDeleteElement(&root->table, index); + return NULL; +} \ No newline at end of file diff --git a/libcfs/libcfs/winnt/winnt-sync.c b/libcfs/libcfs/winnt/winnt-sync.c index be2cb17..81b40df 100644 --- a/libcfs/libcfs/winnt/winnt-sync.c +++ b/libcfs/libcfs/winnt/winnt-sync.c @@ -34,7 +34,7 @@ * Lustre is a trademark of Sun Microsystems, Inc. */ -#define DEBUG_SUBSYSTEM S_LIBCFS +#define DEBUG_SUBSYSTEM S_LNET #include @@ -61,7 +61,7 @@ void cfs_waitq_init(cfs_waitq_t *waitq) { waitq->magic = CFS_WAITQ_MAGIC; waitq->flags = 0; - INIT_LIST_HEAD(&(waitq->waiters)); + CFS_INIT_LIST_HEAD(&(waitq->waiters)); spin_lock_init(&(waitq->guard)); } @@ -103,8 +103,8 @@ void cfs_waitlink_init(cfs_waitlink_t *link) atomic_inc(&slot->count); - INIT_LIST_HEAD(&(link->waitq[0].link)); - INIT_LIST_HEAD(&(link->waitq[1].link)); + CFS_INIT_LIST_HEAD(&(link->waitq[0].link)); + CFS_INIT_LIST_HEAD(&(link->waitq[1].link)); link->waitq[0].waitl = link->waitq[1].waitl = link; } @@ -322,8 +322,9 @@ void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr) LASSERT(waitq->magic == CFS_WAITQ_MAGIC); spin_lock(&waitq->guard); - - list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) { + cfs_list_for_each_entry_typed(scan, &waitq->waiters, + cfs_waitlink_channel_t, + link) { cfs_waitlink_t *waitl = scan->waitl; @@ -407,7 +408,7 @@ void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state) atomic_dec(link->hits); LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); } else { - cfs_wait_event(link->event, 0); + cfs_wait_event_internal(link->event, 0); } } @@ -428,16 +429,16 @@ void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state) * What if it happens to be woken up at the just timeout time !? */ -cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link, - cfs_task_state_t state, - cfs_duration_t timeout) +int64_t cfs_waitq_timedwait( cfs_waitlink_t *link, + cfs_task_state_t state, + int64_t timeout) { if (atomic_read(link->hits) > 0) { atomic_dec(link->hits); LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); - return TRUE; + return (int64_t)TRUE; } - return (cfs_duration_t)cfs_wait_event(link->event, timeout); + return (int64_t)cfs_wait_event_internal(link->event, timeout); } diff --git a/libcfs/libcfs/winnt/winnt-tcpip.c b/libcfs/libcfs/winnt/winnt-tcpip.c index 0ad20d9..ee3a5f0 100644 --- a/libcfs/libcfs/winnt/winnt-tcpip.c +++ b/libcfs/libcfs/winnt/winnt-tcpip.c @@ -1,5 +1,5 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: * * GPL HEADER START * @@ -34,5974 +34,5811 @@ * Lustre is a trademark of Sun Microsystems, Inc. */ -#define DEBUG_SUBSYSTEM S_LIBCFS +#define DEBUG_SUBSYSTEM S_LNET #include #include -#define TDILND_MODULE_NAME L"Tdilnd" +#define TDILND_MODULE_NAME L"tdilnd" -ks_data_t ks_data; +ks_tdi_data_t ks_data; -ULONG -ks_tdi_send_flags(ULONG SockFlags) +VOID +KsDumpPrint(PCHAR buffer, ULONG length) { - ULONG TdiFlags = 0; - - if (cfs_is_flag_set(SockFlags, MSG_OOB)) { - cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED); - } - - if (cfs_is_flag_set(SockFlags, MSG_MORE)) { - cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL); - } - - if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) { - cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING); + ULONG i; + for (i=0; i < length; i++) { + if (((i+1) % 31) == 0) + printk("\n"); + printk("%2.2x ", (UCHAR)buffer[i]); } - - return TdiFlags; + printk("\n"); } -NTSTATUS -KsIrpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - if (NULL != Context) { - KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE); - } - - return STATUS_MORE_PROCESSING_REQUIRED; +PVOID +KsMapMdlBuffer (PMDL Mdl); - UNREFERENCED_PARAMETER(DeviceObject); - UNREFERENCED_PARAMETER(Irp); +VOID +KsDumpMdlChain(PMDL Mdl, ULONG length) +{ + PMDL mdl = Mdl; + PCHAR buffer = NULL; + ULONG len = 0; + int i = 0; + + while (mdl) { + printk("mdl %d:\n", i); + buffer = KsMapMdlBuffer(mdl); + KsDumpPrint(buffer, mdl->ByteCount); + len += mdl->ByteCount; + mdl = mdl->Next; + } + ASSERT(len == length); } - /* - * KsBuildTdiIrp - * Allocate a new IRP and initialize it to be issued to tdi + * KsLockUserBuffer + * Allocate MDL for the buffer and lock the pages into + * nonpaged pool * * Arguments: - * DeviceObject: device object created by the underlying - * TDI transport driver + * UserBuffer: the user buffer to be locked + * Length: length in bytes of the buffer + * Operation: read or write access + * pMdl: the result of the created mdl * * Return Value: - * PRIP: the allocated Irp in success or NULL in failure. + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * * NOTES: * N/A */ -PIRP -KsBuildTdiIrp( - IN PDEVICE_OBJECT DeviceObject +NTSTATUS +KsLockUserBuffer ( + IN PVOID UserBuffer, + IN BOOLEAN bPaged, + IN ULONG Length, + IN LOCK_OPERATION Operation, + OUT PMDL * pMdl ) { - PIRP Irp; - PIO_STACK_LOCATION IrpSp; + NTSTATUS Status; + PMDL Mdl = NULL; - // - // Allocating the IRP ... - // + LASSERT(UserBuffer != NULL); - Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); + *pMdl = NULL; - if (NULL != Irp) { + Mdl = IoAllocateMdl( + UserBuffer, + Length, + FALSE, + FALSE, + NULL + ); - // - // Getting the Next Stack Location ... - // + if (Mdl == NULL) { - IrpSp = IoGetNextIrpStackLocation(Irp); + Status = STATUS_INSUFFICIENT_RESOURCES; - // - // Initializing Irp ... - // + } else { - IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL; - IrpSp->Parameters.DeviceIoControl.IoControlCode = 0; + __try { + + if (bPaged) { + MmProbeAndLockPages( + Mdl, + KernelMode, + Operation + ); + } else { + MmBuildMdlForNonPagedPool( + Mdl + ); + } + + Status = STATUS_SUCCESS; + + *pMdl = Mdl; + + } __except (EXCEPTION_EXECUTE_HANDLER) { + + IoFreeMdl(Mdl); + + Mdl = NULL; + + cfs_enter_debugger(); + + Status = STATUS_INVALID_USER_BUFFER; + } } - return Irp; + return Status; } /* - * KsSubmitTdiIrp - * Issue the Irp to the underlying tdi driver + * KsMapMdlBuffer + * Map the mdl into a buffer in kernel space * * Arguments: - * DeviceObject: the device object created by TDI driver - * Irp: the I/O request packet to be processed - * bSynchronous: synchronous or not. If true, we need wait - * until the process is finished. - * Information: returned info + * Mdl: the mdl to be mapped * * Return Value: - * NTSTATUS: kernel status code + * PVOID: the buffer mapped or NULL in failure * * NOTES: * N/A */ -NTSTATUS -KsSubmitTdiIrp( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN BOOLEAN bSynchronous, - OUT PULONG Information - ) +PVOID +KsMapMdlBuffer (PMDL Mdl) { - NTSTATUS Status; - KEVENT Event; - - if (bSynchronous) { - - KeInitializeEvent( - &Event, - SynchronizationEvent, - FALSE - ); + LASSERT(Mdl != NULL); + return MmGetSystemAddressForMdlSafe( + Mdl, + NormalPagePriority + ); +} - IoSetCompletionRoutine( - Irp, - KsIrpCompletionRoutine, - &Event, - TRUE, - TRUE, - TRUE - ); - } - Status = IoCallDriver(DeviceObject, Irp); +/* + * KsReleaseMdl + * Unlock all the pages in the mdl + * + * Arguments: + * Mdl: memory description list to be released + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ - if (bSynchronous) { +VOID +KsReleaseMdl (IN PMDL Mdl, + IN int Paged ) +{ + LASSERT(Mdl != NULL); - if (STATUS_PENDING == Status) { + while (Mdl) { - Status = KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); - } + PMDL Next; - Status = Irp->IoStatus.Status; + Next = Mdl->Next; - if (Information) { - *Information = (ULONG)(Irp->IoStatus.Information); + if (Paged) { + MmUnlockPages(Mdl); } - Irp->MdlAddress = NULL; - IoFreeIrp(Irp); - } - - if (!NT_SUCCESS(Status)) { + IoFreeMdl(Mdl); - KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n", - Status, KsNtStatusToString(Status))); + Mdl = Next; } - - return (Status); } - - /* - * KsOpenControl - * Open the Control Channel Object ... + * KsQueryMdlsSize + * Query the whole size of a MDL (may be chained) * * Arguments: - * DeviceName: the device name to be opened - * Handle: opened handle in success case - * FileObject: the fileobject of the device + * Mdl: the Mdl to be queried * * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) + * ULONG: the total size of the mdl * - * Notes: + * NOTES: * N/A */ -NTSTATUS -KsOpenControl( - IN PUNICODE_STRING DeviceName, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) +ULONG +KsQueryMdlsSize (PMDL Mdl) { - NTSTATUS Status = STATUS_SUCCESS; - - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; - + PMDL Next = Mdl; + ULONG Length = 0; - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); // - // Initializing ... + // Walking the MDL Chain ... // - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + while (Next) { + Length += MmGetMdlByteCount(Next); + Next = Next->Next; + } - // - // Creating the Transport Address Object ... - // - - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - FILE_SHARE_READ | FILE_SHARE_WRITE, - FILE_OPEN, - 0, - NULL, - 0 - ); + return (Length); +} +/* + * KsCopyMdlToBuffer + * Copy payload from Mdl to buffer + * + * Arguments: + * SourceMdl: the source mdl + * SourceOffset: start offset of the source + * DestinationBuffer: the dst buffer + * DestinationOffset: the offset where data are to be copied. + * BytesTobecopied: the expteced bytes to be copied + * + * Return Value: + * Length of data copied from MDL to user buffer + * + * NOTES: + * N/A + */ - if (NT_SUCCESS(Status)) { +ULONG +KsCopyMdlToBuffer( + IN PMDL SourceMdl, + IN ULONG SourceOffset, + IN PVOID DestinationBuffer, + IN ULONG DestinationOffset, + IN ULONG BytesTobeCopied + ) +{ + PUCHAR SourceBuffer = NULL; + PUCHAR TargetBuffer = DestinationBuffer; + ULONG BytesCopied = 0; - // - // Now Obtaining the FileObject of the Transport Address ... - // + if (MmGetMdlByteCount(SourceMdl) <= SourceOffset) { + return 0; + } - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); + BytesCopied = MmGetMdlByteCount(SourceMdl) - SourceOffset; + if (BytesCopied > BytesTobeCopied) { + BytesCopied = BytesTobeCopied; + } - if (!NT_SUCCESS(Status)) { + SourceBuffer = (PUCHAR)KsMapMdlBuffer(SourceMdl); - cfs_enter_debugger(); - ZwClose(*Handle); - } + RtlMoveMemory(TargetBuffer + DestinationOffset, + SourceBuffer + SourceOffset, BytesCopied); - } else { + return BytesCopied; +} - cfs_enter_debugger(); - } +/* + * KsInitializeKsTsdu + * Initialize the Tsdu buffer header + * + * Arguments: + * KsTsdu: the Tsdu to be initialized + * Length: the total length of the Tsdu + * + * Return Value: + * VOID + * + * NOTES: + * N/A + */ - return (Status); +VOID +KsInitializeKsTsdu( + PKS_TSDU KsTsdu, + ULONG Length + ) +{ + KsTsdu->Magic = KS_TSDU_MAGIC; + KsTsdu->TotalLength = Length; + KsTsdu->StartOffset = KsTsdu->LastOffset = + KS_QWORD_ALIGN(sizeof(KS_TSDU)); } - /* - * KsCloseControl - * Release the Control Channel Handle and FileObject + * KsAllocateKsTsdu + * Reuse a Tsdu from the freelist or allocate a new Tsdu + * from the LookAsideList table or the NonPagedPool * * Arguments: - * Handle: the channel handle to be released - * FileObject: the fileobject to be released + * N/A * * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) + * PKS_Tsdu: the new Tsdu or NULL if it fails * * Notes: * N/A */ -NTSTATUS -KsCloseControl( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject - ) +PKS_TSDU +KsAllocateKsTsdu() { - NTSTATUS Status = STATUS_SUCCESS; + PKS_TSDU KsTsdu = NULL; - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + spin_lock(&(ks_data.ksnd_tsdu_lock)); - if (FileObject) { + if (!list_empty (&(ks_data.ksnd_freetsdus))) { - ObDereferenceObject(FileObject); - } + LASSERT(ks_data.ksnd_nfreetsdus > 0); - if (Handle) { + KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link); + list_del(&(KsTsdu->Link)); + ks_data.ksnd_nfreetsdus--; - Status = ZwClose(Handle); + } else { + + KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc( + ks_data.ksnd_tsdu_slab, 0); } - ASSERT(NT_SUCCESS(Status)); + spin_unlock(&(ks_data.ksnd_tsdu_lock)); - return (Status); + if (NULL != KsTsdu) { + RtlZeroMemory(KsTsdu, ks_data.ksnd_tsdu_size); + KsInitializeKsTsdu(KsTsdu, (ULONG)ks_data.ksnd_tsdu_size); + } + + return (KsTsdu); } +/* + * KsFreeKsTsdu + * Release a Tsdu: uninitialize then free it. + * + * Arguments: + * KsTsdu: Tsdu to be freed. + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +VOID +KsFreeKsTsdu( + PKS_TSDU KsTsdu + ) +{ + cfs_mem_cache_free( + ks_data.ksnd_tsdu_slab, + KsTsdu ); +} /* - * KsOpenAddress - * Open the tdi address object + * KsPutKsTsdu + * Move the Tsdu to the free tsdu list in ks_data. * * Arguments: - * DeviceName: device name of the address object - * pAddress: tdi address of the address object - * AddressLength: length in bytes of the tdi address - * Handle: the newly opened handle - * FileObject: the newly opened fileobject + * KsTsdu: Tsdu to be moved. * * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) + * N/A * * Notes: * N/A */ -NTSTATUS -KsOpenAddress( - IN PUNICODE_STRING DeviceName, - IN PTRANSPORT_ADDRESS pAddress, - IN ULONG AddressLength, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) +VOID +KsPutKsTsdu( + PKS_TSDU KsTsdu + ) { - NTSTATUS Status = STATUS_SUCCESS; + spin_lock(&(ks_data.ksnd_tsdu_lock)); + if (ks_data.ksnd_nfreetsdus > 128) { + KsFreeKsTsdu(KsTsdu); + } else { + list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus)); + ks_data.ksnd_nfreetsdus++; + } + spin_unlock(&(ks_data.ksnd_tsdu_lock)); +} - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; +/* with tconn lock acquired */ +ks_mdl_t * +KsLockTsdus( + ks_tconn_t * tconn, + PKS_TSDUMGR TsduMgr, + PULONG Flags, + PULONG Length + ) +{ + + ks_mdl_t * mdl = NULL; + ks_mdl_t * tail = NULL; - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_MDL KsTsduMdl; - // - // Building EA for the Address Object to be Opened ... - // + *Length = 0; - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH; - Ea->EaValueLength = (USHORT)AddressLength; - RtlCopyMemory( - &(Ea->EaName), - TdiTransportAddress, - Ea->EaNameLength + 1 - ); - RtlMoveMemory( - &(Ea->EaName[Ea->EaNameLength + 1]), - pAddress, - AddressLength - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) + - Ea->EaNameLength + AddressLength; + cfs_list_for_each_entry_typed(KsTsdu, + &TsduMgr->TsduList,KS_TSDU, Link) { - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + ULONG start = 0; + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + start = KsTsdu->StartOffset; - // - // Initializing ... - // + while (start < KsTsdu->LastOffset) { - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); + ks_mdl_t * iov = NULL; - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + start); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + start); + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + start); + LASSERT(KsTsduDat->TsduType == TSDU_TYPE_DAT || + KsTsduBuf->TsduType == TSDU_TYPE_BUF || + KsTsduMdl->TsduType == TSDU_TYPE_MDL); - // - // Creating the Transport Address Object ... - // + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - 0, - FILE_ATTRIBUTE_NORMAL, - FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */ - FILE_OPEN, - 0, - Ea, - EaLength - ); + ASSERT(KsTsdu->LastOffset >= start + KsTsduDat->TotalLength); + if (KsTsduDat->Mdl) { + iov = KsTsduDat->Mdl; + } else { + KsLockUserBuffer( + &KsTsduDat->Data[KsTsduDat->StartOffset], + FALSE, + KsTsduDat->DataLength, + IoReadAccess, + &iov ); + KsTsduDat->Mdl = iov; + } +/* + printk("KsLockTsdus: %u\n", KsTsduDat->DataLength); + KsDumpPrint( + &KsTsduDat->Data[KsTsduDat->StartOffset], + KsTsduDat->DataLength); +*/ + *Length += KsTsduDat->DataLength; + start += KsTsduDat->TotalLength; + } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { - if (NT_SUCCESS(Status)) { + ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_BUF)); + if (KsTsduBuf->Mdl) { + iov = KsTsduBuf->Mdl; + } else { + KsLockUserBuffer( + (PUCHAR)KsTsduBuf->UserBuffer + + KsTsduBuf->StartOffset, + FALSE, + KsTsduBuf->DataLength, + IoReadAccess, + &iov ); + KsTsduBuf->Mdl = iov; + } - // - // Now Obtaining the FileObject of the Transport Address ... - // + *Length += KsTsduBuf->DataLength; + start += sizeof(KS_TSDU_BUF); - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); - - if (!NT_SUCCESS(Status)) { + } else { - cfs_enter_debugger(); - ZwClose(*Handle); - } + LASSERT(TSDU_TYPE_MDL == KsTsduMdl->TsduType); + ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_MDL)); + iov = KsTsduMdl->Mdl; + *Length += KsTsduMdl->DataLength; + start += sizeof(KS_TSDU_MDL); + } - } else { + if (!iov) { + cfs_enter_debugger(); + goto cleanup; + } - cfs_enter_debugger(); + if (tail) { + tail->Next = iov; + } else { + mdl = iov; + } + tail = iov; + tail->Next = NULL; +/* + printk("KsLockTsdus: mdl %d\n", tail->ByteCount); + KsDumpMdlChain(tail, tail->ByteCount); +*/ + } + } +#if 0 + if (Flags) { + *Flags = TsduFlags; } +#endif + return mdl; - return (Status); +cleanup: + + *Length = 0; + return NULL; } -/* - * KsCloseAddress - * Release the Hanlde and FileObject of an opened tdi - * address object - * - * Arguments: - * Handle: the handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseAddress( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject -) +ks_mdl_t * +KsSplitMdl( + IN ks_mdl_t * master, + IN ULONG offset, + IN ULONG length + ) { - NTSTATUS Status = STATUS_SUCCESS; - - if (FileObject) { + ks_mdl_t * mdl = NULL; + char * ptr = NULL; - ObDereferenceObject(FileObject); - } + /* calculate the start virtual address */ + ptr = (char *)KsMapMdlBuffer(master) + offset; - if (Handle) { + /* allocate new mdl for new memory range */ + mdl = IoAllocateMdl(ptr, length, FALSE, FALSE, NULL); - Status = ZwClose(Handle); + if (!mdl) { + return NULL; } + + /* initialize the mdl */ + IoBuildPartialMdl(master, mdl, (PVOID)ptr, length); - ASSERT(NT_SUCCESS(Status)); - - return (Status); + return mdl; } +/* with tconn lock acquired */ +VOID +KsReleaseTsdus( + ks_tconn_t * tconn, + PKS_TSDUMGR TsduMgr, + ULONG length + ) +{ + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_MDL KsTsduMdl; +#if DBG + ULONG total = TsduMgr->TotalBytes; + ULONG size = length; +#endif -/* - * KsOpenConnection - * Open a tdi connection object - * - * Arguments: - * DeviceName: device name of the connection object - * ConnectionContext: the connection context - * Handle: the newly opened handle - * FileObject: the newly opened fileobject - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ + LASSERT(TsduMgr->TotalBytes >= length); -NTSTATUS -KsOpenConnection( - IN PUNICODE_STRING DeviceName, - IN CONNECTION_CONTEXT ConnectionContext, - OUT HANDLE * Handle, - OUT PFILE_OBJECT * FileObject - ) -{ - NTSTATUS Status = STATUS_SUCCESS; + while (!list_empty(&TsduMgr->TsduList)) { - PFILE_FULL_EA_INFORMATION Ea = NULL; - ULONG EaLength; - UCHAR EaBuffer[EA_MAX_LENGTH]; + ULONG start = 0; - OBJECT_ATTRIBUTES ObjectAttributes; - IO_STATUS_BLOCK IoStatus; + KsTsdu = list_entry(TsduMgr->TsduList.next, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + start = KsTsdu->StartOffset; - // - // Building EA for the Address Object to be Opened ... - // + while (length > 0 && start < KsTsdu->LastOffset) { - Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; - Ea->NextEntryOffset = 0; - Ea->Flags = 0; - Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH; - Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT); - RtlCopyMemory( - &(Ea->EaName), - TdiConnectionContext, - Ea->EaNameLength + 1 - ); - RtlMoveMemory( - &(Ea->EaName[Ea->EaNameLength + 1]), - &ConnectionContext, - sizeof(CONNECTION_CONTEXT) - ); - EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + - Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT); + ULONG size = 0; + ks_mdl_t * mdl = NULL; - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + start); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + start); + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + start); + LASSERT(KsTsduDat->TsduType == TSDU_TYPE_DAT || + KsTsduBuf->TsduType == TSDU_TYPE_BUF || + KsTsduMdl->TsduType == TSDU_TYPE_MDL); + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - // - // Initializing ... - // + ASSERT(KsTsdu->LastOffset >= start + KsTsduDat->DataLength); + if (length >= KsTsduDat->DataLength) { + /* whole tsdu is sent out */ + size = KsTsduDat->DataLength; + start += KsTsduDat->TotalLength; + } else { + size = length; + KsTsduDat->StartOffset += size; + } - InitializeObjectAttributes( - &ObjectAttributes, - DeviceName, - OBJ_CASE_INSENSITIVE | - OBJ_KERNEL_HANDLE, - NULL, - NULL - ); + if (KsTsduDat->Mdl) { + mdl = KsTsduDat->Mdl; + KsTsduDat->Mdl = NULL; + } - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + KsTsduDat->DataLength -= size; - // - // Creating the Connection Object ... - // + } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { - Status = ZwCreateFile( - Handle, - FILE_READ_DATA | FILE_WRITE_DATA, - &ObjectAttributes, - &IoStatus, - NULL, - FILE_ATTRIBUTE_NORMAL, - 0, - FILE_OPEN, - 0, - Ea, - EaLength - ); + ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_BUF)); + if (length >= KsTsduBuf->DataLength) { + /* whole tsdu is sent out */ + size = KsTsduBuf->DataLength; + start += sizeof(KS_TSDU_BUF); + LASSERT(KsTsduBuf->UserBuffer); + ExFreePool(KsTsduBuf->UserBuffer); + KsTsduBuf->UserBuffer = NULL; + } else { + KsTsduBuf->StartOffset += length; + size = length; + } + if (KsTsduBuf->Mdl) { + mdl = KsTsduBuf->Mdl; + KsTsduBuf->Mdl = NULL; + } - if (NT_SUCCESS(Status)) { + KsTsduBuf->DataLength -= size; + + } else { - // - // Now Obtaining the FileObject of the Transport Address ... - // + LASSERT(TSDU_TYPE_MDL == KsTsduMdl->TsduType); + ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_MDL)); + mdl = KsTsduMdl->Mdl; + if (length >= KsTsduMdl->DataLength) { + /* whole mdl is sent out */ + size = KsTsduMdl->DataLength; + start += sizeof(KS_TSDU_MDL); + KsTsduMdl->Mdl = NULL; + } else { + /* now split the remained data out */ + ks_mdl_t * mdl1 = KsSplitMdl(mdl, length, + KsTsduMdl->DataLength - length); + if (NULL == mdl1) { + mdl->ByteOffset += length; + mdl = NULL; + } else { + KsTsduMdl->Mdl = mdl1; + } + size = length; + KsTsduMdl->StartOffset += size; + } - Status = ObReferenceObjectByHandle( - *Handle, - FILE_ANY_ACCESS, - NULL, - KernelMode, - FileObject, - NULL - ); + KsTsduMdl->DataLength -= size; + } - if (!NT_SUCCESS(Status)) { + length -= size; + TsduMgr->TotalBytes -= size; - cfs_enter_debugger(); - ZwClose(*Handle); + if (mdl) { + mdl->Next = NULL; + KsReleaseMdl(mdl, FALSE); + } + + KsTsdu->StartOffset = start; } - } else { + if (KsTsdu->StartOffset >= KsTsdu->LastOffset) { - cfs_enter_debugger(); + /* remove KsTsdu from list */ + list_del(&KsTsdu->Link); + TsduMgr->NumOfTsdu--; + KsPutKsTsdu(KsTsdu); + } + + if (length == 0) { + break; + } } - return (Status); + LASSERT(length == 0); +#if DBG + LASSERT(total - size == TsduMgr->TotalBytes); + KsPrint((4, "KsReleaseTsdus: TsduMgr=%p Remained=%xh (%xh)\n", + TsduMgr, TsduMgr->TotalBytes, size )); +#endif } -/* - * KsCloseConnection - * Release the Hanlde and FileObject of an opened tdi - * connection object - * - * Arguments: - * Handle: the handle to be released - * FileObject: the fileobject to be released - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsCloseConnection( - IN HANDLE Handle, - IN PFILE_OBJECT FileObject +PKS_TSDUMGR +KsQueryTsduMgr( + ks_tconn_t * tconn, + BOOLEAN expedited, + BOOLEAN sending ) { - NTSTATUS Status = STATUS_SUCCESS; - - if (FileObject) { - ObDereferenceObject(FileObject); - } + PKS_CHAIN KsChain; + PKS_TSDUMGR TsduMgr; - if (Handle) { + /* get the latest Tsdu buffer form TsduMgr list. + just set NULL if the list is empty. */ - Status = ZwClose(Handle); + if (sending) { + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_send); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_send); + } + } else { + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_recv); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_recv); + } } - ASSERT(NT_SUCCESS(Status)); + if (expedited) { + TsduMgr = &(KsChain->Expedited); + } else { + TsduMgr = &(KsChain->Normal); + } - return (Status); + return TsduMgr; } - -/* - * KsAssociateAddress - * Associate an address object with a connection object - * - * Arguments: - * AddressHandle: the handle of the address object - * ConnectionObject: the FileObject of the connection - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ - -NTSTATUS -KsAssociateAddress( - IN HANDLE AddressHandle, - IN PFILE_OBJECT ConnectionObject - ) +PKS_TSDU +KsGetTsdu(PKS_TSDUMGR TsduMgr, ULONG Length) { - NTSTATUS Status; - PDEVICE_OBJECT DeviceObject; - PIRP Irp; + PKS_TSDU KsTsdu = NULL; - // - // Getting the DeviceObject from Connection FileObject - // + /* retrieve the latest Tsdu buffer form TsduMgr + list if the list is not empty. */ - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - // - // Building Tdi Internal Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { + if (list_empty(&(TsduMgr->TsduList))) { - Status = STATUS_INSUFFICIENT_RESOURCES; + LASSERT(TsduMgr->NumOfTsdu == 0); + KsTsdu = NULL; } else { - // - // Assocating the Address Object with the Connection Object - // + LASSERT(TsduMgr->NumOfTsdu > 0); + KsTsdu = list_entry(TsduMgr->TsduList.prev, KS_TSDU, Link); - TdiBuildAssociateAddress( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - AddressHandle - ); + /* if this Tsdu does not contain enough space, we need + allocate a new Tsdu queue. */ - // - // Calling the Transprot Driver with the Prepared Irp - // + if (KsTsdu->LastOffset + Length > KsTsdu->TotalLength) { + KsTsdu = NULL; + } + } - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); + /* allocate a new Tsdu in case we are not statisfied. */ + if (NULL == KsTsdu) { + KsTsdu = KsAllocateKsTsdu(); + if (NULL != KsTsdu) { + list_add_tail(&(KsTsdu->Link), &(TsduMgr->TsduList)); + TsduMgr->NumOfTsdu++; + } } - return (Status); + return KsTsdu; } +ULONG +KsWriteTsduDat( + PKS_TSDUMGR TsduMgr, + PCHAR buffer, + ULONG length, + ULONG flags + ) +{ + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + + BOOLEAN bNewBuff = FALSE; + PCHAR Buffer = NULL; /* - * KsDisassociateAddress - * Disassociate the connection object (the relationship will - * the corresponding address object will be dismissed. ) - * - * Arguments: - * ConnectionObject: the FileObject of the connection - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ + printk("KsWriteTsduDat: %u\n", length); + KsDumpPrint(buffer, length); +*/ + /* if the Tsdu is even larger than the biggest Tsdu, we have + to allocate new buffer and use TSDU_TYPE_BUF to store it */ -NTSTATUS -KsDisassociateAddress( - IN PFILE_OBJECT ConnectionObject - ) -{ - NTSTATUS Status; - PDEVICE_OBJECT DeviceObject; - PIRP Irp; + if ( KS_TSDU_STRU_SIZE(length) > ks_data.ksnd_tsdu_size - + KS_QWORD_ALIGN(sizeof(KS_TSDU))) { + bNewBuff = TRUE; + } - // - // Getting the DeviceObject from Connection FileObject - // + /* allocating the buffer for TSDU_TYPE_BUF */ + if (bNewBuff) { + Buffer = ExAllocatePool(NonPagedPool, length); + if (NULL == Buffer) { + /* there's no enough memory for us. We just try to + receive maximum bytes with a new Tsdu */ + bNewBuff = FALSE; + length = ks_data.ksnd_tsdu_size - KS_TSDU_STRU_SIZE(0) - + KS_QWORD_ALIGN(sizeof(KS_TSDU)); + } + } - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + /* get empty Tsdu from TsduMgr */ + KsTsdu = KsGetTsdu(TsduMgr, bNewBuff ? sizeof(KS_TSDU_BUF) : + KS_TSDU_STRU_SIZE(length) ); - // - // Building Tdi Internal Irp ... - // + /* allocate a new Tsdu in case we are not statisfied. */ + if (NULL == KsTsdu) { + goto errorout; + } - Irp = KsBuildTdiIrp(DeviceObject); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - if (NULL == Irp) { + if (bNewBuff) { - Status = STATUS_INSUFFICIENT_RESOURCES; + /* setup up the KS_TSDU_BUF record */ + KsTsduBuf->TsduType = TSDU_TYPE_BUF; + KsTsduBuf->TsduFlags = 0; + KsTsduBuf->StartOffset = 0; + KsTsduBuf->UserBuffer = Buffer; + KsTsduBuf->DataLength = length; + KsTsduBuf->Mdl = NULL; + if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) { + KsTsduBuf->TsduFlags |= KS_TSDU_COMM_PARTIAL; + } + + KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); } else { - // - // Disassocating the Address Object with the Connection Object - // + /* setup the KS_TSDU_DATA to contain all the messages */ - TdiBuildDisassociateAddress( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL - ); + KsTsduDat->TsduType = TSDU_TYPE_DAT; + KsTsduDat->TsduFlags = 0; - // - // Calling the Transprot Driver with the Prepared Irp - // + if ( KsTsdu->TotalLength - KsTsdu->LastOffset < + KS_TSDU_STRU_SIZE(length) ) { + length = KsTsdu->TotalLength - KsTsdu->LastOffset - + FIELD_OFFSET(KS_TSDU_DAT, Data); + } + KsTsduDat->DataLength = length; + KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(length); + KsTsduDat->StartOffset = 0; + KsTsduDat->Mdl = NULL; + if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) { + KsTsduDat->TsduFlags |= KS_TSDU_COMM_PARTIAL; + } - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); + Buffer = &KsTsduDat->Data[0]; + KsTsdu->LastOffset += KsTsduDat->TotalLength; } - return (Status); -} - + RtlMoveMemory(Buffer, buffer, length); + TsduMgr->TotalBytes += length; -/* + KsPrint((4, "KsWriteTsduDat: TsduMgr=%p bytes in queue:%xh (%xh)\n", + TsduMgr, TsduMgr->TotalBytes, length)); + return length; -// -// Connection Control Event Callbacks -// +errorout: -TDI_EVENT_CONNECT -TDI_EVENT_DISCONNECT -TDI_EVENT_ERROR + return 0; +} -// -// Tcp Event Callbacks -// +ULONG +KsWriteTsduBuf( + PKS_TSDUMGR TsduMgr, + PCHAR buffer, + ULONG length, + ULONG flags + ) +{ + PKS_TSDU KsTsdu; + PKS_TSDU_BUF KsTsduBuf; -TDI_EVENT_RECEIVE -TDI_EVENT_RECEIVE_EXPEDITED -TDI_EVENT_CHAINED_RECEIVE -TDI_EVENT_CHAINED_RECEIVE_EXPEDITED + /* get empty Tsdu from TsduMgr */ + KsTsdu = KsGetTsdu(TsduMgr, sizeof(KS_TSDU_BUF)); -// -// Udp Event Callbacks -// + /* allocate a new Tsdu in case we are not statisfied. */ + if (NULL == KsTsdu) { + goto errorout; + } -TDI_EVENT_RECEIVE_DATAGRAM -TDI_EVENT_CHAINED_RECEIVE_DATAGRAM + /* setup up the KS_TSDU_BUF record */ + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + KsTsduBuf->TsduType = TSDU_TYPE_BUF; + KsTsduBuf->TsduFlags = 0; + KsTsduBuf->StartOffset = 0; + KsTsduBuf->UserBuffer = buffer; + KsTsduBuf->DataLength = length; + KsTsduBuf->Mdl = NULL; + KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); + if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) { + KsTsduBuf->TsduFlags |= KS_TSDU_COMM_PARTIAL; + } -*/ + TsduMgr->TotalBytes += length; + KsPrint((4, "KsWriteTsduBuf: TsduMgr=%p bytes in queue:%xh (%xh)\n", + TsduMgr, TsduMgr->TotalBytes, length)); + return length; -/* - * KsSetEventHandlers - * Set the tdi event callbacks with an address object - * - * Arguments: - * AddressObject: the FileObject of the address object - * EventContext: the parameter for the callbacks - * Handlers: the handlers indictor array - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * NOTES: - * N/A - */ +errorout: + return 0; +} -NTSTATUS -KsSetEventHandlers( - IN PFILE_OBJECT AddressObject, // Address File Object - IN PVOID EventContext, // Context for Handlers - IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor - ) +ULONG +KsWriteTsduMdl( + PKS_TSDUMGR TsduMgr, + ks_mdl_t * mdl, + PVOID desc, + ULONG offset, + ULONG length, + ULONG flags + ) { - NTSTATUS Status = STATUS_SUCCESS; - PDEVICE_OBJECT DeviceObject; - USHORT i = 0; + PKS_TSDU KsTsdu; + PKS_TSDU_MDL KsTsduMdl; - DeviceObject = IoGetRelatedDeviceObject(AddressObject); + /* get empty Tsdu from TsduMgr */ + KsTsdu = KsGetTsdu(TsduMgr, sizeof(KS_TSDU_MDL)); - for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) { + /* allocate a new Tsdu in case we are not statisfied. */ + if (NULL == KsTsdu) { + goto errorout; + } - // - // Setup the tdi event callback handler if requested. - // + /* setup up the KS_TSDU_MDL record */ + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + KsTsduMdl->TsduType = TSDU_TYPE_MDL; + KsTsduMdl->TsduFlags = 0; + KsTsduMdl->StartOffset = 0; + KsTsduMdl->BaseOffset = offset; + KsTsduMdl->DataLength = length; + KsTsduMdl->Mdl = mdl; + KsTsduMdl->Descriptor = desc; + KsTsdu->LastOffset += sizeof(KS_TSDU_MDL); + if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) { + KsTsduMdl->TsduFlags |= KS_TSDU_COMM_PARTIAL; + } - if (Handlers->IsActive[i]) { + TsduMgr->TotalBytes += length; + KsPrint((4, "KsWriteTsduMdl: TsduMgr=%p bytes in queue:%xh (%xh)\n", + TsduMgr, TsduMgr->TotalBytes, length)); - PIRP Irp; + return length; - // - // Building Tdi Internal Irp ... - // +errorout: + return 0; +} - Irp = KsBuildTdiIrp(DeviceObject); +ULONG +KsReadTsdu ( + PKS_TSDUMGR TsduMgr, + PCHAR buffer, + ULONG length, + ULONG flags + ) +{ + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_MDL KsTsduMdl; - if (NULL == Irp) { + PUCHAR Buffer; + ULONG BytesRecved = 0; +#if DBG + ULONG TotalBytes = TsduMgr->TotalBytes; +#endif - Status = STATUS_INSUFFICIENT_RESOURCES; + KsPrint((4, "KsReadTsdu: TsduMgr=%p request=%xh total=%xh\n", + TsduMgr, length, TsduMgr->TotalBytes )); +NextTsdu: - } else { + if (TsduMgr->TotalBytes == 0) { - // - // Building the Irp to set the Event Handler ... - // + /* It's a notification event. We need reset it to + un-signaled state in case there no any tsdus. */ - TdiBuildSetEventHandler( - Irp, - DeviceObject, - AddressObject, - NULL, - NULL, - i, /* tdi event type */ - Handlers->Handler[i], /* tdi event handler */ - EventContext /* context for the handler */ - ); + KeResetEvent(&(TsduMgr->Event)); - // - // Calling the Transprot Driver with the Prepared Irp - // + } else { - Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); + KsTsdu = list_entry(TsduMgr->TsduList.next, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - // - // tcp/ip tdi does not support these two event callbacks - // + /* remove the KsTsdu from TsduMgr list to release the lock */ + list_del(&(KsTsdu->Link)); + TsduMgr->NumOfTsdu--; - if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE || - i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) { - cfs_enter_debugger(); - Status = STATUS_SUCCESS; - } - } + while (length > BytesRecved) { - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; + ULONG BytesToCopy = 0; + ULONG StartOffset = 0; + ULONG BytesCopied = 0; + + if (KsTsdu->StartOffset >= KsTsdu->LastOffset) { + /* KsTsdu is empty now, we need free it ... */ + KsPutKsTsdu(KsTsdu); + KsTsdu = NULL; + break; } - } - } + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); -errorout: + if ( TSDU_TYPE_DAT == KsTsduDat->TsduType || + TSDU_TYPE_BUF == KsTsduBuf->TsduType ) { - if (!NT_SUCCESS(Status)) { + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n", - Status, KsNtStatusToString(Status) )); - } + /* Data Tsdu Unit ... */ + Buffer = &KsTsduDat->Data[0]; + StartOffset = KsTsduDat->StartOffset; + if (KsTsduDat->DataLength - KsTsduDat->StartOffset > length - BytesRecved) { + /* Recvmsg requst could be statisfied ... */ + BytesToCopy = length - BytesRecved; + } else { + BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset; + } - return (Status); -} + } else { + /* Buffer Tsdu Unit */ + ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); + Buffer = KsTsduBuf->UserBuffer; + StartOffset = KsTsduBuf->StartOffset; + if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > length - BytesRecved) { + /* Recvmsg requst could be statisfied ... */ + BytesToCopy = length - BytesRecved; + } else { + BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset; + } + } -/* - * KsQueryAddressInfo - * Query the address of the FileObject specified - * - * Arguments: - * FileObject: the FileObject to be queried - * AddressInfo: buffer to contain the address info - * AddressSize: length of the AddressInfo buffer - * - * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) - * - * Notes: - * N/A - */ + if (BytesToCopy > 0) { + RtlMoveMemory(buffer + BytesRecved, Buffer + StartOffset, BytesToCopy); + } + BytesCopied = BytesToCopy; + BytesRecved += BytesCopied; + LASSERT(TsduMgr->TotalBytes >= BytesCopied); + TsduMgr->TotalBytes -= BytesCopied; + KsPrint((4, "KsReadTsdu: TsduMgr=%p copied=%xh recved=%xh\n", + TsduMgr, BytesCopied, BytesRecved )); -NTSTATUS -KsQueryAddressInfo( - PFILE_OBJECT FileObject, - PTDI_ADDRESS_INFO AddressInfo, - PULONG AddressSize - ) -{ - NTSTATUS Status = STATUS_UNSUCCESSFUL; - PIRP Irp = NULL; - PMDL Mdl; - PDEVICE_OBJECT DeviceObject; + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + KsTsduDat->StartOffset += BytesCopied; + if (KsTsduDat->StartOffset == KsTsduDat->DataLength) { + if (KsTsduDat->Mdl) { + KsTsduDat->Mdl->Next = NULL; + KsReleaseMdl(KsTsduDat->Mdl, FALSE); + } + KsTsdu->StartOffset += KsTsduDat->TotalLength; + } - DeviceObject = IoGetRelatedDeviceObject(FileObject); + } else { - RtlZeroMemory(AddressInfo, *(AddressSize)); + ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); + KsTsduBuf->StartOffset += BytesCopied; + if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) { + KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); + /* now we need release the buf to system pool */ + if (KsTsduBuf->Mdl) { + KsTsduBuf->Mdl->Next = NULL; + KsReleaseMdl(KsTsduBuf->Mdl, FALSE); + } + ExFreePool(KsTsduBuf->UserBuffer); + } + } - // - // Allocating the Tdi Setting Irp ... - // + } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { - Irp = KsBuildTdiIrp(DeviceObject); + /* MDL Tsdu Unit ... */ + if (KsTsduMdl->DataLength > length - BytesRecved) { + /* Recvmsg requst could be statisfied ... */ + BytesToCopy = length - BytesRecved; + } else { + BytesToCopy = KsTsduMdl->DataLength; + } - if (NULL == Irp) { + BytesCopied = + KsCopyMdlToBuffer( + KsTsduMdl->Mdl, + KsTsduMdl->StartOffset + + KsTsduMdl->BaseOffset, + buffer, + BytesRecved, + BytesToCopy + ); + KsPrint((4, "KsReadTsdu: TsduMgr=%p mdl=%p dec=%p copied=%xh " + "recved=%xh\n", + TsduMgr, KsTsduMdl->Mdl, KsTsduMdl->Descriptor, + BytesCopied, BytesRecved + BytesCopied)); + if (BytesCopied == 0) { + cfs_enter_debugger(); + break; + } - Status = STATUS_INSUFFICIENT_RESOURCES; + KsTsduMdl->StartOffset += BytesCopied; + KsTsduMdl->DataLength -= BytesCopied; + BytesRecved += BytesCopied; + LASSERT(TsduMgr->TotalBytes >= BytesCopied); + TsduMgr->TotalBytes -= BytesCopied; - } else { + if (0 == KsTsduMdl->DataLength) { - // - // Locking the User Buffer / Allocating a MDL for it - // + /* Call TdiReturnChainedReceives to release the Tsdu memory */ + LASSERT(KsTsduMdl->Descriptor != NULL); + if (KsTsduMdl->Descriptor) { + TdiReturnChainedReceives( + &(KsTsduMdl->Descriptor), + 1 ); + } - Status = KsLockUserBuffer( - AddressInfo, - FALSE, - *(AddressSize), - IoModifyAccess, - &Mdl - ); + KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); + } - if (!NT_SUCCESS(Status)) { + } else { + KsPrint((1, "KsReadTsdu: unknown tsdu slot: slot = %x type = %x Start= %x Length=%x\n", + KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength)); + KsPrint((1, " Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x\n", + KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength)); + cfs_enter_debugger(); + } + } - IoFreeIrp(Irp); - Irp = NULL; + /* we need attach the KsTsdu to the list header */ + if (KsTsdu) { + if (KsTsdu->StartOffset >= KsTsdu->LastOffset) { + KsPutKsTsdu(KsTsdu); + KsTsdu = NULL; + } else { + TsduMgr->NumOfTsdu++; + list_add(&(KsTsdu->Link), &(TsduMgr->TsduList)); + } + } + + if (length > BytesRecved) { + goto NextTsdu; } } - if (Irp) { +#if DBG + LASSERT(TotalBytes == TsduMgr->TotalBytes + BytesRecved); +#endif + KsPrint((4, "KsReadTsdu: TsduMgr=%p recved=%xh (%xh) remained=%xh\n", + TsduMgr, BytesRecved, length, TsduMgr->TotalBytes )); - LASSERT(NT_SUCCESS(Status)); + return BytesRecved; +} - TdiBuildQueryInformation( - Irp, - DeviceObject, - FileObject, - NULL, - NULL, - TDI_QUERY_ADDRESS_INFO, - Mdl - ); - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - AddressSize - ); +ULONG +KsTdiSendFlags(int SockFlags) +{ + ULONG TdiFlags = 0; - KsReleaseMdl(Mdl, FALSE); + if (cfs_is_flag_set(SockFlags, MSG_OOB)) { + cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED); } - if (!NT_SUCCESS(Status)) { + if (cfs_is_flag_set(SockFlags, MSG_MORE)) { + cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL); + } - cfs_enter_debugger(); - //TDI_BUFFER_OVERFLOW + if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) { + cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING); } - return (Status); + return TdiFlags; +} + +ULONG +KsTdiRecvFlags(int SockFlags) +{ + ULONG TdiFlags = 0; + + if (cfs_is_flag_set(SockFlags, MSG_OOB)) { + cfs_set_flag(TdiFlags, TDI_RECEIVE_EXPEDITED); + } + + if (cfs_is_flag_set(SockFlags, MSG_MORE)) { + cfs_set_flag(TdiFlags, TDI_RECEIVE_PARTIAL); + } + + if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) { + cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING); + } + + return TdiFlags; +} + +int +KsWriteTsdus(PKS_TSDUMGR TsduMgr, char * buffer, int length, int flags) +{ + int rc = 0; + + if (TsduMgr->TotalBytes <= TDINAL_MAX_TSDU_QUEUE_SIZE) { + rc = KsWriteTsduDat(TsduMgr, buffer, length, flags); + } + + if (rc > 0) { + return rc; + } + + return -EAGAIN; +} + +int +KsReadTsdus(PKS_TSDUMGR TsduMgr, char * buffer, int length, int flags) +{ + int rc = KsReadTsdu(TsduMgr, buffer, length, flags); + + if (rc > 0) { + return rc; + } + + return -EAGAIN; } /* - * KsQueryProviderInfo - * Query the underlying transport device's information + * KsInitializeKsTsduMgr + * Initialize the management structure of + * Tsdu buffers * * Arguments: - * TdiDeviceName: the transport device's name string - * ProviderInfo: TDI_PROVIDER_INFO struncture + * TsduMgr: the TsduMgr to be initialized * * Return Value: - * NTSTATUS: Nt system status code - * + * VOID + * * NOTES: * N/A */ -NTSTATUS -KsQueryProviderInfo( - PWSTR TdiDeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ) +VOID +KsInitializeKsTsduMgr( + PKS_TSDUMGR TsduMgr + ) { - NTSTATUS Status = STATUS_SUCCESS; - - PIRP Irp = NULL; - PMDL Mdl = NULL; - - UNICODE_STRING ControlName; + KeInitializeEvent( + &(TsduMgr->Event), + NotificationEvent, + FALSE + ); - HANDLE Handle; - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; + CFS_INIT_LIST_HEAD( + &(TsduMgr->TsduList) + ); - ULONG ProviderSize = 0; + TsduMgr->NumOfTsdu = 0; + TsduMgr->TotalBytes = 0; - RtlInitUnicodeString(&ControlName, TdiDeviceName); + spin_lock_init(&TsduMgr->Lock); +} - // - // Open the Tdi Control Channel - // - Status = KsOpenControl( - &ControlName, - &Handle, - &FileObject - ); +/* + * KsInitializeKsChain + * Initialize the China structure for receiving + * or transmitting + * + * Arguments: + * KsChain: the KsChain to be initialized + * + * Return Value: + * VOID + * + * NOTES: + * N/A + */ - if (!NT_SUCCESS(Status)) { +VOID +KsInitializeKsChain( + PKS_CHAIN KsChain + ) +{ + KsInitializeKsTsduMgr(&(KsChain->Normal)); + KsInitializeKsTsduMgr(&(KsChain->Expedited)); + KsChain->Expedited.OOB = TRUE; +} - KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n")); - return (Status); - } - // - // Obtain The Related Device Object - // +/* + * KsCleanupTsduMgr + * Clean up all the Tsdus in the TsduMgr list + * + * Arguments: + * TsduMgr: the Tsdu list manager + * + * Return Value: + * NTSTATUS: nt status code + * + * NOTES: + * N/A + */ - DeviceObject = IoGetRelatedDeviceObject(FileObject); +NTSTATUS +KsCleanupTsduMgr( + PKS_TSDUMGR TsduMgr + ) +{ + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_MDL KsTsduMdl; - ProviderSize = sizeof(TDI_PROVIDER_INFO); - RtlZeroMemory(ProviderInfo, ProviderSize); + LASSERT(NULL != TsduMgr); - // - // Allocating the Tdi Setting Irp ... - // + KsRemoveTdiEngine(TsduMgr); + KeSetEvent(&(TsduMgr->Event), 0, FALSE); - Irp = KsBuildTdiIrp(DeviceObject); + while (!list_empty(&TsduMgr->TsduList)) { - if (NULL == Irp) { + KsTsdu = list_entry(TsduMgr->TsduList.next, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - Status = STATUS_INSUFFICIENT_RESOURCES; + if (KsTsdu->StartOffset == KsTsdu->LastOffset) { - } else { + // + // KsTsdu is empty now, we need free it ... + // - // - // Locking the User Buffer / Allocating a MDL for it - // + list_del(&(KsTsdu->Link)); + TsduMgr->NumOfTsdu--; - Status = KsLockUserBuffer( - ProviderInfo, - FALSE, - ProviderSize, - IoModifyAccess, - &Mdl - ); + KsFreeKsTsdu(KsTsdu); - if (!NT_SUCCESS(Status)) { + } else { - IoFreeIrp(Irp); - Irp = NULL; - } - } + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - if (Irp) { + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - LASSERT(NT_SUCCESS(Status)); + KsTsdu->StartOffset += KsTsduDat->TotalLength; - TdiBuildQueryInformation( - Irp, - DeviceObject, - FileObject, - NULL, - NULL, - TDI_QUERY_PROVIDER_INFO, - Mdl - ); + } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - &ProviderSize - ); + ASSERT(KsTsduBuf->UserBuffer != NULL); - KsReleaseMdl(Mdl, FALSE); - } + if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) { + if (KsTsduBuf->Mdl) { + KsTsduBuf->Mdl->Next = NULL; + KsReleaseMdl(KsTsduBuf->Mdl, FALSE); + } + ExFreePool(KsTsduBuf->UserBuffer); + } else { + cfs_enter_debugger(); + } - if (!NT_SUCCESS(Status)) { + KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - cfs_enter_debugger(); - //TDI_BUFFER_OVERFLOW - } + } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { - KsCloseControl(Handle, FileObject); + // + // MDL Tsdu Unit ... + // + if (KsTsduMdl->Descriptor) { + TdiReturnChainedReceives( + &(KsTsduMdl->Descriptor), + 1 ); + } else if (KsTsduMdl->Mdl) { + KsTsduMdl->Mdl->Next = NULL; + KsReleaseMdl(KsTsduMdl->Mdl, FALSE); + } - return (Status); + KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); + } + } + } + + return STATUS_SUCCESS; } + /* - * KsQueryConnectionInfo - * Query the connection info of the FileObject specified - * (some statics data of the traffic) + * KsCleanupKsChain + * Clean up the TsduMgrs of the KsChain * * Arguments: - * FileObject: the FileObject to be queried - * ConnectionInfo: buffer to contain the connection info - * ConnectionSize: length of the ConnectionInfo buffer + * KsChain: the chain managing TsduMgr * * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) + * NTSTATUS: nt status code * * NOTES: * N/A */ NTSTATUS -KsQueryConnectionInfo( - PFILE_OBJECT ConnectionObject, - PTDI_CONNECTION_INFO ConnectionInfo, - PULONG ConnectionSize - ) +KsCleanupKsChain( + PKS_CHAIN KsChain + ) { - NTSTATUS Status = STATUS_UNSUCCESSFUL; - PIRP Irp = NULL; - PMDL Mdl; - PDEVICE_OBJECT DeviceObject; - - LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - RtlZeroMemory(ConnectionInfo, *(ConnectionSize)); - - // - // Allocating the Tdi Query Irp ... - // - - Irp = KsBuildTdiIrp(DeviceObject); - - if (NULL == Irp) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - - } else { - - // - // Locking the User Buffer / Allocating a MDL for it - // + NTSTATUS Status; - Status = KsLockUserBuffer( - ConnectionInfo, - FALSE, - *(ConnectionSize), - IoModifyAccess, - &Mdl - ); + LASSERT(NULL != KsChain); - if (NT_SUCCESS(Status)) { + Status = KsCleanupTsduMgr( + &(KsChain->Normal) + ); - IoFreeIrp(Irp); - Irp = NULL; - } + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; } - if (Irp) { - - LASSERT(NT_SUCCESS(Status)); - - TdiBuildQueryInformation( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - TDI_QUERY_CONNECTION_INFO, - Mdl - ); - - Status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - ConnectionSize - ); + Status = KsCleanupTsduMgr( + &(KsChain->Expedited) + ); - KsReleaseMdl(Mdl, FALSE); + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; } - return (Status); +errorout: + + return Status; } /* - * KsInitializeTdiAddress - * Initialize the tdi addresss + * KsCleanupTsdu + * Clean up all the Tsdus of a tdi connected object * * Arguments: - * pTransportAddress: tdi address to be initialized - * IpAddress: the ip address of object - * IpPort: the ip port of the object + * tconn: the tdi connection which is connected already. * * Return Value: - * ULONG: the total size of the tdi address + * Nt status code * * NOTES: * N/A */ -ULONG -KsInitializeTdiAddress( - IN OUT PTA_IP_ADDRESS pTransportAddress, - IN ULONG IpAddress, - IN USHORT IpPort +NTSTATUS +KsCleanupTsdu( + ks_tconn_t * tconn ) { - pTransportAddress->TAAddressCount = 1; - pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP; - pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP; - pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort; - pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress; + NTSTATUS Status = STATUS_SUCCESS; - return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP); + + if (tconn->kstc_type != kstt_sender && + tconn->kstc_type != kstt_child ) { + + goto errorout; + } + + if (tconn->kstc_type == kstt_sender) { + + Status = KsCleanupKsChain( + &(tconn->sender.kstc_recv) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + Status = KsCleanupKsChain( + &(tconn->sender.kstc_send) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + } else { + + Status = KsCleanupKsChain( + &(tconn->child.kstc_recv) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + Status = KsCleanupKsChain( + &(tconn->child.kstc_send) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + } + +errorout: + + return (Status); +} + +NTSTATUS +KsIrpCompletionRoutine( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + if (NULL != Context) { + KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE); + } + + return STATUS_MORE_PROCESSING_REQUIRED; + + UNREFERENCED_PARAMETER(DeviceObject); + UNREFERENCED_PARAMETER(Irp); } + /* - * KsQueryTdiAddressLength - * Query the total size of the tdi address + * KsBuildTdiIrp + * Allocate a new IRP and initialize it to be issued to tdi * * Arguments: - * pTransportAddress: tdi address to be queried + * DeviceObject: device object created by the underlying + * TDI transport driver * * Return Value: - * ULONG: the total size of the tdi address + * PRIP: the allocated Irp in success or NULL in failure. * * NOTES: * N/A */ -ULONG -KsQueryTdiAddressLength( - PTRANSPORT_ADDRESS pTransportAddress +PIRP +KsBuildTdiIrp( + IN PDEVICE_OBJECT DeviceObject ) { - ULONG TotalLength = 0; - LONG i; + PIRP Irp; + PIO_STACK_LOCATION IrpSp; - PTA_ADDRESS UNALIGNED pTaAddress = NULL; + // + // Allocating the IRP ... + // - ASSERT (NULL != pTransportAddress); + Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); - TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) + - FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount; + if (NULL != Irp) { - pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address; + // + // Getting the Next Stack Location ... + // - for (i = 0; i < pTransportAddress->TAAddressCount; i++) - { - TotalLength += pTaAddress->AddressLength; - pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress + - FIELD_OFFSET(TA_ADDRESS,Address) + - pTaAddress->AddressLength ); + IrpSp = IoGetNextIrpStackLocation(Irp); + + // + // Initializing Irp ... + // + + IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL; + IrpSp->Parameters.DeviceIoControl.IoControlCode = 0; } - return (TotalLength); + return Irp; } - /* - * KsQueryIpAddress - * Query the ip address of the tdi object + * KsSubmitTdiIrp + * Issue the Irp to the underlying tdi driver * * Arguments: - * FileObject: tdi object to be queried - * TdiAddress: TdiAddress buffer, to store the queried - * tdi ip address - * AddressLength: buffer length of the TdiAddress + * DeviceObject: the device object created by TDI driver + * Irp: the I/O request packet to be processed + * bSynchronous: synchronous or not. If true, we need wait + * until the process is finished. + * Information: returned info * * Return Value: - * ULONG: the total size of the tdi ip address + * NTSTATUS: kernel status code * * NOTES: * N/A */ NTSTATUS -KsQueryIpAddress( - PFILE_OBJECT FileObject, - PVOID TdiAddress, - ULONG* AddressLength +KsSubmitTdiIrp( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN BOOLEAN bSynchronous, + OUT PULONG Information ) { - NTSTATUS Status; - - PTDI_ADDRESS_INFO TdiAddressInfo; - ULONG Length; - - - // - // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS - // + NTSTATUS Status; + KEVENT Event; - Length = MAX_ADDRESS_LENGTH; + if (bSynchronous) { - TdiAddressInfo = (PTDI_ADDRESS_INFO) - ExAllocatePoolWithTag( - NonPagedPool, - Length, - 'KSAI' ); + KeInitializeEvent( + &Event, + SynchronizationEvent, + FALSE + ); - if (NULL == TdiAddressInfo) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; + IoSetCompletionRoutine( + Irp, + KsIrpCompletionRoutine, + &Event, + TRUE, + TRUE, + TRUE + ); } + Status = IoCallDriver(DeviceObject, Irp); - Status = KsQueryAddressInfo( - FileObject, - TdiAddressInfo, - &Length - ); - -errorout: - - if (NT_SUCCESS(Status)) - { - if (*AddressLength < Length) { + if (bSynchronous) { - Status = STATUS_BUFFER_TOO_SMALL; + if (STATUS_PENDING == Status) { - } else { + Status = KeWaitForSingleObject( + &Event, + Executive, + KernelMode, + FALSE, + NULL + ); + } - *AddressLength = Length; - RtlCopyMemory( - TdiAddress, - &(TdiAddressInfo->Address), - Length - ); + Status = Irp->IoStatus.Status; - Status = STATUS_SUCCESS; + if (Information) { + *Information = (ULONG)(Irp->IoStatus.Information); } - } else { - + IoFreeIrp(Irp); } + if (!NT_SUCCESS(Status)) { - if (NULL != TdiAddressInfo) { - - ExFreePool(TdiAddressInfo); + KsPrint((1, "KsSubmitTdiIrp: Error when submitting the Irp: " + "Status = %xh (%s)\n", Status, KsNtStatusToString(Status))); } - return Status; + return (Status); } + /* - * KsErrorEventHandler - * the common error event handler callback + * KsOpenControl + * Open the Control Channel Object ... * * Arguments: - * TdiEventContext: should be the socket - * Status: the error code + * DeviceName: the device name to be opened + * Handle: opened handle in success case + * FileObject: the fileobject of the device * * Return Value: - * Status: STATS_SUCCESS + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * - * NOTES: - * We need not do anything in such a severe - * error case. System will process it for us. + * Notes: + * N/A */ NTSTATUS -KsErrorEventHandler( - IN PVOID TdiEventContext, - IN NTSTATUS Status +KsOpenControl( + IN PUNICODE_STRING DeviceName, + OUT HANDLE * Handle, + OUT PFILE_OBJECT * FileObject ) { - KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n", - KeGetCurrentIrql())); + NTSTATUS Status = STATUS_SUCCESS; - cfs_enter_debugger(); + OBJECT_ATTRIBUTES ObjectAttributes; + IO_STATUS_BLOCK IoStatus; - return (STATUS_SUCCESS); -} + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); -/* - * ks_set_handlers - * setup all the event handler callbacks - * - * Arguments: - * tconn: the tdi connecton object - * - * Return Value: - * int: ks error code - * - * NOTES: - * N/A - */ + // + // Initializing ... + // -int -ks_set_handlers( - ksock_tconn_t * tconn - ) -{ - NTSTATUS status = STATUS_SUCCESS; - KS_EVENT_HANDLERS handlers; + InitializeObjectAttributes( + &ObjectAttributes, + DeviceName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL + ); - /* to make sure the address object is opened already */ - if (tconn->kstc_addr.FileObject == NULL) { - goto errorout; - } + LASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL ); - /* initialize the handlers indictor array. for sender and listenr, - there are different set of callbacks. for child, we just return. */ + // + // Creating the Transport Address Object ... + // - memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); + Status = ZwCreateFile( + Handle, + FILE_READ_DATA | FILE_WRITE_DATA, + &ObjectAttributes, + &IoStatus, + 0, + FILE_ATTRIBUTE_NORMAL, + FILE_SHARE_READ | FILE_SHARE_WRITE, + FILE_OPEN, + 0, + NULL, + 0 + ); - SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler); - SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler); - SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler); - SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler); - SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler); - // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler); + if (NT_SUCCESS(Status)) { - if (tconn->kstc_type == kstt_listener) { - SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler); - } else if (tconn->kstc_type == kstt_child) { - goto errorout; - } + // + // Now Obtaining the FileObject of the Transport Address ... + // - /* set all the event callbacks */ - status = KsSetEventHandlers( - tconn->kstc_addr.FileObject, /* Address File Object */ - tconn, /* Event Context */ - &handlers /* Event callback handlers */ - ); + Status = ObReferenceObjectByHandle( + *Handle, + FILE_ANY_ACCESS, + NULL, + KernelMode, + FileObject, + NULL + ); -errorout: + if (!NT_SUCCESS(Status)) { - return cfs_error_code(status); + cfs_enter_debugger(); + ZwClose(*Handle); + } + + } else { + + cfs_enter_debugger(); + } + + return (Status); } /* - * ks_reset_handlers - * disable all the event handler callbacks (set to NULL) + * KsCloseControl + * Release the Control Channel Handle and FileObject * * Arguments: - * tconn: the tdi connecton object + * Handle: the channel handle to be released + * FileObject: the fileobject to be released * * Return Value: - * int: ks error code + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * - * NOTES: + * Notes: * N/A */ -int -ks_reset_handlers( - ksock_tconn_t * tconn - ) +NTSTATUS +KsCloseControl( + IN HANDLE Handle, + IN PFILE_OBJECT FileObject + ) { - NTSTATUS status = STATUS_SUCCESS; - KS_EVENT_HANDLERS handlers; + NTSTATUS Status = STATUS_SUCCESS; - /* to make sure the address object is opened already */ - if (tconn->kstc_addr.FileObject == NULL) { - goto errorout; - } + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - /* initialize the handlers indictor array. for sender and listenr, - there are different set of callbacks. for child, we just return. */ + if (FileObject) { - memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); + ObDereferenceObject(FileObject); + } - SetEventHandler(handlers, TDI_EVENT_ERROR, NULL); - SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL); - SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL); - SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL); - SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL); - // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL); + if (Handle) { - if (tconn->kstc_type == kstt_listener) { - SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL); - } else if (tconn->kstc_type == kstt_child) { - goto errorout; + Status = ZwClose(Handle); } - /* set all the event callbacks */ - status = KsSetEventHandlers( - tconn->kstc_addr.FileObject, /* Address File Object */ - tconn, /* Event Context */ - &handlers /* Event callback handlers */ - ); - -errorout: + ASSERT(NT_SUCCESS(Status)); - return cfs_error_code(status); + return (Status); } /* - * KsAcceptCompletionRoutine - * Irp completion routine for TdiBuildAccept (KsConnectEventHandler) - * - * Here system gives us a chance to check the conneciton is built - * ready or not. + * KsOpenAddress + * Open the tdi address object * * Arguments: - * DeviceObject: the device object of the transport driver - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp + * DeviceName: device name of the address object + * pAddress: tdi address of the address object + * AddressLength: length in bytes of the tdi address + * Handle: the newly opened handle + * FileObject: the newly opened fileobject * * Return Value: - * Nt status code + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * * Notes: * N/A */ NTSTATUS -KsAcceptCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) +KsOpenAddress( + IN PUNICODE_STRING DeviceName, + IN PTRANSPORT_ADDRESS pAddress, + IN ULONG AddressLength, + OUT HANDLE * Handle, + OUT PFILE_OBJECT * FileObject + ) { - ksock_tconn_t * child = (ksock_tconn_t *) Context; - ksock_tconn_t * parent = child->child.kstc_parent; - - KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n", - KeGetCurrentIrql() )); + NTSTATUS Status = STATUS_SUCCESS; - KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n", - Context, Irp->IoStatus.Status)); + PFILE_FULL_EA_INFORMATION Ea = NULL; + ULONG EaLength; + UCHAR EaBuffer[EA_MAX_LENGTH]; - LASSERT(child->kstc_type == kstt_child); + OBJECT_ATTRIBUTES ObjectAttributes; + IO_STATUS_BLOCK IoStatus; - spin_lock(&(child->kstc_lock)); + // + // Building EA for the Address Object to be Opened ... + // - LASSERT(parent->kstc_state == ksts_listening); - LASSERT(child->kstc_state == ksts_connecting); + Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; + Ea->NextEntryOffset = 0; + Ea->Flags = 0; + Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH; + Ea->EaValueLength = (USHORT)AddressLength; + RtlCopyMemory( + &(Ea->EaName), + TdiTransportAddress, + Ea->EaNameLength + 1 + ); + RtlMoveMemory( + &(Ea->EaName[Ea->EaNameLength + 1]), + pAddress, + AddressLength + ); + EaLength = sizeof(FILE_FULL_EA_INFORMATION) + + Ea->EaNameLength + AddressLength; - if (NT_SUCCESS(Irp->IoStatus.Status)) { + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - child->child.kstc_accepted = TRUE; - child->kstc_state = ksts_connected; + // + // Initializing ... + // - /* wake up the daemon thread which waits on this event */ - KeSetEvent( - &(parent->listener.kstc_accept_event), - 0, - FALSE - ); + InitializeObjectAttributes( + &ObjectAttributes, + DeviceName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL + ); - spin_unlock(&(child->kstc_lock)); + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent)); + // + // Creating the Transport Address Object ... + // - } else { + Status = ZwCreateFile( + Handle, + FILE_READ_DATA | FILE_WRITE_DATA, + &ObjectAttributes, + &IoStatus, + 0, + FILE_ATTRIBUTE_NORMAL, + FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */ + FILE_OPEN, + 0, + Ea, + EaLength + ); - /* re-use this child connecton */ - child->child.kstc_accepted = FALSE; - child->child.kstc_busy = FALSE; - child->kstc_state = ksts_associated; - spin_unlock(&(child->kstc_lock)); - } + if (NT_SUCCESS(Status)) { - /* now free the Irp */ - IoFreeIrp(Irp); + // + // Now Obtaining the FileObject of the Transport Address ... + // - /* drop the refer count of the child */ - ks_put_tconn(child); - - return (STATUS_MORE_PROCESSING_REQUIRED); -} - - -/* - * ks_get_vacancy_backlog - * Get a vacancy listeing child from the backlog list - * - * Arguments: - * parent: the listener daemon connection - * - * Return Value: - * the child listening connection or NULL in failure - * - * Notes - * Parent's lock should be acquired before calling. - */ - -ksock_tconn_t * -ks_get_vacancy_backlog( - ksock_tconn_t * parent - ) -{ - ksock_tconn_t * child; - - LASSERT(parent->kstc_type == kstt_listener); - LASSERT(parent->kstc_state == ksts_listening); - - if (list_empty(&(parent->listener.kstc_listening.list))) { - - child = NULL; - - } else { - - struct list_head * tmp; - - /* check the listening queue and try to get a free connecton */ - - list_for_each(tmp, &(parent->listener.kstc_listening.list)) { - child = list_entry (tmp, ksock_tconn_t, child.kstc_link); - spin_lock(&(child->kstc_lock)); - - if (!child->child.kstc_busy) { - LASSERT(child->kstc_state == ksts_associated); - child->child.kstc_busy = TRUE; - spin_unlock(&(child->kstc_lock)); - break; - } else { - spin_unlock(&(child->kstc_lock)); - child = NULL; - } - } - } - - return child; -} - -ks_addr_slot_t * -KsSearchIpAddress(PUNICODE_STRING DeviceName) -{ - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - spin_lock(&ks_data.ksnd_addrs_lock); - - list = ks_data.ksnd_addrs_list.Flink; - while (list != &ks_data.ksnd_addrs_list) { - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - if (RtlCompareUnicodeString( - DeviceName, - &slot->devname, - TRUE) == 0) { - break; - } - list = list->Flink; - slot = NULL; - } - - spin_unlock(&ks_data.ksnd_addrs_lock); - - return slot; -} - -void -KsCleanupIpAddresses() -{ - spin_lock(&ks_data.ksnd_addrs_lock); - - while (!IsListEmpty(&ks_data.ksnd_addrs_list)) { - - ks_addr_slot_t * slot = NULL; - PLIST_ENTRY list = NULL; - - list = RemoveHeadList(&ks_data.ksnd_addrs_list); - slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); - cfs_free(slot); - ks_data.ksnd_naddrs--; - } - - cfs_assert(ks_data.ksnd_naddrs == 0); - spin_unlock(&ks_data.ksnd_addrs_lock); -} - -VOID -KsAddAddressHandler( - IN PTA_ADDRESS Address, - IN PUNICODE_STRING DeviceName, - IN PTDI_PNP_CONTEXT Context - ) -{ - PTDI_ADDRESS_IP IpAddress = NULL; - - if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && - Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { - - ks_addr_slot_t * slot = NULL; - - IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; - KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", - DeviceName, Context, IpAddress->in_addr, - (IpAddress->in_addr & 0xFF000000) >> 24, - (IpAddress->in_addr & 0x00FF0000) >> 16, - (IpAddress->in_addr & 0x0000FF00) >> 8, - (IpAddress->in_addr & 0x000000FF) >> 0 )); - - slot = KsSearchIpAddress(DeviceName); - - if (slot != NULL) { - slot->up = TRUE; - slot->ip_addr = ntohl(IpAddress->in_addr); - } else { - slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO); - if (slot != NULL) { - spin_lock(&ks_data.ksnd_addrs_lock); - InsertTailList(&ks_data.ksnd_addrs_list, &slot->link); - sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++); - slot->ip_addr = ntohl(IpAddress->in_addr); - slot->up = TRUE; - RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length); - slot->devname.Length = DeviceName->Length; - slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR); - slot->devname.Buffer = slot->buffer; - spin_unlock(&ks_data.ksnd_addrs_lock); - } - } - } -} - -VOID -KsDelAddressHandler( - IN PTA_ADDRESS Address, - IN PUNICODE_STRING DeviceName, - IN PTDI_PNP_CONTEXT Context - ) -{ - PTDI_ADDRESS_IP IpAddress = NULL; - - if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && - Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { - - ks_addr_slot_t * slot = NULL; - - slot = KsSearchIpAddress(DeviceName); - - if (slot != NULL) { - slot->up = FALSE; - } - - IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; - KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", - DeviceName, Context, IpAddress->in_addr, - (IpAddress->in_addr & 0xFF000000) >> 24, - (IpAddress->in_addr & 0x00FF0000) >> 16, - (IpAddress->in_addr & 0x0000FF00) >> 8, - (IpAddress->in_addr & 0x000000FF) >> 0 )); - } -} - -NTSTATUS -KsRegisterPnpHandlers() -{ - TDI20_CLIENT_INTERFACE_INFO ClientInfo; - - /* initialize the global ks_data members */ - RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME); - spin_lock_init(&ks_data.ksnd_addrs_lock); - InitializeListHead(&ks_data.ksnd_addrs_list); - - /* register the pnp handlers */ - RtlZeroMemory(&ClientInfo, sizeof(ClientInfo)); - ClientInfo.TdiVersion = TDI_CURRENT_VERSION; - - ClientInfo.ClientName = &ks_data.ksnd_client_name; - ClientInfo.AddAddressHandlerV2 = KsAddAddressHandler; - ClientInfo.DelAddressHandlerV2 = KsDelAddressHandler; - - return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo), - &ks_data.ksnd_pnp_handle); -} - -VOID -KsDeregisterPnpHandlers() -{ - if (ks_data.ksnd_pnp_handle) { - - /* De-register the pnp handlers */ - - TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle); - ks_data.ksnd_pnp_handle = NULL; - - /* cleanup all the ip address slots */ - KsCleanupIpAddresses(); - } -} - -/* - * KsConnectEventHandler - * Connect event handler event handler, called by the underlying TDI - * transport in response to an incoming request to the listening daemon. - * - * it will grab a vacancy backlog from the children tconn list, and - * build an acception Irp with it, then transfer the Irp to TDI driver. - * - * Arguments: - * TdiEventContext: the tdi connnection object of the listening daemon - * ...... - * - * Return Value: - * Nt kernel status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsConnectEventHandler( - IN PVOID TdiEventContext, - IN LONG RemoteAddressLength, - IN PVOID RemoteAddress, - IN LONG UserDataLength, - IN PVOID UserData, - IN LONG OptionsLength, - IN PVOID Options, - OUT CONNECTION_CONTEXT * ConnectionContext, - OUT PIRP * AcceptIrp - ) -{ - ksock_tconn_t * parent; - ksock_tconn_t * child; - - PFILE_OBJECT FileObject; - PDEVICE_OBJECT DeviceObject; - NTSTATUS Status; - - PIRP Irp = NULL; - PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; - - KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql())); - parent = (ksock_tconn_t *) TdiEventContext; - - LASSERT(parent->kstc_type == kstt_listener); - - spin_lock(&(parent->kstc_lock)); - - if (parent->kstc_state == ksts_listening) { - - /* allocate a new ConnectionInfo to backup the peer's info */ - - ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( - NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + - RemoteAddressLength, 'iCsK' ); - - if (NULL == ConnectionInfo) { - - Status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); - goto errorout; - } - - /* initializing ConnectionInfo structure ... */ - - ConnectionInfo->UserDataLength = UserDataLength; - ConnectionInfo->UserData = UserData; - ConnectionInfo->OptionsLength = OptionsLength; - ConnectionInfo->Options = Options; - ConnectionInfo->RemoteAddressLength = RemoteAddressLength; - ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - - RtlCopyMemory( - ConnectionInfo->RemoteAddress, - RemoteAddress, - RemoteAddressLength - ); - - /* get the vacancy listening child tdi connections */ - - child = ks_get_vacancy_backlog(parent); - - if (child) { - - spin_lock(&(child->kstc_lock)); - child->child.kstc_info.ConnectionInfo = ConnectionInfo; - child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress; - child->kstc_state = ksts_connecting; - spin_unlock(&(child->kstc_lock)); - - } else { - - KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent)); - - Status = STATUS_INSUFFICIENT_RESOURCES; - - goto errorout; - } - - FileObject = child->child.kstc_info.FileObject; - DeviceObject = IoGetRelatedDeviceObject (FileObject); - - Irp = KsBuildTdiIrp(DeviceObject); - - TdiBuildAccept( - Irp, - DeviceObject, - FileObject, - KsAcceptCompletionRoutine, - child, - NULL, - NULL - ); - - IoSetNextIrpStackLocation(Irp); - - /* grap the refer of the child tdi connection */ - ks_get_tconn(child); - - Status = STATUS_MORE_PROCESSING_REQUIRED; - - *AcceptIrp = Irp; - *ConnectionContext = child; - - } else { - - Status = STATUS_CONNECTION_REFUSED; - goto errorout; - } - - spin_unlock(&(parent->kstc_lock)); - - return Status; - -errorout: - - spin_unlock(&(parent->kstc_lock)); - - { - *AcceptIrp = NULL; - *ConnectionContext = NULL; - - if (ConnectionInfo) { - - ExFreePool(ConnectionInfo); - } - - if (Irp) { - - IoFreeIrp (Irp); - } - } - - return Status; -} - -/* - * KsDisconnectCompletionRoutine - * the Irp completion routine for TdiBuildDisconect - * - * We just signal the event and return MORE_PRO... to - * let the caller take the responsibility of the Irp. - * - * Arguments: - * DeviceObject: the device object of the transport - * Irp: the Irp is being completed. - * Context: the event specified by the caller - * - * Return Value: - * Nt status code - * - * Notes: - * N/A - */ - -NTSTATUS -KsDisconectCompletionRoutine ( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) -{ - - KeSetEvent((PKEVENT) Context, 0, FALSE); - - return STATUS_MORE_PROCESSING_REQUIRED; - - UNREFERENCED_PARAMETER(DeviceObject); -} - - -/* - * KsDisconnectHelper - * the routine to be executed in the WorkItem procedure - * this routine is to disconnect a tdi connection - * - * Arguments: - * Workitem: the context transferred to the workitem - * - * Return Value: - * N/A - * - * Notes: - * tconn is already referred in abort_connecton ... - */ - -VOID -KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem) -{ - ksock_tconn_t * tconn = WorkItem->tconn; - - DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn); - ks_disconnect_tconn(tconn, WorkItem->Flags); - - KeSetEvent(&(WorkItem->Event), 0, FALSE); - - spin_lock(&(tconn->kstc_lock)); - cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); -} - - -/* - * KsDisconnectEventHandler - * Disconnect event handler event handler, called by the underlying TDI transport - * in response to an incoming disconnection notification from a remote node. - * - * Arguments: - * ConnectionContext: tdi connnection object - * DisconnectFlags: specifies the nature of the disconnection - * ...... - * - * Return Value: - * Nt kernel status code - * - * Notes: - * N/A - */ - - -NTSTATUS -KsDisconnectEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN LONG DisconnectDataLength, - IN PVOID DisconnectData, - IN LONG DisconnectInformationLength, - IN PVOID DisconnectInformation, - IN ULONG DisconnectFlags - ) -{ - ksock_tconn_t * tconn; - NTSTATUS Status; - PKS_DISCONNECT_WORKITEM WorkItem; - - tconn = (ksock_tconn_t *)ConnectionContext; - - KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n", - KeGetCurrentIrql() )); - - KsPrint((2, "tconn = %x DisconnectFlags= %xh\n", - tconn, DisconnectFlags)); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); - - WorkItem = &(tconn->kstc_disconnect); - - if (tconn->kstc_state != ksts_connected) { - - Status = STATUS_SUCCESS; - - } else { - - if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) { - - Status = STATUS_REMOTE_DISCONNECT; - - } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) { - - Status = STATUS_GRACEFUL_DISCONNECT; - } - - if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - - ks_get_tconn(tconn); - - WorkItem->Flags = DisconnectFlags; - WorkItem->tconn = tconn; - - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - - /* queue the workitem to call */ - ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue); - } - } - - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (Status); -} - -NTSTATUS -KsTcpReceiveCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ) -{ - NTSTATUS Status = Irp->IoStatus.Status; - - if (NT_SUCCESS(Status)) { - - ksock_tconn_t *tconn = Context->tconn; - - PKS_TSDU_DAT KsTsduDat = Context->CompletionContext; - PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext; - - KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n", - Context->KsTsduMgr->TotalBytes )); - - spin_lock(&(tconn->kstc_lock)); - - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { - if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { - cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); - } else { - cfs_enter_debugger(); - } - } else { - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { - cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); - } else { - cfs_enter_debugger(); - } - } - - spin_unlock(&(tconn->kstc_lock)); + Status = ObReferenceObjectByHandle( + *Handle, + FILE_ANY_ACCESS, + NULL, + KernelMode, + FileObject, + NULL + ); - /* wake up the thread waiting for the completion of this Irp */ - KeSetEvent(Context->Event, 0, FALSE); + if (!NT_SUCCESS(Status)) { - /* re-active the ks connection and wake up the scheduler */ - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - Context->KsTsduMgr->TotalBytes ); + cfs_enter_debugger(); + ZwClose(*Handle); } } else { - /* un-expected errors occur, we must abort the connection */ - ks_abort_tconn(Context->tconn); - } - - if (Context) { - - /* Freeing the Context structure... */ - ExFreePool(Context); - Context = NULL; - } - - - /* free the Irp */ - if (Irp) { - IoFreeIrp(Irp); + cfs_enter_debugger(); } return (Status); } - /* - * KsTcpCompletionRoutine - * the Irp completion routine for TdiBuildSend and TdiBuildReceive ... - * We need call the use's own CompletionRoutine if specified. Or - * it's a synchronous case, we need signal the event. + * KsCloseAddress + * Release the Hanlde and FileObject of an opened tdi + * address object * * Arguments: - * DeviceObject: the device object of the transport - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp + * Handle: the handle to be released + * FileObject: the fileobject to be released * * Return Value: - * Nt status code + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * * Notes: * N/A */ NTSTATUS -KsTcpCompletionRoutine( - IN PDEVICE_OBJECT DeviceObject, - IN PIRP Irp, - IN PVOID Context - ) +KsCloseAddress( + IN HANDLE Handle, + IN PFILE_OBJECT FileObject +) { - if (Context) { - - PKS_TCP_COMPLETION_CONTEXT CompletionContext = NULL; - ksock_tconn_t * tconn = NULL; - - CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context; - tconn = CompletionContext->tconn; - - /* release the chained mdl */ - KsReleaseMdl(Irp->MdlAddress, FALSE); - Irp->MdlAddress = NULL; - - if (CompletionContext->CompletionRoutine) { - - if ( CompletionContext->bCounted && - InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) { - goto errorout; - } - - // - // Giving control to user specified CompletionRoutine ... - // - - CompletionContext->CompletionRoutine( - Irp, - CompletionContext - ); - - } else { - - // - // Signaling the Event ... - // + NTSTATUS Status = STATUS_SUCCESS; - KeSetEvent(CompletionContext->Event, 0, FALSE); - } + if (FileObject) { - /* drop the reference count of the tconn object */ - ks_put_tconn(tconn); + ObDereferenceObject(FileObject); + } - } else { + if (Handle) { - cfs_enter_debugger(); + Status = ZwClose(Handle); } -errorout: + ASSERT(NT_SUCCESS(Status)); - return STATUS_MORE_PROCESSING_REQUIRED; + return (Status); } + /* - * KsTcpSendCompletionRoutine - * the user specified Irp completion routine for asynchronous - * data transmission requests. - * - * It will do th cleanup job of the ksock_tx_t and wake up the - * ks scheduler thread + * KsOpenConnection + * Open a tdi connection object * * Arguments: - * Irp: the Irp is being completed. - * Context: the context we specified when issuing the Irp + * DeviceName: device name of the connection object + * ConnectionContext: the connection context + * Handle: the newly opened handle + * FileObject: the newly opened fileobject * * Return Value: - * Nt status code + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * * Notes: * N/A */ NTSTATUS -KsTcpSendCompletionRoutine( - IN PIRP Irp, - IN PKS_TCP_COMPLETION_CONTEXT Context - ) +KsOpenConnection( + IN PUNICODE_STRING DeviceName, + IN CONNECTION_CONTEXT ConnectionContext, + OUT HANDLE * Handle, + OUT PFILE_OBJECT * FileObject + ) { - NTSTATUS Status = Irp->IoStatus.Status; - ULONG rc = Irp->IoStatus.Information; - ksock_tconn_t * tconn = Context->tconn; - PKS_TSDUMGR KsTsduMgr = Context->KsTsduMgr; + NTSTATUS Status = STATUS_SUCCESS; - ENTRY; + PFILE_FULL_EA_INFORMATION Ea = NULL; + ULONG EaLength; + UCHAR EaBuffer[EA_MAX_LENGTH]; - LASSERT(tconn) ; + OBJECT_ATTRIBUTES ObjectAttributes; + IO_STATUS_BLOCK IoStatus; - if (NT_SUCCESS(Status)) { + // + // Building EA for the Address Object to be Opened ... + // - if (Context->bCounted) { - PVOID tx = Context->CompletionContext; + Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; + Ea->NextEntryOffset = 0; + Ea->Flags = 0; + Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH; + Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT); + RtlCopyMemory( + &(Ea->EaName), + TdiConnectionContext, + Ea->EaNameLength + 1 + ); + RtlMoveMemory( + &(Ea->EaName[Ea->EaNameLength + 1]), + &ConnectionContext, + sizeof(CONNECTION_CONTEXT) + ); + EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + + Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT); - ASSERT(tconn->kstc_update_tx != NULL); + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - /* update the tx, rebasing the kiov or iov pointers */ - tx = tconn->kstc_update_tx(tconn, tx, rc); - /* update the KsTsudMgr total bytes */ - spin_lock(&tconn->kstc_lock); - KsTsduMgr->TotalBytes -= rc; - spin_unlock(&tconn->kstc_lock); + // + // Initializing ... + // - /* - * now it's time to re-queue the conns into the - * scheduler queue and wake the scheduler thread. - */ + InitializeObjectAttributes( + &ObjectAttributes, + DeviceName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL + ); - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, TRUE, tx, 0); - } + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - } else { + // + // Creating the Connection Object ... + // - PKS_TSDU KsTsdu = Context->CompletionContext; - PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext2; - PKS_TSDU_DAT KsTsduDat = Context->CompletionContext2; + Status = ZwCreateFile( + Handle, + FILE_READ_DATA | FILE_WRITE_DATA, + &ObjectAttributes, + &IoStatus, + NULL, + FILE_ATTRIBUTE_NORMAL, + FILE_SHARE_READ | FILE_SHARE_WRITE, + FILE_OPEN, + 0, + Ea, + EaLength + ); - spin_lock(&tconn->kstc_lock); - /* This is bufferred sending ... */ - ASSERT(KsTsduBuf->StartOffset == 0); - if (KsTsduBuf->DataLength > Irp->IoStatus.Information) { - /* not fully sent .... we have to abort the connection */ - spin_unlock(&tconn->kstc_lock); - ks_abort_tconn(tconn); - goto errorout; - } + if (NT_SUCCESS(Status)) { - if (KsTsduBuf->TsduType == TSDU_TYPE_BUF) { - /* free the buffer */ - ExFreePool(KsTsduBuf->UserBuffer); - KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength; - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - } else if (KsTsduDat->TsduType == TSDU_TYPE_DAT) { - KsTsduMgr->TotalBytes -= KsTsduDat->DataLength; - KsTsdu->StartOffset += KsTsduDat->TotalLength; - } else { - cfs_enter_debugger(); /* shoult not get here */ - } + // + // Now Obtaining the FileObject of the Transport Address ... + // - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { + Status = ObReferenceObjectByHandle( + *Handle, + FILE_ANY_ACCESS, + NULL, + KernelMode, + FileObject, + NULL + ); - list_del(&KsTsdu->Link); - KsTsduMgr->NumOfTsdu--; - KsPutKsTsdu(KsTsdu); - } + if (!NT_SUCCESS(Status)) { - spin_unlock(&tconn->kstc_lock); + cfs_enter_debugger(); + ZwClose(*Handle); } } else { - /* cfs_enter_debugger(); */ + cfs_enter_debugger(); + } - /* - * for the case that the transmission is ussuccessful, - * we need abort the tdi connection, but not destroy it. - * the socknal conn will drop the refer count, then the - * tdi connection will be freed. - */ + return (Status); +} - ks_abort_tconn(tconn); - } +/* + * KsCloseConnection + * Release the Hanlde and FileObject of an opened tdi + * connection object + * + * Arguments: + * Handle: the handle to be released + * FileObject: the fileobject to be released + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ -errorout: +NTSTATUS +KsCloseConnection( + IN HANDLE Handle, + IN PFILE_OBJECT FileObject + ) +{ + NTSTATUS Status = STATUS_SUCCESS; - /* freeing the Context structure... */ + if (FileObject) { - if (Context) { - ExFreePool(Context); - Context = NULL; + ObDereferenceObject(FileObject); } - /* it's our duty to free the Irp. */ + if (Handle) { - if (Irp) { - IoFreeIrp(Irp); - Irp = NULL; + Status = ZwClose(Handle); } - EXIT; + ASSERT(NT_SUCCESS(Status)); - return Status; + return (Status); } + /* - * Normal receive event handler + * KsAssociateAddress + * Associate an address object with a connection object * - * It will move data from system Tsdu to our TsduList + * Arguments: + * AddressHandle: the handle of the address object + * ConnectionObject: the FileObject of the connection + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A */ NTSTATUS -KsTcpReceiveEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ) +KsAssociateAddress( + IN HANDLE AddressHandle, + IN PFILE_OBJECT ConnectionObject + ) { NTSTATUS Status; - - ksock_tconn_t * tconn; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - - BOOLEAN bIsExpedited; - BOOLEAN bIsCompleteTsdu; - - BOOLEAN bNewTsdu = FALSE; - BOOLEAN bNewBuff = FALSE; - - PCHAR Buffer = NULL; - - PIRP Irp = NULL; - PMDL Mdl = NULL; - PFILE_OBJECT FileObject; PDEVICE_OBJECT DeviceObject; + PIRP Irp; - ULONG BytesReceived = 0; - - PKS_TCP_COMPLETION_CONTEXT context = NULL; - - - tconn = (ksock_tconn_t *) ConnectionContext; - - ks_get_tconn(tconn); - - /* check whether the whole body of payload is received or not */ - if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) && - (BytesIndicated == BytesAvailable) ) { - bIsCompleteTsdu = TRUE; - } else { - bIsCompleteTsdu = FALSE; - } - - bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - - KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable)); - KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited )); - - spin_lock(&(tconn->kstc_lock)); + // + // Getting the DeviceObject from Connection FileObject + // - /* check whether we are conntected or not listener ¡­*/ - if ( !((tconn->kstc_state == ksts_connected) && - (tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child))) { + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - *BytesTaken = BytesIndicated; + // + // Building Tdi Internal Irp ... + // - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); + Irp = KsBuildTdiIrp(DeviceObject); - return (STATUS_SUCCESS); - } + if (NULL == Irp) { - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } + Status = STATUS_INSUFFICIENT_RESOURCES; - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); } else { - KsTsduMgr = &(KsChain->Normal); - } - - /* if the Tsdu is even larger than the biggest Tsdu, we have - to allocate new buffer and use TSDU_TYOE_BUF to store it */ - - if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size - - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { - bNewBuff = TRUE; - } - /* retrieve the latest Tsdu buffer form TsduMgr - list if the list is not empty. */ - - if (list_empty(&(KsTsduMgr->TsduList))) { - - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; - - } else { + // + // Assocating the Address Object with the Connection Object + // - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); + TdiBuildAssociateAddress( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL, + AddressHandle + ); - /* if this Tsdu does not contain enough space, we need - allocate a new Tsdu queue. */ + // + // Calling the Transprot Driver with the Prepared Irp + // - if (bNewBuff) { - if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) > - KsTsdu->TotalLength ) { - KsTsdu = NULL; - } - } else { - if ( KS_TSDU_STRU_SIZE(BytesAvailable) > - KsTsdu->TotalLength - KsTsdu->LastOffset ) { - KsTsdu = NULL; - } - } + Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); } - /* allocating the buffer for TSDU_TYPE_BUF */ - if (bNewBuff) { - Buffer = ExAllocatePool(NonPagedPool, BytesAvailable); - if (NULL == Buffer) { - /* there's no enough memory for us. We just try to - receive maximum bytes with a new Tsdu */ - bNewBuff = FALSE; - KsTsdu = NULL; - } - } + return (Status); +} - /* allocate a new Tsdu in case we are not statisfied. */ - if (NULL == KsTsdu) { +/* + * KsDisassociateAddress + * Disassociate the connection object (the relationship will + * the corresponding address object will be dismissed. ) + * + * Arguments: + * ConnectionObject: the FileObject of the connection + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ - KsTsdu = KsAllocateKsTsdu(); +NTSTATUS +KsDisassociateAddress( + IN PFILE_OBJECT ConnectionObject + ) +{ + NTSTATUS Status; + PDEVICE_OBJECT DeviceObject; + PIRP Irp; - if (NULL == KsTsdu) { - goto errorout; - } else { - bNewTsdu = TRUE; - } - } + // + // Getting the DeviceObject from Connection FileObject + // - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - if (bNewBuff) { + // + // Building Tdi Internal Irp ... + // - /* setup up the KS_TSDU_BUF record */ + Irp = KsBuildTdiIrp(DeviceObject); - KsTsduBuf->TsduType = TSDU_TYPE_BUF; - KsTsduBuf->TsduFlags = 0; - KsTsduBuf->StartOffset = 0; - KsTsduBuf->UserBuffer = Buffer; - KsTsduBuf->DataLength = BytesReceived = BytesAvailable; + if (NULL == Irp) { - KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); + Status = STATUS_INSUFFICIENT_RESOURCES; } else { - /* setup the KS_TSDU_DATA to contain all the messages */ - - KsTsduDat->TsduType = TSDU_TYPE_DAT; - KsTsduDat->TsduFlags = 0; + // + // Disassocating the Address Object with the Connection Object + // - if ( KsTsdu->TotalLength - KsTsdu->LastOffset >= - KS_TSDU_STRU_SIZE(BytesAvailable) ) { - BytesReceived = BytesAvailable; - } else { - BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset - - FIELD_OFFSET(KS_TSDU_DAT, Data); - BytesReceived &= (~((ULONG)3)); - } - KsTsduDat->DataLength = BytesReceived; - KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(BytesReceived); - KsTsduDat->StartOffset = 0; + TdiBuildDisassociateAddress( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL + ); - Buffer = &KsTsduDat->Data[0]; + // + // Calling the Transprot Driver with the Prepared Irp + // - KsTsdu->LastOffset += KsTsduDat->TotalLength; + Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); } - KsTsduMgr->TotalBytes += BytesReceived; + return (Status); +} - if (bIsCompleteTsdu) { - /* It's a complete receive, we just move all - the data from system to our Tsdu */ +/* - RtlMoveMemory( - Buffer, - Tsdu, - BytesReceived - ); +// +// Connection Control Event Callbacks +// - *BytesTaken = BytesReceived; - Status = STATUS_SUCCESS; +TDI_EVENT_CONNECT +TDI_EVENT_DISCONNECT +TDI_EVENT_ERROR - if (bNewTsdu) { - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } +// +// Tcp Event Callbacks +// - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); +TDI_EVENT_RECEIVE +TDI_EVENT_RECEIVE_EXPEDITED +TDI_EVENT_CHAINED_RECEIVE +TDI_EVENT_CHAINED_RECEIVE_EXPEDITED - /* re-active the ks connection and wake up the scheduler */ - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - KsTsduMgr->TotalBytes ); - } +// +// Udp Event Callbacks +// - } else { +TDI_EVENT_RECEIVE_DATAGRAM +TDI_EVENT_CHAINED_RECEIVE_DATAGRAM - /* there's still data in tdi internal queue, we need issue a new - Irp to receive all of them. first allocate the tcp context */ +*/ - context = ExAllocatePoolWithTag( - NonPagedPool, - sizeof(KS_TCP_COMPLETION_CONTEXT), - 'cTsK'); - if (!context) { +/* + * KsSetEventHandlers + * Set the tdi event callbacks with an address object + * + * Arguments: + * AddressObject: the FileObject of the address object + * EventContext: the parameter for the callbacks + * Handlers: the handlers indictor array + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * NOTES: + * N/A + */ - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } +NTSTATUS +KsSetEventHandlers( + IN PFILE_OBJECT AddressObject, // Address File Object + IN PVOID EventContext, // Context for Handlers + IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + PDEVICE_OBJECT DeviceObject; + USHORT i = 0; - /* setup the context */ - RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT)); + DeviceObject = IoGetRelatedDeviceObject(AddressObject); - context->tconn = tconn; - context->CompletionRoutine = KsTcpReceiveCompletionRoutine; - context->CompletionContext = KsTsdu; - context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat; - context->KsTsduMgr = KsTsduMgr; - context->Event = &(KsTsduMgr->Event); + for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) { - if (tconn->kstc_type == kstt_sender) { - FileObject = tconn->sender.kstc_info.FileObject; - } else { - FileObject = tconn->child.kstc_info.FileObject; - } + // + // Setup the tdi event callback handler if requested. + // - DeviceObject = IoGetRelatedDeviceObject(FileObject); + if (Handlers->IsActive[i]) { - /* build new tdi Irp and setup it. */ - Irp = KsBuildTdiIrp(DeviceObject); + PIRP Irp; - if (NULL == Irp) { - goto errorout; - } + // + // Building Tdi Internal Irp ... + // - Status = KsLockUserBuffer( - Buffer, - FALSE, - BytesReceived, - IoModifyAccess, - &Mdl - ); + Irp = KsBuildTdiIrp(DeviceObject); - if (!NT_SUCCESS(Status)) { - goto errorout; - } + if (NULL == Irp) { - TdiBuildReceive( - Irp, - DeviceObject, - FileObject, - KsTcpCompletionRoutine, - context, - Mdl, - ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED), - BytesReceived - ); + Status = STATUS_INSUFFICIENT_RESOURCES; - IoSetNextIrpStackLocation(Irp); + } else { - /* return the newly built Irp to transport driver, - it will process it to receive all the data */ + // + // Building the Irp to set the Event Handler ... + // - *IoRequestPacket = Irp; - *BytesTaken = 0; + TdiBuildSetEventHandler( + Irp, + DeviceObject, + AddressObject, + NULL, + NULL, + i, /* tdi event type */ + Handlers->Handler[i], /* tdi event handler */ + EventContext /* context for the handler */ + ); - if (bNewTsdu) { + // + // Calling the Transprot Driver with the Prepared Irp + // - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } + Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); - if (bNewBuff) { - cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); - } else { - cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); + // + // tcp/ip tdi does not support these two event callbacks + // + + if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE || + i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) { + cfs_enter_debugger(); + Status = STATUS_SUCCESS; + } + } + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } } - ks_get_tconn(tconn); - Status = STATUS_MORE_PROCESSING_REQUIRED; } - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); - - return (Status); errorout: - spin_unlock(&(tconn->kstc_lock)); - - if (bNewTsdu && (KsTsdu != NULL)) { - KsFreeKsTsdu(KsTsdu); - } - - if (Mdl) { - KsReleaseMdl(Mdl, FALSE); - } - - if (Irp) { - IoFreeIrp(Irp); - } + if (!NT_SUCCESS(Status)) { - if (context) { - ExFreePool(context); + KsPrint((1, "KsSetEventHandlers: Error Status = %xh (%s)\n", + Status, KsNtStatusToString(Status) )); } - ks_abort_tconn(tconn); - ks_put_tconn(tconn); - - *BytesTaken = BytesAvailable; - Status = STATUS_SUCCESS; - return (Status); } -/* - * Expedited receive event handler - */ - -NTSTATUS -KsTcpReceiveExpeditedEventHandler( - IN PVOID TdiEventContext, - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG BytesIndicated, - IN ULONG BytesAvailable, - OUT ULONG * BytesTaken, - IN PVOID Tsdu, - OUT PIRP * IoRequestPacket - ) -{ - return KsTcpReceiveEventHandler( - TdiEventContext, - ConnectionContext, - ReceiveFlags | TDI_RECEIVE_EXPEDITED, - BytesIndicated, - BytesAvailable, - BytesTaken, - Tsdu, - IoRequestPacket - ); -} /* - * Bulk receive event handler + * KsQueryAddressInfo + * Query the address of the FileObject specified * - * It will queue all the system Tsdus to our TsduList. - * Then later ks_recv_mdl will release them. - */ - -NTSTATUS -KsTcpChainedReceiveEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ) -{ - - NTSTATUS Status; - - ksock_tconn_t * tconn; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_MDL KsTsduMdl; - - BOOLEAN bIsExpedited; - BOOLEAN bNewTsdu = FALSE; - - tconn = (ksock_tconn_t *) ConnectionContext; - - bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - - KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited)); - - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); + * Arguments: + * FileObject: the FileObject to be queried + * AddressInfo: buffer to contain the address info + * AddressSize: length of the AddressInfo buffer + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ - /* check whether we are conntected or not listener ¡­*/ - if ( !((tconn->kstc_state == ksts_connected) && - (tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child))) { +NTSTATUS +KsQueryAddressInfo( + PFILE_OBJECT FileObject, + PTDI_ADDRESS_INFO AddressInfo, + PULONG AddressSize + ) +{ + NTSTATUS Status = STATUS_UNSUCCESSFUL; + PIRP Irp = NULL; + PMDL Mdl; + PDEVICE_OBJECT DeviceObject; - spin_unlock(&(tconn->kstc_lock)); - ks_put_tconn(tconn); + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - return (STATUS_SUCCESS); - } + DeviceObject = IoGetRelatedDeviceObject(FileObject); - /* get the latest Tsdu buffer form TsduMgr list. - just set NULL if the list is empty. */ + RtlZeroMemory(AddressInfo, *(AddressSize)); - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } + // + // Allocating the Tdi Setting Irp ... + // - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } + Irp = KsBuildTdiIrp(DeviceObject); - if (list_empty(&(KsTsduMgr->TsduList))) { + if (NULL == Irp) { - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; + Status = STATUS_INSUFFICIENT_RESOURCES; } else { - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) { - KsTsdu = NULL; - } - } - - /* if there's no Tsdu or the free size is not enough for this - KS_TSDU_MDL structure. We need re-allocate a new Tsdu. */ + // + // Locking the User Buffer / Allocating a MDL for it + // - if (NULL == KsTsdu) { + Status = KsLockUserBuffer( + AddressInfo, + FALSE, + *(AddressSize), + IoModifyAccess, + &Mdl + ); - KsTsdu = KsAllocateKsTsdu(); + if (!NT_SUCCESS(Status)) { - if (NULL == KsTsdu) { - goto errorout; - } else { - bNewTsdu = TRUE; + IoFreeIrp(Irp); + Irp = NULL; } } - /* just queue the KS_TSDU_MDL to the Tsdu buffer */ - - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset); - - KsTsduMdl->TsduType = TSDU_TYPE_MDL; - KsTsduMdl->DataLength = ReceiveLength; - KsTsduMdl->StartOffset = StartingOffset; - KsTsduMdl->Mdl = Tsdu; - KsTsduMdl->Descriptor = TsduDescriptor; - - KsTsdu->LastOffset += sizeof(KS_TSDU_MDL); - KsTsduMgr->TotalBytes += ReceiveLength; + if (Irp) { - KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n", - KsTsduMgr->TotalBytes )); + LASSERT(NT_SUCCESS(Status)); - Status = STATUS_PENDING; + TdiBuildQueryInformation( + Irp, + DeviceObject, + FileObject, + NULL, + NULL, + TDI_QUERY_ADDRESS_INFO, + Mdl + ); - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { + Status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + AddressSize + ); - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; + KsReleaseMdl(Mdl, FALSE); } - spin_unlock(&(tconn->kstc_lock)); - - /* wake up the threads waiing in ks_recv_mdl */ - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); + if (!NT_SUCCESS(Status)) { - if (tconn->kstc_conn && tconn->kstc_sched_cb) { - tconn->kstc_sched_cb( tconn, FALSE, NULL, - KsTsduMgr->TotalBytes ); + cfs_enter_debugger(); + //TDI_BUFFER_OVERFLOW } - ks_put_tconn(tconn); + return (Status); +} - /* Return STATUS_PENDING to system because we are still - owning the MDL resources. ks_recv_mdl is expected - to free the MDL resources. */ +/* + * KsQueryProviderInfo + * Query the underlying transport device's information + * + * Arguments: + * TdiDeviceName: the transport device's name string + * ProviderInfo: TDI_PROVIDER_INFO struncture + * + * Return Value: + * NTSTATUS: Nt system status code + * + * NOTES: + * N/A + */ - return (Status); +NTSTATUS +KsQueryProviderInfo( + PWSTR TdiDeviceName, + PTDI_PROVIDER_INFO ProviderInfo + ) +{ + NTSTATUS Status = STATUS_SUCCESS; -errorout: + PIRP Irp = NULL; + PMDL Mdl = NULL; - spin_unlock(&(tconn->kstc_lock)); + UNICODE_STRING ControlName; - if (bNewTsdu && (KsTsdu != NULL)) { - KsFreeKsTsdu(KsTsdu); - } + HANDLE Handle; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; - /* abort the tdi connection */ - ks_abort_tconn(tconn); - ks_put_tconn(tconn); + ULONG ProviderSize = 0; + RtlInitUnicodeString(&ControlName, TdiDeviceName); - Status = STATUS_SUCCESS; + // + // Open the Tdi Control Channel + // - return (Status); -} + Status = KsOpenControl( + &ControlName, + &Handle, + &FileObject + ); + if (!NT_SUCCESS(Status)) { -/* - * Expedited & Bulk receive event handler - */ + KsPrint((1, "KsQueryProviderInfo: Fail to open the tdi control channel.\n")); + return (Status); + } -NTSTATUS -KsTcpChainedReceiveExpeditedEventHandler ( - IN PVOID TdiEventContext, // the event context - IN CONNECTION_CONTEXT ConnectionContext, - IN ULONG ReceiveFlags, - IN ULONG ReceiveLength, - IN ULONG StartingOffset, // offset of start of client data in TSDU - IN PMDL Tsdu, // TSDU data chain - IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives - ) -{ - return KsTcpChainedReceiveEventHandler( - TdiEventContext, - ConnectionContext, - ReceiveFlags | TDI_RECEIVE_EXPEDITED, - ReceiveLength, - StartingOffset, - Tsdu, - TsduDescriptor ); -} + // + // Obtain The Related Device Object + // + DeviceObject = IoGetRelatedDeviceObject(FileObject); -VOID -KsPrintProviderInfo( - PWSTR DeviceName, - PTDI_PROVIDER_INFO ProviderInfo - ) -{ - KsPrint((2, "%ws ProviderInfo:\n", DeviceName)); + ProviderSize = sizeof(TDI_PROVIDER_INFO); + RtlZeroMemory(ProviderInfo, ProviderSize); - KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version )); - KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize )); - KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData )); - KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize )); - KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags )); + // + // Allocating the Tdi Setting Irp ... + // - if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) { - KsPrint((2, " CONNECTION_MODE\n")); - } + Irp = KsBuildTdiIrp(DeviceObject); - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) { - KsPrint((2, " ORDERLY_RELEASE\n")); - } + if (NULL == Irp) { - if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) { - KsPrint((2, " CONNECTIONLESS_MODE\n")); - } + Status = STATUS_INSUFFICIENT_RESOURCES; - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) { - KsPrint((2, " ERROR_FREE_DELIVERY\n")); - } + } else { - if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) { - KsPrint((2, " SECURITY_LEVEL\n")); - } + // + // Locking the User Buffer / Allocating a MDL for it + // - if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) { - KsPrint((2, " BROADCAST_SUPPORTED\n")); - } + Status = KsLockUserBuffer( + ProviderInfo, + FALSE, + ProviderSize, + IoModifyAccess, + &Mdl + ); - if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) { - KsPrint((2, " MULTICAST_SUPPORTED\n")); - } + if (!NT_SUCCESS(Status)) { - if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) { - KsPrint((2, " DELAYED_ACCEPTANCE\n")); + IoFreeIrp(Irp); + Irp = NULL; + } } - if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) { - KsPrint((2, " EXPEDITED_DATA\n")); - } + if (Irp) { - if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) { - KsPrint((2, " INTERNAL_BUFFERING\n")); - } + LASSERT(NT_SUCCESS(Status)); - if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) { - KsPrint((2, " ROUTE_DIRECTED\n")); - } + TdiBuildQueryInformation( + Irp, + DeviceObject, + FileObject, + NULL, + NULL, + TDI_QUERY_PROVIDER_INFO, + Mdl + ); - if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) { - KsPrint((2, " NO_ZERO_LENGTH\n")); - } + Status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + &ProviderSize + ); - if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) { - KsPrint((2, " POINT_TO_POINT\n")); + KsReleaseMdl(Mdl, FALSE); } - if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) { - KsPrint((2, " MESSAGE_MODE\n")); - } + if (!NT_SUCCESS(Status)) { - if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) { - KsPrint((2, " HALF_DUPLEX\n")); + cfs_enter_debugger(); + //TDI_BUFFER_OVERFLOW } - KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData )); - KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData )); - KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources )); -} + KsCloseControl(Handle, FileObject); + return (Status); +} /* - * KsAllocateKsTsdu - * Reuse a Tsdu from the freelist or allocate a new Tsdu - * from the LookAsideList table or the NonPagedPool + * KsQueryConnectionInfo + * Query the connection info of the FileObject specified + * (some statics data of the traffic) * * Arguments: - * N/A + * FileObject: the FileObject to be queried + * ConnectionInfo: buffer to contain the connection info + * ConnectionSize: length of the ConnectionInfo buffer * * Return Value: - * PKS_Tsdu: the new Tsdu or NULL if it fails + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) * - * Notes: + * NOTES: * N/A */ -PKS_TSDU -KsAllocateKsTsdu() +NTSTATUS +KsQueryConnectionInfo( + PFILE_OBJECT ConnectionObject, + PTDI_CONNECTION_INFO ConnectionInfo, + PULONG ConnectionSize + ) { - PKS_TSDU KsTsdu = NULL; + NTSTATUS Status = STATUS_UNSUCCESSFUL; + PIRP Irp = NULL; + PMDL Mdl; + PDEVICE_OBJECT DeviceObject; - spin_lock(&(ks_data.ksnd_tsdu_lock)); + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); - if (!list_empty (&(ks_data.ksnd_freetsdus))) { + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - LASSERT(ks_data.ksnd_nfreetsdus > 0); + RtlZeroMemory(ConnectionInfo, *(ConnectionSize)); - KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link); - list_del(&(KsTsdu->Link)); - ks_data.ksnd_nfreetsdus--; + // + // Allocating the Tdi Query Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; } else { - KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc( - ks_data.ksnd_tsdu_slab, 0); - } + // + // Locking the User Buffer / Allocating a MDL for it + // - spin_unlock(&(ks_data.ksnd_tsdu_lock)); + Status = KsLockUserBuffer( + ConnectionInfo, + FALSE, + *(ConnectionSize), + IoModifyAccess, + &Mdl + ); - if (NULL != KsTsdu) { - KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size); + if (NT_SUCCESS(Status)) { + + IoFreeIrp(Irp); + Irp = NULL; + } } - return (KsTsdu); -} + if (Irp) { + LASSERT(NT_SUCCESS(Status)); -/* - * KsPutKsTsdu - * Move the Tsdu to the free tsdu list in ks_data. - * - * Arguments: - * KsTsdu: Tsdu to be moved. - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ + TdiBuildQueryInformation( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL, + TDI_QUERY_CONNECTION_INFO, + Mdl + ); -VOID -KsPutKsTsdu( - PKS_TSDU KsTsdu - ) -{ - spin_lock(&(ks_data.ksnd_tsdu_lock)); + Status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + ConnectionSize + ); - list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus)); - ks_data.ksnd_nfreetsdus++; + KsReleaseMdl(Mdl, FALSE); + } - spin_unlock(&(ks_data.ksnd_tsdu_lock)); + return (Status); } /* - * KsFreeKsTsdu - * Release a Tsdu: uninitialize then free it. + * KsInitializeTdiAddress + * Initialize the tdi addresss * * Arguments: - * KsTsdu: Tsdu to be freed. + * pTransportAddress: tdi address to be initialized + * IpAddress: the ip address of object + * IpPort: the ip port of the object * * Return Value: - * N/A + * ULONG: the total size of the tdi address * - * Notes: + * NOTES: * N/A */ -VOID -KsFreeKsTsdu( - PKS_TSDU KsTsdu +ULONG +KsInitializeTdiAddress( + IN OUT PTA_IP_ADDRESS pTransportAddress, + IN ULONG IpAddress, + IN USHORT IpPort ) { - cfs_mem_cache_free( - ks_data.ksnd_tsdu_slab, - KsTsdu ); -} + pTransportAddress->TAAddressCount = 1; + pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP; + pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP; + pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort; + pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress; + return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP); +} /* - * KsInitializeKsTsdu - * Initialize the Tsdu buffer header + * KsQueryTdiAddressLength + * Query the total size of the tdi address * * Arguments: - * KsTsdu: the Tsdu to be initialized - * Length: the total length of the Tsdu + * pTransportAddress: tdi address to be queried * * Return Value: - * VOID + * ULONG: the total size of the tdi address * * NOTES: * N/A */ -VOID -KsInitializeKsTsdu( - PKS_TSDU KsTsdu, - ULONG Length +ULONG +KsQueryTdiAddressLength( + PTRANSPORT_ADDRESS pTransportAddress ) { - RtlZeroMemory(KsTsdu, Length); - KsTsdu->Magic = KS_TSDU_MAGIC; - KsTsdu->TotalLength = Length; - KsTsdu->StartOffset = KsTsdu->LastOffset = - KS_DWORD_ALIGN(sizeof(KS_TSDU)); + ULONG TotalLength = 0; + LONG i; + + PTA_ADDRESS pTaAddress = NULL; + + ASSERT (NULL != pTransportAddress); + + TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) + + FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount; + + pTaAddress = (PTA_ADDRESS)pTransportAddress->Address; + + for (i = 0; i < pTransportAddress->TAAddressCount; i++) + { + TotalLength += pTaAddress->AddressLength; + pTaAddress = (PTA_ADDRESS)((PCHAR)pTaAddress + + FIELD_OFFSET(TA_ADDRESS,Address) + + pTaAddress->AddressLength ); + } + + return (TotalLength); } /* - * KsInitializeKsTsduMgr - * Initialize the management structure of - * Tsdu buffers + * KsQueryIpAddress + * Query the ip address of the tdi object * * Arguments: - * TsduMgr: the TsduMgr to be initialized + * FileObject: tdi object to be queried + * TdiAddress: TdiAddress buffer, to store the queried + * tdi ip address + * AddressLength: buffer length of the TdiAddress * * Return Value: - * VOID + * ULONG: the total size of the tdi ip address * * NOTES: * N/A */ -VOID -KsInitializeKsTsduMgr( - PKS_TSDUMGR TsduMgr +NTSTATUS +KsQueryIpAddress( + PFILE_OBJECT FileObject, + PVOID TdiAddress, + ULONG* AddressLength ) { - KeInitializeEvent( - &(TsduMgr->Event), - NotificationEvent, - FALSE - ); + NTSTATUS Status; + + PTDI_ADDRESS_INFO TdiAddressInfo; + ULONG Length; + + + // + // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS + // + + Length = MAX_ADDRESS_LENGTH; + + TdiAddressInfo = (PTDI_ADDRESS_INFO) + ExAllocatePoolWithTag( + NonPagedPool, + Length, + 'KSAI' ); + + if (NULL == TdiAddressInfo) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + + Status = KsQueryAddressInfo( + FileObject, + TdiAddressInfo, + &Length + ); + +errorout: + + if (NT_SUCCESS(Status)) { + + if (*AddressLength < Length) { + Status = STATUS_BUFFER_TOO_SMALL; + } else { + *AddressLength = Length; + RtlCopyMemory( + TdiAddress, + &(TdiAddressInfo->Address), + Length + ); + Status = STATUS_SUCCESS; + } + } - CFS_INIT_LIST_HEAD( - &(TsduMgr->TsduList) - ); + if (NULL != TdiAddressInfo) { + ExFreePool(TdiAddressInfo); + } - TsduMgr->NumOfTsdu = 0; - TsduMgr->TotalBytes = 0; + return Status; } /* - * KsInitializeKsChain - * Initialize the China structure for receiving - * or transmitting + * KsErrorEventHandler + * the common error event handler callback * * Arguments: - * KsChain: the KsChain to be initialized + * TdiEventContext: should be the socket + * Status: the error code * * Return Value: - * VOID + * Status: STATS_SUCCESS * * NOTES: - * N/A + * We need not do anything in such a severe + * error case. System will process it for us. */ -VOID -KsInitializeKsChain( - PKS_CHAIN KsChain - ) +NTSTATUS +KsErrorEventHandler( + IN PVOID TdiEventContext, + IN NTSTATUS Status + ) { - KsInitializeKsTsduMgr(&(KsChain->Normal)); - KsInitializeKsTsduMgr(&(KsChain->Expedited)); -} + KsPrint((1, "KsErrorEventHandler called at Irql = %xh ...\n", + KeGetCurrentIrql())); + + cfs_enter_debugger(); + return (STATUS_SUCCESS); +} /* - * KsCleanupTsduMgr - * Clean up all the Tsdus in the TsduMgr list + * KsAcceptCompletionRoutine + * Irp completion routine for TdiBuildAccept (KsConnectEventHandler) + * + * Here system gives us a chance to check the conneciton is built + * ready or not. * * Arguments: - * KsTsduMgr: the Tsdu list manager + * DeviceObject: the device object of the transport driver + * Irp: the Irp is being completed. + * Context: the context we specified when issuing the Irp * * Return Value: - * NTSTATUS: nt status code + * Nt status code * - * NOTES: + * Notes: * N/A */ NTSTATUS -KsCleanupTsduMgr( - PKS_TSDUMGR KsTsduMgr +KsAcceptCompletionRoutine( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context ) { - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_MDL KsTsduMdl; + ks_tconn_t * child = (ks_tconn_t *) Context; + ks_tconn_t * parent = child->child.kstc_parent; - LASSERT(NULL != KsTsduMgr); + KsPrint((2, "KsAcceptCompletionRoutine at Irql: %xh child: %p status: %p\n", + KeGetCurrentIrql(), child, Irp->IoStatus.Status)); - KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); + LASSERT(child->kstc_type == kstt_child); - while (!list_empty(&KsTsduMgr->TsduList)) { + spin_lock(&(child->kstc_lock)); - KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + LASSERT(parent->kstc_state == ksts_listening); + LASSERT(child->kstc_state == ksts_connecting); - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { + if (NT_SUCCESS(Irp->IoStatus.Status)) { - // - // KsTsdu is empty now, we need free it ... - // + child->child.kstc_accepted = TRUE; - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; + child->kstc_state = ksts_connected; - KsFreeKsTsdu(KsTsdu); + /* wake up the daemon thread which waits on this event */ + KeSetEvent( + &(parent->listener.kstc_accept_event), + 0, + FALSE + ); - } else { + spin_unlock(&(child->kstc_lock)); - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsPrint((2, "KsAcceptCompletionRoutine: singal parent: %p (child: %p)\n", + parent, child)); - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { + } else { - KsTsdu->StartOffset += KsTsduDat->TotalLength; + /* re-use this child connecton */ + child->child.kstc_accepted = FALSE; + child->child.kstc_busy = FALSE; + child->kstc_state = ksts_associated; - } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { + spin_unlock(&(child->kstc_lock)); + } - ASSERT(KsTsduBuf->UserBuffer != NULL); + /* now free the Irp */ + IoFreeIrp(Irp); - if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) { - ExFreePool(KsTsduBuf->UserBuffer); - } else { - cfs_enter_debugger(); - } + /* drop the refer count of the child */ + ks_put_tconn(child); - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); + return (STATUS_MORE_PROCESSING_REQUIRED); +} - } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { +ks_addr_slot_t * +KsSearchIpAddress(PUNICODE_STRING DeviceName) +{ + ks_addr_slot_t * slot = NULL; + PLIST_ENTRY list = NULL; - // - // MDL Tsdu Unit ... - // + spin_lock(&ks_data.ksnd_addrs_lock); - TdiReturnChainedReceives( - &(KsTsduMdl->Descriptor), - 1 ); + list = ks_data.ksnd_addrs_list.Flink; + while (list != &ks_data.ksnd_addrs_list) { + slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); + if (RtlCompareUnicodeString( + DeviceName, + &slot->devname, + TRUE) == 0) { + break; + } + list = list->Flink; + slot = NULL; + } - KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); + spin_unlock(&ks_data.ksnd_addrs_lock); + + return slot; +} + +void +KsCleanupIpAddresses() +{ + spin_lock(&ks_data.ksnd_addrs_lock); + + while (!IsListEmpty(&ks_data.ksnd_addrs_list)) { + + ks_addr_slot_t * slot = NULL; + PLIST_ENTRY list = NULL; + + list = RemoveHeadList(&ks_data.ksnd_addrs_list); + slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); + cfs_free(slot); + ks_data.ksnd_naddrs--; + } + + cfs_assert(ks_data.ksnd_naddrs == 0); + spin_unlock(&ks_data.ksnd_addrs_lock); +} + +VOID +KsAddAddressHandler( + IN PTA_ADDRESS Address, + IN PUNICODE_STRING DeviceName, + IN PTDI_PNP_CONTEXT Context + ) +{ + PTDI_ADDRESS_IP IpAddress = NULL; + + if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && + Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { + + ks_addr_slot_t * slot = NULL; + + IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; + KsPrint((2, "KsAddAddressHandle: Device=%wZ Context=%xh " + "IpAddress=%xh(%d.%d.%d.%d)\n", + DeviceName, Context, IpAddress->in_addr, + (IpAddress->in_addr & 0x000000FF) >> 0, + (IpAddress->in_addr & 0x0000FF00) >> 8, + (IpAddress->in_addr & 0x00FF0000) >> 16, + (IpAddress->in_addr & 0xFF000000) >> 24 + )); + + slot = KsSearchIpAddress(DeviceName); + + if (slot != NULL) { + slot->up = TRUE; + slot->ip_addr = ntohl(IpAddress->in_addr); + } else { + + /* Matt: only add 192.168.10/5/92.xxx for temporary test */ + if ((IpAddress->in_addr & 0x00FFFFFF) != 0x000aa8c0 && + (IpAddress->in_addr & 0x00FFFFFF) != 0x0092a8c0 && + (IpAddress->in_addr & 0x00FFFFFF) != 0x0005a8c0 ) { + return; + } + + slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO); + if (slot != NULL) { + spin_lock(&ks_data.ksnd_addrs_lock); + InsertTailList(&ks_data.ksnd_addrs_list, &slot->link); + sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++); + slot->ip_addr = ntohl(IpAddress->in_addr); + slot->netmask = 0x00FFFFFF; /* Matt: hardcode*/ + slot->up = TRUE; + RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length); + slot->devname.Length = DeviceName->Length; + slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR); + slot->devname.Buffer = slot->buffer; + spin_unlock(&ks_data.ksnd_addrs_lock); + + KsPrint((0, "KsAddAddressHandle: %s added: ip=%xh(%d.%d.%d.%d)\n", + slot->iface, IpAddress->in_addr, + (IpAddress->in_addr & 0x000000FF) >> 0, + (IpAddress->in_addr & 0x0000FF00) >> 8, + (IpAddress->in_addr & 0x00FF0000) >> 16, + (IpAddress->in_addr & 0xFF000000) >> 24 + )); } } } - - return STATUS_SUCCESS; } +VOID +KsDelAddressHandler( + IN PTA_ADDRESS Address, + IN PUNICODE_STRING DeviceName, + IN PTDI_PNP_CONTEXT Context + ) +{ + PTDI_ADDRESS_IP IpAddress = NULL; -/* - * KsCleanupKsChain - * Clean up the TsduMgrs of the KsChain - * - * Arguments: - * KsChain: the chain managing TsduMgr - * - * Return Value: - * NTSTATUS: nt status code - * - * NOTES: - * N/A - */ + if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && + Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { + + ks_addr_slot_t * slot = NULL; + + slot = KsSearchIpAddress(DeviceName); + + if (slot != NULL) { + slot->up = FALSE; + } + + IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; + KsPrint((2, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", + DeviceName, Context, IpAddress->in_addr, + (IpAddress->in_addr & 0xFF000000) >> 24, + (IpAddress->in_addr & 0x00FF0000) >> 16, + (IpAddress->in_addr & 0x0000FF00) >> 8, + (IpAddress->in_addr & 0x000000FF) >> 0 )); + } +} NTSTATUS -KsCleanupKsChain( - PKS_CHAIN KsChain - ) +KsRegisterPnpHandlers() { - NTSTATUS Status; + TDI20_CLIENT_INTERFACE_INFO ClientInfo; + + /* initialize the global ks_data members */ + RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME); + spin_lock_init(&ks_data.ksnd_addrs_lock); + InitializeListHead(&ks_data.ksnd_addrs_list); - LASSERT(NULL != KsChain); + /* register the pnp handlers */ + RtlZeroMemory(&ClientInfo, sizeof(ClientInfo)); + ClientInfo.TdiVersion = TDI_CURRENT_VERSION; - Status = KsCleanupTsduMgr( - &(KsChain->Normal) - ); + ClientInfo.ClientName = &ks_data.ksnd_client_name; + ClientInfo.AddAddressHandlerV2 = KsAddAddressHandler; + ClientInfo.DelAddressHandlerV2 = KsDelAddressHandler; - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } + return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo), + &ks_data.ksnd_pnp_handle); +} - Status = KsCleanupTsduMgr( - &(KsChain->Expedited) - ); +VOID +KsDeregisterPnpHandlers() +{ + if (ks_data.ksnd_pnp_handle) { - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } + /* De-register the pnp handlers */ -errorout: + TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle); + ks_data.ksnd_pnp_handle = NULL; - return Status; + /* cleanup all the ip address slots */ + KsCleanupIpAddresses(); + } } /* - * KsCleanupTsdu - * Clean up all the Tsdus of a tdi connected object + * KsGetVacancyBacklog + * Get a vacancy listeing child from the backlog list * * Arguments: - * tconn: the tdi connection which is connected already. + * parent: the listener daemon connection * * Return Value: - * Nt status code + * the child listening connection or NULL in failure * - * NOTES: - * N/A + * Notes + * Parent's lock should be acquired before calling. */ -NTSTATUS -KsCleanupTsdu( - ksock_tconn_t * tconn +ks_tconn_t * +KsGetVacancyBacklog( + ks_tconn_t * parent ) { - NTSTATUS Status = STATUS_SUCCESS; - - - if (tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child ) { - - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - - Status = KsCleanupKsChain( - &(tconn->sender.kstc_recv) - ); + ks_tconn_t * child; - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } + LASSERT(parent->kstc_type == kstt_listener); + LASSERT(parent->kstc_state == ksts_listening); - Status = KsCleanupKsChain( - &(tconn->sender.kstc_send) - ); + if (list_empty(&(parent->listener.kstc_listening.list))) { - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } + child = NULL; } else { - Status = KsCleanupKsChain( - &(tconn->child.kstc_recv) - ); + struct list_head * tmp; - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } + /* check the listening queue and try to get a free connecton */ - Status = KsCleanupKsChain( - &(tconn->child.kstc_send) - ); + list_for_each(tmp, &(parent->listener.kstc_listening.list)) { + child = list_entry (tmp, ks_tconn_t, child.kstc_link); + spin_lock(&(child->kstc_lock)); - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; + if (!child->child.kstc_busy) { + LASSERT(child->kstc_state == ksts_associated); + child->child.kstc_busy = TRUE; + spin_unlock(&(child->kstc_lock)); + break; + } else { + spin_unlock(&(child->kstc_lock)); + child = NULL; + } } - } -errorout: - - return (Status); + return child; } - /* - * KsCopyMdlChainToMdlChain - * Copy data from a [chained] Mdl to anther [chained] Mdl. - * Tdi library does not provide this function. We have to - * realize it ourselives. + * KsConnectEventHandler + * Connect event handler event handler, called by the underlying TDI + * transport in response to an incoming request to the listening daemon. + * + * it will grab a vacancy backlog from the children tconn list, and + * build an acception Irp with it, then transfer the Irp to TDI driver. * * Arguments: - * SourceMdlChain: the source mdl - * SourceOffset: start offset of the source - * DestinationMdlChain: the dst mdl - * DestinationOffset: the offset where data are to be copied. - * BytesTobecopied: the expteced bytes to be copied - * BytesCopied: to store the really copied data length + * TdiEventContext: the tdi connnection object of the listening daemon + * ...... * * Return Value: - * NTSTATUS: STATUS_SUCCESS or other error code + * Nt kernel status code * - * NOTES: - * The length of source mdl must be >= SourceOffset + BytesTobecopied + * Notes: + * N/A */ NTSTATUS -KsCopyMdlChainToMdlChain( - IN PMDL SourceMdlChain, - IN ULONG SourceOffset, - IN PMDL DestinationMdlChain, - IN ULONG DestinationOffset, - IN ULONG BytesTobecopied, - OUT PULONG BytesCopied +KsConnectEventHandler( + IN PVOID TdiEventContext, + IN LONG RemoteAddressLength, + IN PVOID RemoteAddress, + IN LONG UserDataLength, + IN PVOID UserData, + IN LONG OptionsLength, + IN PVOID Options, + OUT CONNECTION_CONTEXT * ConnectionContext, + OUT PIRP * AcceptIrp ) { - PMDL SrcMdl = SourceMdlChain; - PMDL DstMdl = DestinationMdlChain; + ks_tconn_t * parent; + ks_tconn_t * child; - PUCHAR SrcBuf = NULL; - PUCHAR DstBuf = NULL; - - ULONG dwBytes = 0; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; + NTSTATUS Status; - NTSTATUS Status = STATUS_SUCCESS; + PIRP Irp = NULL; + PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; + KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql())); + parent = (ks_tconn_t *) TdiEventContext; - while (dwBytes < BytesTobecopied) { + LASSERT(parent->kstc_type == kstt_listener); - ULONG Length = 0; + spin_lock(&(parent->kstc_lock)); - while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) { + if (parent->kstc_state == ksts_listening) { - SourceOffset -= MmGetMdlByteCount(SrcMdl); + /* allocate a new ConnectionInfo to backup the peer's info */ - SrcMdl = SrcMdl->Next; + ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( + NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + + RemoteAddressLength, 'iCsK' ); - if (NULL == SrcMdl) { + if (NULL == ConnectionInfo) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } + Status = STATUS_INSUFFICIENT_RESOURCES; + cfs_enter_debugger(); + goto errorout; } - while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) { + /* initializing ConnectionInfo structure ... */ + + ConnectionInfo->UserDataLength = UserDataLength; + ConnectionInfo->UserData = UserData; + ConnectionInfo->OptionsLength = OptionsLength; + ConnectionInfo->Options = Options; + ConnectionInfo->RemoteAddressLength = RemoteAddressLength; + ConnectionInfo->RemoteAddress = ConnectionInfo + 1; + + RtlCopyMemory( + ConnectionInfo->RemoteAddress, + RemoteAddress, + RemoteAddressLength + ); - DestinationOffset -= MmGetMdlByteCount(DstMdl); + /* get the vacancy listening child tdi connections */ - DstMdl = DstMdl->Next; + child = KsGetVacancyBacklog(parent); - if (NULL == DstMdl) { + if (child) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } - } + spin_lock(&(child->kstc_lock)); + child->child.kstc_info.ConnectionInfo = ConnectionInfo; + child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress; + child->kstc_state = ksts_connecting; + spin_unlock(&(child->kstc_lock)); - DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl); + } else { - if ((NULL == DstBuf)) { + KsPrint((1, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent)); Status = STATUS_INSUFFICIENT_RESOURCES; goto errorout; } - // - // Here we need skip the OVERFLOW case via RtlCopyMemory :-( - // + FileObject = child->child.kstc_info.FileObject; + DeviceObject = IoGetRelatedDeviceObject (FileObject); + + Irp = KsBuildTdiIrp(DeviceObject); + + TdiBuildAccept( + Irp, + DeviceObject, + FileObject, + KsAcceptCompletionRoutine, + child, + NULL, + NULL + ); - if ( KsQueryMdlsSize(SrcMdl) - SourceOffset > - MmGetMdlByteCount(DstMdl) - DestinationOffset ) { + IoSetNextIrpStackLocation(Irp); - Length = BytesTobecopied - dwBytes; + /* grap the refer of the child tdi connection */ + ks_get_tconn(child); - if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) { - Length = KsQueryMdlsSize(SrcMdl) - SourceOffset; - } + Status = STATUS_MORE_PROCESSING_REQUIRED; + *AcceptIrp = Irp; + *ConnectionContext = child; - if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) { - Length = MmGetMdlByteCount(DstMdl) - DestinationOffset; - } + } else { - SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl); + Status = STATUS_CONNECTION_REFUSED; + goto errorout; + } - if ((NULL == DstBuf)) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } + spin_unlock(&(parent->kstc_lock)); - RtlCopyMemory( - DstBuf + DestinationOffset, - SrcBuf + SourceOffset, - Length - ); + return Status; - } else { +errorout: - Status = TdiCopyMdlToBuffer( - SrcMdl, - SourceOffset, - DstBuf, - DestinationOffset, - MmGetMdlByteCount(DstMdl), - &Length - ); + spin_unlock(&(parent->kstc_lock)); - if (STATUS_BUFFER_OVERFLOW == Status) { - cfs_enter_debugger(); - } else if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - goto errorout; - } - } + *AcceptIrp = NULL; + *ConnectionContext = NULL; + + if (ConnectionInfo) { + ExFreePool(ConnectionInfo); + } - SourceOffset += Length; - DestinationOffset += Length; - dwBytes += Length; + if (Irp) { + IoFreeIrp (Irp); } -errorout: + return Status; +} + +/* + * KsDisconnectCompletionRoutine + * the Irp completion routine for TdiBuildDisconect + * + * We just signal the event and return MORE_PRO... to + * let the caller take the responsibility of the Irp. + * + * Arguments: + * DeviceObject: the device object of the transport + * Irp: the Irp is being completed. + * Context: the event specified by the caller + * + * Return Value: + * Nt status code + * + * Notes: + * N/A + */ + +NTSTATUS +KsDisconectCompletionRoutine ( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + + KeSetEvent((PKEVENT) Context, 0, FALSE); - if (NT_SUCCESS(Status)) { - *BytesCopied = dwBytes; - } else { - *BytesCopied = 0; - } + return STATUS_MORE_PROCESSING_REQUIRED; - return Status; + UNREFERENCED_PARAMETER(DeviceObject); } - /* - * KsQueryMdlSize - * Query the whole size of a MDL (may be chained) + * KsDisconnectHelper + * the routine to be executed in the WorkItem procedure + * this routine is to disconnect a tdi connection * * Arguments: - * Mdl: the Mdl to be queried + * Workitem: the context transferred to the workitem * * Return Value: - * ULONG: the total size of the mdl - * - * NOTES: * N/A + * + * Notes: + * tconn is already referred in abort_connecton ... */ -ULONG -KsQueryMdlsSize (PMDL Mdl) +VOID +KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem) { - PMDL Next = Mdl; - ULONG Length = 0; + ks_tconn_t * tconn = WorkItem->tconn; + KsPrint((1, "KsDisconnectHelper: disconnecting tconn=%p\n", tconn)); + ks_disconnect_tconn(tconn, WorkItem->Flags); - // - // Walking the MDL Chain ... - // - - while (Next) { - Length += MmGetMdlByteCount(Next); - Next = Next->Next; - } + KeSetEvent(&(WorkItem->Event), 0, FALSE); - return (Length); + spin_lock(&(tconn->kstc_lock)); + cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); } /* - * KsLockUserBuffer - * Allocate MDL for the buffer and lock the pages into - * nonpaged pool + * KsDisconnectEventHandler + * Disconnect event handler event handler, called by the underlying TDI transport + * in response to an incoming disconnection notification from a remote node. * * Arguments: - * UserBuffer: the user buffer to be locked - * Length: length in bytes of the buffer - * Operation: read or write access - * pMdl: the result of the created mdl + * ConnectionContext: tdi connnection object + * DisconnectFlags: specifies the nature of the disconnection + * ...... * * Return Value: - * NTSTATUS: kernel status code (STATUS_SUCCESS - * or other error code) + * Nt kernel status code * - * NOTES: + * Notes: * N/A */ + NTSTATUS -KsLockUserBuffer ( - IN PVOID UserBuffer, - IN BOOLEAN bPaged, - IN ULONG Length, - IN LOCK_OPERATION Operation, - OUT PMDL * pMdl +KsDisconnectEventHandler( + IN PVOID TdiEventContext, + IN CONNECTION_CONTEXT ConnectionContext, + IN LONG DisconnectDataLength, + IN PVOID DisconnectData, + IN LONG DisconnectInformationLength, + IN PVOID DisconnectInformation, + IN ULONG DisconnectFlags ) { - NTSTATUS Status; - PMDL Mdl = NULL; + ks_tconn_t * tconn; + NTSTATUS Status; + PKS_DISCONNECT_WORKITEM WorkItem; - LASSERT(UserBuffer != NULL); + tconn = (ks_tconn_t *)ConnectionContext; - *pMdl = NULL; + KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n", + KeGetCurrentIrql() )); - Mdl = IoAllocateMdl( - UserBuffer, - Length, - FALSE, - FALSE, - NULL - ); + KsPrint((2, "tconn = %x DisconnectFlags= %xh\n", + tconn, DisconnectFlags)); - if (Mdl == NULL) { + ks_get_tconn(tconn); + spin_lock(&(tconn->kstc_lock)); - Status = STATUS_INSUFFICIENT_RESOURCES; + WorkItem = &(tconn->kstc_disconnect); + + if (tconn->kstc_state != ksts_connected) { + + Status = STATUS_SUCCESS; } else { - __try { + if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) { - if (bPaged) { - MmProbeAndLockPages( - Mdl, - KernelMode, - Operation - ); - } else { - MmBuildMdlForNonPagedPool( - Mdl - ); - } + Status = STATUS_REMOTE_DISCONNECT; - Status = STATUS_SUCCESS; + } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) { - *pMdl = Mdl; + Status = STATUS_GRACEFUL_DISCONNECT; + } - } __except (EXCEPTION_EXECUTE_HANDLER) { + if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - IoFreeMdl(Mdl); + ks_get_tconn(tconn); - Mdl = NULL; + WorkItem->Flags = DisconnectFlags; + WorkItem->tconn = tconn; - cfs_enter_debugger(); + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - Status = STATUS_INVALID_USER_BUFFER; + /* queue the workitem to call */ + ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue); } } - return Status; -} + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); -/* - * KsMapMdlBuffer - * Map the mdl into a buffer in kernel space - * - * Arguments: - * Mdl: the mdl to be mapped - * - * Return Value: - * PVOID: the buffer mapped or NULL in failure - * - * NOTES: - * N/A - */ + return (Status); +} -PVOID -KsMapMdlBuffer (PMDL Mdl) +NTSTATUS +KsTcpReceiveCompletionRoutine( + IN PIRP Irp, + IN PKS_TCP_COMPLETION_CONTEXT Context + ) { - LASSERT(Mdl != NULL); + ks_tconn_t *tconn = Context->tconn; + NTSTATUS status = Irp->IoStatus.Status; + ULONG length = (ULONG)Irp->IoStatus.Information; - return MmGetSystemAddressForMdlSafe( - Mdl, - NormalPagePriority - ); -} + LASSERT(Context != NULL); + if (NT_SUCCESS(status)) { -/* - * KsReleaseMdl - * Unlock all the pages in the mdl - * - * Arguments: - * Mdl: memory description list to be released - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ + PKS_TSDUMGR TsduMgr = Context->TsduMgr; + PCHAR Buffer = Context->Buffer; -VOID -KsReleaseMdl (IN PMDL Mdl, - IN int Paged ) -{ - LASSERT(Mdl != NULL); + KsPrint((4, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n", + TsduMgr->TotalBytes )); - while (Mdl) { + ks_lock_tsdumgr(TsduMgr); + KsWriteTsduBuf(TsduMgr, Context->Buffer, length, 0); + /* signal TsduMgr event */ + KeSetEvent(&(Context->TsduMgr->Event), 0, FALSE); + ks_unlock_tsdumgr(TsduMgr); - PMDL Next; + /* re-active the ks connection and wake up the scheduler */ + if (KS_CAN_SCHED(TsduMgr)) { + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb(tconn, FALSE); + } + } - Next = Mdl->Next; + ks_put_tconn(tconn); - if (Paged) { - MmUnlockPages(Mdl); - } + } else { - IoFreeMdl(Mdl); + /* un-expected errors occur, we must abort the connection */ + ks_put_tconn(tconn); + ks_abort_tconn(tconn); + } - Mdl = Next; + + if (Context) { + + /* free the Context structure... */ + ASSERT(Context->Magic == KS_TCP_CONTEXT_MAGIC); + Context->Magic = 'CDAB'; + cfs_free(Context); + } + + /* free the Irp */ + if (Irp) { + + /* release mdl chain */ + if (Irp->MdlAddress) { + KsReleaseMdl(Irp->MdlAddress, FALSE); + } + + /* free irp packet */ + IoFreeIrp(Irp); } + + return (status); } /* - * ks_lock_buffer - * allocate MDL for the user spepcified buffer and lock (paging-in) - * all the pages of the buffer into system memory + * KsTcpCompletionRoutine + * the Irp completion routine for TdiBuildSend and TdiBuildReceive ... + * We need call the use's own CompletionRoutine if specified. Or + * it's a synchronous case, we need signal the event. * * Arguments: - * buffer: the user buffer to be locked - * length: length in bytes of the buffer - * access: read or write access - * mdl: the result of the created mdl + * DeviceObject: the device object of the transport + * Irp: the Irp is being completed. + * Context: the context we specified when issuing the Irp * * Return Value: - * int: the ks error code: 0: success / -x: failture + * Nt status code * * Notes: * N/A */ -int -ks_lock_buffer ( - void * buffer, - int paged, - int length, - LOCK_OPERATION access, - ksock_mdl_t ** kmdl +NTSTATUS +KsTcpCompletionRoutine( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context ) { - NTSTATUS status; + if (Context) { - status = KsLockUserBuffer( - buffer, - paged !=0, - length, - access, - kmdl - ); + PKS_TCP_COMPLETION_CONTEXT context = NULL; + ks_tconn_t * tconn = NULL; - return cfs_error_code(status); -} + context = (PKS_TCP_COMPLETION_CONTEXT) Context; + ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC); + tconn = context->tconn; + if (context->CompletionRoutine) { -/* - * ks_map_mdl - * Map the mdl pages into kernel space - * - * Arguments: - * mdl: the mdl to be mapped - * - * Return Value: - * void *: the buffer mapped or NULL in failure - * - * Notes: - * N/A - */ + // + // Giving control to user specified CompletionRoutine ... + // + + context->CompletionRoutine(Irp, context); + + } else { + + // + // Signaling the Event ... + // + LASSERT(NULL != context->Event); + KeSetEvent(context->Event, 0, FALSE); + + /* drop the reference count of the tconn object */ + ks_put_tconn(tconn); + } -void * -ks_map_mdl (ksock_mdl_t * mdl) -{ - LASSERT(mdl != NULL); + } else { + + /* cfs_enter_debugger(); */ + } - return KsMapMdlBuffer(mdl); + return STATUS_MORE_PROCESSING_REQUIRED; } /* - * ks_release_mdl - * Unlock all the pages in the mdl and release the mdl + * KsTcpSendCompletionRoutine + * the user specified Irp completion routine for asynchronous + * data transmission requests. + * + * It will do th cleanup job of the ks_tx_t and wake up the + * ks scheduler thread * * Arguments: - * mdl: memory description list to be released + * Irp: the Irp is being completed. + * Context: the context we specified when issuing the Irp * * Return Value: - * N/A + * Nt status code * * Notes: * N/A */ -void -ks_release_mdl (ksock_mdl_t *mdl, int paged) +NTSTATUS +KsTcpSendCompletionRoutine( + IN PIRP Irp, + IN PKS_TCP_COMPLETION_CONTEXT context + ) { - LASSERT(mdl != NULL); + NTSTATUS status = Irp->IoStatus.Status; + ULONG rc = (ULONG)(ULONG_PTR)Irp->IoStatus.Information; + ks_tconn_t * tconn = context->tconn; - KsReleaseMdl(mdl, paged); -} + PKS_TSDUMGR TsduMgr = context->TsduMgr; + PKEVENT Event = context->Event; + LASSERT(tconn != NULL && tconn->kstc_magic == KS_TCONN_MAGIC); + LASSERT(context && context->Magic == KS_TCP_CONTEXT_MAGIC); -/* - * ks_create_tconn - * allocate a new tconn structure from the SLAB cache or - * NonPaged sysetm pool - * - * Arguments: - * N/A - * - * Return Value: - * ksock_tconn_t *: the address of tconn or NULL if it fails - * - * NOTES: - * N/A - */ + KsPrint((4, "KsTcpSendCompltionRoutine: tconn = %p TsduMgr = %p " + "status = %xh bytes = %xh/%x\n", tconn, TsduMgr, status, + Irp->IoStatus.Information, TsduMgr->TotalBytes)); -ksock_tconn_t * -ks_create_tconn() -{ - ksock_tconn_t * tconn = NULL; + ks_lock_tsdumgr(TsduMgr); - /* allocate ksoc_tconn_t from the slab cache memory */ + if (NT_SUCCESS(status)) { - tconn = (ksock_tconn_t *)cfs_mem_cache_alloc( - ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO); + /* cleanup processed TsduMgr queue */ + KsReleaseTsdus(tconn, TsduMgr, rc); - if (tconn) { + /* queue to delivery engine if there's still remained data */ + TsduMgr->Busy = FALSE; + if (TsduMgr->TotalBytes > 0) { + KsQueueTdiEngine(tconn, TsduMgr); + } + /* signal TsduMgr event */ + KeSetEvent(&(TsduMgr->Event), 0, FALSE); + ks_unlock_tsdumgr(TsduMgr); - /* zero tconn elements */ - memset(tconn, 0, sizeof(ksock_tconn_t)); + /* + * now it's time to re-queue the conns into the + * scheduler queue and wake the scheduler thread. + */ - /* initialize the tconn ... */ - tconn->kstc_magic = KS_TCONN_MAGIC; + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb(tconn, TRUE); + } - ExInitializeWorkItem( - &(tconn->kstc_disconnect.WorkItem), - KsDisconnectHelper, - &(tconn->kstc_disconnect) - ); + } else { - KeInitializeEvent( - &(tconn->kstc_disconnect.Event), - SynchronizationEvent, - FALSE ); + ks_unlock_tsdumgr(TsduMgr); - ExInitializeWorkItem( - &(tconn->kstc_destroy), - ks_destroy_tconn, - tconn - ); + KsPrint((1, "KsTcpSendCompltionRoutine: failed tconn: %p " + "TsduMgr: %p status: %xh\n", tconn, TsduMgr, status)); - spin_lock_init(&(tconn->kstc_lock)); + /* cfs_enter_debugger(); */ - ks_get_tconn(tconn); + /* + * for the case that the transmission is unsuccessful, + * we need abort the tdi connection, but not destroy it. + * the socknal conn will drop the refer count, then the + * tdi connection will be freed. + */ - spin_lock(&(ks_data.ksnd_tconn_lock)); + ks_abort_tconn(tconn); + } - /* attach it into global list in ks_data */ + /* drop tconn reference */ + ks_put_tconn(tconn); - list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns)); - ks_data.ksnd_ntconns++; - spin_unlock(&(ks_data.ksnd_tconn_lock)); + /* freeing the context structure */ + if (context) { + ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC); + context->Magic = 'CDAB'; + cfs_free(context); + } - tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000; + /* free the Irp structure */ + if (Irp) { + /* mdl chain was released by KsReleaseTsdus*/ + Irp->MdlAddress = NULL; + IoFreeIrp(Irp); + Irp = NULL; } - return (tconn); + return status; } - /* - * ks_free_tconn - * free the tconn structure to the SLAB cache or NonPaged - * sysetm pool - * - * Arguments: - * tconn: the tcon is to be freed - * - * Return Value: - * N/A + * Normal receive event handler * - * Notes: - * N/A + * It will move data from system Tsdu to our TsduList */ -void -ks_free_tconn(ksock_tconn_t * tconn) +NTSTATUS +KsTcpReceiveEventHandler( + IN PVOID TdiEventContext, + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG BytesIndicated, + IN ULONG BytesAvailable, + OUT ULONG * BytesTaken, + IN PVOID Tsdu, + OUT PIRP * IoRequestPacket + ) { - LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0); + NTSTATUS status; - spin_lock(&(ks_data.ksnd_tconn_lock)); + ks_tconn_t * tconn; - /* remove it from the global list */ - list_del(&tconn->kstc_list); - ks_data.ksnd_ntconns--; + BOOLEAN bIsExpedited; + BOOLEAN bIsCompleteTsdu; - /* if this is the last tconn, it would be safe for - ks_tdi_fini_data to quit ... */ - if (ks_data.ksnd_ntconns == 0) { - cfs_wake_event(&ks_data.ksnd_tconn_exit); + PCHAR Buffer = NULL; + PIRP Irp = NULL; + PMDL Mdl = NULL; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; + PKS_TSDUMGR TsduMgr; + + PKS_TCP_COMPLETION_CONTEXT context = NULL; + + tconn = (ks_tconn_t *) ConnectionContext; + ks_get_tconn(tconn); + + /* check expedited flag */ + bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); + + /* check whether the whole body of payload is received or not */ + if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) && + (BytesIndicated == BytesAvailable) ) { + bIsCompleteTsdu = TRUE; + } else { + bIsCompleteTsdu = FALSE; } - spin_unlock(&(ks_data.ksnd_tconn_lock)); - /* free the structure memory */ - cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn); -} + KsPrint((4, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", + BytesIndicated, BytesAvailable)); + KsPrint((4, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited )); + /* check whether we are conntected or not listener */ + if ( !((tconn->kstc_state == ksts_connected) && + (tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child))) { -/* - * ks_init_listener - * Initialize the tconn as a listener (daemon) - * - * Arguments: - * tconn: the listener tconn - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ + *BytesTaken = BytesIndicated; + ks_put_tconn(tconn); + return (STATUS_SUCCESS); + } -void -ks_init_listener( - ksock_tconn_t * tconn - ) -{ - /* preparation: intialize the tconn members */ + /* query tsdu mgr */ + TsduMgr = KsQueryTsduMgr(tconn, bIsExpedited, FALSE); - tconn->kstc_type = kstt_listener; + ks_lock_tsdumgr(TsduMgr); + if (bIsCompleteTsdu) { - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); + *BytesTaken = KsWriteTsduDat(TsduMgr, Tsdu, BytesAvailable, 0); + status = STATUS_SUCCESS; - CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list)); - CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list)); + /* signal TsduMgr event */ + KeSetEvent(&(TsduMgr->Event), 0, FALSE); + ks_unlock_tsdumgr(TsduMgr); - cfs_init_event( &(tconn->listener.kstc_accept_event), - TRUE, - FALSE ); + /* re-active the ks connection and wake up the scheduler */ + if (KS_CAN_SCHED(TsduMgr)) { + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb(tconn, FALSE); + } + } - cfs_init_event( &(tconn->listener.kstc_destroy_event), - TRUE, - FALSE ); + } else { - tconn->kstc_state = ksts_inited; -} + ks_unlock_tsdumgr(TsduMgr); + /* allocate buffer for further data in tsdu queue */ + Buffer = ExAllocatePool(NonPagedPool, BytesAvailable); + if (NULL == Buffer) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } -/* - * ks_init_sender - * Initialize the tconn as a sender - * - * Arguments: - * tconn: the sender tconn - * - * Return Value: - * N/A - * - * Notes: - * N/A - */ + /* there's still data in tdi internal queue, we need issue a new + Irp to receive all of them. first allocate the tcp context */ + context = cfs_alloc(sizeof(KS_TCP_COMPLETION_CONTEXT), 0); + if (!context) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + /* setup the context */ + RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT)); + context->Magic = KS_TCP_CONTEXT_MAGIC; + context->tconn = tconn; + context->CompletionRoutine = KsTcpReceiveCompletionRoutine; + context->CompletionContext = Buffer; + context->TsduMgr = TsduMgr; + context->Buffer = Buffer; + context->Event = &(TsduMgr->Event); + + if (tconn->kstc_type == kstt_sender) { + FileObject = tconn->sender.kstc_info.FileObject; + } else { + FileObject = tconn->child.kstc_info.FileObject; + } + DeviceObject = IoGetRelatedDeviceObject(FileObject); + + /* build new tdi Irp and setup it. */ + Irp = KsBuildTdiIrp(DeviceObject); + if (NULL == Irp) { + goto errorout; + } + + status = KsLockUserBuffer( + Buffer, + FALSE, + BytesAvailable, + IoModifyAccess, + &Mdl + ); + + if (!NT_SUCCESS(status)) { + goto errorout; + } + + TdiBuildReceive( + Irp, + DeviceObject, + FileObject, + KsTcpCompletionRoutine, + context, + Mdl, + ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED), + BytesAvailable + ); + + IoSetNextIrpStackLocation(Irp); + + /* return the newly built Irp to transport driver, + it will process it to receive all the data */ + + *IoRequestPacket = Irp; + *BytesTaken = 0; + + ks_get_tconn(tconn); + status = STATUS_MORE_PROCESSING_REQUIRED; + } -void -ks_init_sender( - ksock_tconn_t * tconn - ) -{ - tconn->kstc_type = kstt_sender; - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); + ks_put_tconn(tconn); - KsInitializeKsChain(&(tconn->sender.kstc_recv)); - KsInitializeKsChain(&(tconn->sender.kstc_send)); + return (status); - tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; +errorout: - tconn->kstc_state = ksts_inited; -} + if (Mdl) { + KsReleaseMdl(Mdl, FALSE); + } -/* - * ks_init_child - * Initialize the tconn as a child - * - * Arguments: - * tconn: the child tconn - * - * Return Value: - * N/A - * - * NOTES: - * N/A - */ + if (Buffer) { + ExFreePool(Buffer); + } -void -ks_init_child( - ksock_tconn_t * tconn - ) -{ - tconn->kstc_type = kstt_child; - RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); + if (Irp) { + IoFreeIrp(Irp); + } - KsInitializeKsChain(&(tconn->child.kstc_recv)); - KsInitializeKsChain(&(tconn->child.kstc_send)); + if (context) { + ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC); + context->Magic = 'CDAB'; + cfs_free(context); + } - tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + ks_abort_tconn(tconn); + ks_put_tconn(tconn); - tconn->kstc_state = ksts_inited; + *BytesTaken = BytesAvailable; + + return STATUS_SUCCESS; } /* - * ks_get_tconn - * increase the reference count of the tconn with 1 - * - * Arguments: - * tconn: the tdi connection to be referred - * - * Return Value: - * N/A - * - * NOTES: - * N/A + * Expedited receive event handler */ -void -ks_get_tconn( - ksock_tconn_t * tconn +NTSTATUS +KsTcpReceiveExpeditedEventHandler( + IN PVOID TdiEventContext, + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG BytesIndicated, + IN ULONG BytesAvailable, + OUT ULONG * BytesTaken, + IN PVOID Tsdu, + OUT PIRP * IoRequestPacket ) { - atomic_inc(&(tconn->kstc_refcount)); + return KsTcpReceiveEventHandler( + TdiEventContext, + ConnectionContext, + ReceiveFlags | TDI_RECEIVE_EXPEDITED, + BytesIndicated, + BytesAvailable, + BytesTaken, + Tsdu, + IoRequestPacket + ); } /* - * ks_put_tconn - * decrease the reference count of the tconn and destroy - * it if the refercount becomes 0. - * - * Arguments: - * tconn: the tdi connection to be dereferred - * - * Return Value: - * N/A + * Bulk receive event handler * - * NOTES: - * N/A + * It will queue all the system Tsdus to our TsduList. + * Then later ks_recv_mdl will release them. */ -void -ks_put_tconn( - ksock_tconn_t *tconn +NTSTATUS +KsTcpChainedReceiveEventHandler ( + IN PVOID TdiEventContext, // the event context + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG ReceiveLength, + IN ULONG StartingOffset, // offset of start of client data in TSDU + IN PMDL Tsdu, // TSDU data chain + IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives ) { - if (atomic_dec_and_test(&(tconn->kstc_refcount))) { - spin_lock(&(tconn->kstc_lock)); + NTSTATUS status; + ks_tconn_t * tconn; - if ( ( tconn->kstc_type == kstt_child || - tconn->kstc_type == kstt_sender ) && - ( tconn->kstc_state == ksts_connected ) ) { + PKS_TSDUMGR TsduMgr; - spin_unlock(&(tconn->kstc_lock)); + BOOLEAN expedited; - ks_abort_tconn(tconn); + tconn = (ks_tconn_t *) ConnectionContext; + expedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); - } else { + KsPrint((4, "KsTcpChainedReceive: sock: %p conn: %p ReceiveLength: %xh " + "bIsExpedited: %d Tsdu=%p TsduDesc=%p data=%xh\n", + tconn, tconn->kstc_conn, ReceiveLength, expedited, + Tsdu, TsduDescriptor, *((PULONG)KsMapMdlBuffer(Tsdu)))); - if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) { - cfs_enter_debugger(); - } else { - ExQueueWorkItem( - &(tconn->kstc_destroy), - DelayedWorkQueue - ); + ks_get_tconn(tconn); - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY); - } + /* check whether we are conntected or not listener */ + if ( !((tconn->kstc_state == ksts_connected) && + (tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child))) { - spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); + return (STATUS_SUCCESS); + } + + if (Tsdu) { + + TsduMgr = KsQueryTsduMgr(tconn, expedited, FALSE); + ks_lock_tsdumgr(TsduMgr); +#if FALSE + KsWriteTsduMdl(TsduMgr, Tsdu, TsduDescriptor, + StartingOffset, ReceiveLength, 0); + status = STATUS_PENDING; +#else + KsWriteTsduDat(TsduMgr, (PCHAR)KsMapMdlBuffer(Tsdu) + + StartingOffset, ReceiveLength, 0); + status = STATUS_SUCCESS; +#endif + KeSetEvent(&(TsduMgr->Event), 0, FALSE); + ks_unlock_tsdumgr(TsduMgr); + + /* re-active the ks connection and wake up the scheduler */ + if (KS_CAN_SCHED(TsduMgr)) { + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb(tconn, FALSE); + } } + + } else { + + ks_abort_tconn(tconn); + status = STATUS_CONNECTION_ABORTED; } + + ks_put_tconn(tconn); + + /* Return STATUS_PENDING to system because we are still + owning the MDL resources. ks_recv_mdl is expected + to free the MDL resources. */ + + return (status); } + /* - * ks_destroy_tconn - * cleanup the tdi connection and free it + * Expedited & Bulk receive event handler + */ + +NTSTATUS +KsTcpChainedReceiveExpeditedEventHandler ( + IN PVOID TdiEventContext, // the event context + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG ReceiveLength, + IN ULONG StartingOffset, // offset of start of client data in TSDU + IN PMDL Tsdu, // TSDU data chain + IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives + ) +{ + return KsTcpChainedReceiveEventHandler( + TdiEventContext, + ConnectionContext, + ReceiveFlags | TDI_RECEIVE_EXPEDITED, + ReceiveLength, + StartingOffset, + Tsdu, + TsduDescriptor ); +} + + +/* + * KsSetHandlers + * setup all the event handler callbacks * * Arguments: - * tconn: the tdi connection to be cleaned. + * tconn: the tdi connecton object * * Return Value: - * N/A + * int: ks error code * * NOTES: * N/A */ -void -ks_destroy_tconn( - ksock_tconn_t * tconn +int +KsSetHandlers( + ks_tconn_t * tconn ) { - LASSERT(tconn->kstc_refcount.counter == 0); - - if (tconn->kstc_type == kstt_listener) { - - ks_reset_handlers(tconn); - - /* for listener, we just need to close the address object */ - KsCloseAddress( - tconn->kstc_addr.Handle, - tconn->kstc_addr.FileObject - ); - - tconn->kstc_state = ksts_inited; + NTSTATUS status = STATUS_SUCCESS; + KS_EVENT_HANDLERS handlers; - } else if (tconn->kstc_type == kstt_child) { + /* to make sure the address object is opened already */ + if (tconn->kstc_addr.FileObject == NULL) { + goto errorout; + } - /* for child tdi conections */ + /* initialize the handlers indictor array. for sender and listenr, + there are different set of callbacks. for child, we just return. */ - /* disassociate the relation between it's connection object - and the address object */ + memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); - if (tconn->kstc_state == ksts_associated) { - KsDisassociateAddress( - tconn->child.kstc_info.FileObject - ); - } + SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler); + SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler); + SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler); + SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler); + SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler); - /* release the connection object */ + // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler); - KsCloseConnection( - tconn->child.kstc_info.Handle, - tconn->child.kstc_info.FileObject - ); + if (tconn->kstc_type == kstt_listener) { + SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler); + } else if (tconn->kstc_type == kstt_child) { + goto errorout; + } - /* release it's refer of it's parent's address object */ - KsCloseAddress( - NULL, - tconn->kstc_addr.FileObject + /* set all the event callbacks */ + status = KsSetEventHandlers( + tconn->kstc_addr.FileObject, /* Address File Object */ + tconn, /* Event Context */ + &handlers /* Event callback handlers */ ); - spin_lock(&tconn->child.kstc_parent->kstc_lock); - spin_lock(&tconn->kstc_lock); - - tconn->kstc_state = ksts_inited; - - /* remove it frome it's parent's queues */ +errorout: - if (tconn->child.kstc_queued) { + return cfs_error_code(status); +} - list_del(&(tconn->child.kstc_link)); - if (tconn->child.kstc_queueno) { +/* + * KsResetHandlers + * disable all the event handler callbacks (set to NULL) + * + * Arguments: + * tconn: the tdi connecton object + * + * Return Value: + * int: ks error code + * + * NOTES: + * N/A + */ - LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0); - tconn->child.kstc_parent->listener.kstc_accepted.num -= 1; +int +KsResetHandlers( + ks_tconn_t * tconn + ) +{ + NTSTATUS status = STATUS_SUCCESS; + KS_EVENT_HANDLERS handlers; - } else { + /* to make sure the address object is opened already */ + if (tconn->kstc_addr.FileObject == NULL) { + goto errorout; + } - LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0); - tconn->child.kstc_parent->listener.kstc_listening.num -= 1; - } + /* initialize the handlers indictor array. for sender and listenr, + there are different set of callbacks. for child, we just return. */ - tconn->child.kstc_queued = FALSE; - } + memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); - spin_unlock(&tconn->kstc_lock); - spin_unlock(&tconn->child.kstc_parent->kstc_lock); + SetEventHandler(handlers, TDI_EVENT_ERROR, NULL); + SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL); + SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL); + SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL); + SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL); + // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL); - /* drop the reference of the parent tconn */ - ks_put_tconn(tconn->child.kstc_parent); + if (tconn->kstc_type == kstt_listener) { + SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL); + } else if (tconn->kstc_type == kstt_child) { + goto errorout; + } - } else if (tconn->kstc_type == kstt_sender) { + /* set all the event callbacks */ + status = KsSetEventHandlers( + tconn->kstc_addr.FileObject, /* Address File Object */ + tconn, /* Event Context */ + &handlers /* Event callback handlers */ + ); - ks_reset_handlers(tconn); +errorout: - /* release the connection object */ + return cfs_error_code(status); +} - KsCloseConnection( - tconn->sender.kstc_info.Handle, - tconn->sender.kstc_info.FileObject - ); +VOID +KsPrintProviderInfo( + PWSTR DeviceName, + PTDI_PROVIDER_INFO ProviderInfo + ) +{ + KsPrint((2, "%ws ProviderInfo:\n", DeviceName)); - /* release it's refer of it's parent's address object */ - KsCloseAddress( - tconn->kstc_addr.Handle, - tconn->kstc_addr.FileObject - ); + KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version )); + KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize )); + KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData )); + KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize )); + KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags )); - tconn->kstc_state = ksts_inited; + if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) { + KsPrint((2, " CONNECTION_MODE\n")); + } - } else { - cfs_enter_debugger(); + if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) { + KsPrint((2, " ORDERLY_RELEASE\n")); } - /* free the tconn structure ... */ + if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) { + KsPrint((2, " CONNECTIONLESS_MODE\n")); + } - ks_free_tconn(tconn); -} + if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) { + KsPrint((2, " ERROR_FREE_DELIVERY\n")); + } -int -ks_query_data( - ksock_tconn_t * tconn, - size_t * size, - int bIsExpedited ) -{ - int rc = 0; + if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) { + KsPrint((2, " SECURITY_LEVEL\n")); + } - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; + if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) { + KsPrint((2, " BROADCAST_SUPPORTED\n")); + } - *size = 0; + if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) { + KsPrint((2, " MULTICAST_SUPPORTED\n")); + } - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); + if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) { + KsPrint((2, " DELAYED_ACCEPTANCE\n")); + } - if ( tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child) { - rc = -EINVAL; - spin_unlock(&(tconn->kstc_lock)); - goto errorout; + if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) { + KsPrint((2, " EXPEDITED_DATA\n")); } - if (tconn->kstc_state != ksts_connected) { - rc = -ENOTCONN; - spin_unlock(&(tconn->kstc_lock)); - goto errorout; + if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) { + KsPrint((2, " INTERNAL_BUFFERING\n")); } - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); + if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) { + KsPrint((2, " ROUTE_DIRECTED\n")); } - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); + if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) { + KsPrint((2, " NO_ZERO_LENGTH\n")); } - *size = KsTsduMgr->TotalBytes; - spin_unlock(&(tconn->kstc_lock)); + if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) { + KsPrint((2, " POINT_TO_POINT\n")); + } -errorout: + if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) { + KsPrint((2, " MESSAGE_MODE\n")); + } - ks_put_tconn(tconn); + if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) { + KsPrint((2, " HALF_DUPLEX\n")); + } - return (rc); + KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData )); + KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData )); + KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources )); } + /* - * ks_get_tcp_option - * Query the the options of the tcp stream connnection + * ks_create_tconn + * allocate a new tconn structure from the SLAB cache or + * NonPaged sysetm pool * * Arguments: - * tconn: the tdi connection - * ID: option id - * OptionValue: buffer to store the option value - * Length: the length of the value, to be returned + * N/A * * Return Value: - * int: ks return code + * ks_tconn_t *: the address of tconn or NULL if it fails * * NOTES: * N/A */ -int -ks_get_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - PULONG Length - ) +ks_tconn_t * +ks_create_tconn() { - NTSTATUS Status = STATUS_SUCCESS; - - IO_STATUS_BLOCK IoStatus; + ks_tconn_t * tconn = NULL; - TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; - - PIRP Irp = NULL; - PIO_STACK_LOCATION IrpSp = NULL; - - KEVENT Event; - - /* make sure the tdi connection is connected ? */ - - ks_get_tconn(tconn); + /* allocate ksoc_tconn_t from the slab cache memory */ + tconn = (ks_tconn_t *)cfs_mem_cache_alloc( + ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO); - if (tconn->kstc_state != ksts_connected) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } + if (tconn) { - LASSERT(tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); + /* zero tconn elements */ + memset(tconn, 0, sizeof(ks_tconn_t)); - if (tconn->kstc_type == kstt_sender) { - ConnectionObject = tconn->sender.kstc_info.FileObject; - } else { - ConnectionObject = tconn->child.kstc_info.FileObject; - } + /* initialize the tconn ... */ + tconn->kstc_magic = KS_TCONN_MAGIC; - QueryInfoEx.ID.toi_id = ID; - QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION; - QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL; - QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY; - QueryInfoEx.ID.toi_entity.tei_instance = 0; + ExInitializeWorkItem( + &(tconn->kstc_disconnect.WorkItem), + KsDisconnectHelper, + &(tconn->kstc_disconnect) + ); - RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE); + KeInitializeEvent( + &(tconn->kstc_disconnect.Event), + SynchronizationEvent, + FALSE ); - KeInitializeEvent(&Event, NotificationEvent, FALSE); - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + ExInitializeWorkItem( + &(tconn->kstc_destroy), + ks_destroy_tconn, + tconn + ); - Irp = IoBuildDeviceIoControlRequest( - IOCTL_TCP_QUERY_INFORMATION_EX, - DeviceObject, - &QueryInfoEx, - sizeof(TCP_REQUEST_QUERY_INFORMATION_EX), - OptionValue, - *Length, - FALSE, - &Event, - &IoStatus - ); + spin_lock_init(&(tconn->kstc_lock)); - if (Irp == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } + ks_get_tconn(tconn); + spin_lock(&(ks_data.ksnd_tconn_lock)); - IrpSp = IoGetNextIrpStackLocation(Irp); + /* attach it into global list in ks_data */ - if (IrpSp == NULL) { + list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns)); + ks_data.ksnd_ntconns++; + spin_unlock(&(ks_data.ksnd_tconn_lock)); - IoFreeIrp(Irp); - Irp = NULL; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; + tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000; } + KsPrint((3, "ks_create_tconn: new connection: %p\n", tconn)); + return (tconn); +} - IrpSp->FileObject = ConnectionObject; - IrpSp->DeviceObject = DeviceObject; - - Status = IoCallDriver(DeviceObject, Irp); - - if (Status == STATUS_PENDING) { +/* + * ks_free_tconn + * free the tconn structure to the SLAB cache or NonPaged + * sysetm pool + * + * Arguments: + * tconn: the tcon is to be freed + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ - KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); +void +ks_free_tconn(ks_tconn_t * tconn) +{ + LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0); - Status = IoStatus.Status; - } + spin_lock(&(ks_data.ksnd_tconn_lock)); + /* remove it from the global list */ + list_del(&tconn->kstc_list); + ks_data.ksnd_ntconns--; - if (NT_SUCCESS(Status)) { - *Length = IoStatus.Information; - } else { - cfs_enter_debugger(); - memset(OptionValue, 0, *Length); - Status = STATUS_SUCCESS; + /* if this is the last tconn, it would be safe for + ks_tdi_fini_data to quit ... */ + if (ks_data.ksnd_ntconns == 0) { + cfs_wake_event(&ks_data.ksnd_tconn_exit); } + spin_unlock(&(ks_data.ksnd_tconn_lock)); -errorout: - - ks_put_tconn(tconn); + /* free the structure memory */ + cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn); - return cfs_error_code(Status); + KsPrint((3, "ks_free_tconn: tconn %p is freed.\n", tconn)); } + /* - * ks_set_tcp_option - * Set the the options for the tcp stream connnection + * ks_init_listener + * Initialize the tconn as a listener (daemon) * * Arguments: - * tconn: the tdi connection - * ID: option id - * OptionValue: buffer containing the new option value - * Length: the length of the value + * tconn: the listener tconn * * Return Value: - * int: ks return code + * N/A * - * NOTES: + * Notes: * N/A */ -NTSTATUS -ks_set_tcp_option ( - ksock_tconn_t * tconn, - ULONG ID, - PVOID OptionValue, - ULONG Length +void +ks_init_listener( + ks_tconn_t * tconn ) { - NTSTATUS Status = STATUS_SUCCESS; - - IO_STATUS_BLOCK IoStatus; - - ULONG SetInfoExLength; - PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL; - - PFILE_OBJECT ConnectionObject; - PDEVICE_OBJECT DeviceObject = NULL; + /* preparation: intialize the tconn members */ - PIRP Irp = NULL; - PIO_STACK_LOCATION IrpSp = NULL; + tconn->kstc_type = kstt_listener; - PKEVENT Event; + RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - /* make sure the tdi connection is connected ? */ + CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list)); + CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list)); - ks_get_tconn(tconn); + cfs_init_event( &(tconn->listener.kstc_accept_event), + TRUE, + FALSE ); - if (tconn->kstc_state != ksts_connected) { - Status = STATUS_INVALID_PARAMETER; - goto errorout; - } + cfs_init_event( &(tconn->listener.kstc_destroy_event), + TRUE, + FALSE ); - LASSERT(tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); + tconn->kstc_state = ksts_inited; +} - if (tconn->kstc_type == kstt_sender) { - ConnectionObject = tconn->sender.kstc_info.FileObject; - } else { - ConnectionObject = tconn->child.kstc_info.FileObject; - } - SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT); +/* + * ks_init_sender + * Initialize the tconn as a sender + * + * Arguments: + * tconn: the sender tconn + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ - SetInfoEx = ExAllocatePoolWithTag( - NonPagedPool, - SetInfoExLength, - 'TSSK' - ); +void +ks_init_sender( + ks_tconn_t * tconn + ) +{ + tconn->kstc_type = kstt_sender; + RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - if (SetInfoEx == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } + KsInitializeKsChain(&(tconn->sender.kstc_recv)); + KsInitializeKsChain(&(tconn->sender.kstc_send)); - SetInfoEx->ID.toi_id = ID; + tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION; - SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL; - SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY; - SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE; + tconn->kstc_state = ksts_inited; +} - SetInfoEx->BufferSize = Length; - RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length); +/* + * ks_init_child + * Initialize the tconn as a child + * + * Arguments: + * tconn: the child tconn + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ - Event = (PKEVENT)(&(SetInfoEx->Buffer[Length])); - KeInitializeEvent(Event, NotificationEvent, FALSE); +void +ks_init_child( + ks_tconn_t * tconn + ) +{ + tconn->kstc_type = kstt_child; + RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + KsInitializeKsChain(&(tconn->child.kstc_recv)); + KsInitializeKsChain(&(tconn->child.kstc_send)); - Irp = IoBuildDeviceIoControlRequest( - IOCTL_TCP_SET_INFORMATION_EX, - DeviceObject, - SetInfoEx, - SetInfoExLength, - NULL, - 0, - FALSE, - Event, - &IoStatus - ); + tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; - if (Irp == NULL) { - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } + tconn->kstc_state = ksts_inited; +} - IrpSp = IoGetNextIrpStackLocation(Irp); +/* + * ks_get_tconn + * increase the reference count of the tconn with 1 + * + * Arguments: + * tconn: the tdi connection to be referred + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ - if (IrpSp == NULL) { - IoFreeIrp(Irp); - Irp = NULL; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } +void +ks_get_tconn( + ks_tconn_t * tconn + ) +{ + atomic_inc(&(tconn->kstc_refcount)); +} - IrpSp->FileObject = ConnectionObject; - IrpSp->DeviceObject = DeviceObject; +/* + * ks_put_tconn + * decrease the reference count of the tconn and destroy + * it if the refercount becomes 0. + * + * Arguments: + * tconn: the tdi connection to be dereferred + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ - Status = IoCallDriver(DeviceObject, Irp); +void +ks_put_tconn( + ks_tconn_t *tconn + ) +{ + if (atomic_dec_and_test(&(tconn->kstc_refcount))) { - if (Status == STATUS_PENDING) { + spin_lock(&(tconn->kstc_lock)); - KeWaitForSingleObject( - Event, - Executive, - KernelMode, - FALSE, - NULL - ); + if ( ( tconn->kstc_type == kstt_child || + tconn->kstc_type == kstt_sender ) && + ( tconn->kstc_state == ksts_connected ) ) { - Status = IoStatus.Status; - } + spin_unlock(&(tconn->kstc_lock)); -errorout: + ks_abort_tconn(tconn); - if (SetInfoEx) { - ExFreePool(SetInfoEx); - } + } else { - if (!NT_SUCCESS(Status)) { - printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n", - ID, Status); - Status = STATUS_SUCCESS; - } + if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) { + cfs_enter_debugger(); + } else { + ExQueueWorkItem( + &(tconn->kstc_destroy), + DelayedWorkQueue + ); - ks_put_tconn(tconn); + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY); + } - return cfs_error_code(Status); + spin_unlock(&(tconn->kstc_lock)); + } + } } /* - * ks_bind_tconn - * bind the tdi connection object with an address + * ks_destroy_tconn + * cleanup the tdi connection and free it * * Arguments: - * tconn: tconn to be bound - * parent: the parent tconn object - * ipaddr: the ip address - * port: the port number + * tconn: the tdi connection to be cleaned. * * Return Value: - * int: 0 for success or ks error codes. + * N/A * * NOTES: * N/A */ -int -ks_bind_tconn ( - ksock_tconn_t * tconn, - ksock_tconn_t * parent, - ulong_ptr addr, - unsigned short port +void +ks_destroy_tconn( + ks_tconn_t * tconn ) { - NTSTATUS status; - int rc = 0; - - ksock_tdi_addr_t taddr; + LASSERT(tconn->kstc_refcount.counter == 0); - memset(&taddr, 0, sizeof(ksock_tdi_addr_t)); + if (tconn->kstc_type == kstt_listener) { - if (tconn->kstc_state != ksts_inited) { + KsResetHandlers(tconn); - status = STATUS_INVALID_PARAMETER; - rc = cfs_error_code(status); + /* for listener, we just need to close the address object */ + KsCloseAddress( + tconn->kstc_addr.Handle, + tconn->kstc_addr.FileObject + ); - goto errorout; + tconn->kstc_state = ksts_inited; } else if (tconn->kstc_type == kstt_child) { - if (NULL == parent) { - status = STATUS_INVALID_PARAMETER; - rc = cfs_error_code(status); - - goto errorout; - } + /* for child tdi conections */ - /* refer it's parent's address object */ + /* disassociate the relation between it's connection object + and the address object */ - taddr = parent->kstc_addr; - ObReferenceObject(taddr.FileObject); + if (tconn->kstc_state == ksts_associated) { + KsDisassociateAddress( + tconn->child.kstc_info.FileObject + ); + } - ks_get_tconn(parent); + /* release the connection object */ - } else { + KsCloseConnection( + tconn->child.kstc_info.Handle, + tconn->child.kstc_info.FileObject + ); - PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi); - ULONG AddrLen = 0; + /* release it's refer of it's parent's address object */ + KsCloseAddress( + NULL, + tconn->kstc_addr.FileObject + ); - /* intialize the tdi address*/ + spin_lock(&tconn->child.kstc_parent->kstc_lock); + spin_lock(&tconn->kstc_lock); - TdiAddress->TAAddressCount = 1; - TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; - TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; + tconn->kstc_state = ksts_inited; - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); + /* remove it frome it's parent's queues */ - memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); + if (tconn->child.kstc_queued) { + list_del(&(tconn->child.kstc_link)); - /* open the transport address object */ + if (tconn->child.kstc_queueno) { - AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + - TDI_ADDRESS_LENGTH_IP; + LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0); + tconn->child.kstc_parent->listener.kstc_accepted.num -= 1; - status = KsOpenAddress( - &(tconn->kstc_dev), - &(taddr.Tdi), - AddrLen, - &(taddr.Handle), - &(taddr.FileObject) - ); + } else { - if (!NT_SUCCESS(status)) { + LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0); + tconn->child.kstc_parent->listener.kstc_listening.num -= 1; + } - KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n", - addr, port, status )); - rc = cfs_error_code(status); - goto errorout; + tconn->child.kstc_queued = FALSE; } - } - - if (tconn->kstc_type == kstt_child) { - tconn->child.kstc_parent = parent; - } - - tconn->kstc_state = ksts_bind; - tconn->kstc_addr = taddr; - -errorout: - - return (rc); -} - -/* - * ks_build_tconn - * build tcp/streaming connection to remote peer - * - * Arguments: - * tconn: tconn to be connected to the peer - * addr: the peer's ip address - * port: the peer's port number - * - * Return Value: - * int: 0 for success or ks error codes. - * - * Notes: - * N/A - */ - -int -ks_build_tconn( - ksock_tconn_t * tconn, - ulong_ptr addr, - unsigned short port - ) -{ - int rc = 0; - NTSTATUS status = STATUS_SUCCESS; - - - PFILE_OBJECT ConnectionObject = NULL; - PDEVICE_OBJECT DeviceObject = NULL; - PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; - ULONG AddrLength; - - PIRP Irp = NULL; + spin_unlock(&tconn->kstc_lock); + spin_unlock(&tconn->child.kstc_parent->kstc_lock); - LASSERT(tconn->kstc_type == kstt_sender); - LASSERT(tconn->kstc_state == ksts_bind); + /* drop the reference of the parent tconn */ + ks_put_tconn(tconn->child.kstc_parent); - ks_get_tconn(tconn); + } else if (tconn->kstc_type == kstt_sender) { - { - /* set the event callbacks */ - rc = ks_set_handlers(tconn); + KsResetHandlers(tconn); - if (rc < 0) { - cfs_enter_debugger(); - goto errorout; - } - } + /* release the connection object */ - /* create the connection file handle / object */ - status = KsOpenConnection( - &(tconn->kstc_dev), - (CONNECTION_CONTEXT)tconn, - &(tconn->sender.kstc_info.Handle), - &(tconn->sender.kstc_info.FileObject) + KsCloseConnection( + tconn->sender.kstc_info.Handle, + tconn->sender.kstc_info.FileObject ); - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - /* associdate the the connection with the adress object of the tconn */ - - status = KsAssociateAddress( + /* release it's refer of it's parent's address object */ + KsCloseAddress( tconn->kstc_addr.Handle, - tconn->sender.kstc_info.FileObject + tconn->kstc_addr.FileObject ); - if (!NT_SUCCESS(status)) { - rc = cfs_error_code(status); - cfs_enter_debugger(); - goto errorout; - } - - tconn->kstc_state = ksts_associated; - - /* Allocating Connection Info Together with the Address */ - AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) - + TDI_ADDRESS_LENGTH_IP; - - ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( - NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK'); - - if (NULL == ConnectionInfo) { + tconn->kstc_state = ksts_inited; - status = STATUS_INSUFFICIENT_RESOURCES; - rc = cfs_error_code(status); + } else { cfs_enter_debugger(); - goto errorout; } - /* Initializing ConnectionInfo ... */ - { - PTRANSPORT_ADDRESS TdiAddress; - - /* ConnectionInfo settings */ - - ConnectionInfo->UserDataLength = 0; - ConnectionInfo->UserData = NULL; - ConnectionInfo->OptionsLength = 0; - ConnectionInfo->Options = NULL; - ConnectionInfo->RemoteAddressLength = AddrLength; - ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - + /* free the tconn structure ... */ - /* intialize the tdi address*/ + ks_free_tconn(tconn); +} - TdiAddress = ConnectionInfo->RemoteAddress; +/* + * ks_get_tcp_option + * Query the the options of the tcp stream connnection + * + * Arguments: + * tconn: the tdi connection + * ID: option id + * OptionValue: buffer to store the option value + * Length: the length of the value, to be returned + * + * Return Value: + * int: ks return code + * + * NOTES: + * N/A + */ - TdiAddress->TAAddressCount = 1; - TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; - TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; +int +ks_get_tcp_option ( + ks_tconn_t * tconn, + ULONG ID, + PVOID OptionValue, + PULONG Length + ) +{ + NTSTATUS Status = STATUS_SUCCESS; - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); - ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); + IO_STATUS_BLOCK IoStatus; - memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); - } + TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx; - /* Now prepare to connect the remote peer ... */ + PFILE_OBJECT ConnectionObject; + PDEVICE_OBJECT DeviceObject = NULL; - ConnectionObject = tconn->sender.kstc_info.FileObject; - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + PIRP Irp = NULL; + PIO_STACK_LOCATION IrpSp = NULL; - /* allocate a new Irp */ + KEVENT Event; - Irp = KsBuildTdiIrp(DeviceObject); + /* make sure the tdi connection is connected ? */ - if (NULL == Irp) { + ks_get_tconn(tconn); - status = STATUS_INSUFFICIENT_RESOURCES; - rc = cfs_error_code(status); - cfs_enter_debugger(); + if (tconn->kstc_state != ksts_connected) { + Status = STATUS_INVALID_PARAMETER; goto errorout; } - /* setup the Irp */ - - TdiBuildConnect( - Irp, - DeviceObject, - ConnectionObject, - NULL, - NULL, - NULL, - ConnectionInfo, - NULL - ); - - - /* sumbit the Irp to the underlying transport driver */ - status = KsSubmitTdiIrp( - DeviceObject, - Irp, - TRUE, - NULL - ); - - spin_lock(&(tconn->kstc_lock)); + LASSERT(tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child); - if (NT_SUCCESS(status)) { + if (tconn->kstc_type == kstt_sender) { + ConnectionObject = tconn->sender.kstc_info.FileObject; + } else { + ConnectionObject = tconn->child.kstc_info.FileObject; + } - /* Connected! the conneciton is built successfully. */ + QueryInfoEx.ID.toi_id = ID; + QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION; + QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL; + QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY; + QueryInfoEx.ID.toi_entity.tei_instance = 0; - tconn->kstc_state = ksts_connected; + RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE); - tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo; - tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress; + KeInitializeEvent(&Event, NotificationEvent, FALSE); + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - spin_unlock(&(tconn->kstc_lock)); + Irp = IoBuildDeviceIoControlRequest( + IOCTL_TCP_QUERY_INFORMATION_EX, + DeviceObject, + &QueryInfoEx, + sizeof(TCP_REQUEST_QUERY_INFORMATION_EX), + OptionValue, + *Length, + FALSE, + &Event, + &IoStatus + ); - } else { + if (Irp == NULL) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } - /* Not connected! Abort it ... */ + IrpSp = IoGetNextIrpStackLocation(Irp); - if (rc != 0) { - cfs_enter_debugger(); - } + if (IrpSp == NULL) { + IoFreeIrp(Irp); Irp = NULL; - rc = cfs_error_code(status); + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } - tconn->kstc_state = ksts_associated; - spin_unlock(&(tconn->kstc_lock)); + IrpSp->FileObject = ConnectionObject; + IrpSp->DeviceObject = DeviceObject; - /* disassocidate the connection and the address object, - after cleanup, it's safe to set the state to abort ... */ + Status = IoCallDriver(DeviceObject, Irp); - if ( NT_SUCCESS(KsDisassociateAddress( - tconn->sender.kstc_info.FileObject))) { - tconn->kstc_state = ksts_aborted; - } + if (Status == STATUS_PENDING) { - /* reset the event callbacks */ - rc = ks_reset_handlers(tconn); + KeWaitForSingleObject( + &Event, + Executive, + KernelMode, + FALSE, + NULL + ); - goto errorout; + Status = IoStatus.Status; } -errorout: - - if (NT_SUCCESS(status)) { - - ks_query_local_ipaddr(tconn); + if (NT_SUCCESS(Status)) { + *Length = (ULONG)(ULONG_PTR)IoStatus.Information; } else { - - if (ConnectionInfo) { - ExFreePool(ConnectionInfo); - } - if (Irp) { - IoFreeIrp(Irp); - } + cfs_enter_debugger(); + memset(OptionValue, 0, *Length); + Status = STATUS_SUCCESS; } +errorout: + ks_put_tconn(tconn); - return (rc); + return cfs_error_code(Status); } - /* - * ks_disconnect_tconn - * disconnect the tconn from a connection + * ks_set_tcp_option + * Set the the options for the tcp stream connnection * * Arguments: - * tconn: the tdi connecton object connected already - * flags: flags & options for disconnecting + * tconn: the tdi connection + * ID: option id + * OptionValue: buffer containing the new option value + * Length: the length of the value * * Return Value: - * int: ks error code + * int: ks return code * - * Notes: + * NOTES: * N/A */ -int -ks_disconnect_tconn( - ksock_tconn_t * tconn, - ulong_ptr flags +NTSTATUS +ks_set_tcp_option ( + ks_tconn_t * tconn, + ULONG ID, + PVOID OptionValue, + ULONG Length ) { - NTSTATUS status = STATUS_SUCCESS; + NTSTATUS Status = STATUS_SUCCESS; - ksock_tconn_info_t * info; + IO_STATUS_BLOCK IoStatus; + + ULONG SetInfoExLength; + PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL; PFILE_OBJECT ConnectionObject; PDEVICE_OBJECT DeviceObject = NULL; PIRP Irp = NULL; + PIO_STACK_LOCATION IrpSp = NULL; - KEVENT Event; - - ks_get_tconn(tconn); - - /* make sure tt's connected already and it - must be a sender or a child ... */ + PKEVENT Event; - LASSERT(tconn->kstc_state == ksts_connected); - LASSERT( tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child); + /* make sure the tdi connection is connected ? */ - /* reset all the event handlers to NULL */ + ks_get_tconn(tconn); - if (tconn->kstc_type != kstt_child) { - ks_reset_handlers (tconn); + if (tconn->kstc_state != ksts_connected) { + Status = STATUS_INVALID_PARAMETER; + goto errorout; } - /* Disconnecting to the remote peer ... */ + LASSERT(tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child); if (tconn->kstc_type == kstt_sender) { - info = &(tconn->sender.kstc_info); + ConnectionObject = tconn->sender.kstc_info.FileObject; } else { - info = &(tconn->child.kstc_info); + ConnectionObject = tconn->child.kstc_info.FileObject; } - ConnectionObject = info->FileObject; - DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - - /* allocate an Irp and setup it */ - - Irp = KsBuildTdiIrp(DeviceObject); + SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT); - if (NULL == Irp) { + SetInfoEx = ExAllocatePoolWithTag( + NonPagedPool, + SetInfoExLength, + 'TSSK' + ); - status = STATUS_INSUFFICIENT_RESOURCES; - cfs_enter_debugger(); + if (SetInfoEx == NULL) { + Status = STATUS_INSUFFICIENT_RESOURCES; goto errorout; } - KeInitializeEvent( - &Event, - SynchronizationEvent, - FALSE - ); - - TdiBuildDisconnect( - Irp, - DeviceObject, - ConnectionObject, - KsDisconectCompletionRoutine, - &Event, - NULL, - flags, - NULL, - NULL - ); - - /* issue the Irp to the underlying transport - driver to disconnect the connection */ - - status = IoCallDriver(DeviceObject, Irp); - - if (STATUS_PENDING == status) { + SetInfoEx->ID.toi_id = ID; - status = KeWaitForSingleObject( - &Event, - Executive, - KernelMode, - FALSE, - NULL - ); + SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION; + SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL; + SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY; + SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE; - status = Irp->IoStatus.Status; - } + SetInfoEx->BufferSize = Length; + RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length); - KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n", - status, KsNtStatusToString(status))); + Event = (PKEVENT)(&(SetInfoEx->Buffer[Length])); + KeInitializeEvent(Event, NotificationEvent, FALSE); - IoFreeIrp(Irp); + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - if (info->ConnectionInfo) { + Irp = IoBuildDeviceIoControlRequest( + IOCTL_TCP_SET_INFORMATION_EX, + DeviceObject, + SetInfoEx, + SetInfoExLength, + NULL, + 0, + FALSE, + Event, + &IoStatus + ); - /* disassociate the association between connection/address objects */ + if (Irp == NULL) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } - status = KsDisassociateAddress(ConnectionObject); + IrpSp = IoGetNextIrpStackLocation(Irp); - if (!NT_SUCCESS(status)) { - cfs_enter_debugger(); - } + if (IrpSp == NULL) { + IoFreeIrp(Irp); + Irp = NULL; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } - spin_lock(&(tconn->kstc_lock)); + IrpSp->FileObject = ConnectionObject; + IrpSp->DeviceObject = DeviceObject; - /* cleanup the tsdumgr Lists */ - KsCleanupTsdu (tconn); + Status = IoCallDriver(DeviceObject, Irp); - /* set the state of the tconn */ - if (NT_SUCCESS(status)) { - tconn->kstc_state = ksts_disconnected; - } else { - tconn->kstc_state = ksts_associated; - } + if (Status == STATUS_PENDING) { - /* free the connection info to system pool*/ - ExFreePool(info->ConnectionInfo); - info->ConnectionInfo = NULL; - info->Remote = NULL; + KeWaitForSingleObject( + Event, + Executive, + KernelMode, + FALSE, + NULL + ); - spin_unlock(&(tconn->kstc_lock)); + Status = IoStatus.Status; } - status = STATUS_SUCCESS; - errorout: + if (SetInfoEx) { + ExFreePool(SetInfoEx); + } + + if (!NT_SUCCESS(Status)) { + KsPrint((0, "ks_set_tcp_option: error setup tcp option: " + "ID (%d) Status = %xh\n", ID, Status)); + Status = STATUS_SUCCESS; + } + ks_put_tconn(tconn); - return cfs_error_code(status); + return cfs_error_code(Status); } - /* - * ks_abort_tconn - * The connection is broken un-expectedly. We need do - * some cleanup. + * ks_bind_tconn + * bind the tdi connection object with an address * * Arguments: - * tconn: the tdi connection + * tconn: tconn to be bound + * parent: the parent tconn object + * ipaddr: the ip address + * port: the port number * * Return Value: - * N/A + * int: 0 for success or ks error codes. * - * Notes: + * NOTES: * N/A */ -void -ks_abort_tconn( - ksock_tconn_t * tconn +int +ks_bind_tconn ( + ks_tconn_t * tconn, + ks_tconn_t * parent, + ulong addr, + unsigned short port ) { - PKS_DISCONNECT_WORKITEM WorkItem = NULL; + NTSTATUS status; + int rc = 0; - WorkItem = &(tconn->kstc_disconnect); + ks_tdi_addr_t taddr; - ks_get_tconn(tconn); - spin_lock(&(tconn->kstc_lock)); + memset(&taddr, 0, sizeof(ks_tdi_addr_t)); - if (tconn->kstc_state != ksts_connected) { - ks_put_tconn(tconn); - } else { + if (tconn->kstc_state != ksts_inited) { - if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { + status = STATUS_INVALID_PARAMETER; + rc = cfs_error_code(status); + goto errorout; - WorkItem->Flags = TDI_DISCONNECT_ABORT; - WorkItem->tconn = tconn; + } else if (tconn->kstc_type == kstt_child) { - cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); + if (NULL == parent) { + status = STATUS_INVALID_PARAMETER; + rc = cfs_error_code(status); - ExQueueWorkItem( - &(WorkItem->WorkItem), - DelayedWorkQueue - ); + goto errorout; } - } - spin_unlock(&(tconn->kstc_lock)); -} + /* refer it's parent's address object */ + taddr = parent->kstc_addr; + ObReferenceObject(taddr.FileObject); -/* - * ks_query_local_ipaddr - * query the local connection ip address - * - * Arguments: - * tconn: the tconn which is connected - * - * Return Value: - * int: ks error code - * - * Notes: - * N/A - */ + ks_get_tconn(parent); -int -ks_query_local_ipaddr( - ksock_tconn_t * tconn - ) -{ - PFILE_OBJECT FileObject = NULL; - NTSTATUS status; + } else { - PTRANSPORT_ADDRESS TdiAddress; - ULONG AddressLength; + PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi); + ULONG AddrLen = 0; - if (tconn->kstc_type == kstt_sender) { - FileObject = tconn->sender.kstc_info.FileObject; - } else if (tconn->kstc_type == kstt_child) { - FileObject = tconn->child.kstc_info.FileObject; - } else { - status = STATUS_INVALID_PARAMETER; - goto errorout; - } + /* intialize the tdi address*/ - TdiAddress = &(tconn->kstc_addr.Tdi); - AddressLength = MAX_ADDRESS_LENGTH; + TdiAddress->TAAddressCount = 1; + TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; + TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; - status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength); + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = (ULONG)htonl(addr); - if (NT_SUCCESS(status)) { + memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); - KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n", - ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr, - ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port )); - } else { - KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n")); + + /* open the transport address object */ + + AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + + TDI_ADDRESS_LENGTH_IP; + + status = KsOpenAddress( + &(tconn->kstc_dev), + &(taddr.Tdi), + AddrLen, + &(taddr.Handle), + &(taddr.FileObject) + ); + + if (!NT_SUCCESS(status)) { + + KsPrint((1, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n", + addr, port, status )); + rc = cfs_error_code(status); + goto errorout; + } } + if (tconn->kstc_type == kstt_child) { + tconn->child.kstc_parent = parent; + } + + tconn->kstc_state = ksts_bind; + tconn->kstc_addr = taddr; + errorout: - return cfs_error_code(status); + return (rc); } /* - * ks_send_mdl - * send MDL chain to the peer for a stream connection + * ks_build_tconn + * build tcp/streaming connection to remote peer * * Arguments: - * tconn: tdi connection object - * tx: the transmit context - * mdl: the mdl chain containing the data - * len: length of the data - * flags: flags of the transmission + * tconn: tconn to be connected to the peer + * addr: the peer's ip address + * port: the peer's port number * * Return Value: - * ks return code + * int: 0 for success or ks error codes. * * Notes: * N/A */ int -ks_send_mdl( - ksock_tconn_t * tconn, - void * tx, - ksock_mdl_t * mdl, - int len, - int flags +ks_build_tconn( + ks_tconn_t * tconn, + ulong addr, + unsigned short port ) { - NTSTATUS Status; - int rc = 0; - ulong_ptr length; - ulong_ptr tflags; - ksock_tdi_tx_t * context; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_DAT KsTsduDat; + int rc = 0; + NTSTATUS status = STATUS_SUCCESS; - BOOLEAN bNewTsdu = FALSE; /* newly allocated */ - BOOLEAN bNewBuff = FALSE; /* newly allocated */ - BOOLEAN bBuffed; /* bufferred sending */ + PFILE_OBJECT ConnectionObject = NULL; + PDEVICE_OBJECT DeviceObject = NULL; - PUCHAR Buffer = NULL; - ksock_mdl_t * NewMdl = NULL; + PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; + ULONG AddrLength; - PIRP Irp = NULL; - PFILE_OBJECT ConnObject; - PDEVICE_OBJECT DeviceObject; + PIRP Irp = NULL; - BOOLEAN bIsNonBlock; + LASSERT(tconn->kstc_type == kstt_sender); + LASSERT(tconn->kstc_state == ksts_bind); ks_get_tconn(tconn); - tflags = ks_tdi_send_flags(flags); - bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); - - spin_lock(&tconn->kstc_lock); - - LASSERT( tconn->kstc_type == kstt_sender || - tconn->kstc_type == kstt_child ); + { + /* set the event callbacks */ + rc = KsSetHandlers(tconn); - if (tconn->kstc_state != ksts_connected) { - spin_unlock(&tconn->kstc_lock); - ks_put_tconn(tconn); - return -ENOTCONN; + if (rc < 0) { + cfs_enter_debugger(); + goto errorout; + } } - /* get the latest Tsdu buffer form TsduMgr list. - just set NULL if the list is empty. */ + /* create the connection file handle / object */ + status = KsOpenConnection( + &(tconn->kstc_dev), + (CONNECTION_CONTEXT)tconn, + &(tconn->sender.kstc_info.Handle), + &(tconn->sender.kstc_info.FileObject) + ); - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_send); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_send); + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; } - if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } + /* associdate the the connection with the adress object of the tconn */ - if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) { - bBuffed = TRUE; - } else { - bBuffed = FALSE; + status = KsAssociateAddress( + tconn->kstc_addr.Handle, + tconn->sender.kstc_info.FileObject + ); + + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; } - /* do the preparation work for bufferred sending */ + tconn->kstc_state = ksts_associated; - if (bBuffed) { + /* Allocating Connection Info Together with the Address */ + AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + + TDI_ADDRESS_LENGTH_IP; - /* if the data is even larger than the biggest Tsdu, we have - to allocate new buffer and use TSDU_TYOE_BUF to store it */ + ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( + NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK'); - if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size - - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { - bNewBuff = TRUE; - } + if (NULL == ConnectionInfo) { - if (list_empty(&(KsTsduMgr->TsduList))) { + status = STATUS_INSUFFICIENT_RESOURCES; + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; + } - LASSERT(KsTsduMgr->NumOfTsdu == 0); - KsTsdu = NULL; + /* Initializing ConnectionInfo ... */ + { + PTRANSPORT_ADDRESS TdiAddress; - } else { + /* ConnectionInfo settings */ - LASSERT(KsTsduMgr->NumOfTsdu > 0); - KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + ConnectionInfo->UserDataLength = 0; + ConnectionInfo->UserData = NULL; + ConnectionInfo->OptionsLength = 0; + ConnectionInfo->Options = NULL; + ConnectionInfo->RemoteAddressLength = AddrLength; + ConnectionInfo->RemoteAddress = ConnectionInfo + 1; - /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */ - if (bNewBuff) { - if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) { - KsTsdu = NULL; - } - } else { - if ( KS_TSDU_STRU_SIZE((ULONG)len) > - KsTsdu->TotalLength - KsTsdu->LastOffset ) { - KsTsdu = NULL; - } - } - } + /* intialize the tdi address*/ - /* if there's no Tsdu or the free size is not enough for the - KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu. */ + TdiAddress = ConnectionInfo->RemoteAddress; - if (NULL == KsTsdu) { + TdiAddress->TAAddressCount = 1; + TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; + TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; - KsTsdu = KsAllocateKsTsdu(); + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = (ULONG)htonl(addr); - if (NULL == KsTsdu) { - bBuffed = FALSE; - bNewBuff = FALSE; - } else { - bNewTsdu = TRUE; - } - } + memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); + } - /* process the case that a new buffer is to be allocated from system memory */ - if (bNewBuff) { + /* Now prepare to connect the remote peer ... */ - /* now allocating internal buffer to contain the payload */ - Buffer = ExAllocatePool(NonPagedPool, len); + ConnectionObject = tconn->sender.kstc_info.FileObject; + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - if (NULL == Buffer) { - bBuffed = FALSE; - } - } - } + /* allocate a new Irp */ - if (bBuffed) { + Irp = KsBuildTdiIrp(DeviceObject); - if (bNewBuff) { + if (NULL == Irp) { - /* queue a new KS_TSDU_BUF to the Tsdu buffer */ - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + status = STATUS_INSUFFICIENT_RESOURCES; + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; + } - KsTsduBuf->TsduFlags = 0; - KsTsduBuf->DataLength = (ULONG)len; - KsTsduBuf->StartOffset = 0; - KsTsduBuf->UserBuffer = Buffer; - } else { - /* queue a new KS_TSDU_BUF to the Tsdu buffer */ - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + /* setup the Irp */ - KsTsduDat->TsduFlags = 0; - KsTsduDat->DataLength = (ULONG)len; - KsTsduDat->StartOffset = 0; - KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE((ULONG)len); + TdiBuildConnect( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL, + NULL, + ConnectionInfo, + NULL + ); - Buffer = &KsTsduDat->Data[0]; - } - /* now locking the Buffer and copy user payload into the buffer */ - ASSERT(Buffer != NULL); + /* sumbit the Irp to the underlying transport driver */ + status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + NULL + ); - rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl); - if (rc != 0) { - printk("ks_send_mdl: bufferred: error allocating mdl.\n"); - bBuffed = FALSE; - } else { - ULONG BytesCopied = 0; - TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied); - if (BytesCopied != (ULONG) len) { - bBuffed = FALSE; - } - } + spin_lock(&(tconn->kstc_lock)); - /* Do the finializing job if we succeed to to lock the buffer and move - user data. Or we need do cleaning up ... */ - if (bBuffed) { + if (NT_SUCCESS(status)) { - if (bNewBuff) { - KsTsduBuf->TsduType = TSDU_TYPE_BUF; - KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); + /* Connected! the conneciton is built successfully. */ - } else { - KsTsduDat->TsduType = TSDU_TYPE_DAT; - KsTsdu->LastOffset += KsTsduDat->TotalLength; - } + tconn->kstc_state = ksts_connected; - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { + tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo; + tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress; - list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - KsTsduMgr->NumOfTsdu++; - } + spin_unlock(&(tconn->kstc_lock)); - } else { + } else { - if (NewMdl) { - ks_release_mdl(NewMdl, FALSE); - NewMdl = NULL; - } + /* Not connected! Abort it ... */ - if (bNewBuff) { - ExFreePool(Buffer); - Buffer = NULL; - bNewBuff = FALSE; - } + if (rc != 0) { + cfs_enter_debugger(); } - } - /* update the TotalBytes being in sending */ - KsTsduMgr->TotalBytes += (ULONG)len; + Irp = NULL; + rc = cfs_error_code(status); - spin_unlock(&tconn->kstc_lock); + tconn->kstc_state = ksts_associated; + spin_unlock(&(tconn->kstc_lock)); - /* cleanup the Tsdu if not successful */ - if (!bBuffed && bNewTsdu) { - KsPutKsTsdu(KsTsdu); - bNewTsdu = FALSE; - KsTsdu = NULL; - } + /* disassocidate the connection and the address object, + after cleanup, it's safe to set the state to abort ... */ - /* we need allocate the ksock_tx_t structure from memory pool. */ + if ( NT_SUCCESS(KsDisassociateAddress( + tconn->sender.kstc_info.FileObject))) { + tconn->kstc_state = ksts_aborted; + } - context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0); - if (!context) { - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); + /* reset the event callbacks */ + rc = KsResetHandlers(tconn); - Status = STATUS_INSUFFICIENT_RESOURCES; goto errorout; } - /* intialize the TcpContext */ - - memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT)); +errorout: - context->tconn = tconn; - context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t)); + if (NT_SUCCESS(status)) { - KeInitializeEvent(context->Event, SynchronizationEvent, FALSE); + ks_query_local_ipaddr(tconn); - if (bBuffed) { + } else { - /* for bufferred transmission, we need set - the internal completion routine. */ + if (ConnectionInfo) { + ExFreePool(ConnectionInfo); + } + if (Irp) { + IoFreeIrp(Irp); + } + } - context->CompletionRoutine = KsTcpSendCompletionRoutine; - context->KsTsduMgr = KsTsduMgr; - context->CompletionContext = KsTsdu; - context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat); - context->bCounted = FALSE; + ks_put_tconn(tconn); - } else if (bIsNonBlock) { + return (rc); +} - /* for non-blocking transmission, we need set - the internal completion routine too. */ - context->CompletionRoutine = KsTcpSendCompletionRoutine; - context->CompletionContext = tx; - context->KsTsduMgr = KsTsduMgr; - context->bCounted = TRUE; - context->ReferCount = 2; - } +/* + * ks_disconnect_tconn + * disconnect the tconn from a connection + * + * Arguments: + * tconn: the tdi connecton object connected already + * flags: flags & options for disconnecting + * + * Return Value: + * int: ks error code + * + * Notes: + * N/A + */ - if (tconn->kstc_type == kstt_sender) { - ConnObject = tconn->sender.kstc_info.FileObject; - } else { - LASSERT(tconn->kstc_type == kstt_child); - ConnObject = tconn->child.kstc_info.FileObject; - } +int +ks_disconnect_tconn( + ks_tconn_t * tconn, + ulong flags + ) +{ + NTSTATUS status = STATUS_SUCCESS; - DeviceObject = IoGetRelatedDeviceObject(ConnObject); + ks_tconn_info_t * info; - Irp = KsBuildTdiIrp(DeviceObject); + PFILE_OBJECT ConnectionObject; + PDEVICE_OBJECT DeviceObject = NULL; - if (NULL == Irp) { + PIRP Irp = NULL; - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); + KEVENT Event; - Status = STATUS_INSUFFICIENT_RESOURCES; - goto errorout; - } + ks_get_tconn(tconn); - length = KsQueryMdlsSize(mdl); + /* make sure tt's connected already and it + must be a sender or a child ... */ - LASSERT((ULONG)len <= length); + LASSERT(tconn->kstc_state == ksts_connected); + LASSERT( tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child); - ks_get_tconn(tconn); + /* reset all the event handlers to NULL */ - TdiBuildSend( - Irp, - DeviceObject, - ConnObject, - KsTcpCompletionRoutine, - context, - (bBuffed ? NewMdl : mdl), - (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags), - (ULONG)len; - ); + if (tconn->kstc_type != kstt_child) { + KsResetHandlers (tconn); + } - Status = IoCallDriver(DeviceObject, Irp); + /* Disconnecting to the remote peer ... */ - if (bBuffed) { - ks_release_mdl(mdl, FALSE); - NewMdl = NULL; + if (tconn->kstc_type == kstt_sender) { + info = &(tconn->sender.kstc_info); + } else { + info = &(tconn->child.kstc_info); } - if (!NT_SUCCESS(Status)) { - cfs_enter_debugger(); - rc = cfs_error_code(Status); - goto errorout; - } + ConnectionObject = info->FileObject; + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); - if (bBuffed) { - Status = STATUS_SUCCESS; - rc = len; - context = NULL; - } else { - if (bIsNonBlock) { - if (InterlockedDecrement(&context->ReferCount) == 0) { - Status = Irp->IoStatus.Status; - } else { - Status = STATUS_PENDING; - context = NULL; - } - } else { - if (STATUS_PENDING == Status) { - Status = KeWaitForSingleObject( - context->Event, - Executive, - KernelMode, - FALSE, - NULL - ); - - if (NT_SUCCESS(Status)) { - Status = Irp->IoStatus.Status; - } - } - } + /* allocate an Irp and setup it */ - if (Status == STATUS_SUCCESS) { - rc = (int)(Irp->IoStatus.Information); + Irp = KsBuildTdiIrp(DeviceObject); - spin_lock(&tconn->kstc_lock); - KsTsduMgr->TotalBytes -= rc; - spin_unlock(&tconn->kstc_lock); + if (NULL == Irp) { - } else { - rc = cfs_error_code(Status); - } + status = STATUS_INSUFFICIENT_RESOURCES; + cfs_enter_debugger(); + goto errorout; } -errorout: + KeInitializeEvent( + &Event, + SynchronizationEvent, + FALSE + ); - if (bBuffed) { + TdiBuildDisconnect( + Irp, + DeviceObject, + ConnectionObject, + KsDisconectCompletionRoutine, + &Event, + NULL, + flags, + NULL, + NULL + ); - if (NewMdl) { - ks_release_mdl(NewMdl, FALSE); - NewMdl = NULL; - } + /* issue the Irp to the underlying transport + driver to disconnect the connection */ - if (bNewBuff) { - if (!NT_SUCCESS(Status)) { - ExFreePool(Buffer); - Buffer = NULL; - } - } + status = IoCallDriver(DeviceObject, Irp); - } else { + if (STATUS_PENDING == status) { - if (Status != STATUS_PENDING) { + status = KeWaitForSingleObject( + &Event, + Executive, + KernelMode, + FALSE, + NULL + ); - if (Irp) { + status = Irp->IoStatus.Status; + } - /* Freeing the Irp ... */ + KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n", + status, KsNtStatusToString(status))); - IoFreeIrp(Irp); - Irp = NULL; - } - } - } + IoFreeIrp(Irp); - if (!NT_SUCCESS(Status)) { + if (info->ConnectionInfo) { - spin_lock(&tconn->kstc_lock); + /* disassociate the association between connection/address objects */ - KsTsduMgr->TotalBytes -= (ULONG)len; + status = KsDisassociateAddress(ConnectionObject); - if (bBuffed) { + if (!NT_SUCCESS(status)) { + cfs_enter_debugger(); + } - /* attach it to the TsduMgr list if the Tsdu is newly created. */ - if (bNewTsdu) { + spin_lock(&(tconn->kstc_lock)); - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; + /* cleanup the tsdumgr Lists */ + KsCleanupTsdu (tconn); - KsPutKsTsdu(KsTsdu); - } else { - if (bNewBuff) { - if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) == - (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { - KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF); - KsTsduBuf->TsduType = 0; - } else { - cfs_enter_debugger(); - KsTsduBuf->StartOffset = KsTsduBuf->DataLength; - } - } else { - if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength == - (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { - KsTsdu->LastOffset -= KsTsduDat->TotalLength; - KsTsduDat->TsduType = 0; - } else { - cfs_enter_debugger(); - KsTsduDat->StartOffset = KsTsduDat->DataLength; - } - } - } + /* set the state of the tconn */ + if (NT_SUCCESS(status)) { + tconn->kstc_state = ksts_disconnected; + } else { + tconn->kstc_state = ksts_associated; } - spin_unlock(&tconn->kstc_lock); - } + /* free the connection info to system pool*/ + ExFreePool(info->ConnectionInfo); + info->ConnectionInfo = NULL; + info->Remote = NULL; - /* free the context if is not used at all */ - if (context) { - cfs_free(context); + spin_unlock(&(tconn->kstc_lock)); } + status = STATUS_SUCCESS; + +errorout: + ks_put_tconn(tconn); - return rc; + return cfs_error_code(status); } + /* - * ks_recv_mdl - * Receive data from the peer for a stream connection + * ks_abort_tconn + * The connection is broken un-expectedly. We need do + * some cleanup. * * Arguments: - * tconn: tdi connection object - * mdl: the mdl chain to contain the incoming data - * len: length of the data - * flags: flags of the receiving + * tconn: the tdi connection * * Return Value: - * ks return code + * N/A * * Notes: * N/A */ -int -ks_recv_mdl( - ksock_tconn_t * tconn, - ksock_mdl_t * mdl, - int size, - int flags +void +ks_abort_tconn( + ks_tconn_t * tconn ) { - NTSTATUS Status = STATUS_SUCCESS; - int rc = 0; - - BOOLEAN bIsNonBlock; - BOOLEAN bIsExpedited; - - PKS_CHAIN KsChain; - PKS_TSDUMGR KsTsduMgr; - PKS_TSDU KsTsdu; - PKS_TSDU_DAT KsTsduDat; - PKS_TSDU_BUF KsTsduBuf; - PKS_TSDU_MDL KsTsduMdl; - - PUCHAR Buffer; - - ULONG BytesRecved = 0; - ULONG RecvedOnce; + PKS_DISCONNECT_WORKITEM WorkItem = NULL; - bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); - bIsExpedited = cfs_is_flag_set(flags, MSG_OOB); + WorkItem = &(tconn->kstc_disconnect); ks_get_tconn(tconn); - -Again: - - RecvedOnce = 0; - spin_lock(&(tconn->kstc_lock)); - if ( tconn->kstc_type != kstt_sender && - tconn->kstc_type != kstt_child) { - - rc = -EINVAL; - spin_unlock(&(tconn->kstc_lock)); - - goto errorout; - } - if (tconn->kstc_state != ksts_connected) { - - rc = -ENOTCONN; - spin_unlock(&(tconn->kstc_lock)); - - goto errorout; - } - - if (tconn->kstc_type == kstt_sender) { - KsChain = &(tconn->sender.kstc_recv); - } else { - LASSERT(tconn->kstc_type == kstt_child); - KsChain = &(tconn->child.kstc_recv); - } - - if (bIsExpedited) { - KsTsduMgr = &(KsChain->Expedited); - } else { - KsTsduMgr = &(KsChain->Normal); - } - -NextTsdu: - - if (list_empty(&(KsTsduMgr->TsduList))) { - - // - // It's a notification event. We need reset it to - // un-signaled state in case there no any tsdus. - // - - KeResetEvent(&(KsTsduMgr->Event)); - + ks_put_tconn(tconn); } else { - KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); - LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); - - /* remove the KsTsdu from TsduMgr list to release the lock */ - list_del(&(KsTsdu->Link)); - KsTsduMgr->NumOfTsdu--; - - spin_unlock(&(tconn->kstc_lock)); - - while ((ULONG)size > BytesRecved) { + if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { - ULONG BytesCopied = 0; - ULONG BytesToCopy = 0; - ULONG StartOffset = 0; + WorkItem->Flags = TDI_DISCONNECT_ABORT; + WorkItem->tconn = tconn; - KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); - KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); - if ( TSDU_TYPE_DAT == KsTsduDat->TsduType || - TSDU_TYPE_BUF == KsTsduBuf->TsduType ) { + ExQueueWorkItem( + &(WorkItem->WorkItem), + DelayedWorkQueue + ); + } + } + spin_unlock(&(tconn->kstc_lock)); +} - // - // Data Tsdu Unit ... - // - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { +/* + * ks_query_local_ipaddr + * query the local connection ip address + * + * Arguments: + * tconn: the tconn which is connected + * + * Return Value: + * int: ks error code + * + * Notes: + * N/A + */ - if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { - /* data is not ready yet*/ - KeResetEvent(&(KsTsduMgr->Event)); - printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat); - break; - } +int +ks_query_local_ipaddr( + ks_tconn_t * tconn + ) +{ + PFILE_OBJECT FileObject = NULL; + NTSTATUS status; - Buffer = &KsTsduDat->Data[0]; - StartOffset = KsTsduDat->StartOffset; - if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) { - /* Recvmsg requst could be statisfied ... */ - BytesToCopy = size - BytesRecved; - } else { - BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset; - } + PTRANSPORT_ADDRESS TdiAddress; + ULONG AddressLength; - } else { + if (tconn->kstc_type == kstt_sender) { + FileObject = tconn->sender.kstc_info.FileObject; + } else if (tconn->kstc_type == kstt_child) { + FileObject = tconn->child.kstc_info.FileObject; + } else { + status = STATUS_INVALID_PARAMETER; + goto errorout; + } - if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { - /* data is not ready yet*/ - KeResetEvent(&(KsTsduMgr->Event)); - DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf); - break; - } + TdiAddress = &(tconn->kstc_addr.Tdi); + AddressLength = MAX_ADDRESS_LENGTH; - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - Buffer = KsTsduBuf->UserBuffer; - StartOffset = KsTsduBuf->StartOffset; + status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength); - if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) { - /* Recvmsg requst could be statisfied ... */ - BytesToCopy = size - BytesRecved; - } else { - BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset; - } - } + if (NT_SUCCESS(status)) { + KsPrint((2, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n", + ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr, + ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port )); + } else { + KsPrint((2, "ks_query_local_ipaddr: Failed to query the connection local ip address.\n")); + } - if (BytesToCopy > 0) { - Status = TdiCopyBufferToMdl( - Buffer, - StartOffset, - BytesToCopy, - mdl, - BytesRecved, - &BytesCopied - ); - - if (NT_SUCCESS(Status)) { - - if (BytesToCopy != BytesCopied) { - cfs_enter_debugger(); - } +errorout: - BytesRecved += BytesCopied; - RecvedOnce += BytesCopied; + return cfs_error_code(status); +} - } else { +int +KsCalcWhichEngine(ks_tconn_t * tconn) +{ + PTRANSPORT_ADDRESS TdiAddress = &(tconn->kstc_addr.Tdi); + ULONG addr = ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr; + ULONG sum = (addr & 0xFF) + ((addr & 0xFF00) >> 8) + ((addr & 0xFF0000) >> 16); - cfs_enter_debugger(); + return (int)(sum % ks_data.ksnd_engine_nums); +} - if (STATUS_BUFFER_OVERFLOW == Status) { - } - } - } +void +KsQueueTdiEngine(ks_tconn_t * tconn, PKS_TSDUMGR TsduMgr) +{ + ks_engine_mgr_t * engm; + ks_engine_slot_t * engs; + + engm = &ks_data.ksnd_engine_mgr[KsCalcWhichEngine(tconn)]; + engs = &TsduMgr->Slot; + + if (!engs->queued) { + spin_lock(&engm->lock); + if (!engs->queued) { + list_add_tail(&engs->link, &engm->list); + engs->queued = TRUE; + engs->tconn = tconn; + engs->emgr = engm; + engs->tsdumgr = TsduMgr; + KeSetEvent(&(engm->start),0, FALSE); + } + spin_unlock(&engm->lock); + KsPrint((4, "KsQueueTdiEngine: TsduMgr=%p is queued to engine %p\n", + TsduMgr, engm)); + } + KeSetEvent(&(engm->start),0, FALSE); +} - if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { +void +KsRemoveTdiEngine(PKS_TSDUMGR TsduMgr) +{ + ks_engine_mgr_t * engm; + ks_engine_slot_t * engs; + + engs = &TsduMgr->Slot; + if (engs->queued) { + engm = engs->emgr; + LASSERT(engm != NULL); + spin_lock(&engm->lock); + if (engs->queued) { + list_del(&engs->link); + engs->queued = FALSE; + engs->tconn = NULL; + engs->emgr = NULL; + engs->tsdumgr = NULL; + } + spin_unlock(&engm->lock); + KsPrint((4, "KsQueueTdiEngine: TsduMgr %p is removed from engine %p\n", + TsduMgr, engm)); + } +} - KsTsduDat->StartOffset += BytesCopied; +int +KsDeliveryIrp(ks_tconn_t * tconn, PIRP irp) +{ + PFILE_OBJECT connobj; + PDEVICE_OBJECT devobj; + NTSTATUS status; + int rc = 0; - if (KsTsduDat->StartOffset == KsTsduDat->DataLength) { - KsTsdu->StartOffset += KsTsduDat->TotalLength; - } + /* construct Irp */ + if (tconn->kstc_type == kstt_sender) { + connobj = tconn->sender.kstc_info.FileObject; + } else { + LASSERT(tconn->kstc_type == kstt_child); + connobj = tconn->child.kstc_info.FileObject; + } + devobj = IoGetRelatedDeviceObject(connobj); + + /* send irp to transport layer */ + status = IoCallDriver(devobj, irp); - } else { + /* convert status to linux error code */ + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + } - ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); - KsTsduBuf->StartOffset += BytesCopied; - if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) { - KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); - /* now we need release the buf to system pool */ - ExFreePool(KsTsduBuf->UserBuffer); - } - } + KsPrint((4, "KsDeliveryIrp: tconn=%p irp=%p status=%xh rc=%d.\n", + tconn, irp, status, rc)); + return rc; +} - } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { +PIRP +KsBuildSend(ks_tconn_t * tconn, PKS_TSDUMGR TsduMgr, + ks_mdl_t * mdl, ulong flags ) +{ + ks_tdi_tx_t * context; + PIRP irp = NULL; + PFILE_OBJECT connobj; + PDEVICE_OBJECT devobj; + NTSTATUS status; + ULONG length; - // - // MDL Tsdu Unit ... - // + int rc = 0; - if (KsTsduMdl->DataLength > size - BytesRecved) { + /* query mdl chain total length */ + length = KsQueryMdlsSize(mdl); - /* Recvmsg requst could be statisfied ... */ + /* we need allocate the ks_tx_t structure from memory pool. */ + context = cfs_alloc(sizeof(ks_tdi_tx_t), 0); + if (!context) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } - BytesToCopy = size - BytesRecved; + /* intialize the TcpContext */ + memset(context,0, sizeof(ks_tdi_tx_t)); + context->Magic = KS_TCP_CONTEXT_MAGIC; + context->tconn = tconn; + context->CompletionRoutine = KsTcpSendCompletionRoutine; + context->TsduMgr = TsduMgr; + context->Length = length; - } else { + /* construct Irp */ + if (tconn->kstc_type == kstt_sender) { + connobj = tconn->sender.kstc_info.FileObject; + } else { + LASSERT(tconn->kstc_type == kstt_child); + connobj = tconn->child.kstc_info.FileObject; + } + devobj = IoGetRelatedDeviceObject(connobj); + irp = KsBuildTdiIrp(devobj); + if (NULL == irp) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } - BytesToCopy = KsTsduMdl->DataLength; - } + /* grab tconn reference */ + ks_get_tconn(tconn); - Status = KsCopyMdlChainToMdlChain( - KsTsduMdl->Mdl, - KsTsduMdl->StartOffset, - mdl, - BytesRecved, - BytesToCopy, - &BytesCopied - ); + /* delivery the sending request */ + TdiBuildSend( + irp, + devobj, + connobj, + KsTcpCompletionRoutine, + context, + mdl, + flags, + length + ); - if (NT_SUCCESS(Status)) { + return irp; - if (BytesToCopy != BytesCopied) { - cfs_enter_debugger(); - } +errorout: - KsTsduMdl->StartOffset += BytesCopied; - KsTsduMdl->DataLength -= BytesCopied; + /* free the context if is not used at all */ + if (context) { + ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC); + context->Magic = 'CDAB'; + cfs_free(context); + } - BytesRecved += BytesCopied; - RecvedOnce += BytesCopied; - } else { - cfs_enter_debugger(); - } + /* here need free the Irp. */ + if (irp) { + IoFreeIrp(irp); + irp = NULL; + } - if (0 == KsTsduMdl->DataLength) { + return NULL; +} - // - // Call TdiReturnChainedReceives to release the Tsdu memory - // +int +KsDeliveryTsdus(ks_tconn_t * tconn, PKS_TSDUMGR TsduMgr) +{ + int rc = 0; + ulong length = 0; + ulong tflags = 0; + ks_mdl_t * mdl = NULL; + PIRP irp = NULL; + BOOLEAN expedited; - TdiReturnChainedReceives( - &(KsTsduMdl->Descriptor), - 1 ); + LASSERT(tconn->kstc_magic == KS_TCONN_MAGIC); - KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); - } + ks_get_tconn(tconn); + ks_lock_tsdumgr(TsduMgr); - } else { - printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n", - KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength); - printk(" Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x", - KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength); - cfs_enter_debugger(); - } + if ( tconn->kstc_type != kstt_sender && + tconn->kstc_type != kstt_child) { + rc = -EINVAL; + ks_unlock_tsdumgr(TsduMgr); + goto errorout; + } - if (KsTsdu->StartOffset == KsTsdu->LastOffset) { + if (tconn->kstc_state != ksts_connected) { + rc = -ENOTCONN; + ks_unlock_tsdumgr(TsduMgr); + goto errorout; + } - // - // KsTsdu is empty now, we need free it ... - // + if (TsduMgr->OOB) { + tflags = TDI_SEND_NON_BLOCKING | TDI_SEND_EXPEDITED; + } else { + tflags = TDI_SEND_NON_BLOCKING; + } + + if (list_empty(&TsduMgr->TsduList)) { + LASSERT(TsduMgr->TotalBytes == 0); + ks_unlock_tsdumgr(TsduMgr); + goto errorout; + } - KsPutKsTsdu(KsTsdu); - KsTsdu = NULL; + /* check whether there's outstanding sending requests */ + if (TsduMgr->Busy) { + rc = -EAGAIN; + ks_unlock_tsdumgr(TsduMgr); + goto errorout; + } - break; - } + /* probe all Tsdus and merge buffers together */ + mdl = KsLockTsdus(tconn, TsduMgr, &tflags, &length); + if (NULL == mdl) { + if (length == 0) { + LASSERT(TsduMgr->TotalBytes == 0); + rc = -EAGAIN; + } else { + rc = -ENOMEM; } + ks_unlock_tsdumgr(TsduMgr); + goto errorout; + } - spin_lock(&(tconn->kstc_lock)); + KsPrint((4, "KsDeliveryTsdus: tconn=%p TsudMgr=%p, length=%xh/%xh\n", + tconn, TsduMgr, length, TsduMgr->TotalBytes)); - /* we need attach the KsTsdu to the list header */ - if (KsTsdu) { - KsTsduMgr->NumOfTsdu++; - list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); - } else if ((ULONG)size > BytesRecved) { - goto NextTsdu; - } + /* build send irp request */ + irp = KsBuildSend(tconn, TsduMgr, mdl, tflags); + if (NULL == irp) { + rc = -ENOMEM; + ks_unlock_tsdumgr(TsduMgr); + goto errorout; } + TsduMgr->Busy = TRUE; + ks_unlock_tsdumgr(TsduMgr); - if (KsTsduMgr->TotalBytes < RecvedOnce) { - cfs_enter_debugger(); - KsTsduMgr->TotalBytes = 0; - } else { - KsTsduMgr->TotalBytes -= RecvedOnce; + /* delivery mdl chain */ + LASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + rc = KsDeliveryIrp(tconn, irp); + if (rc < 0) { + goto errorout; } - spin_unlock(&(tconn->kstc_lock)); +errorout: - if (NT_SUCCESS(Status)) { + LASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + ks_put_tconn(tconn); + return rc; +} - if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) { +int +KsDeliveryEngineThread(void * context) +{ + ks_engine_mgr_t * engm = context; + ks_engine_slot_t * engs; + struct list_head * list; + ks_tconn_t * tconn; - KeWaitForSingleObject( - &(KsTsduMgr->Event), - Executive, - KernelMode, - FALSE, - NULL - ); + cfs_set_thread_priority(31); - goto Again; - } + while (!engm->stop) { - if (bIsNonBlock && (BytesRecved == 0)) { - rc = -EAGAIN; - } else { - rc = BytesRecved; + cfs_wait_event_internal(&engm->start, 0); + + spin_lock(&engm->lock); + if (list_empty(&engm->list)) { + spin_unlock(&engm->lock); + continue; } - } -errorout: + list = engm->list.next; + list_del(list); + engs = list_entry(list, ks_engine_slot_t, link); + LASSERT(engs->emgr == engm); + LASSERT(engs->queued); + engs->emgr = NULL; + engs->queued = FALSE; + spin_unlock(&engm->lock); - ks_put_tconn(tconn); + tconn = engs->tconn; + LASSERT(tconn->kstc_magic == KS_TCONN_MAGIC); - if (rc > 0) { - KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc)); - } else { - KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status)); + KsPrint((4, "KsDeliveryEngineThread: %p active: tconn=%p " + "TsduMgr=%p\n", engm, tconn, engs->tsdumgr)); + KsDeliveryTsdus(tconn, engs->tsdumgr); + + LASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); } - /* release the chained mdl */ - ks_release_mdl(mdl, FALSE); + KeSetEvent(&engm->exit, 0, FALSE); - return (rc); + return 0; } - /* * ks_init_tdi_data * initialize the global data in ksockal_data @@ -6019,17 +5856,17 @@ errorout: int ks_init_tdi_data() { - int rc = 0; + int rc = 0, i; /* initialize tconn related globals */ - RtlZeroMemory(&ks_data, sizeof(ks_data_t)); + RtlZeroMemory(&ks_data, sizeof(ks_tdi_data_t)); spin_lock_init(&ks_data.ksnd_tconn_lock); CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns); cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE); ks_data.ksnd_tconn_slab = cfs_mem_cache_create( - "tcon", sizeof(ksock_tconn_t) , 0, 0); + "tcon", sizeof(ks_tconn_t) , 0, 0); if (!ks_data.ksnd_tconn_slab) { rc = -ENOMEM; @@ -6037,7 +5874,6 @@ ks_init_tdi_data() } /* initialize tsdu related globals */ - spin_lock_init(&ks_data.ksnd_tsdu_lock); CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus); ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */ @@ -6046,21 +5882,41 @@ ks_init_tdi_data() if (!ks_data.ksnd_tsdu_slab) { rc = -ENOMEM; - cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); - ks_data.ksnd_tconn_slab = NULL; goto errorout; } - /* initialize daemon related globals */ - - spin_lock_init(&ks_data.ksnd_daemon_lock); - CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons); - cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE); + /* initialize engine threads list */ + ks_data.ksnd_engine_nums = num_online_cpus(); + if (ks_data.ksnd_engine_nums < 4) { + ks_data.ksnd_engine_nums = 4; + } + ks_data.ksnd_engine_mgr = cfs_alloc(sizeof(ks_engine_mgr_t) * + ks_data.ksnd_engine_nums,CFS_ALLOC_ZERO); + if (ks_data.ksnd_engine_mgr == NULL) { + rc = -ENOMEM; + goto errorout; + } + for (i = 0; i < ks_data.ksnd_engine_nums; i++) { + spin_lock_init(&ks_data.ksnd_engine_mgr[i].lock); + cfs_init_event(&ks_data.ksnd_engine_mgr[i].start, TRUE, FALSE); + cfs_init_event(&ks_data.ksnd_engine_mgr[i].exit, TRUE, FALSE); + CFS_INIT_LIST_HEAD(&ks_data.ksnd_engine_mgr[i].list); + cfs_kernel_thread(KsDeliveryEngineThread, &ks_data.ksnd_engine_mgr[i], 0); + } + /* register pnp handlers to watch network condition */ KsRegisterPnpHandlers(); errorout: + /* do cleanup in case we get failures */ + if (rc < 0) { + if (ks_data.ksnd_tconn_slab) { + cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); + ks_data.ksnd_tconn_slab = NULL; + } + } + return rc; } @@ -6084,10 +5940,21 @@ ks_fini_tdi_data() { PKS_TSDU KsTsdu = NULL; struct list_head * list = NULL; + int i; /* clean up the pnp handler and address slots */ KsDeregisterPnpHandlers(); + /* stop all tcp sending engines */ + for (i = 0; i < ks_data.ksnd_engine_nums; i++) { + ks_data.ksnd_engine_mgr[i].stop = TRUE; + KeSetEvent(&ks_data.ksnd_engine_mgr[i].start, 0, FALSE); + } + + for (i = 0; i < ks_data.ksnd_engine_nums; i++) { + cfs_wait_event_internal(&ks_data.ksnd_engine_mgr[i].exit, 0); + } + /* we need wait until all the tconn are freed */ spin_lock(&(ks_data.ksnd_tconn_lock)); @@ -6097,7 +5964,7 @@ ks_fini_tdi_data() spin_unlock(&(ks_data.ksnd_tconn_lock)); /* now wait on the tconn exit event */ - cfs_wait_event(&ks_data.ksnd_tconn_exit, 0); + cfs_wait_event_internal(&ks_data.ksnd_tconn_exit, 0); /* it's safe to delete the tconn slab ... */ cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); @@ -6135,13 +6002,13 @@ ks_fini_tdi_data() * N/A */ -ksock_tconn_t * +ks_tconn_t * ks_create_child_tconn( - ksock_tconn_t * parent + ks_tconn_t * parent ) { NTSTATUS status; - ksock_tconn_t * backlog; + ks_tconn_t * backlog; /* allocate the tdi connecton object */ backlog = ks_create_tconn(); @@ -6215,12 +6082,12 @@ errorout: void ks_replenish_backlogs( - ksock_tconn_t * parent, - int nbacklog + ks_tconn_t * parent, + int nbacklog ) { - ksock_tconn_t * backlog; - int n = 0; + ks_tconn_t * backlog; + int n = 0; /* calculate how many backlogs needed */ if ( ( parent->listener.kstc_listening.num + @@ -6272,7 +6139,7 @@ ks_replenish_backlogs( */ int -ks_start_listen(ksock_tconn_t *tconn, int nbacklog) +ks_start_listen(ks_tconn_t *tconn, int nbacklog) { int rc = 0; @@ -6280,7 +6147,7 @@ ks_start_listen(ksock_tconn_t *tconn, int nbacklog) ks_replenish_backlogs(tconn, nbacklog); /* set the event callback handlers */ - rc = ks_set_handlers(tconn); + rc = KsSetHandlers(tconn); if (rc < 0) { return rc; @@ -6296,13 +6163,13 @@ ks_start_listen(ksock_tconn_t *tconn, int nbacklog) } void -ks_stop_listen(ksock_tconn_t *tconn) +ks_stop_listen(ks_tconn_t *tconn) { struct list_head * list; - ksock_tconn_t * backlog; + ks_tconn_t * backlog; /* reset all tdi event callbacks to NULL */ - ks_reset_handlers (tconn); + KsResetHandlers (tconn); spin_lock(&tconn->kstc_lock); @@ -6310,7 +6177,7 @@ ks_stop_listen(ksock_tconn_t *tconn) /* cleanup all the listening backlog child connections */ list_for_each (list, &(tconn->listener.kstc_listening.list)) { - backlog = list_entry(list, ksock_tconn_t, child.kstc_link); + backlog = list_entry(list, ks_tconn_t, child.kstc_link); /* destory and free it */ ks_put_tconn(backlog); @@ -6343,12 +6210,12 @@ ks_stop_listen(ksock_tconn_t *tconn) int ks_wait_child_tconn( - ksock_tconn_t * parent, - ksock_tconn_t ** child + ks_tconn_t * parent, + ks_tconn_t ** child ) { struct list_head * tmp; - ksock_tconn_t * backlog = NULL; + ks_tconn_t * backlog = NULL; ks_replenish_backlogs(parent, parent->listener.nbacklog); @@ -6364,7 +6231,7 @@ again: /* check the listening queue and try to search the accepted connecton */ list_for_each(tmp, &(parent->listener.kstc_listening.list)) { - backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link); + backlog = list_entry (tmp, ks_tconn_t, child.kstc_link); spin_lock(&(backlog->kstc_lock)); @@ -6415,16 +6282,245 @@ again: } } + KsPrint((2, "ks_wait_child_tconn: connection %p accepted.\n", backlog)); + if (backlog) { /* query the local ip address of the connection */ ks_query_local_ipaddr(backlog); + } else { + return -EINTR; } - *child = backlog; return 0; } +int +ks_query_iovs_length(struct iovec *iov, int niov) +{ + int i; + int total = 0; + + LASSERT(iov != NULL); + LASSERT(niov > 0); + + for (i=0; i < niov; i++) { + total += iov[i].iov_len; + } + + return total; +} + +int +ks_query_kiovs_length(lnet_kiov_t *kiov, int nkiov) +{ + int i; + int total = 0; + + LASSERT(kiov != NULL); + LASSERT(nkiov > 0); + + for (i=0; i < nkiov; i++) { + total += kiov[i].kiov_len; + } + + return total; +} + +int +ks_sock_buf_cb(void *tsdu, int ns, int off, char **buf) +{ + int rc = 0; + + if (off < ns) { + *buf = (char *)tsdu + off; + rc = ns - off; + } + return rc; +} + +int +ks_sock_iov_cb(void *tsdu, int ns, int off, char **buf) +{ + int rc = 0, i; + struct iovec *iov = tsdu; + + for (i=0; i < ns; i++) { + if ((size_t)off >= iov[i].iov_len) { + off -= iov[i].iov_len; + } else { + *buf = (char *)iov[i].iov_base + off; + rc = iov[i].iov_len - off; + break; + } + } + return rc; +} + +int +ks_sock_kiov_cb(void *tsdu, int ns, int off, char **buf) +{ + int rc = 0, i; + lnet_kiov_t *kiov = tsdu; + + for (i=0; i < ns; i++) { + if ((size_t)off >= kiov[i].kiov_len) { + off -= kiov[i].kiov_len; + } else { + *buf = (char *)kiov[i].kiov_page->addr + + kiov[i].kiov_offset + off; + rc = kiov[i].kiov_len - off; + break; + } + } + return rc; +} + +typedef int (*ks_tsdu_cb_t)(void *tsdu, int ns, int off, char **buf); + +int +ks_sock_io(ks_tconn_t *tconn, void *tsdu, int ns, int reqlen, + int flags, int timeout, int out, ks_tsdu_cb_t callback) +{ + ULONG tflags; + BOOLEAN expedited; + PKS_TSDUMGR TsduMgr; + + int rc; + int length; + int total = 0; + int64_t remained; + PCHAR buffer; + BOOLEAN async; + + LASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + remained = (int64_t)cfs_time_seconds(timeout); + + /* query tsdu manager */ + expedited = cfs_is_flag_set(flags, MSG_OOB); + TsduMgr = KsQueryTsduMgr(tconn, expedited, (BOOLEAN)out); + + /* check whether equest is nonblocking */ + if (async = cfs_is_flag_set(flags, MSG_DONTWAIT)) { + timeout = 0; + } + + ks_get_tconn(tconn); + ks_lock_tsdumgr(TsduMgr); + if ( tconn->kstc_type != kstt_sender && + tconn->kstc_type != kstt_child) { + rc = -EINVAL; + goto errorout; + } + + while (length = callback(tsdu, ns, total, &buffer)) { + + /* check whether socket is stil valid */ + if (tconn->kstc_state != ksts_connected) { + rc = -ENOTCONN; + goto errorout; + } + + if (out) { + tflags = KsTdiSendFlags(flags); + rc = KsWriteTsdus(TsduMgr, buffer, length, tflags); + } else { + tflags = KsTdiRecvFlags(flags); + rc = KsReadTsdus(TsduMgr, buffer, length, tflags); + } + + if (rc > 0) { + total += rc; + } else if (!async && rc == -EAGAIN) { + if (timeout) { + if (remained) { + ks_unlock_tsdumgr(TsduMgr); + remained = cfs_wait_event_internal( + &TsduMgr->Event, + remained ); + } else { + goto errorout; + } + } else { + ks_unlock_tsdumgr(TsduMgr); + cfs_wait_event_internal(&TsduMgr->Event, 0); + } + ks_lock_tsdumgr(TsduMgr); + } else { + break; + } + } + +errorout: + + if (!out) { + TsduMgr->Payload = reqlen - total; + } + ks_unlock_tsdumgr(TsduMgr); + + KsPrint((4, "ks_sock_io: tconn=%p tsdumgr=%p %c total=%xh/%xh rc=%d\n", + tconn, TsduMgr, out?'W':'R', total, TsduMgr->TotalBytes, rc)); + + if (total) { + if (out) { + /* signal Tdi sending engine */ + KsQueueTdiEngine(tconn, TsduMgr); + } + rc = total; + } + + ks_put_tconn(tconn); + + LASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + return rc; +} + +int ks_send_buf(ks_tconn_t * tconn, char *buf, + int len, int flags, int timeout) +{ + return ks_sock_io(tconn, buf, len, len, flags, + timeout, 1, ks_sock_buf_cb); +} + +int ks_recv_buf(ks_tconn_t * tconn, char *buf, + int len, int flags, int timeout) +{ + return ks_sock_io(tconn, buf, len, len, flags, + timeout, 0, ks_sock_buf_cb); +} + +int ks_send_iovs(ks_tconn_t * tconn, struct iovec *iov, + int niov, int flags, int timeout) +{ + int reqlen = ks_query_iovs_length(iov, niov); + return ks_sock_io(tconn, iov, niov, reqlen, flags, + timeout, TRUE, ks_sock_iov_cb); +} + +int ks_recv_iovs(ks_tconn_t * tconn, struct iovec *iov, + int niov, int flags, int timeout) +{ + int reqlen = ks_query_iovs_length(iov, niov); + return ks_sock_io(tconn, iov, niov, reqlen, flags, + timeout, FALSE, ks_sock_iov_cb); +} + +int ks_send_kiovs(ks_tconn_t * tconn, lnet_kiov_t *kiov, + int nkiov, int flags, int timeout) +{ + int reqlen = ks_query_kiovs_length(kiov, nkiov); + return ks_sock_io(tconn, kiov, nkiov, reqlen, flags, + timeout, TRUE, ks_sock_kiov_cb); +} + +int ks_recv_kiovs(ks_tconn_t * tconn, lnet_kiov_t *kiov, + int nkiov, int flags, int timeout) +{ + int reqlen = ks_query_kiovs_length(kiov, nkiov); + return ks_sock_io(tconn, kiov, nkiov, reqlen, flags, + timeout, FALSE, ks_sock_kiov_cb); +} + int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) { ks_addr_slot_t * slot = NULL; @@ -6488,8 +6584,8 @@ void libcfs_ipif_free_enumeration(char **names, int n) int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog) { - int rc = 0; - ksock_tconn_t * parent; + int rc = 0; + ks_tconn_t * parent; parent = ks_create_tconn(); if (!parent) { @@ -6565,12 +6661,13 @@ int libcfs_sock_connect(struct socket **sockp, int *fatal, __u32 local_ip, int local_port, __u32 peer_ip, int peer_port) { - ksock_tconn_t * tconn = NULL; + ks_tconn_t * tconn = NULL; int rc = 0; *sockp = NULL; + if (fatal) *fatal = 0; - KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n", + KsPrint((2, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n", peer_ip, peer_port, local_ip, local_port )); /* create the tdi connecion structure */ @@ -6586,7 +6683,7 @@ int libcfs_sock_connect(struct socket **sockp, int *fatal, /* bind the local ip address with the tconn */ rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port); if (rc < 0) { - KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n", + KsPrint((1, "libcfs_sock_connect: failed to bind address %x:%d...\n", local_ip, local_port )); ks_free_tconn(tconn); goto errorout; @@ -6595,7 +6692,7 @@ int libcfs_sock_connect(struct socket **sockp, int *fatal, /* connect to the remote peer */ rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port); if (rc < 0) { - KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n", + KsPrint((1, "libcfs_sock_connect: failed to connect %x:%d ...\n", peer_ip, peer_port )); ks_put_tconn(tconn); @@ -6652,61 +6749,47 @@ int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port) int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout) { int rc; - ksock_mdl_t * mdl; - int offset = 0; while (nob > offset) { - /* lock the user buffer */ - rc = ks_lock_buffer( (char *)buffer + offset, - FALSE, nob - offset, IoReadAccess, &mdl ); + rc = ks_send_buf(sock, (char *)buffer + offset, nob - offset, 0, timeout); - if (rc < 0) { - return (rc); - } - - /* send out the whole mdl */ - rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 ); - - if (rc > 0) { - offset += rc; + if (rc <= 0) { + goto errorout; } else { - return (rc); + offset += rc; + rc = 0; } } - return (0); +errorout: + + KsPrint((4, "libcfs_sock_write: sock: %p %d bytes rc: %d\n", sock, offset, rc)); + return rc; } int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout) { - int rc; - ksock_mdl_t * mdl; - + int rc = 0; int offset = 0; while (nob > offset) { - /* lock the user buffer */ - rc = ks_lock_buffer( (char *)buffer + offset, - FALSE, nob - offset, IoWriteAccess, &mdl ); - - if (rc < 0) { - return (rc); - } - - /* recv the requested buffer */ - rc = ks_recv_mdl( sock, mdl, nob - offset, 0 ); + rc = ks_recv_buf(sock, (char *)buffer + offset, nob - offset, 0, timeout); - if (rc > 0) { - offset += rc; + if (rc <= 0) { + goto errorout; } else { - return (rc); + offset += rc; + rc = 0; } } - return (0); +errorout: + + KsPrint((4, "libcfs_sock_read: sock: %p %d bytes rc: %d\n", sock, offset, rc)); + return rc; } void libcfs_sock_release(struct socket *sock) diff --git a/libcfs/libcfs/winnt/winnt-tracefile.c b/libcfs/libcfs/winnt/winnt-tracefile.c index 4927aa6..6b065d7 100644 --- a/libcfs/libcfs/winnt/winnt-tracefile.c +++ b/libcfs/libcfs/winnt/winnt-tracefile.c @@ -45,52 +45,64 @@ #define put_cpu() do { } while (0) #endif -#define TCD_TYPE_MAX 1 +/* only define one trace_data type for windows */ +enum { + TCD_TYPE_PASSIVE = 0, + TCD_TYPE_DISPATCH, + TCD_TYPE_MAX +}; -event_t tracefile_event; +/* percents to share the total debug memory for each type */ +static unsigned int pages_factor[TCD_TYPE_MAX] = { + 90, /* 90% pages for TCD_TYPE_PASSIVE */ + 10 /* 10% pages for TCD_TYPE_DISPATCH */ +}; -void tracefile_init_arch() +char *trace_console_buffers[NR_CPUS][TCD_TYPE_MAX]; + +struct rw_semaphore tracefile_sem; + +int tracefile_init_arch() { int i; int j; - struct trace_cpu_data *tcd; + struct trace_cpu_data *tcd; - cfs_init_event(&tracefile_event, TRUE, TRUE); - - /* initialize trace_data */ - memset(trace_data, 0, sizeof(trace_data)); - for (i = 0; i < TCD_TYPE_MAX; i++) { - trace_data[i]=cfs_alloc(sizeof(struct trace_data_union)*NR_CPUS, 0); - if (trace_data[i] == NULL) - goto out; - } + init_rwsem(&tracefile_sem); - /* arch related info initialized */ - tcd_for_each(tcd, i, j) { - tcd->tcd_pages_factor = 100; /* Only one type */ - tcd->tcd_cpu = j; - tcd->tcd_type = i; - } + /* initialize trace_data */ + memset(trace_data, 0, sizeof(trace_data)); + for (i = 0; i < TCD_TYPE_MAX; i++) { + trace_data[i]=cfs_alloc(sizeof(union trace_data_union)*NR_CPUS, + GFP_KERNEL); + if (trace_data[i] == NULL) + goto out; + } - memset(trace_console_buffers, 0, sizeof(trace_console_buffers)); + /* arch related info initialized */ + tcd_for_each(tcd, i, j) { + tcd->tcd_pages_factor = (USHORT) pages_factor[i]; + tcd->tcd_type = (USHORT) i; + tcd->tcd_cpu = (USHORT)j; + } - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; j < 1; j++) { + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < TCD_TYPE_MAX; j++) { trace_console_buffers[i][j] = cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE, - CFS_ALLOC_ZERO); + GFP_KERNEL); if (trace_console_buffers[i][j] == NULL) - goto out; + goto out; } - } return 0; out: tracefile_fini_arch(); - KsPrint((0, "lnet: No enough memory\n")); + printk(KERN_ERR "lnet: No enough memory\n"); return -ENOMEM; + } void tracefile_fini_arch() @@ -98,84 +110,104 @@ void tracefile_fini_arch() int i; int j; - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; j < 2; j++) { + for (i = 0; i < num_possible_cpus(); i++) { + for (j = 0; j < TCD_TYPE_MAX; j++) { if (trace_console_buffers[i][j] != NULL) { cfs_free(trace_console_buffers[i][j]); trace_console_buffers[i][j] = NULL; } - } - } + } + } - for (i = 0; trace_data[i] != NULL; i++) { - cfs_free(trace_data[i]); - trace_data[i] = NULL; - } + for (i = 0; trace_data[i] != NULL; i++) { + cfs_free(trace_data[i]); + trace_data[i] = NULL; + } + + fini_rwsem(&tracefile_sem); } void tracefile_read_lock() { - cfs_wait_event(&tracefile_event, 0); + down_read(&tracefile_sem); } void tracefile_read_unlock() { - cfs_wake_event(&tracefile_event); + up_read(&tracefile_sem); } void tracefile_write_lock() { - cfs_wait_event(&tracefile_event, 0); + down_write(&tracefile_sem); } void tracefile_write_unlock() { - cfs_wake_event(&tracefile_event); + up_write(&tracefile_sem); } char * trace_get_console_buffer(void) { -#pragma message ("is there possible problem with pre-emption ?") - int cpu = (int) KeGetCurrentProcessorNumber(); - return trace_console_buffers[cpu][0]; + int cpu = get_cpu(); + int type = 0; + + if (KeGetCurrentIrql() >= DISPATCH_LEVEL) + type = TCD_TYPE_DISPATCH; + else + type = TCD_TYPE_PASSIVE; + return trace_console_buffers[cpu][type]; } void trace_put_console_buffer(char *buffer) { + put_cpu(); } struct trace_cpu_data * trace_get_tcd(void) { -#pragma message("todo: return NULL if in interrupt context") - - int cpu = (int) KeGetCurrentProcessorNumber(); - return &(*trace_data[0])[cpu].tcd; + int cpu = get_cpu(); + int type = 0; + + if (KeGetCurrentIrql() >= DISPATCH_LEVEL) + type = TCD_TYPE_DISPATCH; + else + type = TCD_TYPE_PASSIVE; + return &(*trace_data[type])[cpu].tcd; } void -trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags) +trace_put_tcd (struct trace_cpu_data *tcd) { + put_cpu(); } -int -trace_lock_tcd(struct trace_cpu_data *tcd) +int trace_lock_tcd(struct trace_cpu_data *tcd) { - __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); - return 1; + __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); + return 1; } -void -trace_unlock_tcd(struct trace_cpu_data *tcd) +void trace_unlock_tcd(struct trace_cpu_data *tcd) +{ + __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); +} + +int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) { - __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + return tcd->tcd_cpu == tage->cpu; } void set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, - const int line, unsigned long stack) + const int line, unsigned long stack) { struct timeval tv; @@ -187,16 +219,16 @@ set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, header->ph_sec = (__u32)tv.tv_sec; header->ph_usec = tv.tv_usec; header->ph_stack = stack; - header->ph_pid = current->pid; + header->ph_pid = (__u32)(ULONG_PTR)current->pid; header->ph_line_num = line; header->ph_extern_pid = 0; return; } void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, - int len, const char *file, const char *fn) + int len, const char *file, const char *fn) { - char *prefix = NULL, *ptype = NULL; + char *prefix = "Lustre", *ptype = NULL; if ((mask & D_EMERG) != 0) { prefix = "LustreError"; @@ -207,25 +239,20 @@ void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, } else if ((mask & D_WARNING) != 0) { prefix = "Lustre"; ptype = KERN_WARNING; - } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { + } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { prefix = "Lustre"; ptype = KERN_INFO; } if ((mask & D_CONSOLE) != 0) { - printk("%s%s: %s", ptype, prefix, buf); + printk("%s%s: %.*s", ptype, prefix, len, buf); } else { - printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid, - hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf); + printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, + hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); } return; } -int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) -{ - return 1; -} - int trace_max_debug_mb(void) { int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT)); @@ -234,7 +261,16 @@ int trace_max_debug_mb(void) } void -trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) +trace_call_on_all_cpus(void (*fn)(void *_arg), void *arg) { -#error "tbd" + int cpu; + KAFFINITY mask = cfs_query_thread_affinity(); + + for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + if (cfs_tie_thread_to_cpu(cpu)) { + ASSERT((int)KeGetCurrentProcessorNumber() == cpu); + fn(arg); + cfs_set_thread_affinity(mask); + } + } } diff --git a/libcfs/libcfs/winnt/winnt-usr.c b/libcfs/libcfs/winnt/winnt-usr.c index 8f326a8..3c1a3de 100644 --- a/libcfs/libcfs/winnt/winnt-usr.c +++ b/libcfs/libcfs/winnt/winnt-usr.c @@ -1,5 +1,5 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: * * GPL HEADER START * @@ -36,23 +36,16 @@ #ifndef __KERNEL__ -#include -#include -#include -#include +#define _NTDDK_ #include +#include -void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, - char *format, ...) { - } - -int cfs_proc_mknod(const char *path, unsigned short mode, unsigned int dev) +void sleep(int time) { - return 0; + DWORD Time = 1000 * time; + Sleep(Time); } - void print_last_error(char* Prefix) { LPVOID lpMsgBuf; @@ -74,33 +67,6 @@ void print_last_error(char* Prefix) LocalFree(lpMsgBuf); } -// -// The following declarations are defined in io.h of VC -// sys/types.h will conflict with io.h, so we need place -// these declartions here. - -#ifdef __cplusplus -extern "C" { -#endif - void - __declspec (naked) __cdecl _chkesp(void) - { -#if _X86_ - __asm { jz exit_chkesp }; - __asm { int 3 }; - exit_chkesp: - __asm { ret }; -#endif - } -#ifdef __cplusplus -} -#endif - -unsigned int sleep (unsigned int seconds) -{ - Sleep(seconds * 1000); - return 0; -} int gethostname(char * name, int namelen) { @@ -117,4 +83,954 @@ int ioctl ( return 0; } -#endif /* __KERNEL__ */ + +/* + * getopt structures & routines + */ + + +/* Data type for reentrant functions. */ +struct _getopt_data +{ + /* These have exactly the same meaning as the corresponding global + variables, except that they are used for the reentrant + versions of getopt. */ + int optind; + int opterr; + int optopt; + char *optarg; + + /* Internal members. */ + + /* True if the internal members have been initialized. */ + int __initialized; + + /* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + char *__nextchar; + + /* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we + scan, so that eventually all the non-options are at the end. + This allows options to be given in any order, even with programs + that were not written to expect this. + + RETURN_IN_ORDER is an option available to programs that were + written to expect options and other ARGV-elements in any order + and that care about the ordering of the two. We describe each + non-option ARGV-element as if it were the argument of an option + with character code 1. Using `-' as the first character of the + list of option characters selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ + + enum + { + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER + } __ordering; + + /* If the POSIXLY_CORRECT environment variable is set. */ + int __posixly_correct; + + + /* Handle permutation of arguments. */ + + /* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first + of them; `last_nonopt' is the index after the last of them. */ + + int __first_nonopt; + int __last_nonopt; +}; + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns -1, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Keep a global copy of all internal members of getopt_data. */ + +static struct _getopt_data getopt_data; + + +/* Initialize the internal data when the first call is made. */ + +static const char * +_getopt_initialize (int argc, char *const *argv, const char *optstring, + struct _getopt_data *d) +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + d->__first_nonopt = d->__last_nonopt = d->optind; + + d->__nextchar = NULL; + + d->__posixly_correct = 0; + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + d->__ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + d->__ordering = REQUIRE_ORDER; + ++optstring; + } + else if (d->__posixly_correct) + d->__ordering = REQUIRE_ORDER; + else + d->__ordering = PERMUTE; + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns -1. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +#define SWAP_FLAGS(ch1, ch2) + +static void +exchange (char **argv, struct _getopt_data *d) +{ + int bottom = d->__first_nonopt; + int middle = d->__last_nonopt; + int top = d->optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + d->__first_nonopt += (d->optind - d->__last_nonopt); + d->__last_nonopt = d->optind; +} + +int +_getopt_internal_r (int argc, char *const *argv, const char *optstring, + const struct option *longopts, int *longind, + int long_only, struct _getopt_data *d) +{ + int print_errors = d->opterr; + if (optstring[0] == ':') + print_errors = 0; + + if (argc < 1) + return -1; + + d->optarg = NULL; + + if (d->optind == 0 || !d->__initialized) + { + if (d->optind == 0) + d->optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring, d); + d->__initialized = 1; + } + + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ + +# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0') + + if (d->__nextchar == NULL || *d->__nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (d->__last_nonopt > d->optind) + d->__last_nonopt = d->optind; + if (d->__first_nonopt > d->optind) + d->__first_nonopt = d->optind; + + if (d->__ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange ((char **) argv, d); + else if (d->__last_nonopt != d->optind) + d->__first_nonopt = d->optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (d->optind < argc && NONOPTION_P) + d->optind++; + d->__last_nonopt = d->optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (d->optind != argc && !strcmp (argv[d->optind], "--")) + { + d->optind++; + + if (d->__first_nonopt != d->__last_nonopt + && d->__last_nonopt != d->optind) + exchange ((char **) argv, d); + else if (d->__first_nonopt == d->__last_nonopt) + d->__first_nonopt = d->optind; + d->__last_nonopt = argc; + + d->optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (d->optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (d->__first_nonopt != d->__last_nonopt) + d->optind = d->__first_nonopt; + return -1; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if (NONOPTION_P) + { + if (d->__ordering == REQUIRE_ORDER) + return -1; + d->optarg = argv[d->optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + d->__nextchar = (argv[d->optind] + 1 + + (longopts != NULL && argv[d->optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[d->optind][1] == '-' + || (long_only && (argv[d->optind][2] + || !strchr (optstring, argv[d->optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = -1; + int option_index; + + for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) + { + if ((unsigned int) (nameend - d->__nextchar) + == (unsigned int) strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else if (long_only + || pfound->has_arg != p->has_arg + || pfound->flag != p->flag + || pfound->val != p->val) + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (print_errors) + { + fprintf (stderr, "%s: option '%s' is ambiguous\n", + argv[0], argv[d->optind]); + } + d->__nextchar += strlen (d->__nextchar); + d->optind++; + d->optopt = 0; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + d->optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + { + + if (argv[d->optind - 1][1] == '-') + { + /* --option */ + fprintf (stderr, "%s: option '--%s' doesn't allow an argument\n", + argv[0], pfound->name); + } + else + { + /* +option or -option */ + fprintf (stderr, "%s: option '%c%s' doesn't allow an argument\n", + argv[0], argv[d->optind - 1][0], + pfound->name); + } + + } + + d->__nextchar += strlen (d->__nextchar); + + d->optopt = pfound->val; + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + { + fprintf (stderr, + "%s: option '%s' requires an argument\n", + argv[0], argv[d->optind - 1]); + } + d->__nextchar += strlen (d->__nextchar); + d->optopt = pfound->val; + return optstring[0] == ':' ? ':' : '?'; + } + } + d->__nextchar += strlen (d->__nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[d->optind][1] == '-' + || strchr (optstring, *d->__nextchar) == NULL) + { + if (print_errors) + { + if (argv[d->optind][1] == '-') + { + /* --option */ + + fprintf (stderr, "%s: unrecognized option '--%s'\n", + argv[0], d->__nextchar); + } + else + { + /* +option or -option */ + fprintf (stderr, "%s: unrecognized option '%c%s'\n", + argv[0], argv[d->optind][0], d->__nextchar); + } + + + } + d->__nextchar = (char *) ""; + d->optind++; + d->optopt = 0; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *d->__nextchar++; + char *temp = strchr (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*d->__nextchar == '\0') + ++d->optind; + + if (temp == NULL || c == ':') + { + if (print_errors) + { + fprintf (stderr, "%s: invalid option -- '%c'\n", argv[0], c); + } + d->optopt = c; + return '?'; + } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + { + fprintf (stderr, + "%s: option requires an argument -- '%c'\n", + argv[0], c); + } + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `d->optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '='; + nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar)) + { + if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (print_errors) + { + fprintf (stderr, "%s: option '-W %s' is ambiguous\n", + argv[0], argv[d->optind]); + } + d->__nextchar += strlen (d->__nextchar); + d->optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + d->optarg = nameend + 1; + else + { + if (print_errors) + { + fprintf (stderr, "\ +%s: option '-W %s' doesn't allow an argument\n", + argv[0], pfound->name); + } + + d->__nextchar += strlen (d->__nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (d->optind < argc) + d->optarg = argv[d->optind++]; + else + { + if (print_errors) + { + + fprintf (stderr, + "%s: option '%s' requires an argument\n", + argv[0], argv[d->optind - 1]); + } + d->__nextchar += strlen (d->__nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + d->__nextchar += strlen (d->__nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + d->__nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + d->optind++; + } + else + d->optarg = NULL; + d->__nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*d->__nextchar != '\0') + { + d->optarg = d->__nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + d->optind++; + } + else if (d->optind == argc) + { + if (print_errors) + { + fprintf (stderr, + "%s: option requires an argument -- '%c'\n", + argv[0], c); + } + d->optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + d->optarg = argv[d->optind++]; + d->__nextchar = NULL; + } + } + return c; + } +} + +int +_getopt_internal (int argc, char *const *argv, const char *optstring, + const struct option *longopts, int *longind, int long_only) +{ + int result; + + getopt_data.optind = optind; + getopt_data.opterr = opterr; + + result = _getopt_internal_r (argc, argv, optstring, longopts, + longind, long_only, &getopt_data); + + optind = getopt_data.optind; + optarg = getopt_data.optarg; + optopt = getopt_data.optopt; + + return result; +} + +int +getopt_long (int argc, char *const *argv, const char *options, + const struct option *long_options, int *opt_index) +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +#define TOLOWER(c) tolower(c) +typedef unsigned chartype; + +char * +strcasestr (phaystack, pneedle) + const char *phaystack; + const char *pneedle; +{ + register const unsigned char *haystack, *needle; + register chartype b, c; + + + haystack = (const unsigned char *) phaystack; + needle = (const unsigned char *) pneedle; + + b = TOLOWER (*needle); + if (b != '\0') + { + haystack--; /* possible ANSI violation */ + do + { + c = *++haystack; + if (c == '\0') + goto ret0; + } + while (TOLOWER (c) != (int) b); + + c = TOLOWER (*++needle); + if (c == '\0') + goto foundneedle; + ++needle; + goto jin; + + for (;;) + { + register chartype a; + register const unsigned char *rhaystack, *rneedle; + + do + { + a = *++haystack; + if (a == '\0') + goto ret0; + if (TOLOWER (a) == (int) b) + break; + a = *++haystack; + if (a == '\0') + goto ret0; +shloop: + ; + } + while (TOLOWER (a) != (int) b); + +jin: a = *++haystack; + if (a == '\0') + goto ret0; + + if (TOLOWER (a) != (int) c) + goto shloop; + + rhaystack = haystack-- + 1; + rneedle = needle; + a = TOLOWER (*rneedle); + + if (TOLOWER (*rhaystack) == (int) a) + do + { + if (a == '\0') + goto foundneedle; + ++rhaystack; + a = TOLOWER (*++needle); + if (TOLOWER (*rhaystack) != (int) a) + break; + if (a == '\0') + goto foundneedle; + ++rhaystack; + a = TOLOWER (*++needle); + } + while (TOLOWER (*rhaystack) == (int) a); + + needle = rneedle; /* took the register-poor approach */ + + if (a == '\0') + break; + } + } +foundneedle: + return (char*) haystack; +ret0: + return 0; +} + +int glob (const char * __pattern, int __flags, + int (*__errfunc) (const char *, int), + glob_t * __pglob) { + + cfs_enter_debugger(); + return 0; +} + +void globfree(glob_t *__pglog) +{ +} + +int setenv(const char *envname, const char *envval, int overwrite) +{ + int rc = 0; + + if (GetEnvironmentVariable(envname, NULL, 0) == 0) { + overwrite = TRUE; + } + + if (overwrite) { + rc = SetEnvironmentVariable(envname, envval); + rc = rc > 0 ? 0 : -1; + } else { + rc = -1; + } + return rc; +} + +int uname(struct utsname *uts) +{ + OSVERSIONINFO OsVerInfo; + + /* query computer name */ + memset(uts, 0, sizeof(struct utsname)); + strcpy(uts->sysname, "winnt"); + strcpy(uts->release, "winnt"); + + /* query system version */ + OsVerInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&OsVerInfo); + + if (OsVerInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) { + if (OsVerInfo.dwMajorVersion == 6 && + OsVerInfo.dwBuildNumber > 3790) { + strcpy(uts->release, "Vista"); + } + } else { + /* we got errors */ + return -1; + } + + sprintf(uts->version, "%d.%d", OsVerInfo.dwMajorVersion, + OsVerInfo.dwMinorVersion); + return 0; +} + +struct passwd * getpwuid(uid_t uid) +{ + static struct passwd generic_passwd = {0, "root"}; + return &generic_passwd; +} + +void* pgalloc(size_t factor) +{ + LPVOID page; + + page = VirtualAlloc(NULL, CFS_PAGE_SIZE << factor, + MEM_COMMIT, PAGE_READWRITE); + return page; +} + +void pgfree(void * page) +{ + _ASSERT(page != NULL); + VirtualFree(page, 0, MEM_RELEASE); +} + +#endif /* !__KERNEL__ */ diff --git a/libcfs/libcfs/winnt/winnt-utils.c b/libcfs/libcfs/winnt/winnt-utils.c index 1977da4..e34e744 100644 --- a/libcfs/libcfs/winnt/winnt-utils.c +++ b/libcfs/libcfs/winnt/winnt-utils.c @@ -1,5 +1,5 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: * * GPL HEADER START * @@ -39,15 +39,375 @@ * miscellaneous libcfs stuff */ #define DEBUG_SUBSYSTEM S_LNET -#include +#include +#include /* - * Convert server error code to client format. Error codes are from - * Linux errno.h, so for Linux client---identity. + * IDR support routines + * + * local global id <-> handle context */ -int convert_server_error(__u64 ecode) + +/* idr definitions */ + +#define IDR_BITS 7 +#define IDR_FULL 0xffffffff +#define IDR_SIZE (1 << IDR_BITS) +#define IDR_MASK ((1 << IDR_BITS)-1) +#define MAX_ID_SHIFT (sizeof(int)*8 - 1) +#define MAX_ID_BIT (1U << MAX_ID_SHIFT) +#define MAX_ID_MASK (MAX_ID_BIT - 1) +#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS +#define IDR_FREE_MAX MAX_LEVEL + MAX_LEVEL + +#define idr_set_bit(bit, v) (v) |= (1<<(bit)) +#define idr_clear_bit(bit, v) (v) &= ~(1<<(bit)) +#define idr_test_bit(bit, v) ((v) & (1<<(bit))) + +struct idr_layer { + uint32_t bitmap; + struct idr_layer *ary[IDR_SIZE]; + int count; +}; + +struct idr_context { + struct idr_layer *top; + struct idr_layer *id_free; + int layers; + int id_free_cnt; +}; + + +/* + * id (fd) <-> pointer (HANDLE) + */ + +/********************************************************** + private structures and routines for id implementation +***********************************************************/ + +static struct idr_layer *alloc_layer(struct idr_context *idp) { - return cfs_error_code((NTSTATUS)ecode); + struct idr_layer *p; + + if (!(p = idp->id_free)) + return NULL; + idp->id_free = p->ary[0]; + idp->id_free_cnt--; + p->ary[0] = NULL; + return p; +} + +static int find_next_idrbit(uint32_t bm, int maxid, int n) +{ + while (nary[0] = idp->id_free; + idp->id_free = p; + idp->id_free_cnt++; +} + +static int idr_pre_get(struct idr_context *idp) +{ + while (idp->id_free_cnt < IDR_FREE_MAX) { + struct idr_layer *new; + + new = cfs_alloc(sizeof(struct idr_layer), CFS_ALLOC_ZERO); + if(new == NULL) + return (0); + free_layer(idp, new); + } + return 1; +} + +static int sub_alloc(struct idr_context *idp, void *ptr, int *starting_id) +{ + int n, m, sh; + struct idr_layer *p, *new; + struct idr_layer *pa[MAX_LEVEL]; + int l, id; + uint32_t bm; + + memset(pa, 0, sizeof(pa)); + + id = *starting_id; + p = idp->top; + l = idp->layers; + pa[l--] = NULL; + while (1) { + /* + * We run around this while until we reach the leaf node... + */ + n = (id >> (IDR_BITS*l)) & IDR_MASK; + bm = ~p->bitmap; + m = find_next_idrbit(bm, IDR_SIZE, n); + if (m == IDR_SIZE) { + /* no space available go back to previous layer. */ + l++; + id = (id | ((1 << (IDR_BITS*l))-1)) + 1; + if (!(p = pa[l])) { + *starting_id = id; + return -2; + } + continue; + } + if (m != n) { + sh = IDR_BITS*l; + id = ((id >> sh) ^ n ^ m) << sh; + } + if ((id >= MAX_ID_BIT) || (id < 0)) + return -1; + if (l == 0) + break; + /* + * Create the layer below if it is missing. + */ + if (!p->ary[m]) { + if (!(new = alloc_layer(idp))) + return -1; + p->ary[m] = new; + p->count++; + } + pa[l--] = p; + p = p->ary[m]; + } + /* + * We have reached the leaf node, plant the + * users pointer and return the raw id. + */ + p->ary[m] = (struct idr_layer *)ptr; + idr_set_bit(m, p->bitmap); + p->count++; + /* + * If this layer is full mark the bit in the layer above + * to show that this part of the radix tree is full. + * This may complete the layer above and require walking + * up the radix tree. + */ + n = id; + while (p->bitmap == IDR_FULL) { + if (!(p = pa[++l])) + break; + n = n >> IDR_BITS; + idr_set_bit((n & IDR_MASK), p->bitmap); + } + return(id); +} + +static int idr_get_new_above_int(struct idr_context *idp, void *ptr, int starting_id) +{ + struct idr_layer *p, *new; + int layers, v, id; + + idr_pre_get(idp); + + id = starting_id; +build_up: + p = idp->top; + layers = idp->layers; + if (!p) { + if (!(p = alloc_layer(idp))) + return -1; + layers = 1; + } + /* + * Add a new layer to the top of the tree if the requested + * id is larger than the currently allocated space. + */ + while ((layers < MAX_LEVEL) && (id >= (1 << (layers*IDR_BITS)))) { + layers++; + if (!p->count) + continue; + if (!(new = alloc_layer(idp))) { + /* + * The allocation failed. If we built part of + * the structure tear it down. + */ + for (new = p; p && p != idp->top; new = p) { + p = p->ary[0]; + new->ary[0] = NULL; + new->bitmap = new->count = 0; + free_layer(idp, new); + } + return -1; + } + new->ary[0] = p; + new->count = 1; + if (p->bitmap == IDR_FULL) + idr_set_bit(0, new->bitmap); + p = new; + } + idp->top = p; + idp->layers = layers; + v = sub_alloc(idp, ptr, &id); + if (v == -2) + goto build_up; + return(v); +} + +static int sub_remove(struct idr_context *idp, int shift, int id) +{ + struct idr_layer *p = idp->top; + struct idr_layer **pa[MAX_LEVEL]; + struct idr_layer ***paa = &pa[0]; + int n; + + *paa = NULL; + *++paa = &idp->top; + + while ((shift > 0) && p) { + n = (id >> shift) & IDR_MASK; + idr_clear_bit(n, p->bitmap); + *++paa = &p->ary[n]; + p = p->ary[n]; + shift -= IDR_BITS; + } + n = id & IDR_MASK; + if (p != NULL && idr_test_bit(n, p->bitmap)) { + idr_clear_bit(n, p->bitmap); + p->ary[n] = NULL; + while(*paa && ! --((**paa)->count)){ + free_layer(idp, **paa); + **paa-- = NULL; + } + if ( ! *paa ) + idp->layers = 0; + return 0; + } + return -1; +} + +static void *_idr_find(struct idr_context *idp, int id) +{ + int n; + struct idr_layer *p; + + n = idp->layers * IDR_BITS; + p = idp->top; + /* + * This tests to see if bits outside the current tree are + * present. If so, tain't one of ours! + */ + if ((id & ~(~0 << MAX_ID_SHIFT)) >> (n + IDR_BITS)) + return NULL; + + /* Mask off upper bits we don't use for the search. */ + id &= MAX_ID_MASK; + + while (n >= IDR_BITS && p) { + n -= IDR_BITS; + p = p->ary[(id >> n) & IDR_MASK]; + } + return((void *)p); +} + +static int _idr_remove(struct idr_context *idp, int id) +{ + struct idr_layer *p; + + /* Mask off upper bits we don't use for the search. */ + id &= MAX_ID_MASK; + + if (sub_remove(idp, (idp->layers - 1) * IDR_BITS, id) == -1) { + return -1; + } + + if ( idp->top && idp->top->count == 1 && + (idp->layers > 1) && + idp->top->ary[0]) { + /* We can drop a layer */ + p = idp->top->ary[0]; + idp->top->bitmap = idp->top->count = 0; + free_layer(idp, idp->top); + idp->top = p; + --idp->layers; + } + while (idp->id_free_cnt >= IDR_FREE_MAX) { + p = alloc_layer(idp); + cfs_free(p); + } + return 0; +} + +/********************************************************** + publick interfaces of id vs handle conversion +***********************************************************/ + +/** + initialise a idr tree. + */ +struct idr_context *cfs_idr_init() +{ + struct idr_context * idp = NULL; + idp = cfs_alloc(sizeof(struct idr_context), 0); + if (idp) { + memset(idp, 0, sizeof(struct idr_context)); + } + + return idp; +} + +/** + remove an id from the idr tree +*/ +int cfs_idr_remove(struct idr_context *idp, int id) +{ + int ret; + ret = _idr_remove((struct idr_context *)idp, id); + if (ret != 0) { + CWARN("WARNING: attempt to remove unset id %d in idtree\n", id); + } + return ret; +} + +/** + allocate the next available id, and assign 'ptr' into its slot. + you can retrieve later this pointer using idr_find() +*/ +int cfs_idr_get_new(struct idr_context *idp, void *ptr) +{ + int ret = idr_get_new_above_int(idp, ptr, 0); + if (ret > MAX_ID_MASK) { + cfs_idr_remove(idp, ret); + return -1; + } + return ret; +} + +/** + allocate a new id, giving the first available value greater than or + equal to the given starting id +*/ +int cfs_idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id) +{ + int ret = idr_get_new_above_int(idp, ptr, starting_id); + if (ret > MAX_ID_MASK) { + cfs_idr_remove(idp, ret); + return -1; + } + return ret; +} + +/** + find a pointer value previously set with idr_get_new given an id +*/ +void *cfs_idr_find(struct idr_context *idp, int id) +{ + return _idr_find(idp, id); +} + +/** + destroy a idr tree. + */ +void cfs_idr_exit(struct idr_context *idp) +{ + if (idp) { + cfs_free(idp); + } } /* @@ -64,7 +424,6 @@ int convert_client_oflag(int cflag, int *result) return 0; } - int cfs_error_code(NTSTATUS Status) { switch (Status) { @@ -115,12 +474,14 @@ int cfs_error_code(NTSTATUS Status) case STATUS_PORT_UNREACHABLE: return (-ECONNRESET); + case STATUS_INSUFFICIENT_RESOURCES: + return (-ENOMEM); + case STATUS_PAGEFILE_QUOTA: case STATUS_NO_MEMORY: case STATUS_CONFLICTING_ADDRESSES: case STATUS_QUOTA_EXCEEDED: case STATUS_TOO_MANY_PAGING_FILES: - case STATUS_INSUFFICIENT_RESOURCES: case STATUS_WORKING_SET_QUOTA: case STATUS_COMMITMENT_LIMIT: case STATUS_TOO_MANY_ADDRESSES: @@ -157,6 +518,8 @@ int cfs_error_code(NTSTATUS Status) case STATUS_INVALID_BUFFER_SIZE: return (-EMSGSIZE); + case STATUS_ADDRESS_ALREADY_EXISTS: + return (-EADDRINUSE); } if (NT_SUCCESS(Status)) @@ -165,12 +528,307 @@ int cfs_error_code(NTSTATUS Status) return (-EINVAL); } +/* + * Convert server error code to client format. Error codes are from + * Linux errno.h, so for Linux client---identity. + */ +int convert_server_error(__u64 ecode) +{ + return cfs_error_code((NTSTATUS)ecode); +} -void cfs_stack_trace_fill(struct cfs_stack_trace *trace) +char * strsep(char **strp, const char *delim) { + char *begin, *end; + + begin = *strp; + if (begin == NULL) { + return NULL; + } + + if (delim[0] == '\0' || delim[1] == '\0') { + char ch = delim[0]; + if (ch == '\0') { + end = NULL; + } else { + if (*begin == ch) { + end = begin; + } else if (*begin == '\0') { + end = NULL; + } else { + end = strchr (begin + 1, ch); + } + } + } else { + end = strpbrk (begin, delim); + } + + if (end) { + *end++ = '\0'; + *strp = end; + } else { + *strp = NULL; + } + + return begin; } -void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) +/* + * strnchr - Find a character in a length limited string + * @s: The string to be searched + * @count: The number of characters to be searched + * @c: The character to search for + */ + +char *strnchr(const char *s, size_t count, int c) { + for (; count-- && *s != '\0'; ++s) + if (*s == (char) c) + return (char *) s; return NULL; } + +__u64 strtoull(char *nptr, char **endptr,int base) +{ + char *s = nptr; + __u64 acc, cutoff; + int c, neg = 0, any, cutlim; + + /* + * See strtol for comments as to the logic used. + */ + do { + c = *s++; + } while (cfs_isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else if (c == '+') + c = *s++; + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + cutoff = (__u64)ULONG_LONG_MAX / (__u64)base; + cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base); + for (acc = 0, any = 0;; c = *s++) { + if (cfs_isdigit(c)) + c -= '0'; + else if (cfs_isalpha(c)) + c -= cfs_isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) { + acc = ULONG_LONG_MAX; + } else if (neg) + acc = 0 - acc; + if (endptr != 0) + *endptr = (char *) (any ? s - 1 : nptr); + return (acc); +} + +#if __KERNEL__ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* ========================================================================= */ +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: + + uLong adler = adler32(0L, NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ULONG zlib_adler32(ULONG adler, + const BYTE *buf, + UINT len) +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} + +#if !defined(NTDDI_VERSION) || NTDDI_VERSION < 0x06000000 +_CRTIMP size_t __cdecl strnlen(const char * _Str, size_t _MaxCount) +{ + size_t len = 0; + while(len < _MaxCount && _Str[len++]); + return len; +} +#endif + +int (__cdecl *_cfs_isalpha)(int); +int (__cdecl *_cfs_isspace)(int); +int (__cdecl *_cfs_isupper)(int); +int (__cdecl *_cfs_isdigit)(int); +int (__cdecl *_cfs_isxdigit)(int); + +int cfs_isalpha(int c) +{ + if (_cfs_isalpha) { + return _cfs_isalpha(c); + } else { + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z')); + } +} + +int cfs_isspace(int c) +{ + if (_cfs_isspace) { + return _cfs_isspace(c); + } else { + return ((c >= 0x09 && c <= 0x0d) || + (c == 0x20)); + } +} + +int cfs_isupper(int c) +{ + if (_cfs_isupper) { + return _cfs_isupper(c); + } else { + return (c >= 'A' && c <= 'Z'); + } +} + +int cfs_isdigit(int c) +{ + if (_cfs_isdigit) { + return _cfs_isdigit(c); + } else { + return (c >= '0' && c <= '9'); + } +} + +int cfs_isxdigit(int c) +{ + if (_cfs_isxdigit) { + return _cfs_isxdigit(c); + } else { + return ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'F')); + } +} + +void cfs_libc_init() +{ + UNICODE_STRING fn; + int i; + + struct {WCHAR * name; PVOID * addr;} funcs[] = { + { L"isspace", (PVOID *)&_cfs_isspace}, + { L"isalpha", (PVOID *)&_cfs_isalpha}, + { L"isupper", (PVOID *)&_cfs_isupper}, + { L"isdigit", (PVOID *)&_cfs_isdigit}, + { L"isxdigit",(PVOID *)&_cfs_isxdigit}, + { NULL, NULL }, + }; + + for (i=0; funcs[i].name != NULL; i++) { + RtlInitUnicodeString(&fn, funcs[i].name); + *(funcs[i].addr) = MmGetSystemRoutineAddress(&fn); + } + +#if DBG + ASSERT(cfs_isspace(0x20) && cfs_isspace(0x09) && + cfs_isspace(0x0a) && cfs_isspace(0x0d) && + !cfs_isspace('a') && !cfs_isspace('0')); + ASSERT(cfs_isalpha('a') && cfs_isalpha('Z') && + !cfs_isalpha('0') && !cfs_isalpha('=')); + ASSERT(cfs_isupper('A') && cfs_isupper('Z') && + !cfs_isupper('a') && !cfs_isupper('=')); + ASSERT(cfs_isdigit('0') && cfs_isdigit('9') && + !cfs_isdigit('a') && !cfs_isdigit('#')); + ASSERT(cfs_isxdigit('0') && cfs_isxdigit('9') && + cfs_isxdigit('a') && cfs_isxdigit('A') && + cfs_isxdigit('F') && cfs_isxdigit('f') && + !cfs_isxdigit('G') && !cfs_isxdigit('z')); +#endif +} + +#else + +unsigned int libcfs_subsystem_debug = ~0; + +int cfs_isalpha(int c) +{ + return isalpha(c); +} + +int cfs_isspace(int c) +{ + return isspace(c); +} + +int cfs_isupper(int c) +{ + return isupper(c); +} + +int cfs_isdigit(int c) +{ + return isdigit(c); +} + +int cfs_isxdigit(int c) +{ + return isxdigit(c); +} + +void cfs_libc_init() +{ +} + + +#endif