Whamcloud - gitweb
b=16150
authormattwu <mattwu>
Tue, 7 Oct 2008 04:14:14 +0000 (04:14 +0000)
committermattwu <mattwu>
Tue, 7 Oct 2008 04:14:14 +0000 (04:14 +0000)
i=adilger
i=robert
i=liang
winnt libcfs cleanup

65 files changed:
libcfs/include/libcfs/Makefile.am
libcfs/include/libcfs/darwin/darwin-types.h
libcfs/include/libcfs/darwin/kp30.h
libcfs/include/libcfs/libcfs.h
libcfs/include/libcfs/libcfs_debug.h
libcfs/include/libcfs/libcfs_pack.h [new file with mode: 0644]
libcfs/include/libcfs/libcfs_prim.h
libcfs/include/libcfs/libcfs_private.h
libcfs/include/libcfs/libcfs_time.h
libcfs/include/libcfs/libcfs_unpack.h [new file with mode: 0644]
libcfs/include/libcfs/linux/kp30.h
libcfs/include/libcfs/linux/libcfs.h
libcfs/include/libcfs/linux/linux-lock.h
libcfs/include/libcfs/linux/linux-prim.h
libcfs/include/libcfs/linux/portals_compat25.h
libcfs/include/libcfs/list.h
libcfs/include/libcfs/posix/libcfs.h
libcfs/include/libcfs/posix/posix-types.h
libcfs/include/libcfs/posix/posix-wordsize.h
libcfs/include/libcfs/user-lock.h
libcfs/include/libcfs/user-prim.h
libcfs/include/libcfs/user-tcpip.h
libcfs/include/libcfs/user-time.h
libcfs/include/libcfs/util/platform.h
libcfs/include/libcfs/winnt/kp30.h
libcfs/include/libcfs/winnt/libcfs.h
libcfs/include/libcfs/winnt/portals_compat25.h
libcfs/include/libcfs/winnt/portals_utils.h
libcfs/include/libcfs/winnt/winnt-fs.h
libcfs/include/libcfs/winnt/winnt-lock.h
libcfs/include/libcfs/winnt/winnt-mem.h
libcfs/include/libcfs/winnt/winnt-prim.h
libcfs/include/libcfs/winnt/winnt-tcpip.h
libcfs/include/libcfs/winnt/winnt-time.h
libcfs/include/libcfs/winnt/winnt-types.h
libcfs/libcfs/darwin/darwin-tracefile.c
libcfs/libcfs/debug.c
libcfs/libcfs/linux/linux-tracefile.c
libcfs/libcfs/lwt.c
libcfs/libcfs/module.c
libcfs/libcfs/nidstrings.c
libcfs/libcfs/tracefile.c
libcfs/libcfs/tracefile.h
libcfs/libcfs/user-lock.c
libcfs/libcfs/user-mem.c
libcfs/libcfs/user-prim.c
libcfs/libcfs/user-tcpip.c
libcfs/libcfs/util/l_ioctl.c
libcfs/libcfs/util/parser.c
libcfs/libcfs/watchdog.c
libcfs/libcfs/winnt/winnt-curproc.c
libcfs/libcfs/winnt/winnt-debug.c
libcfs/libcfs/winnt/winnt-fs.c
libcfs/libcfs/winnt/winnt-lock.c
libcfs/libcfs/winnt/winnt-mem.c
libcfs/libcfs/winnt/winnt-module.c
libcfs/libcfs/winnt/winnt-native.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-prim.c
libcfs/libcfs/winnt/winnt-proc.c
libcfs/libcfs/winnt/winnt-strusup.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-sync.c
libcfs/libcfs/winnt/winnt-tcpip.c
libcfs/libcfs/winnt/winnt-tracefile.c
libcfs/libcfs/winnt/winnt-usr.c
libcfs/libcfs/winnt/winnt-utils.c

index dc2b841..c9f9478 100644 (file)
@@ -1,4 +1,3 @@
-
 SUBDIRS := linux posix util
 if DARWIN
 SUBDIRS += darwin
@@ -6,7 +5,8 @@ endif
 DIST_SUBDIRS := $(SUBDIRS)
 
 EXTRA_DIST := curproc.h libcfs_private.h libcfs.h list.h lltrace.h \
-       user-lock.h user-prim.h user-time.h \
-       user-tcpip.h user-bitops.h bitmap.h user-mem.h\
-       libcfs_prim.h libcfs_private.h libcfs_hash.h libcfs_time.h \
-       libcfs_debug.h libcfsutil.h libcfs_ioctl.h
+               user-lock.h user-prim.h user-time.h user-mem.h \
+               user-tcpip.h user-bitops.h bitmap.h \
+               libcfs_prim.h libcfs_time.h libcfs_hash.h \
+               libcfs_debug.h libcfsutil.h libcfs_ioctl.h \
+               libcfs_pack.h libcfs_unpack.h
index 1845ac2..ea9d985 100644 (file)
@@ -61,6 +61,10 @@ typedef int16_t         __s16;
 typedef int32_t         __s32;
 typedef int64_t         __s64;
 
+/* long integer with size equal to pointer */
+typedef unsigned long ulong_ptr_t;
+typedef long long_ptr_t;
+
 #ifdef __KERNEL__
 
 #include <kern/kern_types.h>
index f5e5619..1fc1a1d 100644 (file)
@@ -127,4 +127,16 @@ typedef struct {
 # define LL_POISON ((long)0x5a5a5a5a)
 # define LP_POISON ((void *)(long)0x5a5a5a5a)
 
+/*
+ * long_ptr_t & ulong_ptr_t, same to "long" for gcc
+ */
+# define LPLU "%lu"
+# define LPLD "%ld"
+# define LPLX "%#lx"
+
+/*
+ * pid_t
+ */
+# define LPPID "%d"
+
 #endif
index 5486e0c..08be620 100644 (file)
 #include "curproc.h"
 
 #ifndef offsetof
-# define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
+# define offsetof(typ,memb) ((long)(long_ptr_t)((char *)&(((typ *)0)->memb)))
 #endif
 
-/* cardinality of array */
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(a) ((sizeof (a)) / (sizeof ((a)[0])))
 #endif
 /* given a pointer @ptr to the field @member embedded into type (usually
  * struct) @type, return pointer to the embedding instance of @type. */
 #define container_of(ptr, type, member) \
-        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+        ((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
 #endif
 
-#define container_of0(ptr, type, member)                        \
-({                                                              \
-        typeof(ptr) __ptr = (ptr);                              \
-        type       *__res;                                      \
-                                                                \
-        if (unlikely(IS_ERR(__ptr) || __ptr == NULL))           \
-                __res = (type *)__ptr;                          \
-        else                                                    \
-                __res = container_of(__ptr, type, member);      \
-        __res;                                                  \
-})
+static inline int __is_po2(unsigned long long val)
+{
+        return !(val & (val - 1));
+}
 
-/*
- * true iff @i is power-of-2
- */
-#define IS_PO2(i)                               \
-({                                              \
-        typeof(i) __i;                          \
-                                                \
-        __i = (i);                              \
-        !(__i & (__i - 1));                     \
-})
+#define IS_PO2(val) __is_po2((unsigned long long)(val))
 
 #define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
 
@@ -301,6 +284,18 @@ int cfs_univ2oflags(int flags);
 #include <libcfs/libcfs_prim.h>
 #include <libcfs/libcfs_time.h>
 
+/* container_of depends on "likely" which is defined in libcfs_private.h */
+static inline void *__container_of(void *ptr, unsigned long shift)
+{
+        if (unlikely(IS_ERR(ptr) || ptr == NULL))
+                return ptr;
+        else
+                return (char *)ptr - shift;
+}
+
+#define container_of0(ptr, type, member) \
+        ((type *)__container_of((void *)(ptr), offsetof(type, member)))
+
 #define _LIBCFS_H
 
 #endif /* _LIBCFS_H */
index 87037f0..379bc9c 100644 (file)
@@ -179,23 +179,23 @@ static inline int cdebug_show(unsigned int mask, unsigned int subsystem)
                 ((libcfs_debug & mask) && (libcfs_subsystem_debug & subsystem));
 }
 
-#define __CDEBUG(cdls, mask, format, a...)                              \
+#define __CDEBUG(cdls, mask, format, ...)                               \
 do {                                                                    \
         CHECK_STACK();                                                  \
                                                                         \
         if (cdebug_show(mask, DEBUG_SUBSYSTEM))                         \
                 libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask,           \
                                  __FILE__, __FUNCTION__, __LINE__,      \
-                                 format, ## a);                         \
+                                 format, ## __VA_ARGS__);               \
 } while (0)
 
-#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a)
+#define CDEBUG(mask, format, ...) __CDEBUG(NULL, mask, format, ## __VA_ARGS__)
 
-#define CDEBUG_LIMIT(mask, format, a...)        \
+#define CDEBUG_LIMIT(mask, format, ...)         \
 do {                                            \
         static cfs_debug_limit_state_t cdls;    \
                                                 \
-        __CDEBUG(&cdls, mask, format, ## a);    \
+        __CDEBUG(&cdls, mask, format, ## __VA_ARGS__);\
 } while (0)
 
 #else /* !CDEBUG_ENABLED */
@@ -203,18 +203,18 @@ static inline int cdebug_show(unsigned int mask, unsigned int subsystem)
 {
         return 0;
 }
-#define CDEBUG(mask, format, a...) (void)(0)
-#define CDEBUG_LIMIT(mask, format, a...) (void)(0)
+#define CDEBUG(mask, format, ...) (void)(0)
+#define CDEBUG_LIMIT(mask, format, ...) (void)(0)
 #warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!"
 #endif
 
 #else /* !__KERNEL__ && (!__arch_lib__ || LUSTRE_UTILS) */
 
-#define CDEBUG(mask, format, a...)                                      \
+#define CDEBUG(mask, format, ...)                                       \
 do {                                                                    \
         if (((mask) & D_CANTMASK) != 0)                                 \
                 fprintf(stderr, "(%s:%d:%s()) " format,                 \
-                        __FILE__, __LINE__, __FUNCTION__, ## a);        \
+                        __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__);\
 } while (0)
 
 #define CDEBUG_LIMIT CDEBUG
@@ -222,27 +222,27 @@ do {                                                                    \
 #endif /* !__KERNEL__ ... */
 
 
-#define CWARN(format, a...)          CDEBUG_LIMIT(D_WARNING, format, ## a)
-#define CERROR(format, a...)         CDEBUG_LIMIT(D_ERROR, format, ## a)
-#define CEMERG(format, a...)         CDEBUG_LIMIT(D_EMERG, format, ## a)
+#define CWARN(format, ...)          CDEBUG_LIMIT(D_WARNING, format, ## __VA_ARGS__)
+#define CERROR(format, ...)         CDEBUG_LIMIT(D_ERROR, format, ## __VA_ARGS__)
+#define CEMERG(format, ...)         CDEBUG_LIMIT(D_EMERG, format, ## __VA_ARGS__)
 
-#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a)
-#define LCONSOLE_INFO(format, a...)  CDEBUG_LIMIT(D_CONSOLE, format, ## a)
-#define LCONSOLE_WARN(format, a...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a)
-#define LCONSOLE_ERROR_MSG(errnum, format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
-                           "%x-%x: " format, errnum, LERRCHKSUM(errnum),  ## a)
-#define LCONSOLE_ERROR(format, a...) LCONSOLE_ERROR_MSG(0x00, format, ## a)
+#define LCONSOLE(mask, format, ...) CDEBUG(D_CONSOLE | (mask), format, ## __VA_ARGS__)
+#define LCONSOLE_INFO(format, ...)  CDEBUG_LIMIT(D_CONSOLE, format, ## __VA_ARGS__)
+#define LCONSOLE_WARN(format, ...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## __VA_ARGS__)
+#define LCONSOLE_ERROR_MSG(errnum, format, ...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
+                           "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## __VA_ARGS__)
+#define LCONSOLE_ERROR(format, ...) LCONSOLE_ERROR_MSG(0x00, format, ## __VA_ARGS__)
 
-#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a)
+#define LCONSOLE_EMERG(format, ...) CDEBUG(D_CONSOLE | D_EMERG, format, ## __VA_ARGS__)
 
 #ifdef CDEBUG_ENABLED
 
 #define GOTO(label, rc)                                                 \
 do {                                                                    \
-        long GOTO__ret = (long)(rc);                                    \
-        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
-               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
-               (signed long)GOTO__ret);                                 \
+        long_ptr_t GOTO__ret = (long_ptr_t)(rc);                        \
+        CDEBUG(D_TRACE,"Process leaving via %s (rc=" LPLU " : " LPLD    \
+               " : " LPLX ")\n", #label, (ulong_ptr_t)GOTO__ret,        \
+               GOTO__ret, GOTO__ret);                                   \
         goto label;                                                     \
 } while (0)
 #else
@@ -255,6 +255,7 @@ do {                                                                    \
  * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
  * there will be a warning in osx.
  */
+#if defined(__GNUC__)
 #define RETURN(rc)                                                      \
 do {                                                                    \
         typeof(rc) RETURN__ret = (rc);                                  \
@@ -263,6 +264,16 @@ do {                                                                    \
         EXIT_NESTING;                                                   \
         return RETURN__ret;                                             \
 } while (0)
+#elif defined(_MSC_VER)
+#define RETURN(rc)                                                      \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process leaving.\n");                          \
+        EXIT_NESTING;                                                   \
+        return (rc);                                                    \
+} while (0)
+#else
+# error "Unkown compiler"
+#endif /* __GNUC__ */
 
 #define ENTRY                                                           \
 ENTRY_NESTING;                                                          \
@@ -293,11 +304,11 @@ struct libcfs_debug_msg_data {
 };
 
 #define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \
-        .msg_cdls           = (cdls),       \
-        .msg_subsys         = (subsystem),  \
-        .msg_file           = (file),       \
-        .msg_fn             = (func),       \
-        .msg_line           = (ln)          \
+        /* msg_cdls */          (cdls),       \
+        /* msg_subsys */        (subsystem),  \
+        /* msg_file */          (file),       \
+        /* msg_fn */            (func),       \
+        /* msg_line */          (ln)          \
     }
 
 
@@ -311,8 +322,8 @@ extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls,
 #define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args)   \
     libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL)
 
-#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...)    \
-    libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a)
+#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, ...)    \
+    libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ## __VA_ARGS__)
 
 #define cdebug_va(cdls, mask, file, func, line, fmt, args)      do {          \
         CHECK_STACK();                                                        \
@@ -322,17 +333,26 @@ extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls,
                                   (file), (func), (line), fmt, args);         \
 } while(0);
 
-#define cdebug(cdls, mask, file, func, line, fmt, a...) do {                  \
+#define cdebug(cdls, mask, file, func, line, fmt, ...) do {                   \
         CHECK_STACK();                                                        \
                                                                               \
         if (cdebug_show(mask, DEBUG_SUBSYSTEM))                               \
                 libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask),               \
-                                 (file), (func), (line), fmt, ## a);          \
+                                 (file), (func), (line), fmt, ## __VA_ARGS__);\
 } while(0);
 
 extern void libcfs_assertion_failed(const char *expr, const char *file,
                                     const char *fn, const int line);
 
 
+#if defined(HAVE_BGL_SUPPORT)
+#define DEBUG_FILE_PATH_DEFAULT "/bgl/ion/tmp/lustre-log"
+#elif defined(__arch_um__)
+#define DEBUG_FILE_PATH_DEFAULT "/r/tmp/lustre-log"
+#elif defined(__WINNT__)
+#define DEBUG_FILE_PATH_DEFAULT "\\SystemRoot\\temp\\lustre-log"
+#else
+#define DEBUG_FILE_PATH_DEFAULT "/tmp/lustre-log"
+#endif
 
 #endif /* __LIBCFS_DEBUG_H__ */
diff --git a/libcfs/include/libcfs/libcfs_pack.h b/libcfs/include/libcfs/libcfs_pack.h
new file mode 100644 (file)
index 0000000..74ba33c
--- /dev/null
@@ -0,0 +1,5 @@
+#if !defined(__GNUC__) && defined(_MSC_VER) 
+#pragma warning(disable:4103)
+#pragma pack(push, 1)
+#endif
+
index 3885bb7..64938a5 100644 (file)
@@ -69,7 +69,7 @@ int64_t cfs_waitq_timedwait(cfs_waitlink_t *link, cfs_task_state_t state,
 /*
  * Timer
  */
-typedef  void (cfs_timer_func_t)(unsigned long);
+typedef  void (cfs_timer_func_t)(ulong_ptr_t);
 
 void cfs_init_timer(cfs_timer_t *t);
 void cfs_timer_init(cfs_timer_t *t, cfs_timer_func_t *func, void *arg);
index dd04602..9e3fe71 100644 (file)
  *
  * requires -Wall. Unfortunately this rules out use of likely/unlikely.
  */
-#define LASSERT(cond)                                           \
-({                                                              \
-        if (cond)                                               \
-                ;                                               \
-        else                                                    \
-                libcfs_assertion_failed( #cond , __FILE__,      \
-                        __FUNCTION__, __LINE__);                \
-})
-
-#define LASSERTF(cond, fmt, a...)                                       \
-({                                                                      \
+#define LASSERT(cond)                                                   \
+do {                                                                    \
+        if (cond)                                                       \
+                ;                                                       \
+        else                                                            \
+                libcfs_assertion_failed( #cond , __FILE__,              \
+                        __FUNCTION__, __LINE__);                        \
+} while(0)
+
+#define LASSERTF(cond, fmt, ...)                                        \
+do {                                                                    \
          if (cond)                                                      \
                  ;                                                      \
          else {                                                         \
                  libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG,       \
                                   __FILE__, __FUNCTION__,__LINE__,      \
                                   "ASSERTION(" #cond ") failed: " fmt,  \
-                                  ## a);                                \
+                                  ## __VA_ARGS__);                      \
                  LBUG();                                                \
          }                                                              \
-})
-
+} while(0)
 
 #else /* !LASSERT_CHECKED */
 
-#define LASSERT(cond)                                           \
-({                                                              \
-        if (unlikely(!(cond)))                                  \
-                libcfs_assertion_failed(#cond , __FILE__,       \
-                        __FUNCTION__, __LINE__);                \
-})
+#define LASSERT(cond)                                                   \
+do {                                                                    \
+        if (unlikely(!(cond)))                                          \
+                libcfs_assertion_failed(#cond , __FILE__,               \
+                        __FUNCTION__, __LINE__);                        \
+} while(0)
 
-#define LASSERTF(cond, fmt, a...)                                       \
-({                                                                      \
+#define LASSERTF(cond, fmt, ...)                                        \
+do {                                                                    \
         if (unlikely(!(cond))) {                                        \
                 libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG,        \
                                  __FILE__, __FUNCTION__,__LINE__,       \
                                  "ASSERTION(" #cond ") failed: " fmt,   \
-                                 ## a);                                 \
+                                 ## __VA_ARGS__ );                      \
                 LBUG();                                                 \
         }                                                               \
-})
-
+} while(0)
 
 #endif /* !LASSERT_CHECKED */
 #else /* !LIBCFS_DEBUG */
 /* sizeof is to use expression without evaluating it. */
 # define LASSERT(e) ((void)sizeof!!(e))
-# define LASSERTF(cond, fmt...) ((void)sizeof!!(cond))
+# define LASSERTF(cond, ...) ((void)sizeof!!(cond))
 #endif /* !LIBCFS_DEBUG */
 
 #ifdef INVARIANT_CHECK
@@ -237,10 +235,10 @@ void libcfs_debug_set_level(unsigned int debug_level);
 #  undef NDEBUG
 #  include <assert.h>
 #  define LASSERT(e)     assert(e)
-#  define LASSERTF(cond, args...)                                              \
+#  define LASSERTF(cond, ...)                                                  \
 do {                                                                           \
           if (!(cond))                                                         \
-                CERROR(args);                                                  \
+                CERROR(__VA_ARGS__);                                           \
           assert(cond);                                                        \
 } while (0)
 #  define LBUG()   assert(0)
@@ -251,12 +249,12 @@ do {                                                                           \
 #  endif
 # else
 #  define LASSERT(e) ((void)sizeof!!(e))
-#  define LASSERTF(cond, args...) ((void)sizeof!!(cond))
+#  define LASSERTF(cond, ...) ((void)sizeof!!(cond))
 #  define LBUG()   ((void)(0))
 #  define LINVRNT(exp) ((void)sizeof!!(exp))
 # endif /* LIBCFS_DEBUG */
 # define KLASSERT(e) ((void)0)
-# define printk(format, args...) printf (format, ## args)
+# define printk printf
 # ifdef CRAY_XT3                                /* buggy calloc! */
 #  define LIBCFS_ALLOC(ptr, size)               \
    do {                                         \
@@ -277,10 +275,13 @@ int libcfs_debug_cleanup(void);
  * build go below this comment. Actual compiler/compiler version
  * specific implementations come from the above header files
  */
-
+#ifdef __GNUC__
 #define likely(x)      __builtin_expect(!!(x), 1)
 #define unlikely(x)    __builtin_expect(!!(x), 0)
-
+#else
+#define likely(x)      (!!(x))
+#define unlikely(x)    (!!(x))
+#endif
 /* !__KERNEL__ */
 #endif
 
@@ -298,7 +299,7 @@ int libcfs_debug_cleanup(void);
  *       value  after  conversion...
  *
  */
-#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } })
+#define CLASSERT(cond) do {switch(42) {case (cond): case 0: break;}} while (0)
 
 /* support decl needed both by kernel and liblustre */
 int         libcfs_isknown_lnd(int type);
index 94d6862..946c2be 100644 (file)
 
 static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
 {
-        return t + d;
+        return (cfs_time_t)(t + d);
 }
 
 static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
 {
-        return t1 - t2;
+        return (cfs_time_t)(t1 - t2);
 }
 
 static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
diff --git a/libcfs/include/libcfs/libcfs_unpack.h b/libcfs/include/libcfs/libcfs_unpack.h
new file mode 100644 (file)
index 0000000..7ea2205
--- /dev/null
@@ -0,0 +1,4 @@
+#if !defined(__GNUC__) && defined(_MSC_VER) 
+#pragma warning(disable:4103)
+#pragma pack(pop)
+#endif
index 378bfc7..02a45e1 100644 (file)
@@ -354,6 +354,18 @@ extern int  lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
 # define LPF64 "l"
 #endif
 
+/*
+ * long_ptr_t & ulong_ptr_t, same to "long" for gcc
+ */
+# define LPLU "%lu"
+# define LPLD "%ld"
+# define LPLX "%#lx"
+
+/*
+ * pid_t
+ */
+# define LPPID "%d"
+
 #ifdef HAVE_SIZE_T_LONG
 # define LPSZ  "%lu"
 #else
index f2b7744..6f0a9e8 100644 (file)
@@ -117,6 +117,10 @@ typedef kernel_cap_t cfs_kernel_cap_t;
 struct cfs_stack_trace {
 };
 
+/* long integer with size equal to pointer */
+typedef unsigned long ulong_ptr_t;
+typedef long long_ptr_t;
+
 #ifndef WITH_WATCHDOG
 #define WITH_WATCHDOG
 #endif
index f294428..f1c442f 100644 (file)
@@ -82,6 +82,7 @@
  * - down_write(x)
  * - up_write(x)
  */
+#define fini_rwsem(s) do {} while(0)
 
 /*
  * rwlock_t (use Linux kernel's primitives)
index 20f3674..3414c9c 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/config.h>
 #endif
 #include <linux/module.h>
+#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/version.h>
 #include <linux/proc_fs.h>
@@ -204,15 +205,15 @@ do {                                                             \
    retval == 0; condition met; we're good.
    retval > 0; timed out.
 */
-#define cfs_waitq_wait_event_timeout(wq, condition, timeout)         \
-({                                                                   \
-       int __ret = 0;                                               \
+#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret)    \
+do {                                                                 \
+       ret = 0;                                                     \
        if (!(condition))                                            \
-               __wait_event_timeout(wq, condition, timeout, __ret); \
-       __ret;                                                       \
-})
+               __wait_event_timeout(wq, condition, timeout, ret);   \
+} while (0)
 #else
-#define cfs_waitq_wait_event_timeout  wait_event_timeout
+#define cfs_waitq_wait_event_timeout(wq, condition, timeout, ret)    \
+        ret = wait_event_timeout(wq, condition, timeout)
 #endif
 
 #ifndef wait_event_interruptible_timeout /* Only for RHEL3 2.4.21 kernel */
@@ -251,16 +252,16 @@ do {                                                           \
    retval < 0; interrupted by signal.
    retval > 0; timed out.
 */
-#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout\
-({                                                                \
-       int __ret = 0;                                            \
+#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout, ret)\
+do {                                                              \
+       ret = 0;                                                  \
        if (!(condition))                                         \
                __wait_event_interruptible_timeout(wq, condition, \
-                                               timeout, __ret);  \
-       __ret;                                                    \
-})
+                                               timeout, ret);     \
+} while (0)
 #else
-#define cfs_waitq_wait_event_interruptible_timeout wait_event_interruptible_timeout
+#define cfs_waitq_wait_event_interruptible_timeout(wq, c, timeout, ret) \
+        ret = wait_event_interruptible_timeout(wq, c, timeout)
 #endif
 
 #endif
index 492fd00..38906e9 100644 (file)
 
 #endif
 
+# define cfs_wait_event_interruptible(wq, condition, ret)               \
+         ret = wait_event_interruptible(wq, condition)
+# define cfs_wait_event_interruptible_exclusive(wq, condition, ret)     \
+         ret = wait_event_interruptible(wq, condition)
+
 #if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
 #define UML_PID(tsk) ((tsk)->thread.extern_pid)
 #elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
index ed03bd5..548bdf5 100644 (file)
  * using the generic single-entry routines.
  */
 
-#ifndef __WINNT__
 #define prefetch(a) ((void)a)
-#else
-#define prefetch(a) ((void *)a)
-#endif
 
 struct list_head {
        struct list_head *next, *prev;
@@ -208,7 +204,7 @@ static inline void list_splice_init(struct list_head *list,
  * @member:    the name of the list_struct within the struct.
  */
 #define list_entry(ptr, type, member) \
-       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+       ((type *)((char *)(ptr)-(char *)(&((type *)0)->member)))
 
 /**
  * list_for_each       -       iterate over a list
@@ -253,8 +249,8 @@ struct hlist_node {
 #define NULL_P ((void *)0)
 #endif
 
-#define CFS_HLIST_HEAD_INIT { .first = NULL_P }
-#define CFS_HLIST_HEAD(name) struct hlist_head name = {  .first = NULL_P }
+#define CFS_HLIST_HEAD_INIT { NULL_P }
+#define CFS_HLIST_HEAD(name) struct hlist_head name = { NULL_P }
 #define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P)
 #define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P)
 
@@ -329,11 +325,11 @@ static inline void hlist_add_after(struct hlist_node *n,
 #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
-       for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
+       for (pos = (head)->first; pos && (prefetch(pos->next), 1); \
             pos = pos->next)
 
 #define hlist_for_each_safe(pos, n, head) \
-       for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+       for (pos = (head)->first; pos && (n = pos->next, 1); \
             pos = n)
 
 /**
@@ -395,7 +391,7 @@ static inline void hlist_add_after(struct hlist_node *n,
  * @head:      the head for your list.
  */
 #define list_for_each_prev(pos, head) \
-       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+       for (pos = (head)->prev, prefetch(pos->prev); pos != (head);     \
                pos = pos->prev, prefetch(pos->prev))
 
 #endif /* list_for_each_prev */
@@ -441,6 +437,7 @@ static inline void hlist_add_after(struct hlist_node *n,
                n = list_entry(pos->member.next, typeof(*pos), member); \
             &pos->member != (head);                                    \
             pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
 #endif /* list_for_each_entry_safe */
 
 #ifndef list_for_each_entry_safe_from
@@ -454,10 +451,44 @@ static inline void hlist_add_after(struct hlist_node *n,
  * Iterate over list of given type from current point, safe against
  * removal of list entry.
  */
-#define list_for_each_entry_safe_from(pos, n, head, member)                 \
-        for (n = list_entry(pos->member.next, typeof(*pos), member);        \
-             &pos->member != (head);                                        \
+#define list_for_each_entry_safe_from(pos, n, head, member)             \
+        for (n = list_entry(pos->member.next, typeof(*pos), member);    \
+             &pos->member != (head);                                    \
              pos = n, n = list_entry(n->member.next, typeof(*n), member))
 #endif /* list_for_each_entry_safe_from */
 
+#define cfs_list_for_each_entry_typed(pos, head, type, member)         \
+        for (pos = list_entry((head)->next, type, member),             \
+                    prefetch(pos->member.next);                        \
+            &pos->member != (head);                                    \
+            pos = list_entry(pos->member.next, type, member),          \
+            prefetch(pos->member.next))
+
+#define cfs_list_for_each_entry_reverse_typed(pos, head, type, member) \
+       for (pos = list_entry((head)->prev, type, member);              \
+            prefetch(pos->member.prev), &pos->member != (head);        \
+            pos = list_entry(pos->member.prev, type, member))
+
+#define cfs_list_for_each_entry_safe_typed(pos, n, head, type, member) \
+    for (pos = list_entry((head)->next, type, member),                 \
+               n = list_entry(pos->member.next, type, member);         \
+            &pos->member != (head);                                    \
+            pos = n, n = list_entry(n->member.next, type, member))
+
+#define cfs_list_for_each_entry_safe_from_typed(pos, n, head, type, member)   \
+        for (n = list_entry(pos->member.next, type, member);            \
+             &pos->member != (head);                                    \
+             pos = n, n = list_entry(n->member.next, type, member))
+#define cfs_hlist_for_each_entry_typed(tpos, pos, head, type, member)   \
+       for (pos = (head)->first;                                       \
+            pos && (prefetch(pos->next), 1) &&                         \
+               (tpos = hlist_entry(pos, type, member), 1);             \
+            pos = pos->next)
+
+#define cfs_hlist_for_each_entry_safe_typed(tpos, pos, n, head, type, member)\
+       for (pos = (head)->first;                                       \
+            pos && (n = pos->next, 1) &&                               \
+               (tpos = hlist_entry(pos, type, member), 1);             \
+            pos = n)
+
 #endif /* __LIBCFS_LUSTRE_LIST_H__ */
index b50554f..95035d2 100644 (file)
@@ -43,6 +43,7 @@
 #ifndef __LIBCFS_POSIX_LIBCFS_H__
 #define __LIBCFS_POSIX_LIBCFS_H__
 
+#include <errno.h>
 #include <sys/errno.h>
 #include <string.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
-#include <unistd.h>
 #include <fcntl.h>
 #include <limits.h>
 #include <assert.h>
+#include <sys/ioctl.h>
 #include <sys/signal.h>
 #include <signal.h>
 #include <sys/time.h>
 #include <time.h>
+#include <getopt.h>
+#include <signal.h>
+#include <pwd.h>
+#include <sys/socket.h>
+#include <sys/utsname.h>
+#include <ctype.h>
+
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 
 #ifdef HAVE_LIBPTHREAD
 #include <pthread.h>
@@ -331,7 +346,8 @@ static inline struct radix_tree_node *radix_tree_lookup0(struct radix_tree_root
         if (list_empty(&root->list))
                 return NULL;
 
-        list_for_each_entry(node, &root->list, _node)
+        cfs_list_for_each_entry_typed(node, &root->list,
+                                      struct radix_tree_node, _node)
                 if (node->index == idx)
                         return node;
 
index 19ef126..392e70e 100644 (file)
@@ -61,4 +61,8 @@ typedef unsigned int __u32;
 typedef __signed__ long long __s64;
 typedef unsigned long long __u64;
 
+/* long integer with size equal to pointer */
+typedef unsigned long ulong_ptr_t;
+typedef long long_ptr_t;
+
 #endif
index 87eb165..62bd289 100644 (file)
@@ -146,4 +146,16 @@ typedef struct {
 # error "No word size defined"
 #endif
 
+/*
+ * long_ptr_t & ulong_ptr_t, same to "long" for gcc
+ */
+# define LPLU "%lu"
+# define LPLD "%ld"
+# define LPLX "%#lx"
+
+/*
+ * pid_t
+ */
+# define LPPID "%d"
+
 #endif
index 2a38463..01c289b 100644 (file)
@@ -149,6 +149,7 @@ struct completion {
 typedef int (*cfs_wait_handler_t) (int timeout);
 void set_completion_wait_handler(cfs_wait_handler_t *handler);
 void init_completion(struct completion *c);
+void init_completion_module(cfs_wait_handler_t handler);
 void complete(struct completion *c);
 void wait_for_completion(struct completion *c);
 int wait_for_completion_interruptible(struct completion *c);
@@ -182,6 +183,7 @@ void down_write(struct rw_semaphore *s);
 int down_write_trylock(struct rw_semaphore *s);
 void up_read(struct rw_semaphore *s);
 void up_write(struct rw_semaphore *s);
+void fini_rwsem(struct rw_semaphore *s);
 
 /*
  * read-write lock : Need to be investigated more!!
@@ -348,7 +350,7 @@ static inline int mutex_is_locked(struct mutex *lock)
  **************************************************************************/
 
 struct lock_class_key {
-        ;
+        int foo;
 };
 
 static inline void lockdep_set_class(void *lock, struct lock_class_key *key)
index e723182..967cf07 100644 (file)
@@ -112,12 +112,11 @@ typedef sigset_t                        cfs_sigset_t;
 /*
  * Timer
  */
-#include <sys/time.h>
 
 typedef struct {
         struct list_head tl_list;
-        void (*function)(unsigned long unused);
-        unsigned long data;
+        void (*function)(ulong_ptr_t unused);
+        ulong_ptr_t data;
         long expires;
 } cfs_timer_t;
 
@@ -169,6 +168,7 @@ struct cfs_stack_trace {
 /*
  * arithmetic
  */
+#ifndef do_div /* gcc only, platform-specific will override */
 #define do_div(a,b)                     \
         ({                              \
                 unsigned long remainder;\
@@ -176,6 +176,34 @@ struct cfs_stack_trace {
                 (a) = (a) / (b);        \
                 (remainder);            \
         })
+#endif
+
+/* utility libcfs init/fini entries */
+#ifdef __WINNT__
+extern int libcfs_arch_init(void);
+extern void libcfs_arch_cleanup(void);
+#else /* !__WINNT__ */
+static inline int libcfs_arch_init(void) {
+        return 0;
+}
+static inline void libcfs_arch_cleanup(void) {
+}
+/* __WINNT__ */
+#endif
+
+/* proc interface wrappers for non-win OS */
+#ifndef __WINNT__
+#define cfs_proc_open   open
+#define cfs_proc_mknod  mknod
+#define cfs_proc_ioctl  ioctl
+#define cfs_proc_close  close
+#define cfs_proc_read   read
+#define cfs_proc_write  write
+#define cfs_proc_fopen  fopen
+#define cfs_proc_fclose fclose
+#define cfs_proc_fgets  fgets
+/* !__WINNT__ */
+#endif
 
 /* !__KERNEL__ */
 #endif
index 94a75d6..fe0464d 100644 (file)
@@ -73,6 +73,7 @@ int libcfs_fcntl_nonblock(int fd);
 int libcfs_sock_set_nagle(int fd, int nagle);
 int libcfs_sock_set_bufsiz(int fd, int bufsiz);
 int libcfs_sock_create(int *fdp);
+void libcfs_sock_release(int fd);
 int libcfs_sock_bind_to_port(int fd, __u16 port);
 int libcfs_sock_connect(int fd, __u32 ip, __u16 port);
 int libcfs_sock_writev(int fd, const struct iovec *vector, int count);
index 60e466a..5a5cc3e 100644 (file)
@@ -97,7 +97,7 @@
 
 typedef time_t cfs_fs_time_t;
 typedef time_t cfs_time_t;
-typedef long cfs_duration_t;
+typedef time_t cfs_duration_t;
 
 /* looks like linux */
 #define time_after(a, b) ((long)(b) - (long)(a) < 0)
@@ -110,7 +110,7 @@ static inline cfs_time_t cfs_time_current(void)
         return time(NULL);
 }
 
-static inline cfs_duration_t cfs_time_seconds(int seconds)
+static inline cfs_duration_t cfs_time_seconds(cfs_time_t seconds)
 {
         return seconds;
 }
index 43319af..137dabe 100644 (file)
@@ -249,8 +249,10 @@ static inline void l_cond_broadcast(l_cond_t *cond)
 #include <errno.h>
 #include <string.h>
 #if HAVE_LIBPTHREAD
+#ifndef __WINNT__
 #include <sys/ipc.h>
 #include <sys/shm.h>
+#endif
 #include <pthread.h>
 
 typedef pthread_mutex_t        l_mutex_t;
index 49adea2..940bf48 100644 (file)
@@ -37,9 +37,6 @@
 #ifndef __LIBCFS_WINNT_KP30_H__
 #define __LIBCFS_WINNT_KP30_H__
 
-#include <libcfs/winnt/portals_compat25.h>
-#include <lnet/types.h>
-
 #ifdef __KERNEL__
 
 /* Module parameter support */
 
 #define CFS_SYSFS_MODULE_PARM    0 /* no sysfs access to module parameters */
 
+#define cond_resched our_cond_resched
+void our_cond_resched();
 
-static inline void our_cond_resched()
-{
-    schedule_timeout(1i64);
-}
-
-#ifdef CONFIG_SMP
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
-#else
 #define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-#endif
+#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0)
+
+/* winnt panic */
+void libcfs_panic(char *msg);
+#define LIBCFS_PANIC(msg) libcfs_panic(msg)
+void libcfs_register_panic_notifier();
+void libcfs_unregister_panic_notifier();
 
-#error Need a winnt version of panic()
-#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL)
-#error libcfs_register_panic_notifier() missing
-#error libcfs_unregister_panic_notifier() missing
 
 #define cfs_work_struct_t WORK_QUEUE_ITEM
 #define cfs_prepare_work(tq, routine, contex)
@@ -82,6 +75,10 @@ static inline void our_cond_resched()
 
 #define printk                                  DbgPrint
 #define ptintf                                  DbgPrint
+#define printk_ratelimit()                      (FALSE)
+#define vprintk(f, a)                           vDbgPrintEx(DPFLTR_IHVDRIVER_ID, DPFLTR_ERROR_LEVEL, f, a)
+                                                /* vDbgPrintEx only available on xp and later OS */
+#define cfs_assert                              ASSERT
 
 #else  /* !__KERNEL__ */
 
@@ -91,6 +88,9 @@ static inline void our_cond_resched()
 # include <cygwin-ioctl.h>
 #endif
 # include <time.h>
+#include <crtdbg.h>
+
+#define cfs_assert     _ASSERT
 
 #endif /* End of !__KERNEL__ */
 
@@ -104,12 +104,12 @@ typedef struct {
         __s64      lwte_when;
         char       *lwte_where;
         void       *lwte_task;
-        long_ptr        lwte_p1;
-        long_ptr        lwte_p2;
-        long_ptr        lwte_p3;
-        long_ptr        lwte_p4;
+        long_ptr_t        lwte_p1;
+        long_ptr_t        lwte_p2;
+        long_ptr_t        lwte_p3;
+        long_ptr_t        lwte_p4;
 # if BITS_PER_LONG > 32
-        long_ptr        lwte_pad;
+        long_ptr_t        lwte_pad;
 # endif
 } lwt_event_t;
 
@@ -119,7 +119,7 @@ typedef struct {
 
 /* ------------------------------------------------------------------ */
 
-#define IOCTL_LIBCFS_TYPE long_ptr
+#define IOCTL_LIBCFS_TYPE long_ptr_t
 
 #ifdef __CYGWIN__
 # ifndef BITS_PER_LONG
@@ -133,35 +133,34 @@ typedef struct {
 
 #if BITS_PER_LONG > 32
 # define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
-# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a)
-# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long_ptr_t)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((char *)(long_ptr_t)0x5a5a5a5a5a5a5a5a)
 #else
 # define LI_POISON ((int)0x5a5a5a5a)
-# define LL_POISON ((long_ptr)0x5a5a5a5a)
-# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a)
+# define LL_POISON ((long_ptr_t)0x5a5a5a5a)
+# define LP_POISON ((char *)(long_ptr_t)0x5a5a5a5a)
 #endif
 
-#if defined(__x86_64__)
-# define LPU64 "%I64u"
-# define LPD64 "%I64d"
-# define LPX64 "%I64x"
-# define LPSZ  "%lu"
-# define LPSSZ "%ld"
-#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
-# define LPU64 "%I64u"
-# define LPD64 "%I64d"
-# define LPX64 "%I64x"
-# define LPSZ  "%u"
-# define LPSSZ "%d"
-#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
-# define LPU64 "%I64u"
-# define LPD64 "%I64d"
-# define LPX64 "%I64x"
-# define LPSZ  "%u"
-# define LPSSZ "%d"
-#endif
-#ifndef LPU64
-# error "No word size defined"
+#define LPF64 "%I64d"
+#define LPU64 "%I64u"
+#define LPD64 "%I64d"
+#define LPX64 "%#I64x"
+#define LPSZ  "%lu"
+#define LPSSZ "%ld"
+
+/*
+ * long_ptr_t & ulong_ptr_t, same to "long" for linux
+ */
+#if _x86_
+# define LPLU "%u"
+# define LPLD "%d"
+# define LPLX "%#x"
+# define LPPID "%d"
+#else
+# define LPLU "%Ii64u"
+# define LPLD "%I64d"
+# define LPLX "%#I64x"
+# define LPPID "%d"
 #endif
 
 #endif
index aaef666..cfff406 100644 (file)
 #endif
 
 /* workgroud for VC compiler */
-#ifndef __FUNCTION__
-#define __FUNCTION__ "generic"
+#if _MSC_VER <= 1300
+#define __FUNCTION__ ("generic")
 #endif
 
+#include <config.h>
 #include <libcfs/winnt/winnt-types.h>
-#include <libcfs/portals_utils.h>
+#include <libcfs/list.h>
 #include <libcfs/winnt/winnt-time.h>
 #include <libcfs/winnt/winnt-lock.h>
 #include <libcfs/winnt/winnt-mem.h>
 #include <libcfs/winnt/winnt-tcpip.h>
 #include <libcfs/winnt/kp30.h>
 
-struct ptldebug_header {
-        __u32 ph_len;
-        __u32 ph_flags;
-        __u32 ph_subsys;
-        __u32 ph_mask;
-        __u32 ph_cpu_id;
-        __u32 ph_sec;
-        __u64 ph_usec;
-        __u32 ph_stack;
-        __u32 ph_pid;
-        __u32 ph_extern_pid;
-        __u32 ph_line_num;
-} __attribute__((packed));
-
 #ifdef __KERNEL__
 
 enum {
@@ -85,18 +72,25 @@ static inline __u32 query_stack_size()
 {
     ULONG   LowLimit, HighLimit;
 
-    IoGetStackLimits(&LowLimit, &HighLimit);
+    IoGetStackLimits((PULONG_PTR)&LowLimit, (PULONG_PTR)&HighLimit);
     ASSERT(HighLimit > LowLimit);
 
     return (__u32) (HighLimit - LowLimit);
 }
-#else
+
+/* disable watchdog */
+#undef WITH_WATCHDOG
+
+#else /* !__KERNEL__*/
+
+#include <libcfs/user-bitops.h>
+
 static inline __u32 query_stack_size()
 {
-   return 4096;
+   return PAGE_SIZE; /* using one page in default */
 }
-#endif
 
+#endif /* __KERNEL__*/
 
 #ifndef THREAD_SIZE
 # define THREAD_SIZE query_stack_size()
@@ -105,26 +99,8 @@ static inline __u32 query_stack_size()
 #define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
 
 #ifdef __KERNEL__
-# ifdef  __ia64__
-#  define CDEBUG_STACK() (THREAD_SIZE -                         \
-                          ((ulong_ptr)__builtin_dwarf_cfa() &   \
-                           (THREAD_SIZE - 1)))
-# else
-#  define CDEBUG_STACK (IoGetRemainingStackSize())
-#  error "This doesn't seem right; CDEBUG_STACK should grow with the stack"
-# endif /* __ia64__ */
-
-#define CHECK_STACK()                                                   \
-do {                                                                    \
-        unsigned long _stack = CDEBUG_STACK();                          \
-                                                                        \
-        if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) {        \
-                libcfs_stack = _stack;                                  \
-                libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING,      \
-                                 __FILE__, NULL, __LINE__,              \
-                                 "maximum lustre stack %lu\n", _stack); \
-        }                                                               \
-} while (0)
+#define CDEBUG_STACK() (THREAD_SIZE - (__u32)IoGetRemainingStackSize())
+#define CHECK_STACK() do {} while(0)
 #else /* !__KERNEL__ */
 #define CHECK_STACK() do { } while(0)
 #define CDEBUG_STACK() (0L)
@@ -134,8 +110,8 @@ do {                                                                    \
 #define LUSTRE_LNET_PID          12345
 
 #define ENTRY_NESTING_SUPPORT (0)
-#define ENTRY_NESTING   do {;} while (0)
-#define EXIT_NESTING   do {;} while (0)
+#define ENTRY_NESTING   do {} while (0)
+#define EXIT_NESTING   do {} while (0)
 #define __current_nesting_level() (0)
 
 #endif /* _WINNT_LIBCFS_H */
index 17965e6..2353528 100644 (file)
@@ -1,5 +1,5 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
  *
  * GPL HEADER START
  *
 
 #ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__
 #define __LIBCFS_WINNT_PORTALS_COMPAT_H__
+#ifdef __KERNEL__
+/*
+ * Signal
+ */
 
+#define SIGNAL_MASK_ASSERT()                    do {} while(0)
+#define SIGNAL_MASK_LOCK(task, flags)           do {} while(0)
+#define SIGNAL_MASK_UNLOCK(task, flags)         do {} while(0)
+#define USERMODEHELPER(path, argv, envp)        do {} while(0)
+#define RECALC_SIGPENDING                       do {} while(0)
+#define CLEAR_SIGPENDING                        do {} while(0)
+#define CURRENT_SECONDS                         get_seconds()
+#endif
 
+#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos)        \
+        proc_dointvec(table, write, filp, buffer, lenp)
+#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos)        \
+        proc_dostring(table, write, filp, buffer, lenp)
+#define LL_PROC_PROTO(name)                                             \
+        name(cfs_sysctl_table_t *table, int write, struct file *filp,   \
+             void __user *buffer, size_t *lenp)
+#define DECLARE_LL_PROC_PPOS_DECL  loff_t *ppos = &filp->f_pos
 
 #endif /* _PORTALS_COMPAT_H */
index c9ae107..54bb4ee 100644 (file)
@@ -1,5 +1,5 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4
  *
  * GPL HEADER START
  *
 #define cfs_clear_flag(x,f)  ((x) &= ~(f))
 #endif
 
-
-static inline __u32 __do_div(__u32 * n, __u32 b) 
+static inline __u32 do_div64(__u64 * n, __u64 b) 
 {
-    __u32   mod;
+    __u64   mod;
 
     mod = *n % b;
     *n  = *n / b;
-    return mod;
+    return (__u32)mod;
 } 
 
-#define do_div(n,base)  __do_div((__u32 *)&(n), (__u32) (base))
-
+#define do_div(n, b) do_div64(&(n), (__u64)b)
 #ifdef __KERNEL__
 
 #include <stdlib.h>
 #include <libcfs/winnt/winnt-types.h>
 
 char * strsep(char **s, const char *ct);
-static inline size_t strnlen(const char * s, size_t count) {
-    size_t len = 0;
-    while(len < count && s[len++]);
-    return len;
-}
-char * ul2dstr(ulong_ptr address, char *buf, int len);
+char * ul2dstr(ulong_ptr_t address, char *buf, int len);
 
 #define simple_strtol(a1, a2, a3)               strtol(a1, a2, a3)
 #define simple_strtoll(a1, a2, a3)              (__s64)strtoull(a1, a2, a3)
@@ -84,25 +77,154 @@ char * ul2dstr(ulong_ptr address, char *buf, int len);
 
 unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base);
 
+static inline int set_bit(int nr, void * addr)
+{
+    (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31));
+    return *((int *) addr);
+}
+
 static inline int test_bit(int nr, void * addr)
 {
-    return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0;
+    return (int)(((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0);
 }
 
-static inline void clear_bit(int nr, void * addr)
+static inline int clear_bit(int nr, void * addr)
 {
     (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31)));
+    return *((int *) addr);
 }
 
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+    int rc;
+    unsigned char  mask;
+    volatile unsigned char *ADDR = addr;
 
-static inline void set_bit(int nr, void * addr)
+    ADDR += nr >> 3;
+    mask = 1 << (nr & 0x07);
+    rc = ((mask & *ADDR) != 0);
+    *ADDR |= mask;
+
+    return rc;
+}
+
+#define ext2_set_bit(nr,addr)   (set_bit(nr, addr), 0)
+#define ext2_clear_bit(nr,addr)        (clear_bit(nr, addr), 0)
+#define ext2_test_bit(nr,addr)  test_bit(nr, addr)
+
+static inline int ffs(int x)
 {
-    (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31));
+        int r = 1;
+
+        if (!x)
+                return 0;
+        if (!(x & 0xffff)) {
+                x >>= 16;
+                r += 16;
+        }
+        if (!(x & 0xff)) {
+                x >>= 8;
+                r += 8;
+        }
+        if (!(x & 0xf)) {
+                x >>= 4;
+                r += 4;
+        }
+        if (!(x & 3)) {
+                x >>= 2;
+                r += 2;
+        }
+        if (!(x & 1)) {
+                x >>= 1;
+                r += 1;
+        }
+        return r;
+}
+
+static inline unsigned long __ffs(unsigned long word)
+{
+        int num = 0;
+
+#if BITS_PER_LONG == 64
+        if ((word & 0xffffffff) == 0) {
+                num += 32;
+                word >>= 32;
+        }
+#endif
+        if ((word & 0xffff) == 0) {
+                num += 16;
+                word >>= 16;
+        }
+        if ((word & 0xff) == 0) {
+                num += 8;
+                word >>= 8;
+        }
+        if ((word & 0xf) == 0) {
+                num += 4;
+                word >>= 4;
+        }
+        if ((word & 0x3) == 0) {
+                num += 2;
+                word >>= 2;
+        }
+        if ((word & 0x1) == 0)
+                num += 1;
+        return num;
+}
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static inline
+int fls(int x)
+{
+        int r = 32;
+
+        if (!x)
+                return 0;
+        if (!(x & 0xffff0000u)) {
+                x <<= 16;
+                r -= 16;
+        }
+        if (!(x & 0xff000000u)) {
+                x <<= 8;
+                r -= 8;
+        }
+        if (!(x & 0xf0000000u)) {
+                x <<= 4;
+                r -= 4;
+        }
+        if (!(x & 0xc0000000u)) {
+                x <<= 2;
+                r -= 2;
+        }
+        if (!(x & 0x80000000u)) {
+                x <<= 1;
+                r -= 1;
+        }
+        return r;
+}
+
+static inline unsigned find_first_bit(const unsigned long *addr, unsigned size)
+{
+        unsigned x = 0;
+
+        while (x < size) {
+                unsigned long val = *addr++;
+                if (val)
+                        return __ffs(val) + x;
+                x += (sizeof(*addr)<<3);
+        }
+        return x;
 }
 
 static inline void read_random(char *buf, int len)
 {
-    ULONG   Seed = (ULONG) buf;
+    ULONG   Seed = (ULONG)(ULONG_PTR) buf;
     Seed = RtlRandom(&Seed);
     while (len >0) {
         if (len > sizeof(ULONG)) {
@@ -116,6 +238,7 @@ static inline void read_random(char *buf, int len)
         } 
     }
 }
+
 #define get_random_bytes(buf, len)  read_random(buf, len)
 
 /* do NOT use function or expression as parameters ... */
@@ -147,12 +270,18 @@ static int copy_from_user(void *to, void *from, int c)
     return 0;
 }
 
-static int copy_to_user(void *to, void *from, int c) 
+static int copy_to_user(void *to, const void *from, int c) 
 {
     memcpy(to, from, c);
     return 0;
 }
 
+static unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+    memset(to, 0, n);
+       return n;
+}
 
 #define put_user(x, ptr)        \
 (                               \
@@ -169,12 +298,46 @@ static int copy_to_user(void *to, void *from, int c)
 
 #define num_physpages                  (64 * 1024)
 
-#define snprintf  _snprintf
-#define vsnprintf _vsnprintf
+#else
 
+#define unlink _unlink 
+#define close  _close
+#define open   _open
+#define fdopen _fdopen
+#define strdup _strdup
+#define fileno _fileno
+#define isattry _isattry
+#define stat    _stat
 
 #endif /* !__KERNEL__ */
 
 int cfs_error_code(NTSTATUS);
 
+static inline int vsnprintf(char *buf, size_t cnt,
+                            const char *fmt, va_list va)
+{
+    int rc;
+
+#ifdef TRUE /* using msvcrt from windkk 3790 */
+    rc = _vsnprintf(buf, cnt, fmt, va);
+#else
+    rc = _vsnprintf_s(buf, cnt, cnt, fmt, va);
+#endif
+    if (rc == -1)
+        return cnt;
+    return rc;
+}
+
+static inline int snprintf(char *buf, size_t cnt, 
+                           const char *fmt, ...)
+{
+    int         rc;
+    va_list     va;
+
+    va_start(va, fmt);
+    rc = vsnprintf(buf, cnt, fmt, va);
+    va_end(va);
+    return rc;
+}
+
 #endif
index 7e0c939..e3d52c0 100644 (file)
 #define NODEV          0
 #define MKDEV(ma,mi)   (((ma) << MINORBITS) | (mi))
 
+#define PATH_MAX (260)
 
 #ifdef __KERNEL__
 
+/* linux/fs.h */
+
+#define MAY_EXEC 1
+#define MAY_WRITE 2
+#define MAY_READ 4
+#define MAY_APPEND 8
+
+#define FMODE_READ 1
+#define FMODE_WRITE 2
+
+/* Internal kernel extensions */
+#define FMODE_LSEEK    4
+#define FMODE_PREAD    8
+#define FMODE_PWRITE   FMODE_PREAD     /* These go hand in hand */
+
+/* File is being opened for execution. Primary users of this flag are
+   distributed filesystems that can use it to achieve correct ETXTBUSY
+   behavior for cross-node execution/opening_for_writing of files */
+#define FMODE_EXEC     16
+
+#define RW_MASK         1
+#define RWA_MASK        2
+#define READ 0
+#define WRITE 1
+#define READA 2         /* read-ahead  - don't block if no resources */
+#define SWRITE 3        /* for ll_rw_block() - wait for buffer lock */
+#define SPECIAL 4       /* For non-blockdevice requests in request queue */
+#define READ_SYNC       (READ | (1 << BIO_RW_SYNC))
+#define WRITE_SYNC      (WRITE | (1 << BIO_RW_SYNC))
+#define WRITE_BARRIER   ((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+
 struct file_operations
 {
-    loff_t (*lseek)(struct file * file, loff_t offset, int origin);
+    struct module *owner;
+    loff_t (*llseek)(struct file * file, loff_t offset, int origin);
     ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos);
     ssize_t (*write)(struct file * file, const char * buffer,
         size_t count, loff_t *ppos);
-    int (*ioctl) (struct file *, unsigned int, ulong_ptr);
-    int (*open) (struct file *);
-    int (*release) (struct file *);
+    int (*ioctl) (struct file *, unsigned int, ulong_ptr_t);
+    int (*open) (struct inode*, struct file *);
+    int (*release) (struct inode*, struct file *);
 };
 
 struct file {
@@ -73,23 +106,23 @@ struct file {
     cfs_handle_t            f_handle;
     unsigned int            f_flags;
     mode_t                  f_mode;
-    ulong_ptr           f_count;
-
-    //struct list_head      f_list;
-    //struct dentry *       f_dentry;
-
-    cfs_proc_entry_t *      proc_dentry;
-    cfs_file_operations_t * f_op;
+    __u32                   f_count;
 
     size_t                  f_size;
     loff_t                  f_pos;
     unsigned int            f_uid, f_gid;
     int                     f_error;
 
-    ulong_ptr           f_version;
+    __u32                   f_version;
 
-    void *                  private_data;
+    //struct list_head      f_list;
+    struct dentry *         f_dentry;
+
+    cfs_proc_entry_t *      proc_dentry;
+    cfs_file_operations_t * f_op;
 
+    void *                  private_data;
+    struct inode *          f_inode;
     char                    f_name[1];
 
 };
@@ -105,9 +138,7 @@ int cfs_filp_fsync(cfs_file_t *fp);
 int cfs_get_file(cfs_file_t *fp);
 int cfs_put_file(cfs_file_t *fp);
 int cfs_file_count(cfs_file_t *fp);
-
-
-
+#define cfs_filp_unlink(x, y) (KdBreakPoint(),0) 
 /*
  * CFS_FLOCK routines
  */
@@ -146,41 +177,212 @@ typedef struct file_lock{
 #define ATTR_RAW        0x0800  /* file system, not vfs will massage attrs */
 #define ATTR_FROM_OPEN  0x1000  /* called from open path, ie O_TRUNC */
 //#define ATTR_CTIME_SET  0x2000
-#define ATTR_BLOCKS     0x4000
+
+/*
+ * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
+ * ATTR_* attributes (see bug 13828): lustre/include/winnt/lustre_compat25.h
+ */
+/* #define ATTR_BLOCKS     0x4000 */
+#define ATTR_BLOCKS    (1 << 27)
+
 #define ATTR_KILL_SUID  0
 #define ATTR_KILL_SGID  0
 
+
+
 #define in_group_p(x)  (0)
 
-/*
- * proc fs routines
+
+/* VFS structures for windows */
+
+/* 
+ * inode formats
  */
 
-int proc_init_fs();
-void proc_destroy_fs();
+#define S_IFMT   00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK         0120000
+#define S_IFREG  0100000
+#define S_IFBLK  0060000
+#define S_IFDIR  0040000
+#define S_IFCHR  0020000
+#define S_IFIFO  0010000
+#define S_ISUID  0004000
+#define S_ISGID  0002000
+#define S_ISVTX  0001000
+
+/* Inode flags - they have nothing to superblock flags now */
+
+#define S_SYNC         1       /* Writes are synced at once */
+#define S_NOATIME      2       /* Do not update access times */
+#define S_APPEND       4       /* Append-only file */
+#define S_IMMUTABLE    8       /* Immutable file */
+#define S_DEAD         16      /* removed, but still open directory */
+#define S_NOQUOTA      32      /* Inode is not counted to quota */
+#define S_DIRSYNC      64      /* Directory modifications are synchronous */
+#define S_NOCMTIME     128     /* Do not update file c/mtime */
+#define S_SWAPFILE     256     /* Do not truncate: swapon got its bmaps */
+#define S_PRIVATE      512     /* Inode is fs-internal */
+
+
+struct inode {
+        __u32           i_mode;
+        __u64           i_size;
+        __u64           i_blocks;
+        struct timespec i_atime;
+        struct timespec i_ctime;
+        struct timespec i_mtime;
+        struct timespec i_dtime;
+        __u32           i_ino;
+        __u32           i_generation;
+        __u32           i_state;
+        __u32           i_blkbits;
+        int             i_uid;
+        int             i_gid;
+        __u32           i_flags;
+        mutex_t         i_sem;
+        void *          i_priv;
+};
 
+#define I_FREEING       0x0001
 
-/*
- *  misc
- */
+struct dentry {
+        atomic_t        d_count;
+        struct {
+            int         len;
+            char *      name;
+        } d_name;
+        struct inode *  d_inode;
+        struct dentry*  d_parent;
+};
 
-static inline void *ERR_PTR(long_ptr error)
+extern struct dentry *dget(struct dentry *de);
+extern void dput(struct dentry *de);
+static __inline struct dentry *lookup_one_len(const char *name, struct dentry *de, int len)
 {
-       return (void *) error;
+    cfs_enter_debugger();
+    return NULL;
 }
 
-static inline long_ptr PTR_ERR(const void *ptr)
+static inline loff_t i_size_read(const struct inode *inode)
 {
-       return (long_ptr) ptr;
+    cfs_enter_debugger();
+    return inode->i_size;
 }
 
-static inline long_ptr IS_ERR(const void *ptr)
+static inline void i_size_write(struct inode *inode, loff_t i_size)
 {
-       return (ulong_ptr)ptr > (ulong_ptr)-1000L;
+    cfs_enter_debugger();
+    inode->i_size = i_size;
 }
 
+struct kstatfs {
+        u64     f_type;
+        long    f_bsize;
+        u64     f_blocks;
+        u64     f_bfree;
+        u64     f_bavail;
+        u64     f_files;
+        u64     f_ffree;
+        __u32   f_fsid;
+        long    f_namelen;
+        long    f_frsize;
+        long    f_spare[5];
+};
+
+struct super_block {
+        void *  s_fs_info;
+};
+
+struct vfsmount {
+        struct dentry * pwd;
+        struct dentry * mnt_root;
+        struct super_block *mnt_sb;
+};
+
+
+/*
+ * quota definitions (linux/quota.h)
+ */
+
+#define MAXQUOTAS 2
+#define USRQUOTA  0            /* element used for user quotas */
+#define GRPQUOTA  1            /* element used for group quotas */
+
+
+/*
+ * proc fs routines
+ */
+
+typedef int (read_proc_t)(char *page, char **start, off_t off,
+                          int count, int *eof, void *data);
+
+struct file; /* forward ref */
+typedef int (write_proc_t)(struct file *file, const char *buffer,
+                           unsigned long count, void *data);
+
+void proc_destory_subtree(cfs_proc_entry_t *entry);
+
+int proc_init_fs();
+void proc_destroy_fs();
+
+/*
+ *  thread affinity
+ */
+
+HANDLE cfs_open_current_thread();
+void cfs_close_thread_handle(HANDLE handle);
+KAFFINITY cfs_query_thread_affinity();
+int cfs_set_thread_affinity(KAFFINITY affinity);
+int cfs_tie_thread_to_cpu(int cpu);
+typedef PVOID mm_segment_t;
+
+/*
+ * thread priority
+ */
+int cfs_set_thread_priority(KPRIORITY priority);
+
+#define MAKE_MM_SEG(s) ((mm_segment_t)(ulong_ptr_t)(s))
+#define KERNEL_DS       MAKE_MM_SEG(0xFFFFFFFFUL)
+#define USER_DS         MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()        (KERNEL_DS)
+#define set_fs(x) do {} while(0)
+#define get_fs() (NULL)
+
+/*
+ * radix tree (linux/radix_tree.h)
+ */
+
+/* radix tree root structure */
+struct radix_tree_root {
+    RTL_GENERIC_TABLE   table;
+};
+
+/* #define RADIX_TREE_INIT(mask) {0}
+
+#define RADIX_TREE(name, mask) \
+       struct radix_tree_root name RADIX_TREE_INIT(mask) */
+
+VOID RadixInitTable(IN PRTL_GENERIC_TABLE Table);
+#define INIT_RADIX_TREE(root, mask)    RadixInitTable(&((root)->table))
+
+/* all radix tree routines should be protected by external locks */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned int max_items);
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index);
+int radix_tree_insert(struct radix_tree_root *root,unsigned long index, void *item);
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index);
+
+struct rcu_head {
+    int     foo;
+};
+
 #else  /* !__KERNEL__ */
 
+#if !defined(_WINDOWS_)
+
 #define CREATE_NEW          1
 #define CREATE_ALWAYS       2
 #define OPEN_EXISTING       3
@@ -222,6 +424,13 @@ CloseHandle(
     );
 
 NTSYSAPI
+DWORD
+NTAPI
+GetLastError(
+   VOID
+   );
+
+NTSYSAPI
 HANDLE
 NTAPI
 CreateFileMappingA(
@@ -259,6 +468,7 @@ NTAPI
 UnmapViewOfFile(
     IN PVOID lpBaseAddress
     );
+#endif
 
 #endif /* __KERNEL__ */
 
@@ -266,5 +476,12 @@ typedef struct {
        void    *d;
 } cfs_dentry_t;
 
+/*
+ *  misc
+ */
+
+#define ERR_PTR(error) ((void *)(long_ptr_t)(error))
+#define PTR_ERR(ptr)   ((long)(long_ptr_t) (ptr))
+#define IS_ERR(ptr)    ((long)(((ulong_ptr_t) (ptr)) > (ulong_ptr_t)(-1000L)))
 
 #endif /* __LIBCFS_WINNT_CFS_FS_H__*/
index e9b1b57..0b6bac5 100644 (file)
 
 
 /*
- *  nt specific part ...
+ * IMPORTANT !!!!!!!!
+ *
+ * All locks' declaration are not guaranteed to be initialized,
+ * Althought some of they are initialized in Linux. All locks
+ * declared by CFS_DECL_* should be initialized explicitly.
+ */
+
+/*
+ *  spinlock & event definitions
  */
 
+typedef struct spin_lock spinlock_t;
 
 /* atomic */
 
@@ -73,6 +82,13 @@ void FASTCALL atomic_dec(atomic_t *v);
 int FASTCALL atomic_dec_and_test(atomic_t *v);
 int FASTCALL atomic_inc_and_test(atomic_t *v);
 
+int FASTCALL atomic_add_return(int i, atomic_t *v);
+int FASTCALL atomic_sub_return(int i, atomic_t *v);
+
+#define atomic_inc_return(v)  atomic_add_return(1, v)
+#define atomic_dec_return(v)  atomic_sub_return(1, v)
+
+int FASTCALL atomic_dec_and_lock(atomic_t *v, spinlock_t *lock);
 
 /* event */
 
@@ -107,7 +123,7 @@ static inline void
 }
 
 /*
- * cfs_wait_event
+ * cfs_wait_event_internal
  *   To wait on an event to syncrhonize the process
  *
  * Arguments:
@@ -123,7 +139,7 @@ static inline void
  */
 
 static inline int64_t
-cfs_wait_event(event_t * event, int64_t timeout)
+cfs_wait_event_internal(event_t * event, int64_t timeout)
 {
     NTSTATUS        Status;
     LARGE_INTEGER   TimeOut;
@@ -185,16 +201,6 @@ cfs_clear_event(event_t * event)
     KeResetEvent(event);
 }
 
-
-/*
- * IMPORTANT !!!!!!!!
- *
- * All locks' declaration are not guaranteed to be initialized,
- * Althought some of they are initialized in Linux. All locks
- * declared by CFS_DECL_* should be initialized explicitly.
- */
-
-
 /*
  * spin lock defintions / routines
  */
@@ -209,29 +215,31 @@ cfs_clear_event(event_t * event)
  *
  */
 
-typedef struct spin_lock {
-
+struct spin_lock {
     KSPIN_LOCK lock;
     KIRQL      irql;
-
-} spinlock_t;
-
+};
 
 #define CFS_DECL_SPIN(name)  spinlock_t name;
 #define CFS_DECL_SPIN_EXTERN(name)  extern spinlock_t name;
 
+#define SPIN_LOCK_UNLOCKED {0}
 
 static inline void spin_lock_init(spinlock_t *lock)
 {
     KeInitializeSpinLock(&(lock->lock));
 }
 
-
 static inline void spin_lock(spinlock_t *lock)
 {
     KeAcquireSpinLock(&(lock->lock), &(lock->irql));
 }
 
+static inline void spin_lock_nested(spinlock_t *lock, unsigned subclass)
+{
+    KeAcquireSpinLock(&(lock->lock), &(lock->irql));
+}
+
 static inline void spin_unlock(spinlock_t *lock)
 {
     KIRQL       irql = lock->irql;
@@ -248,7 +256,7 @@ static inline void spin_unlock(spinlock_t *lock)
    no way to identify the system is MP build or UP build
    on the runtime. We just uses a workaround for it. */
 
-extern int MPSystem;
+extern int libcfs_mp_system;
 
 static int spin_trylock(spinlock_t *lock)
 {
@@ -259,8 +267,8 @@ static int spin_trylock(spinlock_t *lock)
 
     KeRaiseIrql(DISPATCH_LEVEL, &Irql);
 
-    if (MPSystem) {
-        if (0 == (ulong_ptr)lock->lock) {
+    if (libcfs_mp_system) {
+        if (0 == (ulong_ptr_t)lock->lock) {
 #if _X86_
             __asm {
                 mov  edx, dword ptr [ebp + 8]
@@ -287,6 +295,16 @@ static int spin_trylock(spinlock_t *lock)
     return rc;
 }
 
+static int spin_is_locked(spinlock_t *lock)
+{
+#if _WIN32_WINNT >= 0x502
+    /* KeTestSpinLock only avalilable on 2k3 server or later */
+    return (!KeTestSpinLock(&lock->lock));
+#else
+    return (int) (lock->lock);
+#endif
+}
+
 /* synchronization between cpus: it will disable all DPCs
    kernel task scheduler on the CPU */
 #define spin_lock_bh(x)                    spin_lock(x)
@@ -305,7 +323,7 @@ typedef struct rw_semaphore {
 
 #define CFS_DECL_RWSEM(name) rw_semaphore_t name
 #define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name
-
+#define DECLARE_RWSEM CFS_DECL_RWSEM
 
 /*
  * init_rwsem
@@ -325,7 +343,7 @@ static inline void init_rwsem(rw_semaphore_t *s)
 {
        ExInitializeResourceLite(&s->rwsem);
 }
-
+#define rwsem_init init_rwsem
 
 /*
  * fini_rwsem
@@ -346,6 +364,7 @@ static inline void fini_rwsem(rw_semaphore_t *s)
 {
     ExDeleteResourceLite(&s->rwsem);
 }
+#define rwsem_fini fini_rwsem
 
 /*
  * down_read
@@ -365,6 +384,7 @@ static inline void down_read(struct rw_semaphore *s)
 {
        ExAcquireResourceSharedLite(&s->rwsem, TRUE);
 }
+#define down_read_nested down_read
 
 
 /*
@@ -406,7 +426,7 @@ static inline void down_write(struct rw_semaphore *s)
 {
        ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE);
 }
-
+#define down_write_nested down_write
 
 /*
  * down_write_trylock
@@ -500,6 +520,11 @@ void write_unlock(rwlock_t * rwlock);
 #define read_lock_irqsave(l, f)                do {f=0; read_lock(l);} while(0)
 #define read_unlock_irqrestore(l, f)    do {read_unlock(l);} while(0)
 
+#define write_lock_bh   write_lock
+#define write_unlock_bh write_unlock
+
+struct lock_class_key {int foo;};
+#define lockdep_set_class(lock, class) do {} while(0)
 
 /*
  * Semaphore
@@ -509,9 +534,9 @@ void write_unlock(rwlock_t * rwlock);
  * - __up(x)
  */
 
-typedef struct semaphore {
+struct semaphore{
        KSEMAPHORE sem;
-} mutex_t;
+};
 
 static inline void sema_init(struct semaphore *s, int val)
 {
@@ -524,12 +549,25 @@ static inline void __down(struct semaphore *s)
                           KernelMode, FALSE, NULL );
 
 }
-
 static inline void __up(struct semaphore *s)
 {
        KeReleaseSemaphore(&s->sem, 0, 1, FALSE);
 }
 
+static inline int down_trylock(struct semaphore * s)
+{
+    LARGE_INTEGER  timeout = {0};
+    NTSTATUS status =
+        KeWaitForSingleObject( &(s->sem), Executive,
+                               KernelMode, FALSE, &timeout);
+
+    if (status == STATUS_SUCCESS) {
+        return 0;
+    }
+
+    return 1;
+}
+
 /*
  * mutex_t:
  *
@@ -539,6 +577,10 @@ static inline void __up(struct semaphore *s)
  * - mutex_down(x)
  */
 
+#define mutex semaphore
+typedef struct semaphore mutex_t;
+
+#define DECLARE_MUTEX(x) mutex_t x
 
 /*
  * init_mutex
@@ -553,13 +595,13 @@ static inline void __up(struct semaphore *s)
  * Notes: 
  *   N/A
  */
-
+#define mutex_init init_mutex
 static inline void init_mutex(mutex_t *mutex)
 {
     sema_init(mutex, 1);
 }
 
-
+#define init_MUTEX init_mutex
 /*
  * mutex_down
  *   To acquire the mutex lock
@@ -579,6 +621,10 @@ static inline void mutex_down(mutex_t *mutex)
     __down(mutex);
 }
 
+#define mutex_lock(m) mutex_down(m)
+#define mutex_trylock(s) down_trylock(s)
+#define mutex_lock_nested(m) mutex_down(m)
+#define down(m)       mutex_down(m)
 
 /*
  * mutex_up
@@ -599,6 +645,8 @@ static inline void mutex_up(mutex_t *mutex)
     __up(mutex);
 }
 
+#define mutex_unlock(m) mutex_up(m)
+#define up(m)           mutex_up(m)
 
 /*
  * init_mutex_locked
@@ -614,12 +662,18 @@ static inline void mutex_up(mutex_t *mutex)
  *   N/A
  */
 
-static inline init_mutex_locked(mutex_t *mutex)
+static inline void init_mutex_locked(mutex_t *mutex)
 {
     init_mutex(mutex);
     mutex_down(mutex);
 }
 
+#define init_MUTEX_LOCKED init_mutex_locked
+
+static inline void mutex_destroy(mutex_t *mutex)
+{
+}
+
 /*
  * completion
  *
@@ -689,14 +743,15 @@ static inline void complete(struct completion *c)
 
 static inline void wait_for_completion(struct completion *c)
 {
-    cfs_wait_event(&(c->event), 0);
+    cfs_wait_event_internal(&(c->event), 0);
 }
 
-/* __KERNEL__ */
-#else
-
-#include "../user-lock.h"
+static inline int wait_for_completion_interruptible(struct completion *c)
+{
+    cfs_wait_event_internal(&(c->event), 0);
+    return 0;
+}
 
-/* __KERNEL__ */
-#endif
+#else  /* !__KERNEL__ */
+#endif /* !__KERNEL__ */
 #endif
index c5cbfa0..211ffef 100644 (file)
 #error Do not #include this file directly. #include <libcfs/libcfs.h> instead
 #endif
 
+#include <libcfs/winnt/portals_utils.h>
+
 #ifdef __KERNEL__
 
+typedef struct cfs_mem_cache cfs_mem_cache_t;
+
+/*
+ * page definitions
+ */
+
 #define CFS_PAGE_SIZE                   PAGE_SIZE
 #define CFS_PAGE_SHIFT                  PAGE_SHIFT
 #define CFS_PAGE_MASK                   (~(PAGE_SIZE - 1))
 typedef struct cfs_page {
     void *      addr;
     atomic_t    count;
+    void *      private;
+    void *      mapping;
+    __u32       index;
+    __u32       flags;
 } cfs_page_t;
 
+#define page cfs_page
+
+#ifndef page_private
+#define page_private(page) ((page)->private)
+#define set_page_private(page, v) ((page)->private = (v))
+#endif
+
+#define page_count(page) (0)
+
+#define PG_locked               0      /* Page is locked. Don't touch. */
+#define PG_error                1
+#define PG_referenced           2
+#define PG_uptodate             3
+
+#define PG_dirty                4
+#define PG_lru                  5
+#define PG_active               6
+#define PG_slab                         7      /* slab debug (Suparna wants this) */
+
+#define PG_owner_priv_1                 8      /* Owner use. If pagecache, fs may use*/
+#define PG_arch_1               9
+#define PG_reserved            10
+#define PG_private             11      /* If pagecache, has fs-private data */
+
+#define PG_writeback           12      /* Page is under writeback */
+#define PG_compound            14      /* Part of a compound page */
+#define PG_swapcache           15      /* Swap page: swp_entry_t in private */
+
+#define PG_mappedtodisk                16      /* Has blocks allocated on-disk */
+#define PG_reclaim             17      /* To be reclaimed asap */
+#define PG_buddy               19      /* Page is free, on buddy lists */
+
+#define PG_virt         31  /* addr is not */
+
+#ifndef arch_set_page_uptodate
+#define arch_set_page_uptodate(page)
+#endif
+
+/* Make it prettier to test the above... */
+#define UnlockPage(page)        unlock_page(page)
+#define Page_Uptodate(page)     test_bit(PG_uptodate, &(page)->flags)
+#define SetPageUptodate(page) \
+       do {                                                            \
+               arch_set_page_uptodate(page);                           \
+               set_bit(PG_uptodate, &(page)->flags);                   \
+       } while (0)
+#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags)
+#define PageDirty(page)         test_bit(PG_dirty, &(page)->flags)
+#define SetPageDirty(page)      set_bit(PG_dirty, &(page)->flags)
+#define ClearPageDirty(page)    clear_bit(PG_dirty, &(page)->flags)
+#define PageLocked(page)        test_bit(PG_locked, &(page)->flags)
+#define LockPage(page)          set_bit(PG_locked, &(page)->flags)
+#define TryLockPage(page)       test_and_set_bit(PG_locked, &(page)->flags)
+#define PageChecked(page)       test_bit(PG_checked, &(page)->flags)
+#define SetPageChecked(page)    set_bit(PG_checked, &(page)->flags)
+#define ClearPageChecked(page)  clear_bit(PG_checked, &(page)->flags)
+#define PageLaunder(page)       test_bit(PG_launder, &(page)->flags)
+#define SetPageLaunder(page)    set_bit(PG_launder, &(page)->flags)
+#define ClearPageLaunder(page)  clear_bit(PG_launder, &(page)->flags)
+#define ClearPageArch1(page)    clear_bit(PG_arch_1, &(page)->flags)
+
+#define PageError(page)                test_bit(PG_error, &(page)->flags)
+#define SetPageError(page)     set_bit(PG_error, &(page)->flags)
+#define ClearPageError(page)   clear_bit(PG_error, &(page)->flags)
+#define PageReferenced(page)    test_bit(PG_referenced, &(page)->flags)
+#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags)
+#define ClearPageReferenced(page)       clear_bit(PG_referenced, &(page)->flags)
+
+#define PageActive(page)        test_bit(PG_active, &(page)->flags)
+#define SetPageActive(page)     set_bit(PG_active, &(page)->flags)
+#define ClearPageActive(page)   clear_bit(PG_active, &(page)->flags)
+
+#define PageWriteback(page)    test_bit(PG_writeback, &(page)->flags)
+#define TestSetPageWriteback(page) test_and_set_bit(PG_writeback,      \
+                                                       &(page)->flags)
+#define TestClearPageWriteback(page) test_and_clear_bit(PG_writeback,  \
+                                                       &(page)->flags)
+
+#define __GFP_FS    (1)
+#define GFP_KERNEL  (2)
+#define GFP_ATOMIC  (4)
 
 cfs_page_t *cfs_alloc_page(int flags);
 void cfs_free_page(cfs_page_t *pg);
+void cfs_release_page(cfs_page_t *pg);
+cfs_page_t * virt_to_page(void * addr);
+cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
+void __cfs_free_pages(cfs_page_t *page, unsigned int order);
+int cfs_mem_is_in_cache(const void *addr, const cfs_mem_cache_t *kmem);
+
+#define page_cache_get(a) do {} while (0)
+#define page_cache_release(a) do {} while (0)
 
 static inline void *cfs_page_address(cfs_page_t *page)
 {
@@ -90,15 +191,18 @@ static inline int cfs_page_count(cfs_page_t *page)
     return atomic_read(&page->count);
 }
 
+#define cfs_page_index(p)       ((p)->index)
+
 /*
  * Memory allocator
  */
 
 #define CFS_ALLOC_ATOMIC_TRY   (0)
-
 extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
 extern void  cfs_free(void *addr);
 
+#define kmalloc cfs_alloc
+
 extern void *cfs_alloc_large(size_t nr_bytes);
 extern void  cfs_free_large(void *addr);
 
@@ -110,20 +214,36 @@ extern void  cfs_free_large(void *addr);
 
 /* The cache name is limited to 20 chars */
 
-typedef struct cfs_mem_cache {
-
+struct cfs_mem_cache {
     char                    name[20];
-    ulong_ptr           flags;
+    ulong_ptr_t             flags;
     NPAGED_LOOKASIDE_LIST   npll;
-
-} cfs_mem_cache_t;
+};
 
 
-extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr);
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
 extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
 extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
 extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
 
+/*
+ * shrinker 
+ */
+typedef int (*shrink_callback)(int nr_to_scan, gfp_t gfp_mask);
+struct shrinker {
+    shrink_callback cb;
+       int seeks;      /* seeks to recreate an obj */
+
+       /* These are for internal use */
+       struct list_head list;
+       long nr;        /* objs pending delete */
+};
+
+struct shrinker * set_shrinker(int seeks, shrink_callback cb);
+void remove_shrinker(struct shrinker *s);
+
+int start_shrinker_timer();
+void stop_shrinker_timer();
 
 /*
  * Page allocator slabs 
@@ -142,8 +262,20 @@ extern cfs_mem_cache_t *cfs_page_p_slab;
 #define rmb()   mb()
 #define wmb()   mb()
 
+/*
+ * MM defintions from (linux/mm.h)
+ */
 
-/* __KERNEL__ */
-#endif
+#define DEFAULT_SEEKS 2 /* shrink seek */
+
+#else  /* !__KERNEL__ */
+
+#include "../user-mem.h"
+
+/* page alignmed buffer allocation */
+void* pgalloc(size_t factor);
+void  pgfree(void * page);
+
+#endif /* __KERNEL__ */
 
 #endif /* __WINNT_CFS_MEM_H__ */
index 87f905b..6bef4c1 100644 (file)
 #error Do not #include this file directly. #include <libcfs/libcfs.h> instead
 #endif
 
-
 /*
  * libcfs proc device object
  */
 
 
-#define LUSTRE_PROC_DEVICE  L"\\Device\\lproc"      /* proc fs emulator device object */
-#define LUSTRE_PROC_SYMLNK  L"\\DosDevices\\lproc"  /* proc fs user-visible device */
+#define LUSTRE_PROC_DEVICE  L"\\Device\\LNetProcFS"      /* proc fs emulator device object */
+#define LUSTRE_PROC_SYMLNK  L"\\DosDevices\\LNetProcFS"  /* proc fs user-visible device */
 
 
 /*
@@ -61,8 +60,6 @@
 
 #define FILE_DEVICE_LIBCFS      ('LC')
 
-#define FILE_DEVICE_LIBCFS      ('LC')
-
 #define FUNC_LIBCFS_VERSION     0x101  // get version of current libcfs
 #define FUNC_LIBCFS_IOCTL       0x102  // Device i/o control to proc fs
 
      CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL,   METHOD_BUFFERED, FILE_ANY_ACCESS)
 
 #pragma pack(4)
-
 typedef struct _CFS_PROC_IOCTL {
 
     ULONG           cmd;    // ioctl command identifier
     ULONG           len;    // length of data
+    int             rc;     // return code
+    ULONG           usused; // unused 
 
     // UCHAR        data[]; // content of the real ioctl
 
 } CFS_PROC_IOCTL, *PCFS_PROC_IOCTL;
-
 #pragma pack()
 
 #ifdef __KERNEL__
 
-#include <libcfs/list.h>
+void cfs_enter_debugger(void);
+#define __builtin_return_address(x) (0)
 
 /*
  * Symbol functions for libcfs
@@ -110,8 +108,6 @@ extern void *   cfs_symbol_get(const char *);
 extern void     cfs_symbol_put(const char *);
 extern void     cfs_symbol_clean();
 
-
-
 typedef struct file_operations cfs_file_operations_t;
 typedef struct file cfs_file_t;
 
@@ -135,9 +131,9 @@ int cfs_psdev_deregister(cfs_psdev_t * psdev);
  */
 
 typedef int cfs_read_proc_t(char *page, char **start, off_t off,
-                         int count, int *eof, void *data);
+                            int count, int *eof, void *data);
 typedef int cfs_write_proc_t(struct file *file, const char *buffer,
-                          ulong_ptr count, void *data);
+                             unsigned long count, void *data);
 
 #define CFS_PROC_ENTRY_MAGIC 'CPEM'
 
@@ -154,6 +150,8 @@ typedef struct cfs_proc_entry
         PRTL_SPLAY_LINKS    root;
     };
 
+    struct cfs_proc_entry  *parent;
+
     struct _file_entry {                // proc file / leaf entry
            cfs_read_proc_t  *  read_proc;
            cfs_write_proc_t *  write_proc;
@@ -161,10 +159,11 @@ typedef struct cfs_proc_entry
 
     mode_t                  mode;
     unsigned short          nlink;
+    BOOLEAN                 deleted;
 
        
-    struct file_operations * proc_fops;
-       void * data;
+    struct file_operations *proc_fops;
+    void                   *data;
 
     // proc_dir_entry ended.
 
@@ -179,28 +178,32 @@ typedef struct cfs_proc_entry
 } cfs_proc_entry_t, cfs_proc_dir_entry_t;
 
 typedef cfs_proc_entry_t cfs_proc_dir_entry_t;
+#define proc_dir_entry cfs_proc_entry
 
 #define PROC_BLOCK_SIZE    PAGE_SIZE
 
+struct proc_dir_entry *PDE(const struct inode *inode);
+
+
 /*
  * Sysctl register
  */
 
-typedef struct ctl_table                   cfs_sysctl_table_t;
-typedef struct ctl_table_header                cfs_sysctl_table_header_t;
+typedef struct ctl_table                cfs_sysctl_table_t;
+typedef struct ctl_table_header         cfs_sysctl_table_header_t;
 
 
 typedef int ctl_handler (
             cfs_sysctl_table_t *table,
-            int *name,    int nlen,
-                       void *oldval, size_t *oldlenp,
-                       void *newval, size_t newlen, 
-                       void **context );
+            int *name,  int nlen,
+            void *oldval, size_t *oldlenp,
+            void *newval, size_t newlen, 
+            void **context );
 
 typedef int proc_handler (
             cfs_sysctl_table_t *ctl,
             int write, struct file * filp,
-                       void *buffer, size_t *lenp );
+            void *buffer, size_t *lenp );
 
 
 int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
@@ -213,7 +216,6 @@ int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
                  void *oldval, size_t *oldlenp,
                  void *newval, size_t newlen, void **context);
 
-
 /*
  *  System io control definitions
  */
@@ -261,21 +263,107 @@ struct ctl_table_header
        struct list_head        ctl_entry;
 };
 
+/* proc root entries, support routines */
+extern cfs_proc_entry_t *  cfs_proc_root;   /* / */
+extern cfs_proc_entry_t *  cfs_proc_proc;   /* /proc */
+extern cfs_proc_entry_t *  cfs_proc_fs;     /* /proc/fs */
+extern cfs_proc_entry_t *  cfs_proc_sys;    /* /proc/sys */
+extern cfs_proc_entry_t *  cfs_proc_dev;    /* /dev */
 
-cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod,
+cfs_proc_entry_t * create_proc_entry(const char *name, mode_t mod,
                                          cfs_proc_entry_t *parent);
 void proc_free_entry(cfs_proc_entry_t *de);
-void remove_proc_entry(char *name, cfs_proc_entry_t *entry);
-cfs_proc_entry_t * search_proc_entry(char * name,
+void remove_proc_entry(const char *name, cfs_proc_entry_t *entry);
+cfs_proc_entry_t * search_proc_entry(const char * name,
                         cfs_proc_entry_t *  root );
+cfs_proc_entry_t *proc_symlink(const char *name,
+                                      cfs_proc_entry_t *parent,
+                               const char *dest);
+cfs_proc_entry_t *proc_mkdir(const char *name,
+                                    cfs_proc_entry_t *parent);
 
 #define cfs_create_proc_entry create_proc_entry
 #define cfs_free_proc_entry   proc_free_entry
 #define cfs_remove_proc_entry remove_proc_entry
 
-#define register_cfs_sysctl_table(t, a)        register_sysctl_table(t, a)
-#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a)
+struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table,
+                                               int insert_at_head);
+void unregister_sysctl_table(struct ctl_table_header * header);
+
+#define cfs_register_sysctl_table(t, a)   register_sysctl_table(t, a)
+#define cfs_unregister_sysctl_table(t)    unregister_sysctl_table(t)
+
+/*
+ * seq device (linux/seq_file.h)
+ */
+
+
+/*
+ * seq file definitions
+ */
+
+struct dentry;
+struct vfsmount;
+
+struct path {
+        struct vfsmount *mnt;
+        struct dentry *dentry;
+};
+
+struct seq_operations;
+struct file;
+struct inode;
+
+struct seq_file {
+       char *buf;
+       size_t size;
+       size_t from;
+       size_t count;
+       loff_t index;
+       u32    version;
+       mutex_t lock;
+       const struct seq_operations *op;
+       void *private;
+};
+
+struct seq_operations {
+       void * (*start) (struct seq_file *m, loff_t *pos);
+       void (*stop) (struct seq_file *m, void *v);
+       void * (*next) (struct seq_file *m, void *v, loff_t *pos);
+       int (*show) (struct seq_file *m, void *v);
+};
+
+int seq_open(struct file *, const struct seq_operations *);
+ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
+loff_t seq_lseek(struct file *, loff_t, int);
+int seq_release(struct inode *, struct file *);
+int seq_escape(struct seq_file *, const char *, const char *);
+int seq_putc(struct seq_file *m, char c);
+int seq_puts(struct seq_file *m, const char *s);
+
+int seq_printf(struct seq_file *, const char *, ...)
+       __attribute__ ((format (printf,2,3)));
 
+int seq_path(struct seq_file *, struct path *, char *);
+
+int single_open(struct file *, int (*)(struct seq_file *, void *), void *);
+int single_release(struct inode *, struct file *);
+void *__seq_open_private(struct file *, const struct seq_operations *, int);
+int seq_open_private(struct file *, const struct seq_operations *, int);
+int seq_release_private(struct inode *, struct file *);
+
+#define SEQ_START_TOKEN ((void *)1)
+
+/*
+ * Helpers for iteration over list_head-s in seq_files
+ */
+
+extern struct list_head *seq_list_start(struct list_head *head,
+               loff_t pos);
+extern struct list_head *seq_list_start_head(struct list_head *head,
+               loff_t pos);
+extern struct list_head *seq_list_next(void *v, struct list_head *head,
+               loff_t *ppos);
 
 /*
  *  declaration of proc kernel process routines
@@ -290,7 +378,7 @@ lustre_close_file(cfs_file_t * fh);
 int
 lustre_do_ioctl( cfs_file_t * fh,
                  unsigned long cmd,
-                 ulong_ptr arg );
+                 ulong_ptr_t arg );
 
 int
 lustre_ioctl_file( cfs_file_t * fh,
@@ -298,7 +386,7 @@ lustre_ioctl_file( cfs_file_t * fh,
 
 size_t
 lustre_read_file( cfs_file_t *    fh,
-                  loff_t          off,
+                  loff_t          offl,
                   size_t          size,
                   char *          buf
                   );
@@ -320,7 +408,7 @@ typedef int cfs_task_state_t;
 #define CFS_TASK_INTERRUPTIBLE 0x00000001
 #define CFS_TASK_UNINT         0x00000002
 #define CFS_TASK_RUNNING        0x00000003
-
+#define CFS_TASK_UNINTERRUPTIBLE CFS_TASK_UNINT
 
 #define CFS_WAITQ_MAGIC     'CWQM'
 #define CFS_WAITLINK_MAGIC  'CWLM'
@@ -367,26 +455,6 @@ enum {
 
 #define CFS_DECL_WAITQ(name) cfs_waitq_t name
 
-
-void cfs_waitq_init(struct cfs_waitq *waitq);
-void cfs_waitlink_init(struct cfs_waitlink *link);
-
-void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, 
-                            struct cfs_waitlink *link);
-void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
-int  cfs_waitq_active(struct cfs_waitq *waitq);
-
-void cfs_waitq_signal(struct cfs_waitq *waitq);
-void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
-void cfs_waitq_broadcast(struct cfs_waitq *waitq);
-
-void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
-cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, 
-                                  cfs_task_state_t state, cfs_duration_t timeout);
-
-
-
 /* Kernel thread */
 
 typedef int (*cfs_thread_t) (void *arg);
@@ -397,6 +465,7 @@ typedef struct _cfs_thread_context {
 } cfs_thread_context_t;
 
 int cfs_kernel_thread(int (*func)(void *), void *arg, int flag);
+#define kernel_thread cfs_kernel_thread
 
 /*
  * thread creation flags from Linux, not used in winnt
@@ -417,39 +486,121 @@ int cfs_kernel_thread(int (*func)(void *), void *arg, int flag);
 
 
 /*
- * sigset ...
+ * group_info: linux/sched.h
  */
+#define NGROUPS_SMALL           32
+#define NGROUPS_PER_BLOCK       ((int)(PAGE_SIZE / sizeof(gid_t)))
+struct group_info {
+        int ngroups;
+        atomic_t usage;
+        gid_t small_block[NGROUPS_SMALL];
+        int nblocks;
+        gid_t *blocks[0];
+};
 
-typedef sigset_t cfs_sigset_t;
+#define get_group_info(group_info) do { \
+        atomic_inc(&(group_info)->usage); \
+} while (0)
+
+#define put_group_info(group_info) do { \
+        if (atomic_dec_and_test(&(group_info)->usage)) \
+                groups_free(group_info); \
+} while (0)
+
+static __inline struct group_info *groups_alloc(int gidsetsize)
+{
+    struct group_info * groupinfo;
+    KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__));
+    groupinfo = (struct group_info *)cfs_alloc(sizeof(struct group_info), 0);
+    if (groupinfo) {
+        memset(groupinfo, 0, sizeof(struct group_info));
+    }
+    return groupinfo;
+}
+static __inline void groups_free(struct group_info *group_info)
+{
+    KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__));
+    cfs_free(group_info);
+}
+static __inline int set_current_groups(struct group_info *group_info) {
+    KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__));
+    return 0;
+}
+static __inline int groups_search(struct group_info *group_info, gid_t grp) {
+    KdPrint(("%s(%d): %s NOT implemented.\n", __FILE__, __LINE__, __FUNCTION__));
+    return 0;
+}
 
 /*
- * Task struct
+ *   capability issue (linux/capability.h)
  */
 
-#define MAX_SCHEDULE_TIMEOUT    ((long_ptr)(~0UL>>12))
+/* Override resource limits. Set resource limits. */
+/* Override quota limits. */
+/* Override reserved space on ext2 filesystem */
+/* Modify data journaling mode on ext3 filesystem (uses journaling
+   resources) */
+/* NOTE: ext2 honors fsuid when checking for resource overrides, so
+   you can override using fsuid too */
+/* Override size restrictions on IPC message queues */
+/* Allow more than 64hz interrupts from the real-time clock */
+/* Override max number of consoles on console allocation */
+/* Override max number of keymaps */
 
+#define CAP_SYS_RESOURCE     24
 
-#define NGROUPS 1
-#define CFS_CURPROC_COMM_MAX (16)
-typedef struct task_sruct{
-    mode_t umask;
+/*
+ *  capabilities support 
+ */
+
+typedef __u32 cfs_kernel_cap_t;
+
+#define cap_raise(c, flag)  do {} while(0)
+#define cap_lower(c, flag)  do {} while(0)
+#define cap_raised(c, flag) do {} while(0)
 
-       pid_t pid;
-       pid_t pgrp;
 
-       uid_t uid,euid,suid,fsuid;
-       gid_t gid,egid,sgid,fsgid;
+/*
+ * Task struct
+ */
+
+#define MAX_SCHEDULE_TIMEOUT    ((long_ptr_t)(~0UL>>12))
+#define schedule_timeout(t)     cfs_schedule_timeout(0, t)
 
-       int ngroups;
-       gid_t   groups[NGROUPS];
-       cfs_kernel_cap_t   cap_effective,
-                       cap_inheritable,
-                       cap_permitted;
+struct vfsmount;
 
-       char comm[CFS_CURPROC_COMM_MAX];
-    void * journal_info;
+#define NGROUPS 1
+#define CFS_CURPROC_COMM_MAX (16)
+typedef struct task_sruct{
+    mode_t           umask;
+    sigset_t         blocked;
+
+    pid_t            pid;
+    pid_t            pgrp;
+
+    uid_t            uid,euid,suid,fsuid;
+    gid_t            gid,egid,sgid,fsgid;
+
+    int              ngroups;
+    int              cgroups;
+    gid_t            groups[NGROUPS];
+    struct group_info *group_info;
+    cfs_kernel_cap_t cap_effective,
+                     cap_inheritable,
+                     cap_permitted;
+
+    char             comm[CFS_CURPROC_COMM_MAX];
+    void            *journal_info;
+    struct vfsmount *fs;
 }  cfs_task_t;
 
+static inline void task_lock(cfs_task_t *t)
+{
+}
+
+static inline void task_unlock(cfs_task_t *t)
+{
+}
 
 /*
  *  linux task struct emulator ...
@@ -498,49 +649,80 @@ typedef struct _TASK_SLOT {
 #define current                 cfs_current()
 #define set_current_state(s)   do {;} while (0)
 
-#define wait_event(wq, condition)                           \
-do {                                                        \
-    cfs_waitlink_t __wait;                                     \
-                                                            \
-    cfs_waitlink_init(&__wait);                                    \
-       while (TRUE) {                                          \
-               cfs_waitq_add(&wq, &__wait);                        \
-               if (condition)  {                                           \
-                       break;                                                  \
-        }                                                   \
-               cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);        \
-               cfs_waitq_del(&wq, &__wait);                        \
-       }                                                                           \
-       cfs_waitq_del(&wq, &__wait);                                \
+#define wait_event(wq, condition)                               \
+do {                                                            \
+        cfs_waitlink_t __wait;                                  \
+                                                                \
+        cfs_waitlink_init(&__wait);                             \
+        while (TRUE) {                                          \
+            cfs_waitq_add(&wq, &__wait);                        \
+            if (condition) {                                    \
+                break;                                          \
+            }                                                   \
+            cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);    \
+            cfs_waitq_del(&wq, &__wait);                       \
+        }                                                      \
+        cfs_waitq_del(&wq, &__wait);                           \
 } while(0)
 
-#define wait_event_interruptible(wq, condition, __ret)      \
-do {                                                        \
-    cfs_waitlink_t __wait;                                     \
-                                                            \
-    __ret = 0;                                              \
-    cfs_waitlink_init(&__wait);                                    \
-       while (TRUE) {                                          \
-               cfs_waitq_add(&wq, &__wait);                        \
-               if (condition)  {                                           \
-                       break;                                                  \
-        }                                                   \
-               cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);    \
-               cfs_waitq_del(&wq, &__wait);                        \
-       }                                                                           \
-       cfs_waitq_del(&wq, &__wait);                                \
+#define cfs_wait_event_interruptible(wq, condition, __ret)      \
+do {                                                            \
+        cfs_waitlink_t __wait;                                 \
+                                                                \
+        __ret = 0;                                              \
+        cfs_waitlink_init(&__wait);                             \
+        while (TRUE) {                                          \
+            cfs_waitq_add(&wq, &__wait);                       \
+            if (condition) {                                    \
+                break;                                          \
+            }                                                   \
+            cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);    \
+            cfs_waitq_del(&wq, &__wait);                       \
+        }                                                       \
+        cfs_waitq_del(&wq, &__wait);                            \
 } while(0)
 
+# define cfs_wait_event_interruptible_exclusive(wq, condition, rc)  \
+         cfs_wait_event_interruptible(wq, condition, rc)
+
+/*
+   retval == 0; condition met; we're good.
+   retval < 0; interrupted by signal.
+   retval > 0; timed out.
+*/
+
+#define cfs_waitq_wait_event_interruptible_timeout(             \
+                        wq, condition, timeout, rc)             \
+do {                                                            \
+        cfs_waitlink_t __wait;                                  \
+                                                                \
+        rc = 0;                                                 \
+        cfs_waitlink_init(&__wait);                            \
+        while (TRUE) {                                          \
+            cfs_waitq_add(&wq, &__wait);                        \
+            if (condition) {                                    \
+                break;                                          \
+            }                                                   \
+            if (cfs_waitq_timedwait(&__wait,                    \
+                CFS_TASK_INTERRUPTIBLE, timeout) == 0) {        \
+                rc = TRUE;                                      \
+                break;                                          \
+            }                                                   \
+            cfs_waitq_del(&wq, &__wait);                       \
+        }                                                      \
+        cfs_waitq_del(&wq, &__wait);                           \
+} while(0)
+
+
+#define cfs_waitq_wait_event_timeout                            \
+        cfs_waitq_wait_event_interruptible_timeout
 
 int     init_task_manager();
 void    cleanup_task_manager();
 cfs_task_t * cfs_current();
-int     schedule_timeout(int64_t time);
-int     schedule();
 int     wake_up_process(cfs_task_t * task);
-#define cfs_schedule_timeout(state, time)  schedule_timeout(time)
 void sleep_on(cfs_waitq_t *waitq);
-
+#define might_sleep() do {} while(0)
 #define CFS_DECL_JOURNAL_DATA  
 #define CFS_PUSH_JOURNAL           do {;} while(0)
 #define CFS_POP_JOURNAL                    do {;} while(0)
@@ -555,8 +737,14 @@ void sleep_on(cfs_waitq_t *waitq);
 #define __init
 #endif
 
-#define request_module(x) (0)
+struct module {
+    const char *name;
+};
+
+extern struct module libcfs_global_module;
+#define THIS_MODULE  &libcfs_global_module
 
+#define request_module(x) (0)
 #define EXPORT_SYMBOL(s)
 #define MODULE_AUTHOR(s)
 #define MODULE_DESCRIPTION(s)
@@ -581,20 +769,42 @@ void sleep_on(cfs_waitq_t *waitq);
 #define cfs_module(name, version, init, fini) \
 module_init(init);                            \
 module_exit(fini)
+#define module_refcount(x) (1)
 
+/*
+ * typecheck
+ */
+
+#define typecheck(a, b) do {} while(0)
 
 /*
- *  Linux kernel version definition
+ * linux/crypto.h
  */
 
-#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
-#define LINUX_VERSION_CODE (2*100+6*10+7)
+#define CRYPTO_MAX_ALG_NAME            64
 
+#define CRYPTO_TFM_MODE_ECB            0x00000001
+#define CRYPTO_TFM_MODE_CBC            0x00000002
+#define CRYPTO_TFM_MODE_CFB            0x00000004
+#define CRYPTO_TFM_MODE_CTR            0x00000008
+#define CRYPTO_TFM_MODE_EME            0x00000010
 
 /*
- * Signal
+ * hash
  */
-#define SIGNAL_MASK_ASSERT()
+/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
+#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
+
+#if 0 /* defined in libcfs/libcfs_hash.h */
+static inline u32 hash_long(u32 val, unsigned int bits)
+{
+       /* On some cpus multiply is faster, on others gcc will do shifts */
+       u32 hash = val * GOLDEN_RATIO_PRIME_32;
+
+       /* High bits are more random, so use them. */
+       return hash >> (32 - bits);
+}
+#endif
 
 /*
  * Timer
@@ -614,40 +824,11 @@ typedef struct cfs_timer {
 
     cfs_time_t      deadline;
 
-    void (*proc)(ulong_ptr);
+    void (*proc)(ulong_ptr_t);
     void *          arg;
 
 } cfs_timer_t;
 
-
-typedef  void (*timer_func_t)(ulong_ptr);
-
-#define cfs_init_timer(t)
-
-void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg);
-void cfs_timer_done(cfs_timer_t *t);
-void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline);
-void cfs_timer_disarm(cfs_timer_t *t);
-int  cfs_timer_is_armed(cfs_timer_t *t);
-cfs_time_t cfs_timer_deadline(cfs_timer_t *t);
-
-
-/* deschedule for a bit... */
-static inline void cfs_pause(cfs_duration_t ticks)
-{
-    cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks);
-}
-
-
-static inline void cfs_enter_debugger(void)
-{
-#if _X86_
-    __asm int 3;
-#else
-    KdBreakPoint();
-#endif
-}
-
 /*
  *  libcfs globals initialization/cleanup
  */
@@ -659,23 +840,33 @@ void
 libcfs_arch_cleanup(void);
 
 /*
+ *  cache alignment size
+ */
+
+#define L1_CACHE_ALIGN(x) (x)
+
+#define __cacheline_aligned
+
+/*
  * SMP ...
  */
 
+
 #define SMP_CACHE_BYTES             128
-#define __cacheline_aligned
-#define NR_CPUS                                            (2)
-#define smp_processor_id()                 KeGetCurrentProcessorNumber()
-#define smp_num_cpus                NR_CPUS
-#define num_online_cpus() smp_num_cpus
+#define NR_CPUS                     (32)
+#define smp_num_cpus                ((CCHAR)KeNumberProcessors)
+#define num_possible_cpus()         smp_num_cpus
+#define num_online_cpus()           smp_num_cpus
+#define smp_processor_id()                 ((USHORT)KeGetCurrentProcessorNumber())
 #define smp_call_function(f, a, n, w)          do {} while(0)
+#define smp_rmb()                   do {} while(0)
 
 /*
  *  Irp related
  */
 
-#define NR_IRQS                                    512
-#define in_interrupt()                 (0)
+#define NR_IRQS                                        512
+#define in_interrupt()                     (0)
 
 /*
  *  printk flags
@@ -694,7 +885,6 @@ libcfs_arch_cleanup(void);
  * Misc
  */
 
-
 #define inter_module_get(n)                    cfs_symbol_get(n)
 #define inter_module_put(n)                    cfs_symbol_put(n)
 
@@ -708,65 +898,79 @@ libcfs_arch_cleanup(void);
 #define lock_kernel()               do {} while(0)
 #define unlock_kernel()             do {} while(0)
 
-#define USERMODEHELPER(path, argv, envp)       (0)
-
-
 #define local_irq_save(x)
 #define local_irq_restore(x)
 
-#define cfs_assert                      ASSERT
-
 #define THREAD_NAME
 
-#else   /* !__KERNEL__ */
+#define va_copy(_d, _s)                 (_d = _s)
 
-#define PAGE_CACHE_SIZE PAGE_SIZE
-#define PAGE_CACHE_MASK PAGE_MASK
+char *strnchr(const char *s, size_t count, int c);
 
-#define getpagesize()   (PAGE_SIZE)
+#define adler32(a,b,l) zlib_adler32(a,b,l)
+ULONG zlib_adler32(ULONG adler, const BYTE *buf, UINT len);
 
+typedef ssize_t (*read_actor_t)();
 
-typedef struct {
-    int foo;
-} pthread_mutex_t;
+#if DBG
+/*
+ *  winnt debug routines
+ */
 
-typedef struct {
-    int foo;
-} pthread_cond_t;
+VOID
+KsPrintf(
+    LONG  DebugPrintLevel,
+    PCHAR DebugMessage,
+    ...
+    );
 
-#define pthread_mutex_init(x, y)    do {} while(0)
-#define pthread_cond_init(x, y)     do {} while(0)
+PUCHAR
+KsNtStatusToString (IN NTSTATUS Status);
+#endif
 
-#define pthread_mutex_lock(x)       do {} while(0)
-#define pthread_mutex_unlock(x)     do {} while(0)
+#else   /* !__KERNEL__ */
 
-#define pthread_cond_wait(x,y)      do {} while(0)
-#define pthread_cond_broadcast(x)   do {} while(0)
+void cfs_enter_debugger();
 
-typedef struct file {
-    int foo;
-} cfs_file_t;
+/*
+ *  PAGE_SIZE ...
+ */
 
-typedef struct cfs_proc_dir_entry{
-       void            *data;
-}cfs_proc_dir_entry_t;
+#ifndef PAGE_SIZE
+#define PAGE_SIZE       (4096)
+#endif
 
+#define getpagesize()   (4096)
 
+#define PAGE_CACHE_SIZE PAGE_SIZE
+#define PAGE_CACHE_MASK PAGE_MASK
 
-#include "../user-prim.h"
+#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t) -1)
+#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER ((pthread_mutex_t) -2)
+#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER ((pthread_mutex_t) -3)
 
+typedef struct file {
+    int foo;
+} cfs_file_t;
+
+#include "../user-prim.h"
+#include "../user-lock.h"
 #include <sys/stat.h>
 #include <sys/types.h>
 
 #define strcasecmp  strcmp
 #define strncasecmp strncmp
-#define snprintf   _snprintf
 #define getpid()   (0)
 
-
-#define getpwuid(x) (NULL)
+#define getuid()    (0)
 #define getgrgid(x) (NULL)
 
+struct passwd {
+        uid_t pw_uid;
+        char  pw_name[64];
+};
+struct passwd * getpwuid(uid_t uid);
+
 int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev);
 
 int gethostname(char * name, int namelen);
@@ -774,195 +978,191 @@ int gethostname(char * name, int namelen);
 #define setlinebuf(x) do {} while(0)
 
 
-NTSYSAPI VOID NTAPI DebugBreak();
+/* Maximum EA Information Length */
+#define EA_MAX_LENGTH  (sizeof(FILE_FULL_EA_INFORMATION) + 15)
 
+/*
+ *  proc user mode routines
+ */
 
-static inline void cfs_enter_debugger(void)
-{
-#if _X86_
-    __asm int 3;
+int cfs_proc_open (char * filename, int oflag);
+int cfs_proc_close(int fd);
+int cfs_proc_read(int fd, void *buffer, unsigned int count);
+int cfs_proc_write(int fd, void *buffer, unsigned int count);
+int cfs_proc_ioctl(int fd, int cmd, void *buffer);
+FILE *cfs_proc_fopen(char *path, char * mode);
+char *cfs_proc_fgets(char * buf, int len, FILE *fp);
+int cfs_proc_fclose(FILE *fp);
+
+/* Bits set in the FLAGS argument to `glob'.  */
+#define        GLOB_ERR        (1 << 0)/* Return on read errors.  */
+#define        GLOB_MARK       (1 << 1)/* Append a slash to each name.  */
+#define        GLOB_NOSORT     (1 << 2)/* Don't sort the names.  */
+#define        GLOB_DOOFFS     (1 << 3)/* Insert PGLOB->gl_offs NULLs.  */
+#define        GLOB_NOCHECK    (1 << 4)/* If nothing matches, return the pattern.  */
+#define        GLOB_APPEND     (1 << 5)/* Append to results of a previous call.  */
+#define        GLOB_NOESCAPE   (1 << 6)/* Backslashes don't quote metacharacters.  */
+#define        GLOB_PERIOD     (1 << 7)/* Leading `.' can be matched by metachars.  */
+
+#if !defined __USE_POSIX2 || defined __USE_BSD || defined __USE_GNU
+# define GLOB_MAGCHAR   (1 << 8)/* Set in gl_flags if any metachars seen.  */
+# define GLOB_ALTDIRFUNC (1 << 9)/* Use gl_opendir et al functions.  */
+# define GLOB_BRACE     (1 << 10)/* Expand "{a,b}" to "a" "b".  */
+# define GLOB_NOMAGIC   (1 << 11)/* If no magic chars, return the pattern.  */
+# define GLOB_TILDE     (1 << 12)/* Expand ~user and ~ to home directories. */
+# define GLOB_ONLYDIR   (1 << 13)/* Match only directories.  */
+# define GLOB_TILDE_CHECK (1 << 14)/* Like GLOB_TILDE but return an error
+                                     if the user name is not available.  */
+# define __GLOB_FLAGS  (GLOB_ERR|GLOB_MARK|GLOB_NOSORT|GLOB_DOOFFS| \
+                        GLOB_NOESCAPE|GLOB_NOCHECK|GLOB_APPEND|     \
+                        GLOB_PERIOD|GLOB_ALTDIRFUNC|GLOB_BRACE|     \
+                        GLOB_NOMAGIC|GLOB_TILDE|GLOB_ONLYDIR|GLOB_TILDE_CHECK)
 #else
-    DebugBreak();
+# define __GLOB_FLAGS  (GLOB_ERR|GLOB_MARK|GLOB_NOSORT|GLOB_DOOFFS| \
+                        GLOB_NOESCAPE|GLOB_NOCHECK|GLOB_APPEND|     \
+                        GLOB_PERIOD)
 #endif
-}
 
-/* Maximum EA Information Length */
-#define EA_MAX_LENGTH  (sizeof(FILE_FULL_EA_INFORMATION) + 15)
+/* Error returns from `glob'.  */
+#define        GLOB_NOSPACE    1       /* Ran out of memory.  */
+#define        GLOB_ABORTED    2       /* Read error.  */
+#define        GLOB_NOMATCH    3       /* No matches found.  */
+#define GLOB_NOSYS     4       /* Not implemented.  */
+#ifdef __USE_GNU
+/* Previous versions of this file defined GLOB_ABEND instead of
+   GLOB_ABORTED.  Provide a compatibility definition here.  */
+# define GLOB_ABEND GLOB_ABORTED
+#endif
+
+/* Structure describing a globbing run.  */
+#ifdef __USE_GNU
+struct stat;
+#endif
+typedef struct
+  {
+    size_t gl_pathc;           /* Count of paths matched by the pattern.  */
+    char **gl_pathv;           /* List of matched pathnames.  */
+    size_t gl_offs;            /* Slots to reserve in `gl_pathv'.  */
+    int gl_flags;              /* Set to FLAGS, maybe | GLOB_MAGCHAR.  */
+
+    /* If the GLOB_ALTDIRFUNC flag is set, the following functions
+       are used instead of the normal file access functions.  */
+    void (*gl_closedir) (void *);
+#ifdef __USE_GNU
+    struct dirent *(*gl_readdir) (void *);
+#else
+    void *(*gl_readdir) (void *);
+#endif
+    void *(*gl_opendir) (const char *);
+#ifdef __USE_GNU
+    int (*gl_lstat) (const char *__restrict, struct stat *__restrict);
+    int (*gl_stat) (const char *__restrict, struct stat *__restrict);
+#else
+    int (*gl_lstat) (const char *__restrict, void *__restrict);
+    int (*gl_stat) (const char *__restrict, void *__restrict);
+#endif
+  } glob_t;
+
+#ifdef __USE_LARGEFILE64
+# ifdef __USE_GNU
+struct stat64;
+# endif
+typedef struct
+  {
+    __size_t gl_pathc;
+    char **gl_pathv;
+    __size_t gl_offs;
+    int gl_flags;
+
+    /* If the GLOB_ALTDIRFUNC flag is set, the following functions
+       are used instead of the normal file access functions.  */
+    void (*gl_closedir) (void *);
+# ifdef __USE_GNU
+    struct dirent64 *(*gl_readdir) (void *);
+# else
+    void *(*gl_readdir) (void *);
+# endif
+    void *(*gl_opendir) (__const char *);
+# ifdef __USE_GNU
+    int (*gl_lstat) (__const char *__restrict, struct stat64 *__restrict);
+    int (*gl_stat) (__const char *__restrict, struct stat64 *__restrict);
+# else
+    int (*gl_lstat) (__const char *__restrict, void *__restrict);
+    int (*gl_stat) (__const char *__restrict, void *__restrict);
+# endif
+  } glob64_t;
+#endif
+
+int glob (const char * __pattern, int __flags,
+                int (*__errfunc) (const char *, int),
+                glob_t * __pglob);
+void globfree(glob_t *__pglog);
 
+#endif /* !__KERNEL__ */
 
 /*
- *  proc user mode routines
+ *  module routines
  */
 
-HANDLE cfs_proc_open (char * filename, int oflag);
-int cfs_proc_close(HANDLE handle);
-int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count);
-int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count);
-int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer);
+static inline void __module_get(struct module *module)
+{
+}
 
+static inline int try_module_get(struct module *module)
+{
+    return 1;
+}
+
+static inline void module_put(struct module *module)
+{
+}
 
 /*
- * Native API definitions
+ *  sigset_t routines 
  */
 
-//
-//  Disk I/O Routines
-//
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtReadFile(HANDLE FileHandle,
-    HANDLE Event OPTIONAL,
-    PIO_APC_ROUTINE ApcRoutine OPTIONAL,
-    PVOID ApcContext OPTIONAL,
-    PIO_STATUS_BLOCK IoStatusBlock,
-    PVOID Buffer,
-    ULONG Length,
-    PLARGE_INTEGER ByteOffset OPTIONAL,
-    PULONG Key OPTIONAL);
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtWriteFile(HANDLE FileHandle,
-    HANDLE Event OPTIONAL,
-    PIO_APC_ROUTINE ApcRoutine OPTIONAL,
-    PVOID ApcContext OPTIONAL,
-    PIO_STATUS_BLOCK IoStatusBlock,
-    PVOID Buffer,
-    ULONG Length,
-    PLARGE_INTEGER ByteOffset OPTIONAL,
-    PULONG Key OPTIONAL);
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtClose(HANDLE Handle);
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtCreateFile(PHANDLE FileHandle,
-    ACCESS_MASK DesiredAccess,
-    POBJECT_ATTRIBUTES ObjectAttributes,
-    PIO_STATUS_BLOCK IoStatusBlock,
-    PLARGE_INTEGER AllocationSize OPTIONAL,
-    ULONG FileAttributes,
-    ULONG ShareAccess,
-    ULONG CreateDisposition,
-    ULONG CreateOptions,
-    PVOID EaBuffer OPTIONAL,
-    ULONG EaLength);
-
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtDeviceIoControlFile(
-    IN HANDLE  FileHandle,
-    IN HANDLE  Event,
-    IN PIO_APC_ROUTINE  ApcRoutine,
-    IN PVOID  ApcContext,
-    OUT PIO_STATUS_BLOCK  IoStatusBlock,
-    IN ULONG  IoControlCode,
-    IN PVOID  InputBuffer,
-    IN ULONG  InputBufferLength,
-    OUT PVOID  OutputBuffer,
-    OUT ULONG  OutputBufferLength
-    ); 
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtFsControlFile(
-    IN HANDLE FileHandle,
-    IN HANDLE Event OPTIONAL,
-    IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
-    IN PVOID ApcContext OPTIONAL,
-    OUT PIO_STATUS_BLOCK IoStatusBlock,
-    IN ULONG FsControlCode,
-    IN PVOID InputBuffer OPTIONAL,
-    IN ULONG InputBufferLength,
-    OUT PVOID OutputBuffer OPTIONAL,
-    IN ULONG OutputBufferLength
-);
-
-
-NTSYSAPI
-NTSTATUS
-NTAPI
-NtQueryInformationFile(
-    IN HANDLE  FileHandle,
-    OUT PIO_STATUS_BLOCK  IoStatusBlock,
-    OUT PVOID  FileInformation,
-    IN ULONG  Length,
-    IN FILE_INFORMATION_CLASS  FileInformationClass
-    );
+typedef sigset_t cfs_sigset_t;
+#define sigaddset(what,sig) (*(what) |= (1<<(sig)), 0)
+#define sigdelset(what,sig) (*(what) &= ~(1<<(sig)), 0)
+#define sigemptyset(what)   (*(what) = 0, 0)
+#define sigfillset(what)    (*(what) = ~(0), 0)
+#define sigismember(what,sig) (((*(what)) & (1<<(sig))) != 0)
+
+static __inline int
+sigprocmask(int sig, cfs_sigset_t *w1, cfs_sigset_t *w2) {
+    return 0;
+}
+static __inline int
+sigpending(cfs_sigset_t *what) {
+    return 0;
+}
+
+/*
+ * common inode flags (user & kernel)
+ */
 
-//
-// Random routines ...
-//
-
-NTSYSAPI
-ULONG
-NTAPI
-RtlRandom(
-    IN OUT PULONG  Seed
-    ); 
-
-#endif /* __KERNEL__ */
-
-
-//
-// Inode flags (Linux uses octad number, but why ? strange!!!)
-//
-
-#undef S_IFMT
-#undef S_IFDIR
-#undef S_IFCHR
-#undef S_IFREG
-#undef S_IREAD
-#undef S_IWRITE
-#undef S_IEXEC
-
-#define S_IFMT   0x0F000            /* 017 0000 */
-#define S_IFSOCK 0x0C000            /* 014 0000 */
-#define S_IFLNK  0x0A000            /* 012 0000 */
-#define S_IFREG  0x08000            /* 010 0000 */
-#define S_IFBLK  0x06000            /* 006 0000 */
-#define S_IFDIR  0x04000            /* 004 0000 */
-#define S_IFCHR  0x02000            /* 002 0000 */
-#define S_IFIFO  0x01000            /* 001 0000 */
-#define S_ISUID  0x00800            /* 000 4000 */
-#define S_ISGID  0x00400            /* 000 2000 */
-#define S_ISVTX  0x00200            /* 000 1000 */
-
-#define S_ISREG(m)      (((m) & S_IFMT) == S_IFREG)
-#define S_ISSOCK(m)     (((m) & S_IFMT) == S_IFSOCK)
-#define S_ISLNK(m)      (((m) & S_IFMT) == S_IFLNK)
-#define S_ISFIL(m)      (((m) & S_IFMT) == S_IFFIL)
-#define S_ISBLK(m)      (((m) & S_IFMT) == S_IFBLK)
-#define S_ISDIR(m)      (((m) & S_IFMT) == S_IFDIR)
-#define S_ISCHR(m)      (((m) & S_IFMT) == S_IFCHR)
-#define S_ISFIFO(m)     (((m) & S_IFMT) == S_IFIFO)
-
-#define S_IPERMISSION_MASK 0x1FF /*  */
-
-#define S_IRWXU  0x1C0              /* 0 0700 */
-#define S_IRUSR  0x100              /* 0 0400 */
-#define S_IWUSR  0x080              /* 0 0200 */
-#define S_IXUSR  0x040              /* 0 0100 */
-
-#define S_IRWXG  0x038              /* 0 0070 */
-#define S_IRGRP  0x020              /* 0 0040 */
-#define S_IWGRP  0x010              /* 0 0020 */
-#define S_IXGRP  0x008              /* 0 0010 */
-
-#define S_IRWXO  0x007              /* 0 0007 */
-#define S_IROTH  0x004              /* 0 0004 */
-#define S_IWOTH  0x002              /* 0 0002 */
-#define S_IXOTH  0x001              /* 0 0001 */
+#define S_ISLNK(m)     (((m) & S_IFMT) == S_IFLNK)
+#define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m)     (((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m)     (((m) & S_IFMT) == S_IFCHR)
+#define S_ISBLK(m)     (((m) & S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m)    (((m) & S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m)    (((m) & S_IFMT) == S_IFSOCK)
+
+#define S_IRWXU 00700
+#define S_IRUSR 00400
+#define S_IWUSR 00200
+#define S_IXUSR 00100
+
+#define S_IRWXG 00070
+#define S_IRGRP 00040
+#define S_IWGRP 00020
+#define S_IXGRP 00010
+
+#define S_IRWXO 00007
+#define S_IROTH 00004
+#define S_IWOTH 00002
+#define S_IXOTH 00001
 
 #define S_IRWXUGO   (S_IRWXU|S_IRWXG|S_IRWXO)
 #define S_IALLUGO   (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
@@ -970,6 +1170,13 @@ RtlRandom(
 #define S_IWUGO     (S_IWUSR|S_IWGRP|S_IWOTH)
 #define S_IXUGO     (S_IXUSR|S_IXGRP|S_IXOTH)
 
+
+/*
+ *  Linux kernel version definition
+ */
+
+#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
+
 /*
  *  linux ioctl coding definitions
  */
@@ -979,7 +1186,7 @@ RtlRandom(
 #define _IOC_SIZEBITS   14
 #define _IOC_DIRBITS    2
 
-#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
+#define _IOC_NRMASK     ((1 << _IOC_NRBITS)-1)
 #define _IOC_TYPEMASK   ((1 << _IOC_TYPEBITS)-1)
 #define _IOC_SIZEMASK   ((1 << _IOC_SIZEBITS)-1)
 #define _IOC_DIRMASK    ((1 << _IOC_DIRBITS)-1)
@@ -1014,16 +1221,26 @@ RtlRandom(
 #define _IOC_NR(nr)         (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
 #define _IOC_SIZE(nr)       (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
 
-/*
- * Io vector ...  
- */
-
-struct iovec
-{
+/* i/o vector sgructure ... */
+struct iovec {
     void *iov_base;
     size_t iov_len;
 };
 
+/* idr support routines */
+struct idr_context *cfs_idr_init();
+int cfs_idr_remove(struct idr_context *idp, int id);
+int cfs_idr_get_new(struct idr_context *idp, void *ptr);
+int cfs_idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id);
+void *cfs_idr_find(struct idr_context *idp, int id);
+void cfs_idr_exit(struct idr_context *idp);
+
+/* runtime time routines for both kenrel and user mode */
+extern int cfs_isalpha(int);
+extern int cfs_isspace(int);
+extern int cfs_isupper(int);
+extern int cfs_isdigit(int);
+extern int cfs_isxdigit(int);
 
 #define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF))
 /*
@@ -1032,61 +1249,86 @@ struct iovec
  * Ignores `locale' stuff.  Assumes that the upper and lower case
  * alphabets and digits are each contiguous.
  */
-static inline __u64
-strtoull(
-       char *nptr,
-       char **endptr,
-       int base)
+__u64 strtoull(char *nptr, char **endptr,int base);
+
+/*
+ *  getopt routines
+ */
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+
+struct option
 {
-       char *s = nptr;
-       __u64 acc, cutoff;
-       int c, neg = 0, any, cutlim;
-
-       /*
-        * See strtol for comments as to the logic used.
-        */
-       do {
-               c = *s++;
-       } while (isspace(c));
-       if (c == '-') {
-               neg = 1;
-               c = *s++;
-       } else if (c == '+')
-               c = *s++;
-       if ((base == 0 || base == 16) &&
-           c == '0' && (*s == 'x' || *s == 'X')) {
-               c = s[1];
-               s += 2;
-               base = 16;
-       }
-       if (base == 0)
-               base = c == '0' ? 8 : 10;
-       cutoff = (__u64)ULONG_LONG_MAX / (__u64)base;
-       cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base);
-       for (acc = 0, any = 0;; c = *s++) {
-               if (isdigit(c))
-                       c -= '0';
-               else if (isalpha(c))
-                       c -= isupper(c) ? 'A' - 10 : 'a' - 10;
-               else
-                       break;
-               if (c >= base)
-                       break;
-               if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
-                       any = -1;
-               else {
-                       any = 1;
-                       acc *= base;
-                       acc += c;
-               }
-       }
-       if (any < 0) {
-               acc = ULONG_LONG_MAX;
-       } else if (neg)
-               acc = 0 - acc;
-       if (endptr != 0)
-               *endptr = (char *) (any ? s - 1 : nptr);
-       return (acc);
-}
+  const char *name;
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+# define no_argument           0
+# define required_argument     1
+# define optional_argument     2
+
+extern int getopt(int ___argc, char *const *___argv, const char *__shortopts);
+extern int getopt_long (int ___argc, char *const *___argv,
+                       const char *__shortopts,
+                       const struct option *__longopts, int *__longind);
+extern int getopt_long_only (int ___argc, char *const *___argv,
+                            const char *__shortopts,
+                            const struct option *__longopts, int *__longind);
+
+extern char *strcasestr (const char *phaystack, const char *pneedle);
+
+/*
+ * global environment runtime routine
+ */
+
+static __inline char * __cdecl cfs_getenv(const char *ENV) {return NULL;}
+static __inline void   __cdecl set_getenv(const char *ENV, const char *value, int overwrite) {}
+
+int setenv(const char *envname, const char *envval, int overwrite);
+
+struct utsname {
+         char sysname[64];
+         char nodename[64];
+         char release[128];
+         char version[128];
+         char machine[64];
+};
+
+int uname(struct utsname *uts);
 
 #endif
index 179b36b..ebaa4f9 100644 (file)
@@ -1,5 +1,5 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
  *
  * GPL HEADER START
  *
 // iovec is defined in libcfs: winnt_prim.h 
 // lnetkiov_t is defined in lnet/types.h
 
-typedef struct socket ksock_tconn_t;
-typedef struct socket cfs_socket_t;
+typedef struct socket ks_tconn_t, cfs_socket_t;
 
 // completion notification callback routine
 
-typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr);
+typedef VOID (*ks_schedule_cb)(struct socket*, int);
 
-/* completion routine to update tx structure for async sending */
-typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
+#define SOCK_ERROR(s)          ((s->kstc_state >= ksts_disconnected) ? ECONNRESET : 0)
+#define SOCK_TEST_NOSPACE(s)   (1)
 
 //
 // tdinal definitions
 //
 
 
-#if TDI_LIBCFS_DBG
+#if DBG
 #define KsPrint(X)     KsPrintf X
 #else
 #define KsPrint(X)
@@ -77,17 +76,17 @@ typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
 // Socket Addresses Related ...
 //
 
-#define            INADDR_ANY              (ULONG)0x00000000
+#define     INADDR_ANY          (ULONG)0x00000000
 #define     INADDR_LOOPBACK     (ULONG)0x7f000001
-#define            INADDR_BROADCAST    (ULONG)0xffffffff
-#define            INADDR_NONE             (ULONG)0xffffffff
+#define     INADDR_BROADCAST    (ULONG)0xffffffff
+#define     INADDR_NONE         (ULONG)0xffffffff
 
 /*
  *  TCP / IP options
  */
 
 #define     SOL_TCP             6
-#define     SOL_UDP                    17
+#define     SOL_UD              17
 
 
 #define TL_INSTANCE             0
@@ -104,7 +103,7 @@ typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
    Added those for 1003.1g not all are supported yet
  */
  
-#define MSG_OOB            1
+#define MSG_OOB         1
 #define MSG_PEEK        2
 #define MSG_DONTROUTE   4
 #define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
@@ -168,15 +167,14 @@ typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
 
 typedef struct _KS_TSDU {
 
-    ULONG               Magic;
-    ULONG               Flags;
+    ULONG               Magic;          /* magic */
+    ULONG               Flags;          /* flags */
 
-    struct list_head    Link;
+    struct list_head    Link;           /* link list */
 
-    ULONG               TotalLength;    // Total size of KS_TSDU
-
-    ULONG               StartOffset;    // Start offset of the first Tsdu unit
-    ULONG               LastOffset;     // End offset of the last Tsdu unit
+    ULONG               TotalLength;    /* total size of KS_TSDU */
+    ULONG               StartOffset;    /* offset of the first Tsdu unit */
+    ULONG               LastOffset;     /* end offset of the last Tsdu unit */
 
 /*
     union {
@@ -192,7 +190,8 @@ typedef struct _KS_TSDU {
 #define TSDU_TYPE_DAT   ((USHORT)0x5402)
 #define TSDU_TYPE_MDL   ((USHORT)0x5403)
 
-#define KS_TSDU_BUF_RECEIVING       0x0001
+#define KS_TSDU_COMM_PARTIAL         0x0001
+
 typedef struct _KS_TSDU_BUF {
 
     USHORT              TsduType;
@@ -202,11 +201,9 @@ typedef struct _KS_TSDU_BUF {
     ULONG               StartOffset;
 
     PVOID               UserBuffer;
-
+    PMDL                Mdl;         /* mdl */
 } KS_TSDU_BUF, *PKS_TSDU_BUF;
 
-#define KS_TSDU_DAT_RECEIVING       0x0001
-
 typedef struct _KS_TSDU_DAT {
 
     USHORT              TsduType;
@@ -216,48 +213,65 @@ typedef struct _KS_TSDU_DAT {
     ULONG               StartOffset;
 
     ULONG               TotalLength;
+    PMDL                Mdl;        /* mdl */
 
-    UCHAR               Data[1];
+    UCHAR               Data[0];
 
 } KS_TSDU_DAT, *PKS_TSDU_DAT;
 
-#define KS_DWORD_ALIGN(x)      (((x) + 0x03) & (~(0x03)))
-#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data)))
+#define KS_QWORD_ALIGN(x)      (((x) + 0x07) & 0xFFFFFFF8)
+#define KS_TSDU_STRU_SIZE(Len) (KS_QWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data[0])))
 
 typedef struct _KS_TSDU_MDL {
+    USHORT              TsduType;      /* TSDU_TYPE_MDL */
+    USHORT              TsduFlags;     /* */
 
-    USHORT              TsduType;
-    USHORT              TsduFlags;
-
-    ULONG               DataLength;
-    ULONG               StartOffset;    
+    ULONG               DataLength;    /* total valid data length */
+    ULONG               BaseOffset;    /* payload offset in Tsdu */
+    ULONG               StartOffset;   /* offset in payload */
 
+    PVOID               Descriptor;    /* tdi descriptor for receiving */
     PMDL                Mdl;
-    PVOID               Descriptor;
-
 } KS_TSDU_MDL, *PKS_TSDU_MDL;
 
+typedef struct ks_engine_mgr {
+    spinlock_t              lock;
+    int                     stop;
+    event_t                 exit;
+    event_t                 start;
+    struct list_head        list;
+} ks_engine_mgr_t;
+
+typedef struct ks_engine_slot {
+    ks_tconn_t *            tconn;
+    void *                  tsdumgr;
+    struct list_head        link;
+    int                     queued;
+    ks_engine_mgr_t *       emgr;
+} ks_engine_slot_t;
 
 typedef struct _KS_TSDUMGR {
-
-    struct list_head    TsduList;
-    ULONG               NumOfTsdu;
-    ULONG               TotalBytes;
-    KEVENT              Event;
-
+    struct list_head        TsduList;
+    ULONG                   NumOfTsdu;
+    ULONG                   TotalBytes;
+    KEVENT                  Event;
+    spinlock_t              Lock;
+    ks_engine_slot_t        Slot;
+    ULONG                   Payload;
+    int                     Busy:1;
+    int                     OOB:1;
 } KS_TSDUMGR, *PKS_TSDUMGR;
 
+#define ks_lock_tsdumgr(mgr)   spin_lock(&((mgr)->Lock))
+#define ks_unlock_tsdumgr(mgr) spin_unlock(&((mgr)->Lock))
 
 typedef struct _KS_CHAIN {
-
-    KS_TSDUMGR          Normal;
-    KS_TSDUMGR          Expedited;
-
+    KS_TSDUMGR          Normal;      /* normal queue */
+    KS_TSDUMGR          Expedited;   /* OOB/expedited queue */
 } KS_CHAIN, *PKS_CHAIN;
 
 
-#define TDINAL_SCHED_FACTOR (1)
-#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR))
+#define KS_CAN_SCHED(TM) ((TM)->TotalBytes >= ((TM)->Payload >> 2))
 
 //
 // Handler Settings Indictor 
@@ -299,7 +313,7 @@ typedef struct _KS_ADDRESS {
 typedef struct _KS_DISCONNECT_WORKITEM {
 
     WORK_QUEUE_ITEM         WorkItem;       // Workitem to perform disconnection
-    ksock_tconn_t *         tconn;          // tdi connecton
+    ks_tconn_t *            tconn;          // tdi connecton
     ULONG                   Flags;          // connection broken/discnnection flags
     KEVENT                  Event;          // sync event
 
@@ -323,45 +337,38 @@ typedef struct _KS_CONNECTION {
 // type definitions
 //
 
-typedef MDL                         ksock_mdl_t;
-typedef UNICODE_STRING              ksock_unicode_name_t;
-typedef WORK_QUEUE_ITEM             ksock_workitem_t;
+typedef MDL                         ks_mdl_t;
+typedef UNICODE_STRING              ks_unicode_name_t;
+typedef WORK_QUEUE_ITEM             ks_workitem_t;
 
 
-typedef KS_CHAIN                    ksock_chain_t;
-typedef KS_ADDRESS                  ksock_tdi_addr_t;
-typedef KS_CONNECTION               ksock_tconn_info_t;
-typedef KS_DISCONNECT_WORKITEM      ksock_disconnect_workitem_t;
+typedef KS_CHAIN                    ks_chain_t;
+typedef KS_ADDRESS                  ks_tdi_addr_t;
+typedef KS_CONNECTION               ks_tconn_info_t;
+typedef KS_DISCONNECT_WORKITEM      ks_disconnect_t;
 
 
 //
 // Structures for transmission done Workitem
 //
 
-typedef struct _KS_TCPX_FINILIZE {
-    ksock_workitem_t        item;
-    void *                  tx;
-} ksock_tcpx_fini_t;
-
-
-typedef struct ksock_backlogs {
+typedef struct ks_backlogs {
 
         struct list_head    list;   /* list to link the backlog connections */
         int                 num;    /* number of backlogs in the list */
 
-} ksock_backlogs_t;
+} ks_backlogs_t;
 
 
-typedef struct ksock_daemon {
+typedef struct ks_daemon {
 
-    ksock_tconn_t *         tconn;         /* the listener connection object */
+    ks_tconn_t *            tconn;         /* the listener connection object */
     unsigned short          nbacklogs;     /* number of listening backlog conns */
     unsigned short          port;          /* listening port number */ 
     int                     shutdown;      /* daemon threads is to exit */
-    struct list_head        list;          /* to be attached into ksock_nal_data_t*/
-
-} ksock_daemon_t ;
+    struct list_head        list;          /* to be attached into ks_nal_data_t */
 
+} ks_daemon_t;
 
 typedef enum {
 
@@ -373,8 +380,10 @@ typedef enum {
                         // or refuse the connecting request from remote peers.
 
     kstt_child,         // accepted child connection type, it's parent must be Listener
+
     kstt_lasttype
-} ksock_tconn_type;
+
+} ks_tconn_type_t;
 
 typedef enum {
 
@@ -401,7 +410,8 @@ typedef enum {
     ksts_aborted,       // un-exptected broken status
 
     ksts_last           // total number of tconn statuses
-} ksock_tconn_state;
+
+} ks_tconn_state_t;
 
 #define KS_TCONN_MAGIC              'KSTM'
 
@@ -411,23 +421,22 @@ typedef enum {
 
 #define KS_TCONN_DAEMON_STARTED     0x00100000  // indict the daemon is started,
                                                 // only valid for listener
-
 struct socket {
 
-        ulong_ptr                   kstc_magic;      /* Magic & Flags */
-        ulong_ptr                   kstc_flags;
+        ulong                       kstc_magic;      /* Magic & Flags */
+        ulong                       kstc_flags;
 
         spinlock_t                  kstc_lock;       /* serialise lock*/
-        void *                      kstc_conn;       /* ksock_conn_t */
+        void *                      kstc_conn;       /* ks_conn_t */
 
-        ksock_tconn_type            kstc_type;          /* tdi connection Type */
-        ksock_tconn_state           kstc_state;      /* tdi connection state flag */
+        ks_tconn_type_t             kstc_type;          /* tdi connection Type */
+        ks_tconn_state_t            kstc_state;      /* tdi connection state flag */
 
-        ksock_unicode_name_t        kstc_dev;        /* tcp transport device name */
+        ks_unicode_name_t           kstc_dev;        /* tcp transport device name */
 
-        ksock_tdi_addr_t            kstc_addr;       /* local address handlers / Objects */
+        ks_tdi_addr_t               kstc_addr;       /* local address handlers / Objects */
 
-        atomic_t                    kstc_refcount;   /* reference count of ksock_tconn */
+        atomic_t                    kstc_refcount;   /* reference count of ks_tconn_t */
 
         struct list_head            kstc_list;       /* linked to global ksocknal_data */
 
@@ -435,17 +444,17 @@ struct socket {
 
             struct {
                 int                 nbacklog;         /* total number of backlog tdi connections */
-                ksock_backlogs_t    kstc_listening;   /* listeing backlog child connections */
-                ksock_backlogs_t    kstc_accepted;    /* connected backlog child connections */
+                ks_backlogs_t       kstc_listening;   /* listeing backlog child connections */
+                ks_backlogs_t       kstc_accepted;    /* connected backlog child connections */
                 event_t             kstc_accept_event;   /* Signaled by AcceptedHander, 
                                                             ksocknal_wait_accpeted_conns waits on */
                 event_t             kstc_destroy_event;  /* Signaled when accepted child is released */
             } listener; 
 
             struct  {
-                ksock_tconn_info_t  kstc_info;      /* Connection Info if Connected */
-                ksock_chain_t       kstc_recv;      /* tsdu engine for data receiving */
-                ksock_chain_t       kstc_send;      /* tsdu engine for data sending */
+                ks_tconn_info_t     kstc_info;      /* Connection Info if Connected */
+                ks_chain_t          kstc_recv;      /* tsdu engine for data receiving */
+                ks_chain_t          kstc_send;      /* tsdu engine for data sending */
 
                 int                 kstc_queued;    /* Attached to Parent->ChildList ... */
                 int                 kstc_queueno;   /* 0: Attached to Listening list 
@@ -455,30 +464,28 @@ struct socket {
                 int                 kstc_accepted;  /* the connection is built ready ? */
 
                 struct list_head    kstc_link;      /* linked to parent tdi connection */
-                ksock_tconn_t   *   kstc_parent;    /* pointers to it's listener parent */
+                ks_tconn_t   *      kstc_parent;    /* pointers to it's listener parent */
             } child;
 
             struct {
-                ksock_tconn_info_t  kstc_info;      /* Connection Info if Connected */
-                ksock_chain_t       kstc_recv;      /* tsdu engine for data receiving */
-                ksock_chain_t       kstc_send;      /* tsdu engine for data sending */
+                ks_tconn_info_t     kstc_info;      /* Connection Info if Connected */
+                ks_chain_t          kstc_recv;      /* tsdu engine for data receiving */
+                ks_chain_t          kstc_send;      /* tsdu engine for data sending */
             } sender; 
         };
 
-        ulong_ptr                   kstc_snd_wnd;   /* Sending window size */
-        ulong_ptr                   kstc_rcv_wnd;   /* Recving window size */
+        ulong                       kstc_snd_wnd;   /* Sending window size */
+        ulong                       kstc_rcv_wnd;   /* Recving window size */
 
-        ksock_workitem_t            kstc_destroy;    /* tconn destruction workitem */
-        ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */
+        ks_workitem_t               kstc_destroy;    /* tconn destruction workitem */
+        ks_disconnect_t             kstc_disconnect; /* connection disconnect workitem */
 
-        ksock_schedule_cb           kstc_sched_cb;   /* notification callback routine of completion */
-        ksock_update_tx             kstc_update_tx;  /* aync sending callback to update tx */
+        ks_schedule_cb              kstc_sched_cb;   /* notification callback routine of completion */
 };
 
 #define SOCK_WMEM_QUEUED(sock) (0)
-
 #define TDINAL_WINDOW_DEFAULT_SIZE  (0x100000)
-
+#define TDINAL_MAX_TSDU_QUEUE_SIZE  (0x200000)
 
 struct _KS_UDP_COMPLETION_CONTEXT;
 struct _KS_TCP_COMPLETION_CONTEXT;
@@ -510,7 +517,7 @@ typedef struct _KS_UDP_COMPLETION_CONTEXT {
     PKEVENT                             Event;
     union {
         PFILE_OBJECT                    AddressObject;
-        ksock_tconn_t *                 tconn;
+        ks_tconn_t *                    tconn;
     };
 
     PKS_UDP_COMPLETION_ROUTINE          CompletionRoutine;
@@ -523,29 +530,20 @@ typedef struct _KS_UDP_COMPLETION_CONTEXT {
 // Tcp Irp Completion Context (used by tcp data recv/send)
 //
 
-typedef struct _KS_TCP_COMPLETION_CONTEXT {
+#define KS_TCP_CONTEXT_MAGIC 'CCTK'
 
+typedef struct _KS_TCP_COMPLETION_CONTEXT {
     PKEVENT                             Event;      // Event to be waited on by Irp caller ...
-
-    ksock_tconn_t *                     tconn;      // the tdi connection
-
+    ks_tconn_t *                        tconn;      // the tdi connection
     PKS_TCP_COMPLETION_ROUTINE          CompletionRoutine;
     PVOID                               CompletionContext;
-    PVOID                               CompletionContext2;
-
-    PKS_TSDUMGR                         KsTsduMgr;  // Tsdu buffer manager
-
-    //
-    // These tow new members are for NON_BLOCKING transmission
-    //
-
-    BOOLEAN                                                        bCounted;    // To indict needing refcount to
-                                                     // execute CompetionRoutine
-    ULONG                               ReferCount;  // Refer count of this structure
-
+    PKS_TSDUMGR                         TsduMgr;    // Tsdu buffer manager
+    ULONG                               Length;     // Payload length in KsTsdu queue
+    PCHAR                               Buffer;     // User allocated buffer
+    ULONG                               Magic;      // Magic key
 } KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT;
 
-typedef KS_TCP_COMPLETION_CONTEXT  ksock_tdi_tx_t, ksock_tdi_rx_t;
+typedef KS_TCP_COMPLETION_CONTEXT  ks_tdi_tx_t, ks_tdi_rx_t;
 
 
 /*
@@ -596,7 +594,6 @@ typedef KS_TCP_COMPLETION_CONTEXT  ksock_tdi_tx_t, ksock_tdi_rx_t;
         Irp->UserBuffer = OutBuffer;                            \
     }
 
-
 typedef struct ks_addr_slot {
     LIST_ENTRY      link;
     int             up;
@@ -626,30 +623,28 @@ typedef struct {
 
     int               ksnd_init;            /* initialisation state */
 
-    TDI_PROVIDER_INFO ksnd_provider;    /* tdi tcp/ip provider's information */
+    TDI_PROVIDER_INFO ksnd_provider;        /* tdi tcp/ip provider's information */
 
     spinlock_t        ksnd_tconn_lock;      /* tdi connections access serialise */
 
     int               ksnd_ntconns;         /* number of tconns attached in list */
     struct list_head  ksnd_tconns;          /* tdi connections list */
-    cfs_mem_cache_t * ksnd_tconn_slab;      /* slabs for ksock_tconn_t allocations */
+    cfs_mem_cache_t * ksnd_tconn_slab;      /* slabs for ks_tconn_t allocations */
     event_t           ksnd_tconn_exit;      /* exit event to be signaled by the last tconn */
 
     spinlock_t        ksnd_tsdu_lock;       /* tsdu access serialise */
         
     int               ksnd_ntsdus;          /* number of tsdu buffers allocated */
-    ulong_ptr     ksnd_tsdu_size;       /* the size of a signel tsdu buffer */
+    ulong             ksnd_tsdu_size;       /* the size of a signel tsdu buffer */
     cfs_mem_cache_t * ksnd_tsdu_slab;       /* slab cache for tsdu buffer allocation */
 
     int               ksnd_nfreetsdus;      /* number of tsdu buffers in the freed list */
-    struct list_head  ksnd_freetsdus;          /* List of the freed Tsdu buffer. */
+    struct list_head  ksnd_freetsdus;       /* List of the freed Tsdu buffer. */
 
-    spinlock_t        ksnd_daemon_lock;     /* stabilize daemon ops */
-    int               ksnd_ndaemons;        /* number of listening daemons */
-    struct list_head  ksnd_daemons;         /* listening daemon list */
-    event_t           ksnd_daemon_exit;     /* the last daemon quiting should singal it */
+    int               ksnd_engine_nums;     /* number of tcp sending engine threads */
+    ks_engine_mgr_t * ksnd_engine_mgr;      /* tcp sending engine structure */
 
-} ks_data_t;
+} ks_tdi_data_t;
 
 int
 ks_init_tdi_data();
@@ -658,6 +653,71 @@ void
 ks_fini_tdi_data();
 
 
+int
+ks_query_local_ipaddr(
+    ks_tconn_t *     tconn
+    );
+
+void
+ks_get_tconn(
+    ks_tconn_t * tconn
+    );
+
+void
+ks_put_tconn(
+    ks_tconn_t * tconn
+    );
+
+void
+ks_abort_tconn(
+  ks_tconn_t *     tconn
+    );
+int
+ks_disconnect_tconn(
+    ks_tconn_t *    tconn,
+    ulong           flags
+    );
+
+void
+ks_destroy_tconn(
+    ks_tconn_t *     tconn
+    );
+
+NTSTATUS
+KsLockUserBuffer (
+    IN PVOID            UserBuffer,
+    IN BOOLEAN          bPaged,
+    IN ULONG            Length,
+    IN LOCK_OPERATION   Operation,
+    OUT PMDL *          pMdl
+    );
+
+VOID
+KsReleaseMdl (IN PMDL   Mdl,
+              IN int    Paged );
+
+void
+KsQueueTdiEngine(ks_tconn_t * tconn, PKS_TSDUMGR);
+
+void
+KsRemoveTdiEngine(PKS_TSDUMGR);
+
+NTSTATUS
+ks_set_tcp_option (
+    ks_tconn_t *    tconn,
+    ULONG           ID,
+    PVOID           OptionValue,
+    ULONG           Length
+    );
+
+int
+ks_get_tcp_option (
+    ks_tconn_t *        tconn,
+    ULONG               ID,
+    PVOID               OptionValue,
+    PULONG              Length
+    );
+
 #endif /* __KERNEL__ */
 #endif /* __LIBCFS_WINNT_TCPIP_H__ */
 
index 35c6526..3c46828 100644 (file)
  *
  */
 
-#define ONE_BILLION ((u_int64_t)1000000000)
-#define ONE_MILLION ((u_int64_t)   1000000)
-
-#define HZ (100)
-
 struct timeval {
-       time_t          tv_sec;         /* seconds */
-       suseconds_t     tv_usec;        /* microseconds */
+    time_t      tv_sec;   /* seconds */
+    suseconds_t tv_usec;  /* microseconds */
 };
 
-struct timespec {
-    ulong_ptr tv_sec;
-    ulong_ptr tv_nsec;
-};
+typedef time_t cfs_time_t;
+typedef time_t cfs_duration_t;
 
 #ifdef __KERNEL__
 
 #include <libcfs/winnt/portals_compat25.h>
 
+#define HZ (100)
+
+struct timespec {
+    __u32   tv_sec;
+    __u32   tv_nsec;
+};
+typedef struct timeval cfs_fs_time_t;
+
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION ((u_int64_t)   1000000)
+
 /*
  * Generic kernel stuff
  */
 
-typedef struct timeval cfs_fs_time_t;
-
-typedef u_int64_t cfs_time_t;
-typedef int64_t cfs_duration_t;
+#define jiffies     (ULONG_PTR)JIFFIES()
+#define cfs_jiffies (ULONG_PTR)JIFFIES()
 
 static inline void do_gettimeofday(struct timeval *tv)
 {
@@ -118,11 +121,11 @@ static inline void do_gettimeofday(struct timeval *tv)
 
     KeQuerySystemTime(&Time);
 
-    tv->tv_sec  = (long_ptr) (Time.QuadPart / 10000000);
-    tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
+    tv->tv_sec  = (time_t) (Time.QuadPart / 10000000);
+    tv->tv_usec = (suseconds_t) (Time.QuadPart % 10000000) / 10;
 }
 
-static inline cfs_time_t JIFFIES()
+static inline LONGLONG JIFFIES()
 {
     LARGE_INTEGER Tick;
     LARGE_INTEGER Elapse;
@@ -137,33 +140,16 @@ static inline cfs_time_t JIFFIES()
 
 static inline cfs_time_t cfs_time_current(void)
 {
-    return JIFFIES();
+    return (cfs_time_t)JIFFIES();
 }
 
-static inline cfs_time_t cfs_time_current_sec(void)
+static inline time_t cfs_time_current_sec(void)
 {
-    return (JIFFIES() / HZ);
+    return (time_t)(JIFFIES() / HZ);
 }
 
-static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
-{
-    return (t + d);
-}
-
-static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
-{
-    return (t1 - t2);
-}
-
-static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
-{
-    return ((int64_t)t1 - (int64_t)t2) < 0; 
-}
-
-static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
-{
-    return ((int64_t)t1 - (int64_t)t2) <= 0;
-}
+#define time_before(t1, t2) (((signed)(t1) - (signed)(t2)) < 0) 
+#define time_before_eq(t1, t2) (((signed)(t1) - (signed)(t2)) <= 0) 
 
 static inline void cfs_fs_time_current(cfs_fs_time_t *t)
 {
@@ -178,14 +164,21 @@ static inline void cfs_fs_time_current(cfs_fs_time_t *t)
     t->tv_usec = (Sys.LowPart % 10000000) / 10;
 }
 
+static inline unsigned long get_seconds(void)
+{
+    cfs_fs_time_t t;
+    cfs_fs_time_current(&t);
+    return (unsigned long) t.tv_sec;
+}
+
 static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t)
 {
-    return t->tv_sec;
+    return (cfs_time_t)t->tv_sec;
 }
 
-static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
+static inline unsigned long __cfs_fs_time_flat(cfs_fs_time_t *t)
 {
-    return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec;
+    return (unsigned long)(t->tv_sec) * ONE_MILLION + t->tv_usec;
 }
 
 static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
@@ -198,126 +191,95 @@ static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
     return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2));
 }
 
-static inline cfs_duration_t cfs_time_seconds(int seconds)
+static inline cfs_duration_t cfs_time_seconds(cfs_duration_t seconds)
 {
-    return (cfs_duration_t)seconds * HZ;
+    return  (cfs_duration_t)(seconds * HZ);
 }
 
-static inline cfs_time_t cfs_duration_sec(cfs_duration_t d)
+static inline time_t cfs_duration_sec(cfs_duration_t d)
 {
-        return d / HZ;
+    return (time_t)(d / HZ);
 }
 
 static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
 {
-        s->tv_sec = (suseconds_t) (d / HZ);
-        s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) *
+    s->tv_sec = (__u32)(d / HZ);
+    s->tv_usec = (__u32)((d - (cfs_duration_t)s->tv_sec * HZ) *
                               ONE_MILLION / HZ);
 }
 
 static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
 {
-        s->tv_sec = (suseconds_t) (d / HZ);
-        s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) *
-                              ONE_BILLION / HZ);
+    s->tv_sec = (__u32) (d / HZ);
+    s->tv_nsec = (__u32)((d - (cfs_duration_t)s->tv_sec * HZ) *
+                           ONE_BILLION / HZ);
 }
 
 static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
 {
-        *v = *t;
+    *v = *t;
 }
 
 static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
 {
-        s->tv_sec  = t->tv_sec;
-        s->tv_nsec = t->tv_usec * 1000;
+    s->tv_sec  = (__u32) t->tv_sec;
+    s->tv_nsec = (__u32) t->tv_usec * 1000;
 }
 
-#define cfs_time_current_64 cfs_time_current
-#define cfs_time_add_64     cfs_time_add
-#define cfs_time_shift_64   cfs_time_shift
-#define cfs_time_before_64  cfs_time_before
-#define cfs_time_beforeq_64 cfs_time_beforeq
-
-/*
- * One jiffy
- */
-#define CFS_TICK                (1)
-
-#define LTIME_S(t)                     (t)
-
-#define CFS_TIME_T              "%I64u"
-#define CFS_DURATION_T          "%I64d"
-
-#else   /* !__KERNEL__ */
-
-/*
- * Liblustre. time(2) based implementation.
- */
-#include <libcfs/user-time.h>
-
-
-//
-// Time routines ...
-//
-
-NTSYSAPI
-CCHAR
-NTAPI
-NtQuerySystemTime(
-    OUT PLARGE_INTEGER  CurrentTime
-    );
-
 
-NTSYSAPI
-BOOLEAN
-NTAPI
-RtlTimeToSecondsSince1970(
-    IN PLARGE_INTEGER  Time,
-    OUT PULONG  ElapsedSeconds
-    );
+#define cfs_time_current_64 JIFFIES
 
+static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
+{
+    return t + d;
+}
 
-NTSYSAPI
-VOID
-NTAPI
-RtlSecondsSince1970ToTime(
-    IN ULONG  ElapsedSeconds,
-    OUT PLARGE_INTEGER  Time
-    );
-
-NTSYSAPI
-VOID
-NTAPI
-Sleep(
-  DWORD dwMilliseconds   // sleep time in milliseconds
-);
+static inline __u64 cfs_time_shift_64(cfs_duration_t seconds)
+{
+    return cfs_time_add_64(cfs_time_current_64(),
+                           cfs_time_seconds(seconds));
+}
 
+static inline int cfs_time_before_64(__u64 t1, __u64 t2)
+{
+    return (__s64)t2 - (__s64)t1 > 0;
+}
 
-static inline void sleep(int time)
+static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
 {
-    DWORD Time = 1000 * time;
-    Sleep(Time);
+    return (__s64)t2 - (__s64)t1 >= 0;
 }
 
+/*
+ * One jiffy
+ */
+#define CFS_TICK                (1)
+#define LTIME_S(t)                     *((__u64 *)&(t))
 
-static inline void do_gettimeofday(struct timeval *tv)
-{
-    LARGE_INTEGER Time;
+#define CFS_TIME_T              "%u"
+#define CFS_DURATION_T          "%d"
 
-    NtQuerySystemTime(&Time);
+#else   /* !__KERNEL__ */
 
-    tv->tv_sec  = (long_ptr) (Time.QuadPart / 10000000);
-    tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
-}
+#include <time.h>
+#ifdef HAVE_LIBPTHREAD
+#include <pthread.h>
+#else
+struct timespec {
+    unsigned long tv_sec;
+    unsigned long tv_nsec;
+};
+#endif /* HAVE_LIBPTHREAD */
 
-static inline int gettimeofday(struct timeval *tv, void * tz)
-{
-    do_gettimeofday(tv);
-    return 0;
-}
+#include "../user-time.h"
+
+/* liblustre. time(2) based implementation. */
+int nanosleep(const struct timespec *rqtp, struct timespec *rmtp);
+void sleep(int time);
+void do_gettimeofday(struct timeval *tv);
+int gettimeofday(struct timeval *tv, void * tz);
 
-#endif /* __KERNEL__ */
+#endif /* !__KERNEL__ */
 
 /* __LIBCFS_LINUX_LINUX_TIME_H__ */
 #endif
index a87ec81..95660e1 100644 (file)
 #ifdef __KERNEL__
 
 #include <ntifs.h>
+#include <basetsd.h>
 #include <windef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
-
 #include <tdi.h>
 #include <tdikrnl.h>
 #include <tdiinfo.h>
 
 #else
 
-#include <ntddk.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <ntddk.h>
 #include <stdarg.h>
-#include <time.h>
 #include <io.h>
+#include <time.h>
 #include <string.h>
 #include <assert.h>
-
 #endif
 
 
 #define __LITTLE_ENDIAN
+#define __user
 
 #define inline     __inline
 #define __inline__ __inline
 
-typedef unsigned __int8     __u8;
-typedef signed   __int8     __s8;
-
-typedef signed   __int64    __s64;
-typedef unsigned __int64    __u64;
+typedef unsigned __int8         __u8;
+typedef signed   __int8         __s8;
 
 typedef        signed   __int16        __s16;
 typedef        unsigned __int16        __u16;
@@ -87,19 +84,7 @@ typedef      unsigned __int32        __u32;
 typedef        signed   __int64        __s64;
 typedef        unsigned __int64        __u64;
 
-typedef unsigned long       ULONG;
-
-
-#if defined(_WIN64)
-    #define long_ptr        __int64
-    #define ulong_ptr       unsigned __int64
-    #define BITS_PER_LONG   (64)
-#else
-    #define long_ptr        long
-    #define ulong_ptr       unsigned long
-    #define BITS_PER_LONG   (32)
-
-#endif
+typedef unsigned long           ULONG;
 
 /* bsd */
 typedef unsigned char          u_char;
@@ -123,6 +108,11 @@ typedef            __s16           int16_t;
 typedef                __u32           u_int32_t;
 typedef                __s32           int32_t;
 
+#define    u8           __u8
+#define    u16          __u16
+#define    u32          __u32
+#define    u64          __u64
+
 #endif /* !(__BIT_TYPES_DEFINED__) */
 
 typedef                __u8            uint8_t;
@@ -133,72 +123,99 @@ typedef           __u64           uint64_t;
 typedef                __u64           u_int64_t;
 typedef                __s64           int64_t;
 
-typedef long            ssize_t;
+typedef     long        ssize_t;
 
-typedef __u32           suseconds_t;
+typedef     __u32       suseconds_t;
 
-typedef __u32           pid_t, tid_t;
+typedef     __u16       uid_t, gid_t;
 
-typedef __u16           uid_t, gid_t;
+typedef     __u16       mode_t;
+typedef     __u16       umode_t;
 
-typedef __u16           mode_t;
-typedef __u16           umode_t;
+typedef     __u32       sigset_t;
 
-typedef ulong_ptr       sigset_t;
-
-typedef uint64_t        loff_t;
-typedef HANDLE          cfs_handle_t;
+typedef int64_t         loff_t;
+typedef void *          cfs_handle_t;
 typedef uint64_t        cycles_t;
 
 #ifndef INVALID_HANDLE_VALUE
 #define INVALID_HANDLE_VALUE ((HANDLE)-1)
 #endif
 
+# define BITS_PER_LONG   (32)
+
+#if defined(_WIN64)
+typedef  __int64  long_ptr_t;
+typedef  unsigned __int64 ulong_ptr_t;
+#else
+typedef long long_ptr_t;
+typedef unsigned long ulong_ptr_t;
+#endif
 
 #ifdef __KERNEL__ /* kernel */
 
 typedef __u32           off_t;
-typedef __u32           time_t;
 
 typedef unsigned short  kdev_t;
 
-#else  /* !__KERNEL__ */
+typedef __u32           pid_t;
+typedef __u32           tid_t;
 
-typedef int             BOOL;
-typedef __u8            BYTE;
-typedef __u16           WORD;
-typedef __u32           DWORD;
+typedef __u32           ino_t;
 
-#endif /* __KERNEL__ */
+#define dma_addr_t      PVOID
+#define gfp_t           __u32
 
 /*
- * Conastants suffix
+ *  Bytes order 
  */
 
-#define ULL i64
-#define ull i64
+//
+// Byte order swapping routines
+//
 
-/*
- * Winnt kernel has no capabilities.
- */
+#if 0 && NTDDI_VERSION < 0x06000000
+
+USHORT
+FASTCALL
+RtlUshortByteSwap(
+    IN USHORT Source
+    );
+
+ULONG
+FASTCALL
+RtlUlongByteSwap(
+    IN ULONG Source
+    );
+
+ULONGLONG
+FASTCALL
+RtlUlonglongByteSwap(
+    IN ULONGLONG Source
+    );
+#endif
 
-typedef __u32 cfs_kernel_cap_t;
+#else  /* !__KERNEL__ */
 
-#define INT_MAX         ((int)(~0U>>1))
-#define INT_MIN         (-INT_MAX - 1)
-#define UINT_MAX        (~0U)
+typedef int             BOOL;
 
-#endif /* _WINNT_TYPES_H */
+#ifndef _WINDOWS_
+typedef __u8            BYTE;
+typedef __u16           WORD;
+typedef __u32           DWORD;
+#endif
+
+#define __WORDSIZE 32
+typedef long            off_t;
 
+#endif /* __KERNEL__ */
 
 /*
- *  Bytes order 
+ * Conastants suffix
  */
 
-//
-// Byte order swapping routines
-//
-
+#define ULL i64
+#define ull i64
 
 #define ___swab16(x) RtlUshortByteSwap(x)
 #define ___swab32(x) RtlUlongByteSwap(x)
@@ -218,14 +235,14 @@ typedef __u32 cfs_kernel_cap_t;
 
 #define ___constant_swab64(x) \
        ((__u64)( \
-               (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \
-               (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \
-               (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \
-               (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) <<  8) | \
-               (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >>  8) | \
-               (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \
-               (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \
-               (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) ))
+               (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUi64) << 56) | \
+               (__u64)(((__u64)(x) & (__u64)0x000000000000ff00Ui64) << 40) | \
+               (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000Ui64) << 24) | \
+               (__u64)(((__u64)(x) & (__u64)0x00000000ff000000Ui64) <<  8) | \
+               (__u64)(((__u64)(x) & (__u64)0x000000ff00000000Ui64) >>  8) | \
+               (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000Ui64) >> 24) | \
+               (__u64)(((__u64)(x) & (__u64)0x00ff000000000000Ui64) >> 40) | \
+               (__u64)(((__u64)(x) & (__u64)0xff00000000000000Ui64) >> 56) ))
 
 
 #define __swab16(x)  ___constant_swab16(x)
@@ -347,15 +364,23 @@ typedef __u32 cfs_kernel_cap_t;
 #define htons(x)           ntohs(x)
 
 
+/*
+ *  array must be used for array not pointer
+ */
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#endif /* _WINNT_TYPES_H */
 
 #ifndef _I386_ERRNO_H
 #define _I386_ERRNO_H
 
+#include <errno.h>
+
 #define        EPERM            1      /* Operation not permitted */
 #define        ENOENT           2      /* No such file or directory */
 #define        ESRCH            3      /* No such process */
 #define        EINTR            4      /* Interrupted system call */
-#define        EIO                  5  /* I/O error */
+#define        EIO                      5      /* I/O error */
 #define        ENXIO            6      /* No such device or address */
 #define        E2BIG            7      /* Arg list too long */
 #define        ENOEXEC          8      /* Exec format error */
@@ -385,16 +410,6 @@ typedef __u32 cfs_kernel_cap_t;
 #define        EPIPE           32      /* Broken pipe */
 #define        EDOM            33      /* Math argument out of domain of func */
 #define        ERANGE          34      /* Math result not representable */
-#undef EDEADLK
-#define        EDEADLK         35      /* Resource deadlock would occur */
-#undef ENAMETOOLONG
-#define        ENAMETOOLONG    36      /* File name too long */
-#undef ENOLCK
-#define        ENOLCK          37      /* No record locks available */
-#undef ENOSYS
-#define        ENOSYS          38      /* Function not implemented */
-#undef ENOTEMPTY
-#define        ENOTEMPTY       39      /* Directory not empty */
 #define        ELOOP           40      /* Too many symbolic links encountered */
 #define        EWOULDBLOCK     EAGAIN  /* Operation would block */
 #define        ENOMSG          42      /* No message of desired type */
@@ -441,8 +456,6 @@ typedef __u32 cfs_kernel_cap_t;
 #define        ELIBSCN         81      /* .lib section in a.out corrupted */
 #define        ELIBMAX         82      /* Attempting to link in too many shared libraries */
 #define        ELIBEXEC        83      /* Cannot exec a shared library directly */
-#undef EILSEQ
-#define        EILSEQ          84      /* Illegal byte sequence */
 #define        ERESTART        85      /* Interrupted system call should be restarted */
 #define        ESTRPIPE        86      /* Streams pipe error */
 #define        EUSERS          87      /* Too many users */
@@ -501,8 +514,6 @@ typedef __u32 cfs_kernel_cap_t;
 #define EBADTYPE       527     /* Type not supported by server */
 #define EJUKEBOX       528     /* Request initiated, but will not complete before timeout */
 
-
-
 /* open/fcntl - O_SYNC is only implemented on blocks devices and on files
    located on an ext2 file system */
 #define O_ACCMODE         0003
@@ -578,6 +589,7 @@ typedef __u32 cfs_kernel_cap_t;
  *  signal values ...
  */
 
+#ifdef __KERNEL__
 #define SIGHUP          1
 #define SIGINT          2
 #define SIGQUIT                 3
@@ -620,6 +632,8 @@ typedef __u32 cfs_kernel_cap_t;
 #define SIGRTMIN       32
 #define SIGRTMAX       (_NSIG-1)
 
+#endif
+
 /*
  * SA_FLAGS values:
  *
index 7c0095d..8cf3c80 100644 (file)
@@ -68,6 +68,7 @@ int tracefile_init_arch() {
 }
 
 void tracefile_fini_arch() {
+    fini_rwsem(&tracefile_sem);
 }
 
 void tracefile_read_lock() {
index 12ec7a9..23b1b78 100644 (file)
@@ -112,13 +112,8 @@ EXPORT_SYMBOL(libcfs_kmemory);
 
 static cfs_waitq_t debug_ctlwq;
 
-#ifdef HAVE_BGL_SUPPORT
-char debug_file_path_arr[1024] = "/bgl/ion/tmp/lustre-log";
-#elif defined(__arch_um__)
-char debug_file_path_arr[1024] = "/r/tmp/lustre-log";
-#else
-char debug_file_path_arr[1024] = "/tmp/lustre-log";
-#endif
+char debug_file_path_arr[1024] = DEBUG_FILE_PATH_DEFAULT;
+
 /* We need to pass a pointer here, but elsewhere this must be a const */
 static char *debug_file_path = &debug_file_path_arr[0];
 CFS_MODULE_PARM(debug_file_path, "s", charp, 0644,
@@ -429,8 +424,8 @@ void libcfs_debug_dumplog_internal(void *arg)
 
         if (strncmp(debug_file_path_arr, "NONE", 4) != 0) {
                 snprintf(debug_file_name, sizeof(debug_file_name) - 1,
-                         "%s.%ld.%ld", debug_file_path_arr,
-                         cfs_time_current_sec(), (long)arg);
+                         "%s.%ld." LPLD, debug_file_path_arr,
+                         cfs_time_current_sec(), (long_ptr_t)arg);
                 printk(KERN_ALERT "LustreError: dumping log to %s\n",
                        debug_file_name);
                 tracefile_dump_all_pages(debug_file_name);
@@ -461,7 +456,7 @@ void libcfs_debug_dumplog(void)
         cfs_waitq_add(&debug_ctlwq, &wait);
 
         rc = cfs_kernel_thread(libcfs_debug_dumplog_thread,
-                               (void *)(long)cfs_curproc_pid(),
+                               (void *)(long_ptr_t)cfs_curproc_pid(),
                                CLONE_VM | CLONE_FS | CLONE_FILES);
         if (rc < 0)
                 printk(KERN_ERR "LustreError: cannot start log dump thread: "
index 6029352..6111060 100644 (file)
@@ -124,6 +124,8 @@ void tracefile_fini_arch()
                kfree(trace_data[i]);
                trace_data[i] = NULL;
        }
+
+       fini_rwsem(&tracefile_sem);
 }
 
 void tracefile_read_lock()
index 325785b..c8ca37b 100644 (file)
 # define EXPORT_SYMTAB
 #endif
 
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/kernel.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
 #define DEBUG_SUBSYSTEM S_LNET
 
 #include <libcfs/libcfs.h>
index 5918560..2fff7fb 100644 (file)
@@ -319,7 +319,8 @@ static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *a
                 struct libcfs_ioctl_handler *hand;
                 err = -EINVAL;
                 down_read(&ioctl_list_sem);
-                list_for_each_entry(hand, &ioctl_list, item) {
+                cfs_list_for_each_entry_typed(hand, &ioctl_list,
+                        struct libcfs_ioctl_handler, item) {
                         err = hand->handle_ioctl(cmd, data);
                         if (err != -EINVAL) {
                                 if (err == 0)
@@ -432,6 +433,10 @@ static void exit_libcfs_module(void)
         rc = libcfs_debug_cleanup();
         if (rc)
                 printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc);
+
+        fini_rwsem(&ioctl_list_sem);
+        fini_rwsem(&tracefile_sem);
+
         libcfs_arch_cleanup();
 }
 
index 9cf58b4..df02584 100644 (file)
@@ -479,7 +479,7 @@ libcfs_str2nid(const char *str)
                 LASSERT (nf != NULL);
         }
 
-        if (!nf->nf_str2addr(str, sep - str, &addr))
+        if (!nf->nf_str2addr(str, (int)(sep - str), &addr))
                 return LNET_NID_ANY;
 
         return LNET_MKNID(net, addr);
index 15f70de..5b9543e 100644 (file)
@@ -195,7 +195,8 @@ static void tcd_shrink(struct trace_cpu_data *tcd)
         CFS_INIT_LIST_HEAD(&pc.pc_pages);
         spin_lock_init(&pc.pc_lock);
 
-        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+        cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages, 
+                                           struct trace_page, linkage) {
                 if (pgcount-- == 0)
                         break;
 
@@ -537,7 +538,8 @@ static void put_pages_back_on_cpu(void *info)
         tcd_for_each_type_lock(tcd, i) {
                 cur_head = tcd->tcd_pages.next;
 
-                list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+                cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages,
+                                                   struct trace_page, linkage) {
 
                         __LASSERT_TAGE_INVARIANT(tage);
 
@@ -568,7 +570,8 @@ static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
         struct trace_page *tmp;
 
         spin_lock(&pc->pc_lock);
-        list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+        cfs_list_for_each_entry_safe_typed(tage, tmp, &pc->pc_pages,
+                                           struct trace_page, linkage) {
 
                 __LASSERT_TAGE_INVARIANT(tage);
 
@@ -619,7 +622,8 @@ void trace_debug_print(void)
 
         pc.pc_want_daemon_pages = 1;
         collect_pages(&pc);
-        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+        cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
+                                           struct trace_page, linkage) {
                 char *p, *file, *fn;
                 cfs_page_t *page;
 
@@ -636,7 +640,7 @@ void trace_debug_print(void)
                         p += strlen(file) + 1;
                         fn = p;
                         p += strlen(fn) + 1;
-                        len = hdr->ph_len - (p - (char *)hdr);
+                        len = hdr->ph_len - (int)(p - (char *)hdr);
 
                         print_to_console(hdr, D_EMERG, p, len, file, fn);
 
@@ -680,7 +684,8 @@ int tracefile_dump_all_pages(char *filename)
         /* ok, for now, just write the pages.  in the future we'll be building
          * iobufs with the pages and calling generic_direct_IO */
         CFS_MMSPACE_OPEN;
-        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+        cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
+                                           struct trace_page, linkage) {
 
                 __LASSERT_TAGE_INVARIANT(tage);
 
@@ -717,7 +722,8 @@ void trace_flush_pages(void)
 
         pc.pc_want_daemon_pages = 1;
         collect_pages(&pc);
-        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+        cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
+                                           struct trace_page, linkage) {
 
                 __LASSERT_TAGE_INVARIANT(tage);
 
@@ -997,14 +1003,15 @@ static int tracefiled(void *arg)
                 hdr = cfs_page_address(tage->page);
                 hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
 
-                list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                cfs_list_for_each_entry_safe_typed(tage, tmp, &pc.pc_pages,
+                                                   struct trace_page, linkage) {
                         static loff_t f_pos;
 
                         __LASSERT_TAGE_INVARIANT(tage);
 
                         if (f_pos >= (off_t)tracefile_size)
                                 f_pos = 0;
-                        else if (f_pos > cfs_filp_size(filp))
+                        else if (f_pos > (off_t)cfs_filp_size(filp))
                                 f_pos = cfs_filp_size(filp);
 
                         rc = cfs_filp_write(filp, cfs_page_address(tage->page),
@@ -1105,7 +1112,8 @@ static void trace_cleanup_on_cpu(void *info)
         tcd_for_each_type_lock(tcd, i) {
                 tcd->tcd_shutting_down = 1;
 
-                list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+                cfs_list_for_each_entry_safe_typed(tage, tmp, &tcd->tcd_pages,
+                                                   struct trace_page, linkage) {
                         __LASSERT_TAGE_INVARIANT(tage);
 
                         list_del(&tage->linkage);
index dd25327..7faf803 100644 (file)
@@ -265,21 +265,21 @@ extern void trace_assertion_failed(const char *str, const char *fn,
                                   const char *file, int line);
 
 /* ASSERTION that is safe to use within the debug system */
-#define __LASSERT(cond)                                                                \
-({                                                                             \
-       if (unlikely(!(cond))) {                                                \
-                trace_assertion_failed("ASSERTION("#cond") failed",            \
-                                      __FUNCTION__, __FILE__, __LINE__);       \
-       }                                                                       \
-})
-
-#define __LASSERT_TAGE_INVARIANT(tage)                 \
-({                                                     \
-        __LASSERT(tage != NULL);                       \
-        __LASSERT(tage->page != NULL);                 \
-        __LASSERT(tage->used <= CFS_PAGE_SIZE);                \
-        __LASSERT(cfs_page_count(tage->page) > 0);     \
-})
+#define __LASSERT(cond)                                                 \
+    do {                                                                \
+        if (unlikely(!(cond))) {                                        \
+                trace_assertion_failed("ASSERTION("#cond") failed",     \
+                                 __FUNCTION__, __FILE__, __LINE__);     \
+        }                                                               \
+    } while (0)
+
+#define __LASSERT_TAGE_INVARIANT(tage)                                  \
+    do {                                                                \
+        __LASSERT(tage != NULL);                                        \
+        __LASSERT(tage->page != NULL);                                  \
+        __LASSERT(tage->used <= CFS_PAGE_SIZE);                         \
+        __LASSERT(cfs_page_count(tage->page) > 0);                      \
+    } while (0)
 
 #endif /* LUSTRE_TRACEFILE_PRIVATE */
 
index 53ab2c4..cee36d7 100644 (file)
@@ -246,6 +246,12 @@ void up_write(struct rw_semaphore *s)
         (void)s;
 }
 
+void fini_rwsem(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
 #ifdef HAVE_LIBPTHREAD
 
 /*
index 7c2cc4d..1140159 100644 (file)
@@ -63,6 +63,8 @@ cfs_page_t *cfs_alloc_pages(int mask, unsigned long order)
         pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
 #elif defined (__DARWIN__)
         pg->addr = valloc(CFS_PAGE_SIZE << order);
+#elif defined (__WINNT__)
+        pg->addr = pgalloc(order);
 #else
         pg->addr = memalign(CFS_PAGE_SIZE, CFS_PAGE_SIZE << order);
 #endif
@@ -78,6 +80,8 @@ void cfs_free_pages(cfs_page_t *pg, int what)
 {
 #if 0 //#ifdef MAP_ANONYMOUS
         munmap(pg->addr, PAGE_SIZE);
+#elif defined (__WINNT__)
+        pgfree(pg->addr);
 #else
         free(pg->addr);
 #endif
index 07678e0..ee381d6 100644 (file)
@@ -156,22 +156,13 @@ void cfs_timer_init(cfs_timer_t *l, cfs_timer_func_t *func, void *arg)
 {
         CFS_INIT_LIST_HEAD(&l->tl_list);
         l->function = func;
-        l->data = (unsigned long)arg;
+        l->data = (ulong_ptr_t)arg;
         return;
 }
 
-#define cfs_jiffies                             \
-({                                              \
-        unsigned long _ret = 0;                 \
-        struct timeval tv;                      \
-        if (gettimeofday(&tv, NULL) == 0)       \
-                _ret = tv.tv_sec;               \
-        _ret;                                   \
-})
-
 int cfs_timer_is_armed(cfs_timer_t *l)
 {
-        if (cfs_time_before(cfs_jiffies, l->expires))
+        if (cfs_time_before(cfs_time_current(), l->expires))
                 return 1;
         else
                 return 0;
@@ -185,8 +176,7 @@ void cfs_timer_arm(cfs_timer_t *l, cfs_time_t deadline)
 void cfs_timer_disarm(cfs_timer_t *l)
 {
 }
-
-long cfs_timer_deadline(cfs_timer_t *l)
+cfs_time_t cfs_timer_deadline(cfs_timer_t *l)
 {
         return l->expires;
 }
@@ -247,7 +237,7 @@ int cfs_parse_int_tunable(int *value, char *name)
         if (env == NULL)
                 return 0;
 
-        *value = strtoull(env, &end, 0);
+        *value = (int)strtoull(env, &end, 0);
         if (*end == 0)
                 return 0;
 
index a78ddde..9dcf4b5 100644 (file)
@@ -528,6 +528,11 @@ libcfs_sock_create(int *fdp)
         return 0;
 }
 
+void libcfs_sock_release(int fd)
+{
+        close(fd);
+}
+
 int
 libcfs_sock_bind_to_port(int fd, __u16 port)
 {
index 59dc6d8..bc7e6a1 100644 (file)
@@ -23,7 +23,6 @@
 #define __USE_FILE_OFFSET64
 
 #include <libcfs/libcfsutil.h>
-
 #include <lnet/api-support.h>
 #include <lnet/lnetctl.h>
 
@@ -77,15 +76,15 @@ open_ioc_dev(int dev_id)
         }
 
         if (ioc_dev_list[dev_id].dev_fd < 0) {
-                int fd = open(dev_name, O_RDWR);
+                int fd = cfs_proc_open((char *)dev_name, O_RDWR);
 
                 /* Make the /dev/ node if we need to */
                 if (fd < 0 && errno == ENOENT) {
-                        if (mknod(dev_name, 
+                        if (cfs_proc_mknod(dev_name, 
                                   S_IFCHR|S_IWUSR|S_IRUSR,
                                   MKDEV(ioc_dev_list[dev_id].dev_major,
                                         ioc_dev_list[dev_id].dev_minor)) == 0)
-                                fd = open(dev_name, O_RDWR);
+                                fd = cfs_proc_open((char *)dev_name, O_RDWR);
                         else
                                 fprintf(stderr, "mknod %s failed: %s\n",
                                         dev_name, strerror(errno));
@@ -113,7 +112,7 @@ do_ioctl(int dev_id, unsigned int opc, void *buf)
         if (fd < 0) 
                 return fd;
 
-        rc = ioctl(fd, opc, buf);
+        rc = cfs_proc_ioctl(fd, opc, buf);
         return rc;
         
 }
@@ -197,7 +196,7 @@ unregister_ioc_dev(int dev_id)
                 return;
         if (ioc_dev_list[dev_id].dev_name != NULL &&
             ioc_dev_list[dev_id].dev_fd >= 0) 
-                close(ioc_dev_list[dev_id].dev_fd);
+                cfs_proc_close(ioc_dev_list[dev_id].dev_fd);
 
         ioc_dev_list[dev_id].dev_name = NULL;
         ioc_dev_list[dev_id].dev_fd = -1;
@@ -237,16 +236,53 @@ int
 parse_dump(char * dump_file, ioc_handler_t ioc_func)
 {
         int line =0;
-        struct stat st;
         char *start, *buf, *end;
-#ifndef __CYGWIN__
-        int fd;
-#else
+
+#if defined(__CYGWIN__) || defined(__WINNT__)
+
         HANDLE fd, hmap;
         DWORD size;
-#endif
-        
-#ifndef __CYGWIN__
+
+        fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL,
+                        OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+        if (fd == INVALID_HANDLE_VALUE) {
+                fprintf(stderr, "couldn't open %s (error code: %u)\n",
+                                dump_file, GetLastError());
+                exit(1);
+        }
+        size = GetFileSize(fd, NULL);
+        if (size < 1 || size == 0xFFFFFFFF) {
+                fprintf(stderr, "KML is empty\n");
+                CloseHandle(fd);
+                exit(1);
+        }
+
+        hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL);
+        if (hmap == NULL) {
+                fprintf(stderr, "can't create file mapping\n");
+                CloseHandle(fd);
+                exit(1);
+        }
+        start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0);
+        if (start == NULL) {
+                fprintf(stderr, "can't map file content\n");
+                CloseHandle(hmap);
+                CloseHandle(fd);
+                exit(1);
+        }
+        end = buf + size;
+        CloseHandle(fd);
+        if (start == NULL) {
+                fprintf(stderr, "can't create file mapping\n");
+                UnmapViewOfFile(start);
+                CloseHandle(hmap);
+                exit(1);
+        }
+#else
+
+        struct stat st;
+        int fd;
+
         fd = open(dump_file, O_RDONLY);
         if (fd < 0) {
                 fprintf(stderr, "couldn't open %s: %s\n", dump_file, 
@@ -271,24 +307,7 @@ parse_dump(char * dump_file, ioc_handler_t ioc_func)
                 fprintf(stderr, "can't create file mapping\n");
                 exit(1);
         }
-#else
-        fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL,
-                        OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-        size = GetFileSize(fd, NULL);
-        if (size < 1) {
-                fprintf(stderr, "KML is empty\n");
-                exit(1);
-        }
-
-        hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL);
-        start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0);
-        end = buf + size;
-        CloseHandle(fd);
-        if (start == NULL) {
-                fprintf(stderr, "can't create file mapping\n");
-                exit(1);
-        }
-#endif /* __CYGWIN__ */
+#endif
 
         while (buf < end) {
                 struct dump_hdr *dump_hdr = (struct dump_hdr *) buf;
@@ -323,11 +342,11 @@ parse_dump(char * dump_file, ioc_handler_t ioc_func)
                 buf += data->ioc_len + sizeof(*dump_hdr);
         }
 
-#ifndef __CYGWIN__
-        munmap(start, end - start);
-#else
+#if defined(__CYGWIN__) || defined(__WINNT__)
         UnmapViewOfFile(start);
         CloseHandle(hmap);
+#else
+        munmap(start, end - start);
 #endif
 
         return 0;
index ce2a99e..dbb3e92 100644 (file)
  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  *
  */
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <sys/param.h>
-#include <assert.h>
 
 #include <libcfs/libcfsutil.h>
 
@@ -137,7 +129,7 @@ static command_t * find_cmd(char * name, command_t cmds[], char ** next)
            this with strtok*/
         name = skipwhitespace(name);
         *next = skiptowhitespace(name);
-        len = *next - name;
+        len = (int)(*next - name);
         if (len == 0)
                 return NULL;
 
@@ -343,7 +335,7 @@ char * readline(char * prompt)
                 if ((c = fgetc(stdin)) != EOF) {
                         if (c == '\n')
                                 goto out;
-                        *ptr++ = c;
+                        *ptr++ = (char)c;
 
                         if (ptr - line >= size - 1) {
                                 char *tmp;
index 7cfbe1e..6c90188 100644 (file)
@@ -70,7 +70,7 @@ struct lc_watchdog {
  */
 static struct completion lcw_start_completion;
 static struct completion lcw_stop_completion;
-static wait_queue_head_t lcw_event_waitq;
+static cfs_waitq_t lcw_event_waitq;
 
 /*
  * Set this and wake lcw_event_waitq to stop the dispatcher.
@@ -94,7 +94,7 @@ static DECLARE_MUTEX(lcw_refcount_sem);
  */
 static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */
 static struct list_head lcw_pending_timers = \
-        LIST_HEAD_INIT(lcw_pending_timers);
+        CFS_LIST_HEAD_INIT(lcw_pending_timers);
 
 #ifdef HAVE_TASKLIST_LOCK
 static void
@@ -107,11 +107,11 @@ lcw_dump(struct lc_watchdog *lcw)
         tsk = find_task_by_pid(lcw->lcw_pid);
 
         if (tsk == NULL) {
-                CWARN("Process %d was not found in the task list; "
-                      "watchdog callback may be incomplete\n", (int)lcw->lcw_pid);
+                CWARN("Process " LPPID " was not found in the task list; "
+                      "watchdog callback may be incomplete\n", lcw->lcw_pid);
         } else if (tsk != lcw->lcw_task) {
-                CWARN("The current process %d did not set the watchdog; "
-                      "watchdog callback may be incomplete\n", (int)lcw->lcw_pid);
+                CWARN("The current process " LPPID " did not set the watchdog; "
+                      "watchdog callback may be incomplete\n", lcw->lcw_pid);
         } else {
                 libcfs_debug_dumpstack(tsk);
         }
@@ -127,7 +127,7 @@ lcw_dump(struct lc_watchdog *lcw)
 }
 #endif
 
-static void lcw_cb(unsigned long data)
+static void lcw_cb(ulong_ptr_t data)
 {
         struct lc_watchdog *lcw = (struct lc_watchdog *)data;
 
@@ -143,15 +143,15 @@ static void lcw_cb(unsigned long data)
         /* NB this warning should appear on the console, but may not get into
          * the logs since we're running in a softirq handler */
 
-        CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n",
-              (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
+        CWARN("Watchdog triggered for pid: " LPPID " it was inactive for %lds\n",
+              lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
         lcw_dump(lcw);
 
         spin_lock_bh(&lcw_pending_timers_lock);
 
         if (list_empty(&lcw->lcw_list)) {
                 list_add(&lcw->lcw_list, &lcw_pending_timers);
-                wake_up(&lcw_event_waitq);
+                cfs_waitq_signal(&lcw_event_waitq);
         }
 
         spin_unlock_bh(&lcw_pending_timers_lock);
@@ -190,7 +190,7 @@ static int lcw_dispatch_main(void *data)
         complete(&lcw_start_completion);
 
         while (1) {
-                wait_event_interruptible(lcw_event_waitq, is_watchdog_fired());
+                cfs_wait_event_interruptible(lcw_event_waitq, is_watchdog_fired(), rc);
                 CDEBUG(D_INFO, "Watchdog got woken up...\n");
                 if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
                         CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
@@ -214,9 +214,8 @@ static int lcw_dispatch_main(void *data)
                         list_del_init(&lcw->lcw_list);
                         spin_unlock_bh(&lcw_pending_timers_lock);
 
-                        CDEBUG(D_INFO, "found lcw for pid %d: inactive for "
-                               "%lds\n", (int)lcw->lcw_pid,
-                               cfs_duration_sec(lcw->lcw_time));
+                        CDEBUG(D_INFO, "found lcw for pid " LPPID ": inactive for "
+                               "%lds\n", lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
 
                         if (lcw->lcw_state != LC_WATCHDOG_DISABLED)
                                 lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
@@ -240,7 +239,7 @@ static void lcw_dispatch_start(void)
 
         init_completion(&lcw_stop_completion);
         init_completion(&lcw_start_completion);
-        init_waitqueue_head(&lcw_event_waitq);
+        cfs_waitq_init(&lcw_event_waitq);
 
         CDEBUG(D_INFO, "starting dispatch thread\n");
         rc = kernel_thread(lcw_dispatch_main, NULL, 0);
@@ -263,7 +262,7 @@ static void lcw_dispatch_stop(void)
         CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
 
         set_bit(LCW_FLAG_STOP, &lcw_flags);
-        wake_up(&lcw_event_waitq);
+        cfs_waitq_signal(&lcw_event_waitq);
 
         wait_for_completion(&lcw_stop_completion);
 
@@ -292,12 +291,8 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms,
         lcw->lcw_data     = data;
         lcw->lcw_state    = LC_WATCHDOG_DISABLED;
 
-        INIT_LIST_HEAD(&lcw->lcw_list);
-
-        lcw->lcw_timer.function = lcw_cb;
-        lcw->lcw_timer.data = (unsigned long)lcw;
-        lcw->lcw_timer.expires = jiffies + lcw->lcw_time;
-        init_timer(&lcw->lcw_timer);
+        CFS_INIT_LIST_HEAD(&lcw->lcw_list);
+        cfs_timer_init(&lcw->lcw_timer, lcw_cb, lcw);
 
         down(&lcw_refcount_sem);
         if (++lcw_refcount == 1)
@@ -307,7 +302,8 @@ struct lc_watchdog *lc_watchdog_add(int timeout_ms,
         /* Keep this working in case we enable them by default */
         if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
                 do_gettimeofday(&lcw->lcw_last_touched);
-                add_timer(&lcw->lcw_timer);
+                cfs_timer_arm(&lcw->lcw_timer, lcw->lcw_time + 
+                              cfs_time_current());
         }
 
         RETURN(lcw);
@@ -322,7 +318,7 @@ static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
         do_gettimeofday(&newtime);
         if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
                 cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff);
-                CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n",
+                CWARN("Expired watchdog for pid " LPPID " %s after %lu.%.4lus\n",
                       lcw->lcw_pid,
                       message,
                       timediff.tv_sec,
@@ -343,8 +339,8 @@ void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
         lcw_update_time(lcw, "touched");
         lcw->lcw_state = LC_WATCHDOG_ENABLED;
 
-        mod_timer(&lcw->lcw_timer, jiffies +
-                  cfs_time_seconds(timeout_ms) / 1000);
+        cfs_timer_arm(&lcw->lcw_timer, cfs_time_current() +
+                      cfs_time_seconds(timeout_ms) / 1000);
 
         EXIT;
 }
@@ -353,7 +349,7 @@ EXPORT_SYMBOL(lc_watchdog_touch_ms);
 /* deprecated - use above instead */
 void lc_watchdog_touch(struct lc_watchdog *lcw)
 {
-        lc_watchdog_touch_ms(lcw, cfs_duration_sec(lcw->lcw_time) * 1000);
+        lc_watchdog_touch_ms(lcw, (int)cfs_duration_sec(lcw->lcw_time) * 1000);
 }
 EXPORT_SYMBOL(lc_watchdog_touch);
 
@@ -379,7 +375,7 @@ void lc_watchdog_delete(struct lc_watchdog *lcw)
         ENTRY;
         LASSERT(lcw != NULL);
 
-        del_timer(&lcw->lcw_timer);
+        cfs_timer_disarm(&lcw->lcw_timer);
 
         lcw_update_time(lcw, "deleted");
 
@@ -405,7 +401,7 @@ EXPORT_SYMBOL(lc_watchdog_delete);
 
 void lc_watchdog_dumplog(pid_t pid, void *data)
 {
-        libcfs_debug_dumplog_internal((void *)((unsigned long)pid));
+        libcfs_debug_dumplog_internal((void *)((long_ptr_t)pid));
 }
 EXPORT_SYMBOL(lc_watchdog_dumplog);
 
index 7e6f394..93d3af6 100644 (file)
 
 #include <libcfs/libcfs.h>
 
-
 /*
  * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
  * for Linux kernel.
  */
 
-cfs_task_t this_task = 
-    { 0, 0, 0, 0, 0, 0, 0, 
-      0, 0, 0, 0,  1, 0,  0, 0, 0,
-      "sysetm\0" };
+cfs_task_t this_task =
+    { /* umask */ 0,/* blocked*/0, /* pid */ 0, /* pgrp */ 0,
+      /* uid,euid,suid,fsuid */  0, 0, 0, 0, 
+      /* gid_t gid,egid,sgid,fsgid */ 0, 0, 0, 0,
+      /* ngroups*/ 1, /*cgroups*/ 0, /*groups*/ 0,
+      /* group_info */ NULL,
+      /* cap_effective, cap_inheritable, cap_permitted */  0, 0, 0,
+      /* comm */"sysetm\0",
+      /* journal_info */ NULL
+    };
 
 
 uid_t  cfs_curproc_uid(void)
@@ -141,8 +146,10 @@ int cfs_capable(cfs_cap_t cap)
 
 /* global of the task manager structure */
 
-TASK_MAN TaskMan;
+TASK_MAN cfs_win_task_manger;
 
+/* global idr context */
+struct idr_context * cfs_win_task_slot_idp = NULL;
 
 /*
  *  task slot routiens
@@ -153,8 +160,8 @@ alloc_task_slot()
 {
     PTASK_SLOT task = NULL;
 
-    if (TaskMan.slab) {
-        task = cfs_mem_cache_alloc(TaskMan.slab, 0);
+    if (cfs_win_task_manger.slab) {
+        task = cfs_mem_cache_alloc(cfs_win_task_manger.slab, 0);
     } else {
         task = cfs_alloc(sizeof(TASK_SLOT), 0);
     }
@@ -168,16 +175,18 @@ init_task_slot(PTASK_SLOT task)
     memset(task, 0, sizeof(TASK_SLOT));
     task->Magic = TASKSLT_MAGIC;
     task->task  = this_task;
-    task->task.pid = (pid_t)PsGetCurrentThreadId();
     cfs_init_event(&task->Event, TRUE, FALSE);
 }
 
-
 void
 cleanup_task_slot(PTASK_SLOT task)
 {
-    if (TaskMan.slab) {
-        cfs_mem_cache_free(TaskMan.slab, task);
+    if (task->task.pid) {
+        cfs_idr_remove(cfs_win_task_slot_idp, task->task.pid);
+    }
+
+    if (cfs_win_task_manger.slab) {
+        cfs_mem_cache_free(cfs_win_task_manger.slab, task);
     } else {
         cfs_free(task);
     }
@@ -197,25 +206,19 @@ task_manager_notify(
     PLIST_ENTRY ListEntry = NULL; 
     PTASK_SLOT  TaskSlot  = NULL;
 
-    spin_lock(&(TaskMan.Lock));
+    spin_lock(&(cfs_win_task_manger.Lock));
 
-    ListEntry = TaskMan.TaskList.Flink;
-
-    while (ListEntry != (&(TaskMan.TaskList))) {
+    ListEntry = cfs_win_task_manger.TaskList.Flink;
+    while (ListEntry != (&(cfs_win_task_manger.TaskList))) {
 
         TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
 
         if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) {
 
-            if (Create) {
-/*
-                DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n",
-                         ProcessId, ThreadId, TaskSlot->Tet);
-*/
-            } else {
+            if (!Create) {
                 /* remove the taskslot */
                 RemoveEntryList(&(TaskSlot->Link));
-                TaskMan.NumOfTasks--;
+                cfs_win_task_manger.NumOfTasks--;
 
                 /* now free the task slot */
                 cleanup_task_slot(TaskSlot);
@@ -225,7 +228,7 @@ task_manager_notify(
         ListEntry = ListEntry->Flink;
     }
 
-    spin_unlock(&(TaskMan.Lock));
+    spin_unlock(&(cfs_win_task_manger.Lock));
 }
 
 int
@@ -234,24 +237,35 @@ init_task_manager()
     NTSTATUS    status;
 
     /* initialize the content and magic */
-    memset(&TaskMan, 0, sizeof(TASK_MAN));
-    TaskMan.Magic = TASKMAN_MAGIC;
+    memset(&cfs_win_task_manger, 0, sizeof(TASK_MAN));
+    cfs_win_task_manger.Magic = TASKMAN_MAGIC;
 
     /* initialize the spinlock protection */
-    spin_lock_init(&TaskMan.Lock);
+    spin_lock_init(&cfs_win_task_manger.Lock);
 
     /* create slab memory cache */
-    TaskMan.slab = cfs_mem_cache_create(
+    cfs_win_task_manger.slab = cfs_mem_cache_create(
         "TSLT", sizeof(TASK_SLOT), 0, 0);
 
     /* intialize the list header */
-    InitializeListHead(&(TaskMan.TaskList));
+    InitializeListHead(&(cfs_win_task_manger.TaskList));
+
+    cfs_win_task_slot_idp = cfs_idr_init();
+    if (!cfs_win_task_slot_idp) {
+        return -ENOMEM;
+    }
 
     /* set the thread creation/destruction notify routine */
     status = PsSetCreateThreadNotifyRoutine(task_manager_notify);
 
     if (!NT_SUCCESS(status)) {
         cfs_enter_debugger();
+        /* remove idr context */
+        if (cfs_win_task_slot_idp) {
+            cfs_idr_exit(cfs_win_task_slot_idp);
+            cfs_win_task_slot_idp = NULL;
+        }
+        return cfs_error_code(status);
     }
 
     return 0;
@@ -263,28 +277,32 @@ cleanup_task_manager()
     PLIST_ENTRY ListEntry = NULL; 
     PTASK_SLOT  TaskSlot  = NULL;
 
-    /* we must stay in system since we succeed to register the
-       CreateThreadNotifyRoutine: task_manager_notify */
-    cfs_enter_debugger();
+    /* remove ThreadNotifyRoutine: task_manager_notify */
+    PsRemoveCreateThreadNotifyRoutine(task_manager_notify);
 
+    /* remove idr context */
+    if (cfs_win_task_slot_idp) {
+        cfs_idr_exit(cfs_win_task_slot_idp);
+        cfs_win_task_slot_idp = NULL;
+    }
 
     /* cleanup all the taskslots attached to the list */
-    spin_lock(&(TaskMan.Lock));
+    spin_lock(&(cfs_win_task_manger.Lock));
 
-    while (!IsListEmpty(&(TaskMan.TaskList))) {
+    while (!IsListEmpty(&(cfs_win_task_manger.TaskList))) {
 
-        ListEntry = TaskMan.TaskList.Flink;
+        ListEntry = cfs_win_task_manger.TaskList.Flink;
         TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
 
         RemoveEntryList(ListEntry);
         cleanup_task_slot(TaskSlot);
     }
 
-    spin_unlock(&TaskMan.Lock);
+    spin_unlock(&cfs_win_task_manger.Lock);
 
     /* destroy the taskslot cache slab */
-    cfs_mem_cache_destroy(TaskMan.slab);
-    memset(&TaskMan, 0, sizeof(TASK_MAN));
+    cfs_mem_cache_destroy(cfs_win_task_manger.slab);
+    memset(&cfs_win_task_manger, 0, sizeof(TASK_MAN));
 }
 
 
@@ -303,21 +321,15 @@ cfs_current()
     PLIST_ENTRY ListEntry = NULL; 
     PTASK_SLOT  TaskSlot  = NULL;
 
-    spin_lock(&(TaskMan.Lock));
+    spin_lock(&(cfs_win_task_manger.Lock));
 
-    ListEntry = TaskMan.TaskList.Flink;
-
-    while (ListEntry != (&(TaskMan.TaskList))) {
+    ListEntry = cfs_win_task_manger.TaskList.Flink;
+    while (ListEntry != (&(cfs_win_task_manger.TaskList))) {
 
         TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
-
         if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) {
             if (TaskSlot->Tet != Tet) {
 
-/*
-                DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n",
-                         Pid, Tid, Tet, TaskSlot->Tet);
-*/
                 //
                 // The old thread was already exit. This must be a
                 // new thread which get the same Tid to the previous.
@@ -329,16 +341,15 @@ cfs_current()
 
         } else {
 
-            if ((ULONG)TaskSlot->Pid > (ULONG)Pid) {
+            if (TaskSlot->Pid > Pid) {
                 TaskSlot = NULL;
                 break;
-            } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) {
-                if ((ULONG)TaskSlot->Tid > (ULONG)Tid) {
+            } else if (TaskSlot->Pid == Pid) {
+                if (TaskSlot->Tid > Tid) {
                     TaskSlot = NULL;
                     break;
                 }
             }
-
             TaskSlot =  NULL;
         }
 
@@ -347,24 +358,25 @@ cfs_current()
 
     if (!TaskSlot) {
 
+        /* allocate new task slot */
         TaskSlot = alloc_task_slot();
-
         if (!TaskSlot) {
             cfs_enter_debugger();
             goto errorout;
         }
 
+        /* set task slot IDs */
         init_task_slot(TaskSlot);
-
         TaskSlot->Pid = Pid;
         TaskSlot->Tid = Tid;
         TaskSlot->Tet = Tet;
+        TaskSlot->task.pid = (pid_t)cfs_idr_get_new(cfs_win_task_slot_idp, Tet);
 
-        if (ListEntry == (&(TaskMan.TaskList))) {
+        if (ListEntry == (&(cfs_win_task_manger.TaskList))) {
             //
             // Empty case or the biggest case, put it to the tail.
             //
-            InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link));
+            InsertTailList(&(cfs_win_task_manger.TaskList), &(TaskSlot->Link));
         } else {
             //
             // Get a slot and smaller than it's tid, put it just before.
@@ -372,7 +384,7 @@ cfs_current()
             InsertHeadList(ListEntry->Blink, &(TaskSlot->Link));
         }
 
-        TaskMan.NumOfTasks++;
+        cfs_win_task_manger.NumOfTasks++;
     }
 
     //
@@ -382,18 +394,18 @@ cfs_current()
     {
         PTASK_SLOT  Prev = NULL, Curr = NULL;
         
-        ListEntry = TaskMan.TaskList.Flink;
+        ListEntry = cfs_win_task_manger.TaskList.Flink;
 
-        while (ListEntry != (&(TaskMan.TaskList))) {
+        while (ListEntry != (&(cfs_win_task_manger.TaskList))) {
 
             Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
             ListEntry = ListEntry->Flink;
 
             if (Prev) {
-                if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) {
+                if (Prev->Pid > Curr->Pid) {
                     cfs_enter_debugger();
-                } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) {
-                    if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) {
+                } else if (Prev->Pid == Curr->Pid) {
+                    if (Prev->Tid > Curr->Tid) {
                         cfs_enter_debugger();
                     }
                 }
@@ -405,7 +417,7 @@ cfs_current()
 
 errorout:
 
-    spin_unlock(&(TaskMan.Lock));
+    spin_unlock(&(cfs_win_task_manger.Lock));
 
     if (!TaskSlot) {
         cfs_enter_debugger();
@@ -415,15 +427,28 @@ errorout:
     return (&(TaskSlot->task));
 }
 
-int
-schedule_timeout(int64_t time)
+/* deschedule for a bit... */
+void
+cfs_pause(cfs_duration_t ticks)
+{
+    cfs_schedule_timeout(CFS_TASK_UNINTERRUPTIBLE, ticks);
+}
+
+void
+our_cond_resched()
+{
+    cfs_schedule_timeout(CFS_TASK_UNINTERRUPTIBLE, 1i64);
+}
+
+void
+cfs_schedule_timeout(cfs_task_state_t state, int64_t time)
 {
     cfs_task_t * task = cfs_current();
     PTASK_SLOT   slot = NULL;
 
     if (!task) {
         cfs_enter_debugger();
-        return 0;
+        return;
     }
 
     slot = CONTAINING_RECORD(task, TASK_SLOT, task);
@@ -433,13 +458,13 @@ schedule_timeout(int64_t time)
         time = 0;
     }
 
-    return (cfs_wait_event(&(slot->Event), time) != 0);
+    cfs_wait_event_internal(&(slot->Event), time);
 }
 
-int
-schedule()
+void
+cfs_schedule()
 {
-    return schedule_timeout(0);
+    cfs_schedule_timeout(CFS_TASK_UNINTERRUPTIBLE, 0);
 }
 
 int
@@ -463,9 +488,7 @@ wake_up_process(
 }
 
 void
-sleep_on(
-    cfs_waitq_t *waitq
-    )
+sleep_on(cfs_waitq_t *waitq)
 {
        cfs_waitlink_t link;
        
index e226608..16657c8 100644 (file)
 #include <libcfs/libcfs.h>
 #include "tracefile.h"
 
-void lnet_debug_dumpstack(cfs_task_t *tsk)
+void libcfs_debug_dumpstack(cfs_task_t *tsk)
 {
        return;
 }
 
-cfs_task_t *lnet_current(void)
+void libcfs_run_debug_log_upcall(char *file)
 {
-       return cfs_current();
 }
 
-int lnet_arch_debug_init(unsigned long bufsize)
+cfs_task_t *libcfs_current(void)
 {
-       return 0;
-}
-
-int lnet_arch_debug_cleanup(void)
-{
-       return 0;
+       return cfs_current();
 }
 
 void libcfs_run_lbug_upcall(const char *file, const char *fn, const int line)
 {
 }
 
-void libcfs_debug_dumplog(void)
-{
-}
-
 void lbug_with_loc(const char *file, const char *func, const int line)
 {
         libcfs_catastrophe = 1;
         CEMERG("LBUG: pid: %u thread: %#x\n",
-               (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread());
+               cfs_curproc_pid(), PsGetCurrentThread());
+        cfs_enter_debugger();
         libcfs_debug_dumplog();
         libcfs_run_lbug_upcall(file, func, line);
 }
 
-#if TDI_LIBCFS_DBG
+void cfs_enter_debugger(void)
+{
+# if _X86_
+    __asm int 3;
+# else
+    KdBreakPoint();
+# endif
+}
+
+#if DBG
 
 /*
  * Definitions
  */
 
-LONG  KsDebugLevel = 0x5;
+LONG  KsDebugLevel = 1;
 
 
 /*
@@ -1058,21 +1058,66 @@ KsPrintf(
     ...
     )
 {
-    va_list  ap;
+    LARGE_INTEGER tick;
+    va_list       ap;
 
     va_start(ap, DebugMessage);
-
-    if (DebugPrintLevel <= KsDebugLevel)
-    {
+    if (DebugPrintLevel <= KsDebugLevel) {
         CHAR buffer[0x200];
-
+        KeQueryTickCount(&tick);
         vsprintf(buffer, DebugMessage, ap);
-
-        KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer));
+        KdPrint(("%8.8X cpu:%d:%d tid:%p %s",
+                 tick.LowPart,
+                 KeGetCurrentProcessorNumber(),
+                 KeGetCurrentIrql(), 
+                 PsGetCurrentThread(), buffer));
     }
-
     va_end(ap);
 
 } // KsPrint()
 
 #endif
+
+
+void libcfs_panic(char *msg)
+{
+    DbgPrint("%s", msg);
+    cfs_enter_debugger();
+}
+
+/* BUGCHECK callback record */
+static int libcfs_bugcheck_inited = 0;
+KBUGCHECK_CALLBACK_RECORD libcfs_bugcheck_record;
+
+void
+libcfs_bugcheck_callback(
+    IN PVOID Buffer, 
+    IN ULONG Length 
+    )
+{
+    cfs_enter_debugger();
+}
+
+
+void libcfs_register_panic_notifier(void)
+{
+    if (libcfs_bugcheck_inited) {
+        return;
+    }
+
+    KeInitializeCallbackRecord(&libcfs_bugcheck_record);
+    KeRegisterBugCheckCallback(&libcfs_bugcheck_record,
+                                libcfs_bugcheck_callback,
+                               &libcfs_bugcheck_record,
+                                sizeof(KBUGCHECK_CALLBACK_RECORD),
+                                "Lustre");
+}
+
+void libcfs_unregister_panic_notifier(void)
+{
+    if (!libcfs_bugcheck_inited) {
+        return;
+    }
+
+    KeDeregisterBugCheckCallback(&libcfs_bugcheck_record);
+}
index 2270771..515eee5 100644 (file)
@@ -38,7 +38,9 @@
 
 #include <libcfs/libcfs.h>
 
-const CHAR *dos_file_prefix = "\\??\\";
+const CHAR *dos_file_prefix[] = {
+            "\\??\\", "\\DosDevices\\",
+            "\\SystemRoot\\", NULL};
 
 /*
  * cfs_filp_open
@@ -57,6 +59,9 @@ const CHAR *dos_file_prefix = "\\??\\";
  *   N/A
  */
 
+#define is_drv_letter_valid(x) (((x) >= 0 && (x) <= 9) || \
+                ( ((x)|0x20) <= 'z' && ((x)|0x20) >= 'a'))
+
 cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
 {
     cfs_file_t *        fp = NULL;
@@ -81,7 +86,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
     PUCHAR              AnsiString = NULL;
 
     /* Analyze the flags settings */
-
     if (cfs_is_flag_set(flags, O_WRONLY)) {
         DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
         ShareAccess = 0;
@@ -126,11 +130,28 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
     }
 
     /* Initialize the unicode path name for the specified file */
-
     NameLength = (USHORT)strlen(name);
 
+    /* Check file & path name */
     if (name[0] != '\\') {
-        PrefixLength = (USHORT)strlen(dos_file_prefix);
+        if (NameLength < 1 || name[1] != ':' || !is_drv_letter_valid(name[0])) {
+            /* invalid file path name */
+            if (err) *err = -EINVAL;
+            return NULL;
+        }
+        PrefixLength = (USHORT)strlen(dos_file_prefix[0]);
+    } else {
+        int i, j;
+        for (i=0; i < 3 && dos_file_prefix[i] != NULL; i++) {
+            j = strlen(dos_file_prefix[i]);
+            if (NameLength > j && _strnicmp(dos_file_prefix[i], name, j) == 0) {
+                break;
+            }
+        }
+        if (i >= 3) {
+            if (err) *err = -EINVAL;
+            return NULL;
+        }
     }
 
     AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1),
@@ -142,7 +163,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
 
     UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1),
                                CFS_ALLOC_ZERO);
-
     if (NULL == UnicodeString) {
         if (err) *err = -ENOMEM;
         cfs_free(AnsiString);
@@ -150,7 +170,7 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
     }
 
     if (PrefixLength) {
-        RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength);
+        RtlCopyMemory(&AnsiString[0], dos_file_prefix[0], PrefixLength);
     }
 
     RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength);
@@ -167,7 +187,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
     RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE);
 
     /* Setup the object attributes structure for the file. */
-
     InitializeObjectAttributes(
             &ObjectAttributes,
             &UnicodeName,
@@ -177,7 +196,6 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
             NULL );
 
     /* Now to open or create the file now */
-
     Status = ZwCreateFile(
             &FileHandle,
             DesiredAccess,
@@ -192,22 +210,24 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
             0 );
 
     /* Check the returned status of IoStatus... */
-
     if (!NT_SUCCESS(IoStatus.Status)) {
-        *err = cfs_error_code(IoStatus.Status);
+        if (err) {
+            *err = cfs_error_code(IoStatus.Status);
+        }
         cfs_free(UnicodeString);
         cfs_free(AnsiString);
         return NULL;
     }
 
     /* Allocate the cfs_file_t: libcfs file object */
-
     fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO);
 
     if (NULL == fp) {
         Status = ZwClose(FileHandle);
         ASSERT(NT_SUCCESS(Status));
-        *err = -ENOMEM;
+        if (err) {
+            *err = -ENOMEM;
+        }
         cfs_free(UnicodeString);
         cfs_free(AnsiString);
         return NULL;
@@ -218,7 +238,9 @@ cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
     fp->f_flags = flags;
     fp->f_mode  = (mode_t)mode;
     fp->f_count = 1;
-    *err = 0;
+    if (err) {
+        *err = 0;
+    }
 
     /* free the memory of temporary name strings */
     cfs_free(UnicodeString);
@@ -260,6 +282,164 @@ int cfs_filp_close(cfs_file_t *fp)
 }
 
 
+NTSTATUS CompletionRoutine(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID Context)
+{
+    /* copy the IoStatus result */
+    if (Irp->UserIosb)
+        *Irp->UserIosb = Irp->IoStatus;
+    
+    /* singal the event we set */
+    KeSetEvent((PKEVENT) Context, 0, FALSE);
+   
+    /* free the Irp we allocated */
+    IoFreeIrp(Irp);
+    
+    return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+
+NTSTATUS cfs_nt_filp_io(HANDLE Handle, BOOLEAN Writing, PLARGE_INTEGER Offset,
+                        ULONG Length,  PUCHAR Buffer,   PULONG Bytes)
+{
+    NTSTATUS                status;
+    IO_STATUS_BLOCK         iosb;
+
+    PIRP                    irp = NULL;
+    PIO_STACK_LOCATION      irpSp = NULL;
+
+    PFILE_OBJECT            fileObject = NULL;
+    PDEVICE_OBJECT          deviceObject;
+
+    KEVENT                  event;
+
+    KeInitializeEvent(&event, SynchronizationEvent, FALSE);
+
+    status = ObReferenceObjectByHandle( Handle,
+                                        Writing ? FILE_WRITE_DATA : 
+                                                  FILE_READ_DATA,
+                                        *IoFileObjectType,
+                                        KernelMode,
+                                        (PVOID *) &fileObject,
+                                        NULL );
+    if (!NT_SUCCESS(status)) {
+        goto errorout;
+    }
+
+    /* query the DeviceObject in case no input */
+    deviceObject = IoGetBaseFileSystemDeviceObject(fileObject);
+
+
+    /* allocate our own irp */
+    irp = IoAllocateIrp(deviceObject->StackSize, FALSE);
+    if (NULL == irp) {
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    irp->Tail.Overlay.OriginalFileObject = fileObject;
+    irp->Tail.Overlay.Thread = PsGetCurrentThread();
+    irp->Tail.Overlay.AuxiliaryBuffer = (PVOID) NULL;
+    irp->PendingReturned = FALSE;
+    irp->Cancel = FALSE;
+    irp->CancelRoutine = (PDRIVER_CANCEL) NULL;
+    irp->RequestorMode = KernelMode;
+    irp->UserIosb = &iosb;
+
+    /* set up the next I/O stack location. */
+    irpSp = (PIO_STACK_LOCATION)IoGetNextIrpStackLocation(irp);
+    irpSp->MajorFunction = Writing ? IRP_MJ_WRITE : IRP_MJ_READ;
+    irpSp->FileObject = fileObject;
+    irpSp->DeviceObject = deviceObject;
+
+    if (deviceObject->Flags & DO_BUFFERED_IO) {
+        irp->AssociatedIrp.SystemBuffer = Buffer;
+        irp->UserBuffer = Buffer;
+        irp->Flags |= (ULONG) (IRP_BUFFERED_IO |
+                               IRP_INPUT_OPERATION);
+    } else if (deviceObject->Flags & DO_DIRECT_IO) {
+
+        PMDL mdl = NULL;
+
+        mdl = IoAllocateMdl(Buffer, Length, FALSE, TRUE, irp);
+        if (mdl == NULL) {
+            KsPrint((0, "cfs_nt_filp_io: failed to allocate MDL for %wZ .\n",
+                        &fileObject->FileName));
+            status = STATUS_INSUFFICIENT_RESOURCES;
+            goto errorout;
+        }
+
+        __try {
+            MmProbeAndLockPages(mdl, KernelMode, Writing ? IoReadAccess : IoWriteAccess );
+        } __except(EXCEPTION_EXECUTE_HANDLER) {
+            KsPrint((0, "cfs_nt_filp_io: failed to lock buffer %p for %wZ .\n",
+                        Buffer, &fileObject->FileName));
+            IoFreeMdl(irp->MdlAddress);
+            irp->MdlAddress = NULL;
+            status = STATUS_INSUFFICIENT_RESOURCES;
+        }
+    } else {
+        irp->UserBuffer = Buffer;
+        irp->Flags = 0;
+    }
+
+    if (Writing) {
+        irp->Flags |= IRP_WRITE_OPERATION | IRP_DEFER_IO_COMPLETION;
+        irpSp->Parameters.Write.Length = Length;
+        irpSp->Parameters.Write.ByteOffset = *Offset;
+    } else {
+        irp->Flags |= IRP_READ_OPERATION | IRP_DEFER_IO_COMPLETION;
+        irpSp->Parameters.Read.Length = Length;
+        irpSp->Parameters.Read.ByteOffset = *Offset;
+    }
+
+    /* set the Irp completion routine */
+    IoSetCompletionRoutine( irp, CompletionRoutine, 
+                            &event, TRUE, TRUE, TRUE);
+
+
+    /* issue the irp to the lower layer device */
+    status = IoCallDriver(deviceObject, irp);
+
+    /* Irp is to be cleaned up in the compleiton routine */
+    irp = NULL;
+
+    if (status == STATUS_PENDING) {
+
+        /* we need wait until operation is completed, then we can
+           get the returned status and information length */
+
+        status = KeWaitForSingleObject(
+                    &event,
+                    Executive,
+                    KernelMode,
+                    FALSE,
+                    NULL
+                    );
+        if (NT_SUCCESS(status)) {
+            status = iosb.Status;
+        }
+    }
+
+    if (NT_SUCCESS(status)) {
+        *Bytes = (ULONG)iosb.Information;
+    } else {
+        *Bytes = 0;
+    }
+
+errorout:
+
+    if (fileObject) {
+        ObDereferenceObject(fileObject);
+    }
+
+    /* free the Irp in error case */
+    if (irp) {
+        IoFreeIrp(irp);
+    }
+
+    return status;
+}
+
 /*
  * cfs_filp_read
  *     To read data from the opened file
@@ -281,45 +461,33 @@ int cfs_filp_close(cfs_file_t *fp)
 
 int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
 {
-    LARGE_INTEGER   address;
-    NTSTATUS        Status;
-    IO_STATUS_BLOCK IoStatus;
-
+    LARGE_INTEGER   offset;
+    NTSTATUS        status;
     int             rc = 0;
 
     /* Read data from the file into the specified buffer */
-
     if (pos != NULL) {
-        address.QuadPart = *pos;
+        offset.QuadPart = *pos;
     } else {
-        address.QuadPart = fp->f_pos;
+        offset.QuadPart = fp->f_pos;
     }
 
-    Status = ZwReadFile( fp->f_handle,
-                         0,
-                         NULL,
-                         NULL,
-                         &IoStatus,
-                         buf,
-                         nbytes,
-                         &address,
-                         NULL );
+    status = cfs_nt_filp_io(fp->f_handle, 0, &offset,
+                            nbytes, buf, &rc);
 
-    if (!NT_SUCCESS(IoStatus.Status)) {
-        rc = cfs_error_code(IoStatus.Status);
-    } else {
-        rc = (int)IoStatus.Information;
-        fp->f_pos = address.QuadPart + rc;
-        if (pos != NULL) {
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+    }
+
+    if (rc > 0) {
+        fp->f_pos = offset.QuadPart + rc;
+        if (pos != NULL)
             *pos = fp->f_pos;
-        }   
     }
 
-    return rc;     
+    return rc;
 }
 
-
 /*
  * cfs_filp_wrtie
  *     To write specified data to the opened file
@@ -341,63 +509,33 @@ int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
 
 int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
 {
-    LARGE_INTEGER   address;
-    NTSTATUS        Status;
-    IO_STATUS_BLOCK IoStatus;
+    LARGE_INTEGER   offset;
+    NTSTATUS        status;
     int             rc = 0;
 
-    /* Write user specified data into the file */
-
+    /* Read data from the file into the specified buffer */
     if (pos != NULL) {
-        address.QuadPart = *pos;
+        offset.QuadPart = *pos;
     } else {
-        address.QuadPart = fp->f_pos;
+        offset.QuadPart = fp->f_pos;
     }
 
-    Status = ZwWriteFile( fp->f_handle,
-                         0,
-                         NULL,
-                         NULL,
-                         &IoStatus,
-                         buf,
-                         nbytes,
-                         &address,
-                         NULL );
+    status = cfs_nt_filp_io(fp->f_handle, 1, &offset,
+                            nbytes, buf, &rc);
 
-    if (!NT_SUCCESS(Status)) {
-        rc =  cfs_error_code(Status);
-    } else {
-        rc = (int)IoStatus.Information;
-        fp->f_pos = address.QuadPart + rc;
-        if (pos != NULL) {
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+    }
+
+    if (rc > 0) {
+        fp->f_pos = offset.QuadPart + rc;
+        if (pos != NULL)
             *pos = fp->f_pos;
-        }   
     }
 
     return rc;
 }
 
-
-NTSTATUS
-CompletionRoutine(
-    PDEVICE_OBJECT DeviceObject,
-    PIRP Irp,
-    PVOID Context)
-{
-    /* copy the IoStatus result */
-    *Irp->UserIosb = Irp->IoStatus;
-    
-    /* singal the event we set */
-    KeSetEvent(Irp->UserEvent, 0, FALSE);
-   
-    /* free the Irp we allocated */
-    IoFreeIrp(Irp);
-    
-    return STATUS_MORE_PROCESSING_REQUIRED;
-}
-
-
 /*
  * cfs_filp_fsync
  *     To sync the dirty data of the file to disk
@@ -428,7 +566,6 @@ int cfs_filp_fsync(cfs_file_t *fp)
     PIO_STACK_LOCATION      IrpSp;
 
     /* get the FileObject and the DeviceObject */
-
     Status = ObReferenceObjectByHandle(
                 fp->f_handle,
                 FILE_WRITE_DATA,
@@ -444,11 +581,8 @@ int cfs_filp_fsync(cfs_file_t *fp)
     DeviceObject = IoGetRelatedDeviceObject(FileObject);
 
     /* allocate a new Irp */
-
     Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
-
     if (!Irp) {
-
         ObDereferenceObject(FileObject);
         return -ENOMEM;
     }
@@ -457,7 +591,6 @@ int cfs_filp_fsync(cfs_file_t *fp)
     KeInitializeEvent(&Event, SynchronizationEvent, FALSE);
 
     /* setup the Irp */
-    Irp->UserEvent = &Event;
     Irp->UserIosb = &IoSb;
     Irp->RequestorMode = KernelMode;
 
@@ -471,7 +604,8 @@ int cfs_filp_fsync(cfs_file_t *fp)
     IrpSp->DeviceObject = DeviceObject;
     IrpSp->FileObject = FileObject;
 
-    IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE);
+    IoSetCompletionRoutine( Irp, CompletionRoutine,
+                            &Event, TRUE, TRUE, TRUE);
 
 
     /* issue the Irp to the underlying file system driver */
@@ -553,3 +687,21 @@ int cfs_file_count(cfs_file_t *fp)
 {
     return (int)(fp->f_count);
 }
+
+struct dentry *dget(struct dentry *de)
+{
+    if (de) {
+        atomic_inc(&de->d_count);
+    }
+    return de;
+}
+
+void dput(struct dentry *de)
+{
+    if (!de || atomic_read(&de->d_count) == 0) {
+        return;
+    }
+    if (atomic_dec_and_test(&de->d_count)) {
+        cfs_free(de);
+    }
+}
index 2b57e7f..7b20b53 100644 (file)
@@ -40,7 +40,7 @@
 #include <libcfs/libcfs.h>
 
 
-#if _X86_
+#if defined(_X86_)
 
 void __declspec (naked) FASTCALL
 atomic_add(
@@ -148,7 +148,7 @@ atomic_dec_and_test(
     }
 }
 
-#else
+#elif defined(_AMD64_)
 
 void FASTCALL
 atomic_add(
@@ -235,7 +235,7 @@ atomic_dec_and_test(
     do {
 
         counter = v->counter;
-        result = counter + 1;
+        result = counter - 1;
 
     } while ( InterlockedCompareExchange(
                 &(v->counter),
@@ -245,8 +245,62 @@ atomic_dec_and_test(
     return (result == 0);
 }
 
+#else
+
+#error CPU arch type isn't specified.
+
 #endif
 
+/**
+ * atomic_add_return - add integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ *
+ * Atomically adds @i to @v and returns @i + @v
+ */
+int FASTCALL atomic_add_return(int i, atomic_t *v)
+{
+    int counter, result;
+
+    do {
+
+        counter = v->counter;
+        result = counter + i;
+
+    } while ( InterlockedCompareExchange(
+                &(v->counter),
+                result,
+                counter) !=  counter);
+
+    return result;
+
+}
+
+/**
+ * atomic_sub_return - subtract integer and return
+ * @v: pointer of type atomic_t
+ * @i: integer value to subtract
+ *
+ * Atomically subtracts @i from @v and returns @v - @i
+ */
+int FASTCALL atomic_sub_return(int i, atomic_t *v)
+{
+       return atomic_add_return(-i, v);
+}
+
+int FASTCALL atomic_dec_and_lock(atomic_t *v, spinlock_t *lock)
+{
+    if (atomic_read(v) != 1) {
+        return 0;
+    } 
+
+       spin_lock(lock);
+       if (atomic_dec_and_test(v))
+               return 1;
+       spin_unlock(lock);
+       return 0;
+}
+
 
 /*
  * rw spinlock
index 782d6d7..6a8689e 100644 (file)
 cfs_mem_cache_t *cfs_page_t_slab = NULL;
 cfs_mem_cache_t *cfs_page_p_slab = NULL;
 
+cfs_page_t * virt_to_page(void * addr)
+{
+    cfs_page_t *pg;
+    pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0);
+    
+    if (NULL == pg) {
+        cfs_enter_debugger();
+        return NULL;
+    }
+
+    memset(pg, 0, sizeof(cfs_page_t));
+    pg->addr = (void *)((__u64)addr & (~((__u64)PAGE_SIZE-1)));
+    pg->mapping = addr;
+    atomic_set(&pg->count, 1);
+    set_bit(PG_virt, &(pg->flags));
+    cfs_enter_debugger();
+    return pg;
+}
+
 /*
  * cfs_alloc_page
  *   To allocate the cfs_page_t and also 1 page of memory
@@ -57,6 +76,8 @@ cfs_mem_cache_t *cfs_page_p_slab = NULL;
  *   N/A
  */
 
+atomic_t libcfs_total_pages;
+
 cfs_page_t * cfs_alloc_page(int flags)
 {
     cfs_page_t *pg;
@@ -75,6 +96,7 @@ cfs_page_t * cfs_alloc_page(int flags)
         if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) {
             memset(pg->addr, 0, CFS_PAGE_SIZE);
         }
+        atomic_inc(&libcfs_total_pages);
     } else {
         cfs_enter_debugger();
         cfs_mem_cache_free(cfs_page_t_slab, pg);
@@ -103,10 +125,60 @@ void cfs_free_page(cfs_page_t *pg)
     ASSERT(pg->addr  != NULL);
     ASSERT(atomic_read(&pg->count) <= 1);
 
-    cfs_mem_cache_free(cfs_page_p_slab, pg->addr);
+    if (!test_bit(PG_virt, &pg->flags)) {
+        cfs_mem_cache_free(cfs_page_p_slab, pg->addr);
+        atomic_dec(&libcfs_total_pages);
+    } else {
+        cfs_enter_debugger();
+    }
     cfs_mem_cache_free(cfs_page_t_slab, pg);
 }
 
+cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order)
+{
+    cfs_page_t *pg;
+    pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0);
+    
+    if (NULL == pg) {
+        cfs_enter_debugger();
+        return NULL;
+    }
+
+    memset(pg, 0, sizeof(cfs_page_t));
+    pg->addr = cfs_alloc((CFS_PAGE_SIZE << order),0);
+    atomic_set(&pg->count, 1);
+
+    if (pg->addr) {
+        if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) {
+            memset(pg->addr, 0, CFS_PAGE_SIZE << order);
+        }
+        atomic_add(1 << order, &libcfs_total_pages);
+    } else {
+        cfs_enter_debugger();
+        cfs_mem_cache_free(cfs_page_t_slab, pg);
+        pg = NULL;
+    }
+
+    return pg;
+}
+
+void __cfs_free_pages(cfs_page_t *pg, unsigned int order)
+{
+    ASSERT(pg != NULL);
+    ASSERT(pg->addr  != NULL);
+    ASSERT(atomic_read(&pg->count) <= 1);
+
+    atomic_sub(1 << order, &libcfs_total_pages);
+    cfs_free(pg->addr);
+    cfs_mem_cache_free(cfs_page_t_slab, pg);
+}
+
+int cfs_mem_is_in_cache(const void *addr, const cfs_mem_cache_t *kmem)
+{
+    KdPrint(("cfs_mem_is_in_cache: not implemented. (should maintain a"
+              "chain to keep all allocations traced.)\n"));
+    return 1;
+}
 
 /*
  * cfs_alloc
@@ -127,21 +199,19 @@ void cfs_free_page(cfs_page_t *pg)
 void *
 cfs_alloc(size_t nr_bytes, u_int32_t flags)
 {
-       void *ptr;
+    void *ptr;
 
     /* Ignore the flags: always allcoate from NonPagedPool */
-
-       ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs');
-
-       if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) {
-               memset(ptr, 0, nr_bytes);
+    ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs');
+    if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) {
+        memset(ptr, 0, nr_bytes);
     }
 
     if (!ptr) {
         cfs_enter_debugger();
     }
 
-       return ptr;
+    return ptr;
 }
 
 /*
@@ -161,7 +231,7 @@ cfs_alloc(size_t nr_bytes, u_int32_t flags)
 void
 cfs_free(void *addr)
 {
-       ExFreePool(addr);
+    ExFreePool(addr);
 }
 
 /*
@@ -182,7 +252,7 @@ cfs_free(void *addr)
 void *
 cfs_alloc_large(size_t nr_bytes)
 {
-       return cfs_alloc(nr_bytes, 0);
+    return cfs_alloc(nr_bytes, 0);
 }
 
 /*
@@ -202,7 +272,7 @@ cfs_alloc_large(size_t nr_bytes)
 void
 cfs_free_large(void *addr)
 {
-       cfs_free(addr);
+    cfs_free(addr);
 }
 
 
@@ -252,7 +322,6 @@ cfs_mem_cache_create(
     }
 
     memset(kmc, 0, sizeof(cfs_mem_cache_t));
-
     kmc->flags = flags;
 
     if (name) {
@@ -345,3 +414,74 @@ void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf)
 {
     ExFreeToNPagedLookasideList(&(kmc->npll), buf);
 }
+
+spinlock_t  shrinker_guard = {0};
+CFS_LIST_HEAD(shrinker_hdr);
+cfs_timer_t shrinker_timer = {0};
+
+struct shrinker * set_shrinker(int seeks, shrink_callback cb)
+{
+    struct shrinker * s = (struct shrinker *)
+        cfs_alloc(sizeof(struct shrinker), CFS_ALLOC_ZERO);
+    if (s) {
+        s->cb = cb;
+        s->seeks = seeks;
+        s->nr = 2;
+        spin_lock(&shrinker_guard);
+        list_add(&s->list, &shrinker_hdr); 
+        spin_unlock(&shrinker_guard);
+    }
+
+    return s;
+}
+
+void remove_shrinker(struct shrinker *s)
+{
+    struct shrinker *tmp;
+    spin_lock(&shrinker_guard);
+#if TRUE
+    cfs_list_for_each_entry_typed(tmp, &shrinker_hdr,
+                            struct shrinker, list) {
+        if (tmp == s) {
+            list_del(&tmp->list);
+            break;
+        } 
+    }
+#else
+    list_del(&s->list);
+#endif
+    spin_unlock(&shrinker_guard);
+    cfs_free(s);
+}
+
+/* time ut test proc */
+void shrinker_timer_proc(ulong_ptr_t arg)
+{
+    struct shrinker *s;
+    spin_lock(&shrinker_guard);
+
+    cfs_list_for_each_entry_typed(s, &shrinker_hdr,
+                            struct shrinker, list) {
+        s->cb(s->nr, __GFP_FS);
+    }
+    spin_unlock(&shrinker_guard);
+    cfs_timer_arm(&shrinker_timer, 300);
+}
+
+int start_shrinker_timer()
+{
+    /* initialize shriner timer */
+    cfs_timer_init(&shrinker_timer, shrinker_timer_proc, NULL);
+
+    /* start the timer to trigger in 5 minutes */
+    cfs_timer_arm(&shrinker_timer, 300);
+
+    return 0;
+}
+
+void stop_shrinker_timer()
+{
+    /* cancel the timer */
+    cfs_timer_disarm(&shrinker_timer);
+    cfs_timer_done(&shrinker_timer);
+}
index ac255ba..3febc7f 100644 (file)
@@ -35,7 +35,7 @@
  */
 
 
-#define DEBUG_SUBSYSTEM S_LIBCFS
+#define DEBUG_SUBSYSTEM S_LNET
 
 #include <libcfs/libcfs.h>
 
@@ -56,17 +56,17 @@ int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
                 RETURN(err);
 
         if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
-                CERROR(("LIBCFS: version mismatch kernel vs application\n"));
+                CERROR("LIBCFS: version mismatch kernel vs application\n");
                 RETURN(-EINVAL);
         }
 
         if (hdr->ioc_len + buf >= end) {
-                CERROR(("LIBCFS: user buffer exceeds kernel buffer\n"));
+                CERROR("LIBCFS: user buffer exceeds kernel buffer\n");
                 RETURN(-EINVAL);
         }
 
         if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
-                CERROR(("LIBCFS: user buffer too small for ioctl\n"));
+                CERROR("LIBCFS: user buffer too small for ioctl\n");
                 RETURN(-EINVAL);
         }
 
@@ -75,7 +75,7 @@ int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
                 RETURN(err);
 
         if (libcfs_ioctl_is_invalid(data)) {
-                CERROR(("LIBCFS: ioctl not correctly formatted\n"));
+                CERROR("LIBCFS: ioctl not correctly formatted\n");
                 RETURN(-EINVAL);
         }
 
@@ -88,11 +88,18 @@ int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
 
         RETURN(0);
 }
-                                                                                                                                                                        
+
+int libcfs_ioctl_popdata(void *arg, void *data, int size)
+{
+       if (copy_to_user((char *)arg, data, size))
+               return -EFAULT;
+       return 0;
+}
+                                                                                                                                                                       
 extern struct cfs_psdev_ops          libcfs_psdev_ops;
 
 static int 
-libcfs_psdev_open(cfs_file_t * file)
+libcfs_psdev_open(struct inode *in, cfs_file_t * file)
 { 
        struct libcfs_device_userstate **pdu = NULL;
        int    rc = 0;
@@ -107,7 +114,7 @@ libcfs_psdev_open(cfs_file_t * file)
 
 /* called when closing /dev/device */
 static int 
-libcfs_psdev_release(cfs_file_t * file)
+libcfs_psdev_release(struct inode *in, cfs_file_t * file)
 {
        struct libcfss_device_userstate *pdu;
        int    rc = 0;
@@ -121,7 +128,7 @@ libcfs_psdev_release(cfs_file_t * file)
 }
 
 static int 
-libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
+libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr_t arg)
 { 
        struct cfs_psdev_file    pfile;
        int    rc = 0;
@@ -129,8 +136,8 @@ libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
        if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || 
             _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  || 
             _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { 
-               CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n", 
-                      _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)))
+               CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", 
+                      _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); 
                return (-EINVAL); 
        } 
        
@@ -139,8 +146,8 @@ libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
        case IOC_LIBCFS_PANIC: 
                if (!cfs_capable(CFS_CAP_SYS_BOOT)) 
                        return (-EPERM); 
-               CERROR(("debugctl-invoked panic"));
-        KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL);
+               CERROR("debugctl-invoked panic");
+               KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL);
 
                return (0);
        case IOC_LIBCFS_MEMHOG:
@@ -160,6 +167,7 @@ libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
 }
 
 static struct file_operations libcfs_fops = {
+    /* owner */   THIS_MODULE,
     /* lseek: */  NULL,
     /* read: */   NULL,
     /* write: */  NULL,
diff --git a/libcfs/libcfs/winnt/winnt-native.c b/libcfs/libcfs/winnt/winnt-native.c
new file mode 100644 (file)
index 0000000..634024a
--- /dev/null
@@ -0,0 +1,709 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#ifndef __KERNEL__
+
+#include <ntddk.h>
+#include <libcfs/libcfs.h>
+#include <libcfs/user-bitops.h>
+#include <lustre_lib.h>
+
+/*
+ * Native API definitions
+ */
+
+//
+//  Disk I/O Routines
+//
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtReadFile(HANDLE FileHandle,
+    HANDLE Event OPTIONAL,
+    PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+    PVOID ApcContext OPTIONAL,
+    PIO_STATUS_BLOCK IoStatusBlock,
+    PVOID Buffer,
+    ULONG Length,
+    PLARGE_INTEGER ByteOffset OPTIONAL,
+    PULONG Key OPTIONAL);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtWriteFile(HANDLE FileHandle,
+    HANDLE Event OPTIONAL,
+    PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+    PVOID ApcContext OPTIONAL,
+    PIO_STATUS_BLOCK IoStatusBlock,
+    PVOID Buffer,
+    ULONG Length,
+    PLARGE_INTEGER ByteOffset OPTIONAL,
+    PULONG Key OPTIONAL);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtClose(HANDLE Handle);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtCreateFile(PHANDLE FileHandle,
+    ACCESS_MASK DesiredAccess,
+    POBJECT_ATTRIBUTES ObjectAttributes,
+    PIO_STATUS_BLOCK IoStatusBlock,
+    PLARGE_INTEGER AllocationSize OPTIONAL,
+    ULONG FileAttributes,
+    ULONG ShareAccess,
+    ULONG CreateDisposition,
+    ULONG CreateOptions,
+    PVOID EaBuffer OPTIONAL,
+    ULONG EaLength);
+
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtDeviceIoControlFile(
+    IN HANDLE  FileHandle,
+    IN HANDLE  Event,
+    IN PIO_APC_ROUTINE  ApcRoutine,
+    IN PVOID  ApcContext,
+    OUT PIO_STATUS_BLOCK  IoStatusBlock,
+    IN ULONG  IoControlCode,
+    IN PVOID  InputBuffer,
+    IN ULONG  InputBufferLength,
+    OUT PVOID  OutputBuffer,
+    OUT ULONG  OutputBufferLength
+    ); 
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtFsControlFile(
+    IN HANDLE FileHandle,
+    IN HANDLE Event OPTIONAL,
+    IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+    IN PVOID ApcContext OPTIONAL,
+    OUT PIO_STATUS_BLOCK IoStatusBlock,
+    IN ULONG FsControlCode,
+    IN PVOID InputBuffer OPTIONAL,
+    IN ULONG InputBufferLength,
+    OUT PVOID OutputBuffer OPTIONAL,
+    IN ULONG OutputBufferLength
+);
+
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtQueryInformationFile(
+    IN HANDLE  FileHandle,
+    OUT PIO_STATUS_BLOCK  IoStatusBlock,
+    OUT PVOID  FileInformation,
+    IN ULONG  Length,
+    IN FILE_INFORMATION_CLASS  FileInformationClass
+    );
+
+//
+// Random routines ...
+//
+
+NTSYSAPI
+ULONG
+NTAPI
+RtlRandom(
+    IN OUT PULONG  Seed
+    ); 
+
+/*
+ * Time routines ...
+ */
+
+NTSYSAPI
+CCHAR
+NTAPI
+NtQuerySystemTime(
+    OUT PLARGE_INTEGER  CurrentTime
+    );
+
+
+NTSYSAPI
+BOOLEAN
+NTAPI
+RtlTimeToSecondsSince1970(
+    IN PLARGE_INTEGER  Time,
+    OUT PULONG  ElapsedSeconds
+    );
+
+
+NTSYSAPI
+VOID
+NTAPI
+RtlSecondsSince1970ToTime(
+    IN ULONG  ElapsedSeconds,
+    OUT PLARGE_INTEGER  Time
+    );
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+ZwDelayExecution(
+    IN BOOLEAN Alertable,
+    IN PLARGE_INTEGER Interval
+);
+
+
+int nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
+{
+    NTSTATUS status;
+    LARGE_INTEGER Interval;
+    Interval.QuadPart = rqtp->tv_sec * 10000000 + rqtp->tv_nsec / 100;
+    status = ZwDelayExecution(TRUE, &Interval);
+    if (rmtp) {
+        rmtp->tv_sec = 0;
+        rmtp->tv_nsec = 0;
+    }
+    if (status == STATUS_ALERTED || status == STATUS_USER_APC) {
+       return -1;
+    }
+    return 0;
+}
+
+
+void do_gettimeofday(struct timeval *tv)
+{
+    LARGE_INTEGER Time;
+
+    NtQuerySystemTime(&Time);
+
+    tv->tv_sec  = (long_ptr_t)  (Time.QuadPart / 10000000);
+    tv->tv_usec = (suseconds_t) (Time.QuadPart % 10000000) / 10;
+}
+
+int gettimeofday(struct timeval *tv, void * tz)
+{
+    do_gettimeofday(tv);
+    return 0;
+}
+
+/*
+ * proc process routines of user space
+ */
+
+struct idr_context *cfs_proc_idp = NULL;
+
+int cfs_proc_open (char * filename, int oflag)
+{
+    NTSTATUS            status;
+    IO_STATUS_BLOCK     iosb;
+    int                 rc = 0;
+
+    HANDLE              Handle = INVALID_HANDLE_VALUE;
+    OBJECT_ATTRIBUTES   ObjectAttributes;
+    ACCESS_MASK         DesiredAccess;
+    ULONG               CreateDisposition;
+    ULONG               ShareAccess;
+    ULONG               CreateOptions;
+    UNICODE_STRING      UnicodeName;
+    USHORT              NameLength;
+
+    PFILE_FULL_EA_INFORMATION Ea = NULL;
+    ULONG               EaLength;
+    PUCHAR              EaBuffer = NULL;
+
+    /* Check the filename: should start with "/proc" or "/dev" */
+    NameLength = (USHORT)strlen(filename);
+    if (NameLength > 0x05) {
+        if (_strnicmp(filename, "/proc/", 6) == 0) {
+            if (NameLength <= 6) {
+                rc = -EINVAL;
+                goto errorout;
+            }
+        } else if (_strnicmp(filename, "/dev/", 5) == 0) {
+        } else {
+            rc = -EINVAL;
+            goto errorout;
+        }
+    } else {
+        rc = -EINVAL;
+        goto errorout;
+    }
+
+    /* Analyze the flags settings */
+
+    if (cfs_is_flag_set(oflag, O_WRONLY)) {
+        DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
+        ShareAccess = 0;
+    }  else if (cfs_is_flag_set(oflag, O_RDWR)) {
+        DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
+        ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
+    } else {
+        DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
+        ShareAccess = FILE_SHARE_READ;
+    }
+
+    if (cfs_is_flag_set(oflag, O_CREAT)) {
+        if (cfs_is_flag_set(oflag, O_EXCL)) {
+            CreateDisposition = FILE_CREATE;
+            rc = -EINVAL;
+            goto errorout;
+        } else {
+            CreateDisposition = FILE_OPEN_IF;
+        }
+    } else {
+        CreateDisposition = FILE_OPEN;
+    }
+
+    if (cfs_is_flag_set(oflag, O_TRUNC)) {
+        if (cfs_is_flag_set(oflag, O_EXCL)) {
+            CreateDisposition = FILE_OVERWRITE;
+        } else {
+            CreateDisposition = FILE_OVERWRITE_IF;
+        }
+    }
+
+    CreateOptions = 0;
+
+    if (cfs_is_flag_set(oflag, O_DIRECTORY)) {
+        cfs_set_flag(CreateOptions,  FILE_DIRECTORY_FILE);
+    }
+
+    if (cfs_is_flag_set(oflag, O_SYNC)) {
+         cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
+    }
+
+    if (cfs_is_flag_set(oflag, O_DIRECT)) {
+         cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
+    }
+
+    /* Initialize the unicode path name for the specified file */
+    RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK);
+
+    /* Setup the object attributes structure for the file. */
+    InitializeObjectAttributes(
+            &ObjectAttributes,
+            &UnicodeName,
+            OBJ_CASE_INSENSITIVE,
+            NULL,
+            NULL );
+
+    /* building EA for the proc entry ...  */
+    EaBuffer = malloc(NameLength + sizeof(FILE_FULL_EA_INFORMATION));
+    if (!EaBuffer) {
+        rc = -ENOMEM;
+        goto errorout;
+    }
+    memset(EaBuffer, 0, NameLength + sizeof(FILE_FULL_EA_INFORMATION));
+    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+    Ea->NextEntryOffset = 0;
+    Ea->Flags = 0;
+    Ea->EaNameLength = (UCHAR)NameLength;
+    Ea->EaValueLength = 0;
+    RtlCopyMemory(
+        &(Ea->EaName),
+        filename,
+        NameLength + 1
+        );
+    EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
+                               Ea->EaNameLength + 1;
+
+    /* Now to open or create the file now */
+    status = NtCreateFile(
+                &Handle,
+                DesiredAccess,
+                &ObjectAttributes,
+                &iosb,
+                0,
+                FILE_ATTRIBUTE_NORMAL,
+                ShareAccess,
+                CreateDisposition,
+                CreateOptions,
+                Ea,
+                EaLength );
+
+    /* Check the returned status of Iosb ... */
+
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+        goto errorout;
+    }
+
+errorout:
+
+    if (Handle) {
+        rc = cfs_idr_get_new(cfs_proc_idp, Handle);
+        if (rc < 0) {
+            NtClose(Handle);
+        }
+    }
+
+    if (EaBuffer) {
+        free(EaBuffer);
+    }
+
+    return rc;
+}
+
+int cfs_proc_close(int fd)
+{
+    HANDLE handle = cfs_idr_find(cfs_proc_idp, fd);
+
+    if (handle) {
+        NtClose(handle);
+    }
+
+    cfs_idr_remove(cfs_proc_idp, fd);
+
+    return 0;
+}
+
+int cfs_proc_read_internal(
+    int fd, void *buffer,
+    unsigned int count,
+    unsigned int offlow,
+    unsigned int offhigh
+    )
+{
+    NTSTATUS            status;
+    IO_STATUS_BLOCK     iosb;
+    LARGE_INTEGER       offset;
+
+    HANDLE handle = cfs_idr_find(cfs_proc_idp, fd);
+    offset.HighPart = offhigh;
+    offset.LowPart  = offlow;
+
+    /* read file data */
+    status = NtReadFile(
+                handle,
+                0,
+                NULL,
+                NULL,
+                &iosb,
+                buffer,
+                count,
+                &offset,
+                NULL);                     
+
+    /* check the return status */
+    if (!NT_SUCCESS(status)) {
+        printf("NtReadFile request failed with status: 0x%0x\n", status);
+        goto errorout;
+    }
+
+errorout:
+
+    if (NT_SUCCESS(status)) {
+        return (int)(iosb.Information);
+    }
+
+    return cfs_error_code(status);
+}
+
+int cfs_proc_read(
+    int fd, void *buffer,
+    unsigned int count
+    )
+{
+    return cfs_proc_read_internal(fd, buffer, count, 0, 0);
+}
+
+int cfs_proc_write_internal(
+    int fd, void *buffer,
+    unsigned int count,
+    unsigned int offlow,
+    unsigned int offhigh
+    )
+{
+    NTSTATUS            status;
+    IO_STATUS_BLOCK     iosb;
+    LARGE_INTEGER       offset;
+
+    HANDLE handle = cfs_idr_find(cfs_proc_idp, fd);
+    offset.HighPart = offhigh;
+    offset.LowPart = offlow;
+
+    /* write buffer to the opened file */
+    status = NtWriteFile(
+                handle,
+                0,
+                NULL,
+                NULL,
+                &iosb,
+                buffer,
+                count,
+                &offset,
+                NULL);                     
+
+    /* check the return status */
+    if (!NT_SUCCESS(status)) {
+        printf("NtWriteFile request failed 0x%0x\n", status);
+        goto errorout;
+    }
+
+errorout:
+
+    if (NT_SUCCESS(status)) {
+        return (int)(iosb.Information);
+    }
+
+    return cfs_error_code(status);
+}
+
+int cfs_proc_write(
+    int fd, void *buffer,
+    unsigned int count
+    )
+{
+    return cfs_proc_write_internal(fd, buffer, count, 0, 0);
+}
+
+int cfs_proc_ioctl(int fd, int cmd, void *buffer)
+{
+    PUCHAR          procdat = NULL;
+    CFS_PROC_IOCTL  procctl;
+    ULONG           length = 0;
+    ULONG           extra = 0;
+    int             rc = 0;
+
+    NTSTATUS        status = STATUS_UNSUCCESSFUL;
+    IO_STATUS_BLOCK iosb;
+
+    struct libcfs_ioctl_data * portal = buffer;
+    struct obd_ioctl_data * obd = buffer;
+    struct obd_ioctl_data * data;
+
+    HANDLE handle = cfs_idr_find(cfs_proc_idp, fd);
+#if defined(_X86_)
+    CLASSERT(sizeof(struct obd_ioctl_data) == 528);
+#else
+    CLASSERT(sizeof(struct obd_ioctl_data) == 576);
+#endif
+    memset(&procctl, 0, sizeof(CFS_PROC_IOCTL));
+    procctl.cmd = cmd;
+
+    if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) {
+        length = portal->ioc_len;
+    } else if (_IOC_TYPE(cmd) == 'f') {
+        length = obd->ioc_len;
+        extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2);
+    } else if(_IOC_TYPE(cmd) == 'u') {
+        length = 4;
+        extra  = 0;
+    } else if(_IOC_TYPE(cmd) == 'i') {
+        length = obd->ioc_len;
+        extra  = 0;
+    } else {
+        printf("cfs_proc_ioctl: un-supported ioctl type ...\n");
+        cfs_enter_debugger();
+        status = STATUS_INVALID_PARAMETER;
+        goto errorout;
+    }
+
+    procctl.len = length + extra;
+    procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL));
+
+    if (NULL == procdat) {
+        printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n");
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        cfs_enter_debugger();
+        goto errorout;
+    }
+    memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL));
+    memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL));
+    memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length);
+    length += sizeof(CFS_PROC_IOCTL);
+
+    if (_IOC_TYPE(cmd) == 'f') {
+
+        data  = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL));
+        if ( cmd != (ULONG)OBD_IOC_BRW_WRITE  &&
+             cmd != (ULONG)OBD_IOC_BRW_READ ) {
+
+            if (obd->ioc_pbuf1 && data->ioc_plen1) {
+                data->ioc_pbuf1 = &procdat[length];
+                memcpy(data->ioc_pbuf1, obd->ioc_pbuf1, obd->ioc_plen1); 
+                length += size_round(obd->ioc_plen1);
+            } else {
+                data->ioc_plen1 = 0;
+                data->ioc_pbuf1 = NULL;
+            }
+
+            if (obd->ioc_pbuf2 && obd->ioc_plen2) {
+                data->ioc_pbuf2 = &procdat[length];
+                memcpy(data->ioc_pbuf2, obd->ioc_pbuf2, obd->ioc_plen2);
+                length += size_round(obd->ioc_plen2);
+            } else {
+                data->ioc_plen2 = 0;
+                data->ioc_pbuf2 = NULL;
+            }
+               } else {
+             extra = 0;
+        }
+
+        ASSERT(length == extra + sizeof(CFS_PROC_IOCTL) + data->ioc_len);
+        if (obd_ioctl_is_invalid(obd)) {
+            cfs_enter_debugger();
+        }
+    }
+
+    status = NtDeviceIoControlFile(
+                handle, NULL, NULL,
+                NULL, &iosb,
+                IOCTL_LIBCFS_ENTRY,
+                procdat, length,
+                procdat, length );
+
+
+    if (_IOC_TYPE(cmd) == 'f') {
+
+        length = sizeof(CFS_PROC_IOCTL);
+        ASSERT(data  == (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL)));
+               if ( cmd != (ULONG)OBD_IOC_BRW_WRITE  &&
+             cmd != (ULONG)OBD_IOC_BRW_READ ) {
+
+            if (obd->ioc_pbuf1) {
+                ASSERT(obd->ioc_plen1 == data->ioc_plen1);
+                data->ioc_pbuf1 = &procdat[length];
+                memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, obd->ioc_plen1);
+                length += size_round(obd->ioc_plen1);
+            }
+            if (obd->ioc_pbuf2) {
+                ASSERT(obd->ioc_plen2 == data->ioc_plen2);
+                data->ioc_pbuf2 = &procdat[length];
+                memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, obd->ioc_plen2);
+                length += size_round(obd->ioc_plen2);
+            }
+        }
+        data->ioc_inlbuf1 = obd->ioc_inlbuf1;
+        data->ioc_inlbuf2 = obd->ioc_inlbuf2;
+        data->ioc_inlbuf3 = obd->ioc_inlbuf3;
+        data->ioc_inlbuf4 = obd->ioc_inlbuf4;
+        data->ioc_pbuf1   = obd->ioc_pbuf1;
+        data->ioc_pbuf2   = obd->ioc_pbuf2;
+        memcpy(obd, data, obd->ioc_len);
+
+    } else {
+
+        memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); 
+    }
+
+errorout:
+
+    if (STATUS_SUCCESS == status) {
+        rc = ((CFS_PROC_IOCTL *)procdat)->rc;
+    } else {
+        rc = cfs_error_code(status);
+    }
+
+    if (procdat) {
+        free(procdat);
+    }
+
+    return rc;
+}
+
+
+int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev)
+{
+    return 0;
+}
+
+FILE *cfs_proc_fopen(char *path, char * mode)
+{
+    int fp = cfs_proc_open(path, O_RDWR);
+    if (fp > 0) {
+        return (FILE *)(LONG_PTR)fp;
+    }
+
+    return NULL;
+}
+
+char *cfs_proc_fgets(char * buf, int len, FILE *fp)
+{
+    int rc = 0;
+
+    if (fp == NULL) {
+        return NULL;
+    }
+
+    rc = cfs_proc_read_internal((int)(LONG_PTR)fp,
+                                buf, len, -1, 1);
+    if (rc <= 0) {
+        return NULL;
+    }
+
+    return buf;
+}
+
+int cfs_proc_fclose(FILE *fp)
+{
+    if (fp == NULL) {
+        return -1;
+    }
+
+    return cfs_proc_close((int)(LONG_PTR)fp);
+}
+
+void cfs_libc_init();
+
+int
+libcfs_arch_init(void)
+{
+    cfs_libc_init();
+    cfs_proc_idp = cfs_idr_init();
+
+    if (cfs_proc_idp) {
+        return 0;
+    }
+
+    return -ENOMEM;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+    if (cfs_proc_idp) {
+        cfs_idr_exit(cfs_proc_idp);
+        cfs_proc_idp = NULL;
+    }
+}
+
+#endif /* __KERNEL__ */
index e77ac73..e62a39e 100644 (file)
@@ -151,7 +151,7 @@ int cfs_kernel_thread(int (*func)(void *), void *arg, int flag)
 static CFS_DECL_RWSEM(cfs_symbol_lock);
 CFS_LIST_HEAD(cfs_symbol_list);
 
-int MPSystem = FALSE;
+int libcfs_is_mp_system = FALSE;
 
 /*
  * cfs_symbol_get
@@ -364,7 +364,12 @@ cfs_timer_dpc_proc (
     KeReleaseSpinLock(&(timer->Lock), Irql);
 
     /* call the user specified timer procedure */
-    timer->proc((unsigned long)(timer->arg));
+    timer->proc((long_ptr_t)timer->arg);
+}
+
+void cfs_init_timer(cfs_timer_t *timer)
+{
+    memset(timer, 0, sizeof(cfs_timer_t));
 }
 
 /*
@@ -383,7 +388,7 @@ cfs_timer_dpc_proc (
  *   N/A
  */
 
-void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg)
+void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr_t), void *arg)
 {
     memset(timer, 0, sizeof(cfs_timer_t));
 
@@ -441,7 +446,7 @@ void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline)
 
         timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline;
 
-        if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) {
+        if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc)) {
             cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
         }
 
@@ -533,6 +538,11 @@ void cfs_daemonize(char *str)
     return;
 }
 
+int cfs_daemonize_ctxt(char *str) {
+    cfs_daemonize(str);
+    return 0;
+}
+
 /*
  *  routine related with sigals
  */
@@ -566,10 +576,177 @@ void cfs_clear_sigpending(void)
     return;
 }
 
+/*
+ *  thread cpu affinity routines
+ */ 
+
+typedef struct _THREAD_BASIC_INFORMATION {
+    NTSTATUS ExitStatus;
+    PVOID TebBaseAddress;
+    CLIENT_ID ClientId;
+    ULONG_PTR AffinityMask;
+    KPRIORITY Priority;
+    LONG BasePriority;
+} THREAD_BASIC_INFORMATION;
+
+typedef THREAD_BASIC_INFORMATION *PTHREAD_BASIC_INFORMATION;
+
+#define THREAD_QUERY_INFORMATION       (0x0040)
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+ZwOpenThread (
+    __out PHANDLE ThreadHandle,
+    __in ACCESS_MASK DesiredAccess,
+    __in POBJECT_ATTRIBUTES ObjectAttributes,
+    __in_opt PCLIENT_ID ClientId
+    );
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+ZwQueryInformationThread (
+    __in HANDLE ThreadHandle,
+    __in THREADINFOCLASS ThreadInformationClass,
+    __out_bcount(ThreadInformationLength) PVOID ThreadInformation,
+    __in ULONG ThreadInformationLength,
+    __out_opt PULONG ReturnLength
+    );
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+ZwSetInformationThread (
+    __in HANDLE ThreadHandle,
+    __in THREADINFOCLASS ThreadInformationClass,
+    __in_bcount(ThreadInformationLength) PVOID ThreadInformation,
+    __in ULONG ThreadInformationLength
+    );
+
+HANDLE
+cfs_open_current_thread()
+{
+    NTSTATUS         status;
+    HANDLE           handle = NULL;
+    OBJECT_ATTRIBUTES oa;
+    CLIENT_ID        cid;
+
+    /* initialize object attributes */
+    InitializeObjectAttributes( &oa, NULL, OBJ_KERNEL_HANDLE |
+                                OBJ_CASE_INSENSITIVE, NULL, NULL);
+
+    /* initialize client id */
+    cid.UniqueProcess = PsGetCurrentProcessId();
+    cid.UniqueThread  = PsGetCurrentThreadId();
+
+    /* get thread handle */
+    status = ZwOpenThread( &handle, THREAD_QUERY_INFORMATION |
+                           THREAD_SET_INFORMATION, &oa, &cid);
+    if (!NT_SUCCESS(status)) {
+        handle = NULL;
+    }
+
+    return handle;
+}
+
+void
+cfs_close_thread_handle(HANDLE handle)
+{
+    if (handle)
+        ZwClose(handle);
+}
+
+KAFFINITY
+cfs_query_thread_affinity()
+{
+    NTSTATUS         status;
+    HANDLE           handle = NULL;
+    DWORD            size;
+    THREAD_BASIC_INFORMATION TBI = {0};
+
+    /* open current thread */
+    handle = cfs_open_current_thread();
+    if (!handle) {
+        goto errorout;
+    }
+
+    /* query thread cpu affinity */
+    status = ZwQueryInformationThread(handle, ThreadBasicInformation,
+                       &TBI, sizeof(THREAD_BASIC_INFORMATION), &size);
+    if (!NT_SUCCESS(status)) {
+        goto errorout;
+    }
+
+errorout:
+
+    cfs_close_thread_handle(handle);
+    return TBI.AffinityMask;
+}
+
+int
+cfs_set_thread_affinity(KAFFINITY affinity)
+{
+    NTSTATUS         status;
+    HANDLE           handle = NULL;
+
+    /* open current thread */
+    handle = cfs_open_current_thread();
+    if (!handle) {
+        goto errorout;
+    }
+
+    /* set thread cpu affinity */
+    status = ZwSetInformationThread(handle, ThreadAffinityMask,
+                                    &affinity, sizeof(KAFFINITY));
+    if (!NT_SUCCESS(status)) {
+        goto errorout;
+    }
+
+errorout:
+
+    cfs_close_thread_handle(handle);
+    return NT_SUCCESS(status);
+}
+
+int
+cfs_tie_thread_to_cpu(int cpu)
+{
+    return cfs_set_thread_affinity((KAFFINITY) (1 << cpu));
+}
+
+int
+cfs_set_thread_priority(KPRIORITY priority)
+{
+    NTSTATUS         status;
+    HANDLE           handle = NULL;
+
+    /* open current thread */
+    handle = cfs_open_current_thread();
+    if (!handle) {
+        goto errorout;
+    }
+
+    /* set thread cpu affinity */
+    status = ZwSetInformationThread(handle, ThreadPriority,
+                                    &priority, sizeof(KPRIORITY));
+    if (!NT_SUCCESS(status)) {
+        KdPrint(("set_thread_priority failed: %xh\n", status));
+        goto errorout;
+    }
+
+errorout:
+
+    cfs_close_thread_handle(handle);
+    return NT_SUCCESS(status);
+}
+
 /**
  **  Initialize routines 
  **/
 
+void cfs_libc_init();
+
 int
 libcfs_arch_init(void)
 { 
@@ -579,11 +756,15 @@ libcfs_arch_init(void)
     /* Workground to check the system is MP build or UP build */
     spin_lock_init(&lock);
     spin_lock(&lock);
-    MPSystem = (int)lock.lock;
+    libcfs_is_mp_system = (int)lock.lock;
     /* MP build system: it's a real spin, for UP build system, it
        only raises the IRQL to DISPATCH_LEVEL */
     spin_unlock(&lock);
 
+    /* initialize libc routines (confliction between libcnptr.lib
+       and kernel ntoskrnl.lib) */
+    cfs_libc_init();
+
     /* create slab memory caches for page alloctors */
     cfs_page_t_slab = cfs_mem_cache_create(
         "CPGT", sizeof(cfs_page_t), 0, 0 );
@@ -598,7 +779,6 @@ libcfs_arch_init(void)
     }    
 
     rc = init_task_manager();
-
     if (rc != 0) {
         cfs_enter_debugger();
         KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n"));
@@ -607,7 +787,6 @@ libcfs_arch_init(void)
 
     /* initialize the proc file system */
     rc = proc_init_fs();
-
     if (rc != 0) {
         cfs_enter_debugger();
         KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n"));
@@ -617,15 +796,16 @@ libcfs_arch_init(void)
 
     /* initialize the tdi data */
     rc = ks_init_tdi_data();
-
     if (rc != 0) {
         cfs_enter_debugger();
-        KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n"));
+        KdPrint(("winnt-prim.c:libcfs_arch_init: failed to initialize tdi.\n"));
         proc_destroy_fs();
         cleanup_task_manager();
         goto errorout;
     }
 
+    rc = start_shrinker_timer();
+
 errorout:
 
     if (rc != 0) {
@@ -644,12 +824,18 @@ errorout:
 void
 libcfs_arch_cleanup(void)
 {
+    /* stop shrinker timer */
+    stop_shrinker_timer();
+
     /* finialize the tdi data */
     ks_fini_tdi_data();
 
     /* detroy the whole proc fs tree and nodes */
     proc_destroy_fs();
 
+    /* cleanup context of task manager */
+    cleanup_task_manager();
+
     /* destroy the taskslot cache slab */
     if (cfs_page_t_slab) {
         cfs_mem_cache_destroy(cfs_page_t_slab);
@@ -659,7 +845,7 @@ libcfs_arch_cleanup(void)
         cfs_mem_cache_destroy(cfs_page_p_slab);
     }
 
-       return; 
+    return; 
 }
 
 EXPORT_SYMBOL(libcfs_arch_init);
index dd1e0d2..bf41886 100644 (file)
@@ -43,6 +43,7 @@
 
 #include <libcfs/libcfs.h>
 #include "tracefile.h"
+#include <lustre_lib.h>
 
 #ifdef __KERNEL__
 
  *  /proc emulator routines ...
  */
 
+/* The root node of the proc fs emulation: / */
+cfs_proc_entry_t *              cfs_proc_root = NULL;
+
 /* The root node of the proc fs emulation: /proc */
-cfs_proc_entry_t *              proc_fs_root = NULL;
+cfs_proc_entry_t *              cfs_proc_proc = NULL;
 
+/* The fs sys directory: /proc/fs */
+cfs_proc_entry_t *              cfs_proc_fs = NULL;
 
 /* The sys root: /proc/sys */
-cfs_proc_entry_t *              proc_sys_root = NULL;
-
+cfs_proc_entry_t *              cfs_proc_sys = NULL;
 
 /* The sys root: /proc/dev | to implement misc device */
-
-cfs_proc_entry_t *              proc_dev_root = NULL;
+cfs_proc_entry_t *              cfs_proc_dev = NULL;
 
 
 /* SLAB object for cfs_proc_entry_t allocation */
-
 cfs_mem_cache_t *               proc_entry_cache = NULL;
 
 /* root node for sysctl table */
-
 cfs_sysctl_table_header_t       root_table_header;
 
 /* The global lock to protect all the access */
@@ -101,7 +103,7 @@ proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos
     char    *start;
     cfs_proc_entry_t * dp;
 
-    dp = (cfs_proc_entry_t  *) file->private_data;
+    dp = (cfs_proc_entry_t  *) file->f_inode->i_priv;
     if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0)))
         return -ENOMEM;
 
@@ -158,7 +160,7 @@ proc_file_write(struct file * file, const char * buffer,
 {
     cfs_proc_entry_t  * dp;
     
-    dp = (cfs_proc_entry_t *) file->private_data;
+    dp = (cfs_proc_entry_t *) file->f_inode->i_priv;
 
     if (!dp->write_proc)
         return -EIO;
@@ -168,6 +170,7 @@ proc_file_write(struct file * file, const char * buffer,
 }
 
 struct file_operations proc_file_operations = {
+    /*owner*/       THIS_MODULE,
     /*lseek:*/      NULL, //proc_file_lseek,
     /*read:*/       proc_file_read,
     /*write:*/      proc_file_write,
@@ -212,7 +215,7 @@ proc_free_entry(cfs_proc_entry_t * entry)
 
 void
 proc_dissect_name(
-    char *path,
+    const char *path,
     char **first,
     int  *first_len,
     char **remain
@@ -229,12 +232,12 @@ proc_dissect_name(
 
     if (i < len) {
 
-        *first = path + i;
+        *first = (char *)path + i;
         while (i < len && (path[i] != '/')) i++;
-        *first_len = (path + i - *first);
+        *first_len = (int)(path + i - *first);
 
         if (i + 1 < len) {
-            *remain = path + i + 1;
+            *remain = (char *)path + i + 1;
         }
     }
 }
@@ -282,7 +285,6 @@ proc_search_splay (
 
             /*  The prefix is less than the full name
                 so we go down the right child      */
-            //
 
             link = RtlRightChild(link);
 
@@ -362,6 +364,7 @@ proc_insert_splay (
 
     cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED);
     parent->nlink++;
+    child->parent = parent;
 
     return TRUE;
 }
@@ -381,6 +384,7 @@ proc_remove_splay (
     ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
     ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
     ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED));
+    ASSERT(child->parent == parent);
 
     entry = proc_search_splay(parent, child->name);
 
@@ -401,7 +405,7 @@ proc_remove_splay (
 
 cfs_proc_entry_t *
 proc_search_entry(
-    char *              name,
+    const char *        name,
     cfs_proc_entry_t *  root
     )
 {
@@ -463,7 +467,7 @@ errorout:
 
 cfs_proc_entry_t *
 proc_insert_entry(
-    char *              name,
+    const char *        name,
     cfs_proc_entry_t *  root
     )
 {
@@ -523,7 +527,7 @@ again:
 
 void
 proc_remove_entry(
-    char *              name,
+    const char *        name,
     cfs_proc_entry_t *  root
     )
 {
@@ -564,12 +568,11 @@ proc_remove_entry(
 
 cfs_proc_entry_t *
 create_proc_entry (
-    char *              name,
+    const char *        name,
     mode_t              mode,
-    cfs_proc_entry_t *  root
+    cfs_proc_entry_t *  parent
     )
 {
-    cfs_proc_entry_t *parent = root;
     cfs_proc_entry_t *entry  = NULL;
 
     if (S_ISDIR(mode)) {
@@ -583,11 +586,15 @@ create_proc_entry (
     }
 
     LOCK_PROCFS();
-
-    ASSERT(NULL != proc_fs_root);
+    ASSERT(NULL != cfs_proc_root);
 
     if (!parent) {
-        parent = proc_fs_root;
+        if (name[0] == '/') {
+            parent = cfs_proc_root;
+        } else {
+            ASSERT(NULL != cfs_proc_proc);
+            parent = cfs_proc_proc;
+        }
     }
 
     entry = proc_search_entry(name, parent);
@@ -619,15 +626,21 @@ errorout:
 
 cfs_proc_entry_t *
 search_proc_entry(
-    char *              name,
+    const char *        name,
     cfs_proc_entry_t *  root
     )
 {
     cfs_proc_entry_t * entry;
 
     LOCK_PROCFS();
+    ASSERT(cfs_proc_root != NULL);
     if (root == NULL) {
-        root = proc_fs_root;
+        if (name[0] == '/') {
+            root = cfs_proc_root;
+        } else {
+            ASSERT(cfs_proc_proc != NULL);
+            root = cfs_proc_proc;
+        }
     }
     entry = proc_search_entry(name, root);
     UNLOCK_PROCFS();
@@ -639,13 +652,19 @@ search_proc_entry(
 
 void
 remove_proc_entry(
-    char *              name,
+    const char *        name,
     cfs_proc_entry_t *  parent
     )
 {
     LOCK_PROCFS();
+    ASSERT(cfs_proc_root != NULL);
     if (parent == NULL) {
-        parent = proc_fs_root;
+        if (name[0] == '/') {
+            parent = cfs_proc_root;
+        } else {
+            ASSERT(cfs_proc_proc != NULL);
+            parent = cfs_proc_proc;
+        }
     }
     proc_remove_entry(name, parent);
     UNLOCK_PROCFS();
@@ -668,6 +687,30 @@ void proc_destroy_splay(cfs_proc_entry_t * entry)
     proc_free_entry(entry);
 }
 
+cfs_proc_entry_t *proc_symlink(
+    const char *name,
+       cfs_proc_entry_t *parent,
+    const char *dest
+    )
+{
+    cfs_enter_debugger();
+    return NULL;
+}
+
+cfs_proc_entry_t *proc_mkdir(
+    const char *name,
+       cfs_proc_entry_t *parent)
+{
+    return create_proc_entry((char *)name, S_IFDIR, parent);
+}
+
+void proc_destory_subtree(cfs_proc_entry_t *entry)
+{
+    LOCK_PROCFS();
+    entry->root = NULL;
+    proc_destroy_splay(entry);
+    UNLOCK_PROCFS();
+}
 
 /* destory the whole proc fs tree */
 
@@ -675,8 +718,8 @@ void proc_destroy_fs()
 {
     LOCK_PROCFS();
 
-    if (proc_fs_root) {
-        proc_destroy_splay(proc_fs_root);
+    if (cfs_proc_root) {
+        proc_destroy_splay(cfs_proc_root);
     }
 
     if (proc_entry_cache) {
@@ -686,14 +729,77 @@ void proc_destroy_fs()
     UNLOCK_PROCFS();
 }
 
-/* initilaize / build the proc fs tree */
+static char proc_item_path[512];
+
 
+void proc_show_tree(cfs_proc_entry_t * node);
+void proc_print_node(cfs_proc_entry_t * node)
+{
+    if (node != cfs_proc_root) {
+        if (S_ISDIR(node->mode)) {
+            printk("%s/%s/\n", proc_item_path, node->name);
+        } else {
+            printk("%s/%s\n", proc_item_path, node->name);
+        }
+    } else {
+         printk("%s\n", node->name);
+    }
+
+    if (S_ISDIR(node->mode)) {
+        proc_show_tree(node);
+    }
+}
+
+void proc_show_child(PRTL_SPLAY_LINKS link)
+{
+    cfs_proc_entry_t * entry  = NULL;
+
+    if (!link) {
+        return;
+    }
+
+    proc_show_child(link->LeftChild);
+    entry = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link);
+    proc_print_node(entry);
+    proc_show_child(link->RightChild);
+}
+
+void proc_show_tree(cfs_proc_entry_t * node)
+{
+    PRTL_SPLAY_LINKS link = NULL;
+    cfs_proc_entry_t * entry = NULL;
+    int i;
+
+    link = node->root;
+    i = strlen(proc_item_path);
+    ASSERT(S_ISDIR(node->mode));
+    if (node != cfs_proc_root) {
+        strcat(proc_item_path, "/");
+        strcat(proc_item_path, node->name);
+    }
+    proc_show_child(link);
+    proc_item_path[i] = 0;
+}
+
+void proc_print_splay()
+{
+    printk("=================================================\n");
+    printk("Lustre virtual proc entries:\n");
+    printk("-------------------------------------------------\n");
+    LOCK_PROCFS();
+    proc_show_tree(cfs_proc_root);
+    UNLOCK_PROCFS();
+    printk("=================================================\n");
+}
+
+
+/* initilaize / build the proc fs tree */
 int proc_init_fs()
 {
     cfs_proc_entry_t * root = NULL;
 
     memset(&(root_table_header), 0, sizeof(struct ctl_table_header));
-    INIT_LIST_HEAD(&(root_table_header.ctl_entry));
+    CFS_INIT_LIST_HEAD(&(root_table_header.ctl_entry));
 
     INIT_PROCFS_LOCK();
     proc_entry_cache = cfs_mem_cache_create(
@@ -708,49 +814,49 @@ int proc_init_fs()
     }
 
     root = proc_alloc_entry();
-
     if (!root) {
         proc_destroy_fs();
         return (-ENOMEM);
     }
-
     root->magic = CFS_PROC_ENTRY_MAGIC;
     root->flags = CFS_PROC_FLAG_DIRECTORY;
     root->mode  = S_IFDIR | S_IRUGO | S_IXUGO;
     root->nlink = 3; // root should never be deleted.
+    root->name[0]='/';
+    root->name[1]= 0;
+    cfs_proc_root = root;
 
-    root->name[0]='p';
-    root->name[1]='r';
-    root->name[2]='o';
-    root->name[3]='c';
-
-    proc_fs_root = root;
-
-    proc_sys_root = create_proc_entry("sys", S_IFDIR, root);
-
-    if (!proc_sys_root) {
-        proc_free_entry(root);
-        proc_fs_root = NULL;
-        proc_destroy_fs();
-        return (-ENOMEM);
+    cfs_proc_dev = create_proc_entry("dev", S_IFDIR, root);
+    if (!cfs_proc_dev) {
+        goto errorout;
     }
+    cfs_proc_dev->nlink = 1;
 
-    proc_sys_root->nlink = 1;
+    cfs_proc_proc  = create_proc_entry("proc", S_IFDIR, root);
+    if (!cfs_proc_proc) {
+        goto errorout;
+    }
+    cfs_proc_proc->nlink = 1;
 
-    proc_dev_root = create_proc_entry("dev", S_IFDIR, root);
+    cfs_proc_fs = create_proc_entry("fs",  S_IFDIR, cfs_proc_proc);
+    if (!cfs_proc_fs) {
+        goto errorout;
+    }
+    cfs_proc_fs->nlink = 1;
 
-    if (!proc_dev_root) {
-        proc_free_entry(proc_sys_root);
-        proc_sys_root = NULL;
-        proc_free_entry(proc_fs_root);
-        proc_fs_root = NULL;
-        proc_destroy_fs();
-        return (-ENOMEM);
+    cfs_proc_sys = create_proc_entry("sys",  S_IFDIR, cfs_proc_proc);
+    if (!cfs_proc_sys) {
+        goto errorout;
     }
+    cfs_proc_sys->nlink = 1;
 
-    proc_dev_root->nlink = 1;
-   
+  
     return 0;
+
+errorout:
+
+    proc_destroy_fs();
+    return (-ENOMEM);
 }
 
 
@@ -772,9 +878,6 @@ static ssize_t do_rw_proc(int write, struct file * file, char * buf,
         return -ENOTDIR;
     op = (write ? 002 : 004);
 
-//  if (ctl_perm(table, op))
-//      return -EPERM;
-    
     res = count;
 
     /*
@@ -801,6 +904,7 @@ static ssize_t proc_writesys(struct file * file, const char * buf,
 
 
 struct file_operations proc_sys_file_operations = {
+    /*owner*/       THIS_MODULE,
     /*lseek:*/      NULL,
     /*read:*/       proc_readsys,
     /*write:*/      proc_writesys,
@@ -943,14 +1047,14 @@ unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
         if (*cp == '0') {
             base = 8;
             cp++;
-            if ((*cp == 'x') && isxdigit(cp[1])) {
+            if ((*cp == 'x') && cfs_isxdigit(cp[1])) {
                 cp++;
                 base = 16;
             }
         }
     }
-    while (isxdigit(*cp) &&
-           (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+    while (cfs_isxdigit(*cp) &&
+           (value = cfs_isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
         result = result*base + value;
         cp++;
     }
@@ -1353,13 +1457,13 @@ struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table,
         return NULL;
     tmp->ctl_table = table;
 
-    INIT_LIST_HEAD(&tmp->ctl_entry);
+    CFS_INIT_LIST_HEAD(&tmp->ctl_entry);
     if (insert_at_head)
         list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
     else
         list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
 #ifdef CONFIG_PROC_FS
-    register_proc_table(table, proc_sys_root);
+    register_proc_table(table, cfs_proc_sys);
 #endif
     return tmp;
 }
@@ -1375,7 +1479,7 @@ void unregister_sysctl_table(struct ctl_table_header * header)
 {
     list_del(&header->ctl_entry);
 #ifdef CONFIG_PROC_FS
-    unregister_proc_table(header->ctl_table, proc_sys_root);
+    unregister_proc_table(header->ctl_table, cfs_proc_sys);
 #endif
     cfs_free(header);
 }
@@ -1388,7 +1492,7 @@ int cfs_psdev_register(cfs_psdev_t * psdev)
     entry = create_proc_entry (
                 (char *)psdev->name,
                 S_IFREG,
-                proc_dev_root
+                cfs_proc_dev
             );
 
     if (!entry) {
@@ -1409,7 +1513,7 @@ int cfs_psdev_deregister(cfs_psdev_t * psdev)
 
     entry = search_proc_entry (
                 (char *)psdev->name,
-                proc_dev_root
+                cfs_proc_dev
             );
 
     if (entry) {
@@ -1419,15 +1523,13 @@ int cfs_psdev_deregister(cfs_psdev_t * psdev)
 
         remove_proc_entry(
             (char *)psdev->name,
-            proc_dev_root
+            cfs_proc_dev
             );
     }
 
     return 0;
 }
 
-extern char debug_file_path[1024];
-
 #define PSDEV_LNET  (0x100)
 enum {
         PSDEV_DEBUG = 1,          /* control debugging */
@@ -1446,10 +1548,8 @@ static struct ctl_table lnet_table[] = {
          sizeof(int), 0644, NULL, &proc_dointvec},
         {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL,
          &proc_dointvec},
-        {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, 
+        {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit,
          sizeof(int), 0644, NULL, &proc_dointvec},
-        {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
-         sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
 /*
         {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
          sizeof(portals_upcall), 0644, NULL, &proc_dostring,
@@ -1469,7 +1569,7 @@ static struct ctl_table top_table[2] = {
 int trace_write_dump_kernel(struct file *file, const char *buffer,
                              unsigned long count, void *data)
 {
-        int rc = trace_dump_debug_buffer_usrstr(buffer, count);
+        int rc = trace_dump_debug_buffer_usrstr((void *)buffer, count);
         
         return (rc < 0) ? rc : count;
 }
@@ -1477,7 +1577,7 @@ int trace_write_dump_kernel(struct file *file, const char *buffer,
 int trace_write_daemon_file(struct file *file, const char *buffer,
                             unsigned long count, void *data)
 {
-        int rc = trace_daemon_command_usrstr(buffer, count);
+        int rc = trace_daemon_command_usrstr((void *)buffer, count);
 
         return (rc < 0) ? rc : count;
 }
@@ -1485,21 +1585,17 @@ int trace_write_daemon_file(struct file *file, const char *buffer,
 int trace_read_daemon_file(char *page, char **start, off_t off, int count,
                            int *eof, void *data)
 {
-       int rc;
-
-       tracefile_read_lock();
-
+        int rc;
+        tracefile_read_lock();
         rc = trace_copyout_string(page, count, tracefile, "\n");
-
         tracefile_read_unlock();
-
-       return rc;
+        return rc;
 }
 
 int trace_write_debug_mb(struct file *file, const char *buffer,
                          unsigned long count, void *data)
 {
-        int rc = trace_set_debug_mb_userstr(buffer, count);
+        int rc = 0; /*trace_set_debug_mb_userstr((void *)buffer, count);*/
         
         return (rc < 0) ? rc : count;
 }
@@ -1520,14 +1616,14 @@ int insert_proc(void)
 
         ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL);
         if (ent == NULL) {
-                CERROR(("couldn't register dump_kernel\n"));
+                CERROR("couldn't register dump_kernel\n");
                 return -1;
         }
         ent->write_proc = trace_write_dump_kernel;
 
         ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL);
         if (ent == NULL) {
-                CERROR(("couldn't register daemon_file\n"));
+                CERROR("couldn't register daemon_file\n");
                 return -1;
         }
         ent->write_proc = trace_write_daemon_file;
@@ -1535,7 +1631,7 @@ int insert_proc(void)
 
         ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL);
         if (ent == NULL) {
-                CERROR(("couldn't register debug_mb\n"));
+                CERROR("couldn't register debug_mb\n");
                 return -1;
         }
         ent->write_proc = trace_write_debug_mb;
@@ -1546,15 +1642,9 @@ int insert_proc(void)
 
 void remove_proc(void)
 {
-        remove_proc_entry("sys/portals/dump_kernel", NULL);
-        remove_proc_entry("sys/portals/daemon_file", NULL);
-        remove_proc_entry("sys/portals/debug_mb", NULL);
-
-#ifdef CONFIG_SYSCTL
-        if (portals_table_header)
-                unregister_sysctl_table(portals_table_header);
-        portals_table_header = NULL;
-#endif
+        remove_proc_entry("sys/lnet/dump_kernel", NULL);
+        remove_proc_entry("sys/lnet/daemon_file", NULL);
+        remove_proc_entry("sys/lnet/debug_mb", NULL);
 }
 
 
@@ -1569,30 +1659,33 @@ lustre_open_file(char * filename)
     cfs_file_t * fh = NULL;
     cfs_proc_entry_t * fp = NULL;
 
-    fp = search_proc_entry(filename, proc_fs_root);
-
+    fp = search_proc_entry(filename, cfs_proc_root);
     if (!fp) {
-        rc =  -ENOENT;
         return NULL;
     }
 
     fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO);
-
     if (!fh) {
-        rc =  -ENOMEM;
         return NULL;
     }
 
-    fh->private_data = (void *)fp;
+    fh->f_inode = cfs_alloc(sizeof(struct inode), CFS_ALLOC_ZERO);
+    if (!fh->f_inode) {
+        cfs_free(fh);
+        return NULL;
+    }
+
+    fh->f_inode->i_priv = (void *)fp;
     fh->f_op = fp->proc_fops;
 
     if (fh->f_op->open) {
-        rc = (fh->f_op->open)(fh);
+        rc = (fh->f_op->open)(fh->f_inode, fh);
     } else {
         fp->nlink++;
     }
 
     if (0 != rc) {
+        cfs_free(fh->f_inode);
         cfs_free(fh);
         return NULL;
     }
@@ -1606,14 +1699,14 @@ lustre_close_file(cfs_file_t * fh)
     int rc = 0;
     cfs_proc_entry_t * fp = NULL;
 
-    fp = (cfs_proc_entry_t *) fh->private_data;
-
+    fp = (cfs_proc_entry_t *) fh->f_inode->i_priv;
     if (fh->f_op->release) {
-        rc = (fh->f_op->release)(fh);
+        rc = (fh->f_op->release)(fh->f_inode, fh);
     } else {
         fp->nlink--;
     }
 
+    cfs_free(fh->f_inode);
     cfs_free(fh);
 
     return rc;
@@ -1622,7 +1715,7 @@ lustre_close_file(cfs_file_t * fh)
 int
 lustre_do_ioctl( cfs_file_t * fh,
                  unsigned long cmd,
-                 ulong_ptr arg )
+                 ulong_ptr_t arg )
 {
     int rc = 0;
 
@@ -1630,11 +1723,6 @@ lustre_do_ioctl( cfs_file_t * fh,
         rc = (fh->f_op->ioctl)(fh, cmd, arg);
     }
 
-    if (rc != 0) {
-        printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n",
-                cmd, arg, rc);
-    }
-
     return rc;
 }
     
@@ -1642,13 +1730,18 @@ int
 lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl)
 {
     int         rc = 0;
-    ulong_ptr   data;
+    ulong_ptr_t data;
 
-    data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL);
+    data = (ulong_ptr_t)devctl + sizeof(CFS_PROC_IOCTL);
+#if defined(_X86_)    
+    CLASSERT(sizeof(struct obd_ioctl_data) == 528);
+#else
+    CLASSERT(sizeof(struct obd_ioctl_data) == 576);
+#endif
 
     /* obd ioctl code */
     if (_IOC_TYPE(devctl->cmd) == 'f') {
-#if 0
+
         struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data;
 
         if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE  &&
@@ -1656,17 +1749,21 @@ lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl)
 
             unsigned long off = obd->ioc_len;
 
-            if (obd->ioc_pbuf1) {
+            if (obd->ioc_plen1) {
                 obd->ioc_pbuf1 = (char *)(data + off);
                 off += size_round(obd->ioc_plen1);
+            } else {
+                obd->ioc_pbuf1 = NULL;
             }
 
-            if (obd->ioc_pbuf2) {
+            if (obd->ioc_plen2) {
                 obd->ioc_pbuf2 = (char *)(data + off);
+                off += size_round(obd->ioc_plen2);
+            } else {
+                obd->ioc_pbuf2 = NULL;
             }
         }
- #endif
-   }
+    }
 
     rc = lustre_do_ioctl(fh, devctl->cmd, data);
 
@@ -1682,12 +1779,20 @@ lustre_read_file(
     char *          buf
     )
 {
-    size_t rc = 0;
+    size_t  rc = 0;
+    off_t   low, high;
+
+    low = (off_t) size;
+    high = (off_t)(off >> 32);
 
     if (fh->f_op->read) {
         rc = (fh->f_op->read) (fh, buf, size, &off);
     }
 
+    if (rc) {
+        fh->f_pos = off + rc;
+    }
+
     return rc;
 }
  
@@ -1701,7 +1806,7 @@ lustre_write_file(
     )
 {
     size_t rc = 0;
-
+    off = 0;
     if (fh->f_op->write) {
         rc = (fh->f_op->write)(fh, buf, size, &off);
     }
@@ -1709,347 +1814,528 @@ lustre_write_file(
     return rc;
 }  
 
-#else /* !__KERNEL__ */
-
-#include <lnet/api-support.h>
-#include <liblustre.h>
-#include <lustre_lib.h>
 
 /*
- * proc process routines of user space
+ *  seq file routines
  */
 
-HANDLE cfs_proc_open (char * filename, int oflag)
-{
-    NTSTATUS            status;
-    IO_STATUS_BLOCK     iosb;
-    int                 rc;
-
-    HANDLE              FileHandle = INVALID_HANDLE_VALUE;
-    OBJECT_ATTRIBUTES   ObjectAttributes;
-    ACCESS_MASK         DesiredAccess;
-    ULONG               CreateDisposition;
-    ULONG               ShareAccess;
-    ULONG               CreateOptions;
-    UNICODE_STRING      UnicodeName;
-    USHORT              NameLength;
-
-    PFILE_FULL_EA_INFORMATION Ea = NULL;
-    ULONG               EaLength;
-    UCHAR               EaBuffer[EA_MAX_LENGTH];
-
-    /* Check the filename: should start with "/proc" or "/dev" */
-    NameLength = (USHORT)strlen(filename);
-    if (NameLength > 0x05) {
-        if (_strnicmp(filename, "/proc/", 6) == 0) {
-            filename += 6;
-            NameLength -=6;
-            if (NameLength <= 0) {
-                rc = -EINVAL;
-                goto errorout;
-            }
-        } else if (_strnicmp(filename, "/dev/", 5) == 0) {
-        } else {
-            rc = -EINVAL;
-            goto errorout;
-        }
-    } else {
-        rc = -EINVAL;
-        goto errorout;
-    }
-
-    /* Analyze the flags settings */
-
-    if (cfs_is_flag_set(oflag, O_WRONLY)) {
-        DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
-        ShareAccess = 0;
-    }  else if (cfs_is_flag_set(oflag, O_RDWR)) {
-        DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
-        ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
-    } else {
-        DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
-        ShareAccess = FILE_SHARE_READ;
-    }
-
-    if (cfs_is_flag_set(oflag, O_CREAT)) {
-        if (cfs_is_flag_set(oflag, O_EXCL)) {
-            CreateDisposition = FILE_CREATE;
-            rc = -EINVAL;
-            goto errorout;
-        } else {
-            CreateDisposition = FILE_OPEN_IF;
-        }
-    } else {
-        CreateDisposition = FILE_OPEN;
-    }
-
-    if (cfs_is_flag_set(oflag, O_TRUNC)) {
-        if (cfs_is_flag_set(oflag, O_EXCL)) {
-            CreateDisposition = FILE_OVERWRITE;
-        } else {
-            CreateDisposition = FILE_OVERWRITE_IF;
-        }
-    }
-
-    CreateOptions = 0;
-
-    if (cfs_is_flag_set(oflag, O_DIRECTORY)) {
-        cfs_set_flag(CreateOptions,  FILE_DIRECTORY_FILE);
-    }
-
-    if (cfs_is_flag_set(oflag, O_SYNC)) {
-         cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
-    }
-
-    if (cfs_is_flag_set(oflag, O_DIRECT)) {
-         cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
-    }
-
-    /* Initialize the unicode path name for the specified file */
-    RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK);
-
-    /* Setup the object attributes structure for the file. */
-    InitializeObjectAttributes(
-            &ObjectAttributes,
-            &UnicodeName,
-            OBJ_CASE_INSENSITIVE,
-            NULL,
-            NULL );
-
-    /* building EA for the proc entry ...  */
-    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
-    Ea->NextEntryOffset = 0;
-    Ea->Flags = 0;
-    Ea->EaNameLength = (UCHAR)NameLength;
-    Ea->EaValueLength = 0;
-    RtlCopyMemory(
-        &(Ea->EaName),
-        filename,
-        NameLength + 1
-        );
-    EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
-                               Ea->EaNameLength + 1;
-
-    /* Now to open or create the file now */
-    status = ZwCreateFile(
-                &FileHandle,
-                DesiredAccess,
-                &ObjectAttributes,
-                &iosb,
-                0,
-                FILE_ATTRIBUTE_NORMAL,
-                ShareAccess,
-                CreateDisposition,
-                CreateOptions,
-                Ea,
-                EaLength );
-
-    /* Check the returned status of Iosb ... */
-
-    if (!NT_SUCCESS(status)) {
-        rc = cfs_error_code(status);
-        goto errorout;
-    }
-
-errorout:
-
-    return FileHandle;
+/**
+ *     seq_open -      initialize sequential file
+ *     @file: file we initialize
+ *     @op: method table describing the sequence
+ *
+ *     seq_open() sets @file, associating it with a sequence described
+ *     by @op.  @op->start() sets the iterator up and returns the first
+ *     element of sequence. @op->stop() shuts it down.  @op->next()
+ *     returns the next element of sequence.  @op->show() prints element
+ *     into the buffer.  In case of error ->start() and ->next() return
+ *     ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
+ *     returns 0 in case of success and negative number in case of error.
+ */
+int seq_open(struct file *file, const struct seq_operations *op)
+{
+       struct seq_file *p = file->private_data;
+
+       if (!p) {
+               p = kmalloc(sizeof(*p), GFP_KERNEL);
+               if (!p)
+                       return -ENOMEM;
+               file->private_data = p;
+       }
+       memset(p, 0, sizeof(*p));
+       mutex_init(&p->lock);
+       p->op = op;
+
+       /*
+        * Wrappers around seq_open(e.g. swaps_open) need to be
+        * aware of this. If they set f_version themselves, they
+        * should call seq_open first and then set f_version.
+        */
+       file->f_version = 0;
+
+       /* SEQ files support lseek, but not pread/pwrite */
+       file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
+       return 0;
 }
+EXPORT_SYMBOL(seq_open);
 
-int cfs_proc_close(HANDLE handle)
+/**
+ *     seq_read -      ->read() method for sequential files.
+ *     @file: the file to read from
+ *     @buf: the buffer to read to
+ *     @size: the maximum number of bytes to read
+ *     @ppos: the current position in the file
+ *
+ *     Ready-made ->f_op->read()
+ */
+ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
 {
-    if (handle) {
-        NtClose((HANDLE)handle);
-    }
-
-    return 0;
+       struct seq_file *m = (struct seq_file *)file->private_data;
+       size_t copied = 0;
+       loff_t pos;
+       size_t n;
+       void *p;
+       int err = 0;
+
+       mutex_lock(&m->lock);
+       /*
+        * seq_file->op->..m_start/m_stop/m_next may do special actions
+        * or optimisations based on the file->f_version, so we want to
+        * pass the file->f_version to those methods.
+        *
+        * seq_file->version is just copy of f_version, and seq_file
+        * methods can treat it simply as file version.
+        * It is copied in first and copied out after all operations.
+        * It is convenient to have it as  part of structure to avoid the
+        * need of passing another argument to all the seq_file methods.
+        */
+       m->version = file->f_version;
+       /* grab buffer if we didn't have one */
+       if (!m->buf) {
+               m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+               if (!m->buf)
+                       goto Enomem;
+       }
+       /* if not empty - flush it first */
+       if (m->count) {
+               n = min(m->count, size);
+               err = copy_to_user(buf, m->buf + m->from, n);
+               if (err)
+                       goto Efault;
+               m->count -= n;
+               m->from += n;
+               size -= n;
+               buf += n;
+               copied += n;
+               if (!m->count)
+                       m->index++;
+               if (!size)
+                       goto Done;
+       }
+       /* we need at least one record in buffer */
+       while (1) {
+               pos = m->index;
+               p = m->op->start(m, &pos);
+               err = PTR_ERR(p);
+               if (!p || IS_ERR(p))
+                       break;
+               err = m->op->show(m, p);
+               if (err)
+                       break;
+               if (m->count < m->size)
+                       goto Fill;
+               m->op->stop(m, p);
+               cfs_free(m->buf);
+               m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
+               if (!m->buf)
+                       goto Enomem;
+               m->count = 0;
+               m->version = 0;
+       }
+       m->op->stop(m, p);
+       m->count = 0;
+       goto Done;
+Fill:
+       /* they want more? let's try to get some more */
+       while (m->count < size) {
+               size_t offs = m->count;
+               loff_t next = pos;
+               p = m->op->next(m, p, &next);
+               if (!p || IS_ERR(p)) {
+                       err = PTR_ERR(p);
+                       break;
+               }
+               err = m->op->show(m, p);
+               if (err || m->count == m->size) {
+                       m->count = offs;
+                       break;
+               }
+               pos = next;
+       }
+       m->op->stop(m, p);
+       n = min(m->count, size);
+       err = copy_to_user(buf, m->buf, n);
+       if (err)
+               goto Efault;
+       copied += n;
+       m->count -= n;
+       if (m->count)
+               m->from = n;
+       else
+               pos++;
+       m->index = pos;
+Done:
+       if (!copied)
+               copied = err;
+       else
+               *ppos += copied;
+       file->f_version = m->version;
+       mutex_unlock(&m->lock);
+       return copied;
+Enomem:
+       err = -ENOMEM;
+       goto Done;
+Efault:
+       err = -EFAULT;
+       goto Done;
 }
+EXPORT_SYMBOL(seq_read);
 
-int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count)
+static int traverse(struct seq_file *m, loff_t offset)
 {
-    NTSTATUS            status;
-    IO_STATUS_BLOCK     iosb;
-    LARGE_INTEGER       offset;
-
-
-    offset.QuadPart = 0;
+       loff_t pos = 0, index;
+       int error = 0;
+       void *p;
+
+       m->version = 0;
+       index = 0;
+       m->count = m->from = 0;
+       if (!offset) {
+               m->index = index;
+               return 0;
+       }
+       if (!m->buf) {
+               m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
+               if (!m->buf)
+                       return -ENOMEM;
+       }
+       p = m->op->start(m, &index);
+       while (p) {
+               error = PTR_ERR(p);
+               if (IS_ERR(p))
+                       break;
+               error = m->op->show(m, p);
+               if (error)
+                       break;
+               if (m->count == m->size)
+                       goto Eoverflow;
+               if (pos + (loff_t)(m->count) > offset) {
+                       m->from = (size_t)(offset - pos);
+                       m->count -= m->from;
+                       m->index = index;
+                       break;
+               }
+               pos += m->count;
+               m->count = 0;
+               if (pos == offset) {
+                       index++;
+                       m->index = index;
+                       break;
+               }
+               p = m->op->next(m, p, &index);
+       }
+       m->op->stop(m, p);
+       return error;
+
+Eoverflow:
+       m->op->stop(m, p);
+       cfs_free(m->buf);
+       m->buf = cfs_alloc(m->size <<= 1, GFP_KERNEL | CFS_ALLOC_ZERO);
+       return !m->buf ? -ENOMEM : -EAGAIN;
+}
 
-    /* read file data */
-    status = NtReadFile(
-                (HANDLE)handle,
-                0,
-                NULL,
-                NULL,
-                &iosb,
-                buffer,
-                count,
-                &offset,
-                NULL);                     
+/**
+ *     seq_lseek -     ->llseek() method for sequential files.
+ *     @file: the file in question
+ *     @offset: new position
+ *     @origin: 0 for absolute, 1 for relative position
+ *
+ *     Ready-made ->f_op->llseek()
+ */
+loff_t seq_lseek(struct file *file, loff_t offset, int origin)
+{
+       struct seq_file *m = (struct seq_file *)file->private_data;
+       long long retval = -EINVAL;
+
+       mutex_lock(&m->lock);
+       m->version = file->f_version;
+       switch (origin) {
+               case 1:
+                       offset += file->f_pos;
+               case 0:
+                       if (offset < 0)
+                               break;
+                       retval = offset;
+                       if (offset != file->f_pos) {
+                               while ((retval=traverse(m, offset)) == -EAGAIN)
+                                       ;
+                               if (retval) {
+                                       /* with extreme prejudice... */
+                                       file->f_pos = 0;
+                                       m->version = 0;
+                                       m->index = 0;
+                                       m->count = 0;
+                               } else {
+                                       retval = file->f_pos = offset;
+                               }
+                       }
+       }
+       file->f_version = m->version;
+       mutex_unlock(&m->lock);
+       return retval;
+}
+EXPORT_SYMBOL(seq_lseek);
 
-    /* check the return status */
-    if (!NT_SUCCESS(status)) {
-        printf("NtReadFile request failed 0x%0x\n", status);
-        goto errorout;
+/**
+ *     seq_release -   free the structures associated with sequential file.
+ *     @file: file in question
+ *     @inode: file->f_path.dentry->d_inode
+ *
+ *     Frees the structures associated with sequential file; can be used
+ *     as ->f_op->release() if you don't have private data to destroy.
+ */
+int seq_release(struct inode *inode, struct file *file)
+{
+       struct seq_file *m = (struct seq_file *)file->private_data;
+    if (m) {
+        if (m->buf)
+               cfs_free(m->buf);
+           cfs_free(m);
     }
+       return 0;
+}
+EXPORT_SYMBOL(seq_release);
 
-errorout:
+/**
+ *     seq_escape -    print string into buffer, escaping some characters
+ *     @m:     target buffer
+ *     @s:     string
+ *     @esc:   set of characters that need escaping
+ *
+ *     Puts string into buffer, replacing each occurrence of character from
+ *     @esc with usual octal escape.  Returns 0 in case of success, -1 - in
+ *     case of overflow.
+ */
+int seq_escape(struct seq_file *m, const char *s, const char *esc)
+{
+       char *end = m->buf + m->size;
+        char *p;
+       char c;
+
+        for (p = m->buf + m->count; (c = *s) != '\0' && p < end; s++) {
+               if (!strchr(esc, c)) {
+                       *p++ = c;
+                       continue;
+               }
+               if (p + 3 < end) {
+                       *p++ = '\\';
+                       *p++ = '0' + ((c & 0300) >> 6);
+                       *p++ = '0' + ((c & 070) >> 3);
+                       *p++ = '0' + (c & 07);
+                       continue;
+               }
+               m->count = m->size;
+               return -1;
+        }
+       m->count = p - m->buf;
+        return 0;
+}
+EXPORT_SYMBOL(seq_escape);
 
-    if (NT_SUCCESS(status)) {
-        return iosb.Information;
-    }
+int seq_printf(struct seq_file *m, const char *f, ...)
+{
+       va_list args;
+       int len;
+
+       if (m->count < m->size) {
+               va_start(args, f);
+               len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
+               va_end(args);
+               if (m->count + len < m->size) {
+                       m->count += len;
+                       return 0;
+               }
+       }
+       m->count = m->size;
+       return -1;
+}
+EXPORT_SYMBOL(seq_printf);
 
-    return cfs_error_code(status);
+char *d_path(struct path *p, char *buffer, int buflen)
+{
+       cfs_enter_debugger();
+       return ERR_PTR(-ENAMETOOLONG);
 }
 
+int seq_path(struct seq_file *m, struct path *path, char *esc)
+{
+       if (m->count < m->size) {
+               char *s = m->buf + m->count;
+               char *p = d_path(path, s, m->size - m->count);
+               if (!IS_ERR(p)) {
+                       while (s <= p) {
+                               char c = *p++;
+                               if (!c) {
+                                       p = m->buf + m->count;
+                                       m->count = s - m->buf;
+                                       return (int)(s - p);
+                               } else if (!strchr(esc, c)) {
+                                       *s++ = c;
+                               } else if (s + 4 > p) {
+                                       break;
+                               } else {
+                                       *s++ = '\\';
+                                       *s++ = '0' + ((c & 0300) >> 6);
+                                       *s++ = '0' + ((c & 070) >> 3);
+                                       *s++ = '0' + (c & 07);
+                               }
+                       }
+               }
+       }
+       m->count = m->size;
+       return -1;
+}
+EXPORT_SYMBOL(seq_path);
 
-int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count)
+static void *single_start(struct seq_file *p, loff_t *pos)
 {
-    NTSTATUS            status;
-    IO_STATUS_BLOCK     iosb;
-    LARGE_INTEGER       offset;
+       return (void *) (INT_PTR) (*pos == 0);
+}
 
-    offset.QuadPart = -1;
+static void *single_next(struct seq_file *p, void *v, loff_t *pos)
+{
+       ++*pos;
+       return NULL;
+}
 
-    /* write buffer to the opened file */
-    status = NtWriteFile(
-                (HANDLE)handle,
-                0,
-                NULL,
-                NULL,
-                &iosb,
-                buffer,
-                count,
-                &offset,
-                NULL);                     
+static void single_stop(struct seq_file *p, void *v)
+{
+}
 
-    /* check the return status */
-    if (!NT_SUCCESS(status)) {
-        printf("NtWriteFile request failed 0x%0x\n", status);
-        goto errorout;
-    }
+int single_open(struct file *file, int (*show)(struct seq_file *, void *),
+               void *data)
+{
+       struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL);
+       int res = -ENOMEM;
+
+       if (op) {
+               op->start = single_start;
+               op->next = single_next;
+               op->stop = single_stop;
+               op->show = show;
+               res = seq_open(file, op);
+               if (!res)
+                       ((struct seq_file *)file->private_data)->private = data;
+               else
+                       cfs_free(op);
+       }
+       return res;
+}
+EXPORT_SYMBOL(single_open);
 
-errorout:
+int single_release(struct inode *inode, struct file *file)
+{
+       const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
+       int res = seq_release(inode, file);
+       cfs_free((void *)op);
+       return res;
+}
+EXPORT_SYMBOL(single_release);
 
-    if (NT_SUCCESS(status)) {
-        return iosb.Information;
-    }
+int seq_release_private(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq = file->private_data;
 
-    return cfs_error_code(status);
+       cfs_free(seq->private);
+       seq->private = NULL;
+       return seq_release(inode, file);
 }
+EXPORT_SYMBOL(seq_release_private);
 
-int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer)
+void *__seq_open_private(struct file *f, const struct seq_operations *ops,
+               int psize)
 {
-    PUCHAR          procdat = NULL;
-    CFS_PROC_IOCTL  procctl;
-    ULONG           length = 0;
-    ULONG           extra = 0;
-
-    NTSTATUS        status;
-    IO_STATUS_BLOCK iosb;
+       int rc;
+       void *private;
+       struct seq_file *seq;
 
-    procctl.cmd = cmd;
+       private = cfs_alloc(psize, GFP_KERNEL | CFS_ALLOC_ZERO);
+       if (private == NULL)
+               goto out;
 
-    if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) {
-        struct libcfs_ioctl_data * portal;
-        portal = (struct libcfs_ioctl_data *) buffer;
-        length = portal->ioc_len;
-    } else if (_IOC_TYPE(cmd) == 'f') {
-        struct obd_ioctl_data * obd;
-        obd = (struct obd_ioctl_data *) buffer;
-        length = obd->ioc_len;
-        extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2);
-    } else if(_IOC_TYPE(cmd) == 'u') {
-        length = 4;
-        extra  = 0;
-    } else {
-        printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n");
-        cfs_enter_debugger();
-        status = STATUS_INVALID_PARAMETER;
-        goto errorout;
-    }
+       rc = seq_open(f, ops);
+       if (rc < 0)
+               goto out_free;
 
-    procctl.len = length + extra;
-    procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL));
+       seq = f->private_data;
+       seq->private = private;
+       return private;
 
-    if (NULL == procdat) {
-        printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n");
-        status = STATUS_INSUFFICIENT_RESOURCES;
-        cfs_enter_debugger();
-        goto errorout;
-    }
-    memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL));
-    memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL));
-    memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length);
-    length += sizeof(CFS_PROC_IOCTL);
+out_free:
+       cfs_free(private);
+out:
+       return NULL;
+}
+EXPORT_SYMBOL(__seq_open_private);
 
-    if (_IOC_TYPE(cmd) == 'f') {
+int seq_open_private(struct file *filp, const struct seq_operations *ops,
+               int psize)
+{
+       return __seq_open_private(filp, ops, psize) ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(seq_open_private);
 
-        char *ptr;
-        struct obd_ioctl_data * data;
-        struct obd_ioctl_data * obd;
+int seq_putc(struct seq_file *m, char c)
+{
+       if (m->count < m->size) {
+               m->buf[m->count++] = c;
+               return 0;
+       }
+       return -1;
+}
+EXPORT_SYMBOL(seq_putc);
 
-        data = (struct obd_ioctl_data *) buffer;
-        obd  = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL));
-        ptr = obd->ioc_bulk;
+int seq_puts(struct seq_file *m, const char *s)
+{
+       int len = strlen(s);
+       if (m->count + len < m->size) {
+               memcpy(m->buf + m->count, s, len);
+               m->count += len;
+               return 0;
+       }
+       m->count = m->size;
+       return -1;
+}
+EXPORT_SYMBOL(seq_puts);
 
-        if (data->ioc_inlbuf1) {
-                obd->ioc_inlbuf1 = ptr;
-                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
-        }
+struct list_head *seq_list_start(struct list_head *head, loff_t pos)
+{
+       struct list_head *lh;
 
-        if (data->ioc_inlbuf2) {
-                obd->ioc_inlbuf2 = ptr;
-                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
-        }
-        if (data->ioc_inlbuf3) {
-                obd->ioc_inlbuf3 = ptr;
-                LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
-        }
-        if (data->ioc_inlbuf4) {
-                obd->ioc_inlbuf4 = ptr;
-                LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
-        }
-    
-        if ( cmd != (ULONG)OBD_IOC_BRW_WRITE  &&
-             cmd != (ULONG)OBD_IOC_BRW_READ ) {
+       list_for_each(lh, head)
+               if (pos-- == 0)
+                       return lh;
 
-            if (data->ioc_pbuf1 && data->ioc_plen1) {
-                obd->ioc_pbuf1 = &procdat[length];
-                memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1); 
-                length += size_round(data->ioc_plen1);
-            }
+       return NULL;
+}
 
-            if (data->ioc_pbuf2 && data->ioc_plen2) {
-                obd->ioc_pbuf2 = &procdat[length];
-                memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2);
-                length += size_round(data->ioc_plen2);
-            }
-        }
+EXPORT_SYMBOL(seq_list_start);
 
-        if (obd_ioctl_is_invalid(obd)) {
-            cfs_enter_debugger();
-        }
-    }
+struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
+{
+       if (!pos)
+               return head;
 
-    status = NtDeviceIoControlFile(
-                (HANDLE)handle,
-                NULL, NULL, NULL, &iosb,
-                IOCTL_LIBCFS_ENTRY,
-                procdat, length,
-                procdat, length );
+       return seq_list_start(head, pos - 1);
+}
 
+EXPORT_SYMBOL(seq_list_start_head);
 
-    if (NT_SUCCESS(status)) {
-        memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); 
-    }
+struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
+{
+       struct list_head *lh;
 
-errorout:
+       lh = ((struct list_head *)v)->next;
+       ++*ppos;
+       return lh == head ? NULL : lh;
+}
 
-    if (procdat) {
-        free(procdat);
-    }
+EXPORT_SYMBOL(seq_list_next);
 
-    return cfs_error_code(status);
+struct proc_dir_entry *PDE(const struct inode *inode)
+{
+       return (struct proc_dir_entry *)inode->i_priv;
 }
 
+
 #endif /* __KERNEL__ */
diff --git a/libcfs/libcfs/winnt/winnt-strusup.c b/libcfs/libcfs/winnt/winnt-strusup.c
new file mode 100644 (file)
index 0000000..45fac94
--- /dev/null
@@ -0,0 +1,250 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+
+/*
+ * Windows generic table support routines
+ */
+
+#define TAG_RADIX_TABLE 'XIDR'
+typedef struct _RADIX_TABLE_ELEMENT {
+    ULONG       Key;
+    PVOID       Value;
+} RADIX_TABLE_ELEMENT, *PRADIX_TABLE_ELEMENT;
+
+
+RTL_GENERIC_COMPARE_RESULTS
+RadixCompareElement (
+    IN PRTL_GENERIC_TABLE   Table,
+    IN PVOID                Index1,
+    IN PVOID                Index2
+    )
+{
+    ULONG   Key1, Key2;
+
+    Key1 = *((ULONG UNALIGNED *) Index1);
+    Key2 = *((ULONG UNALIGNED *) Index2);
+
+    if (Key1 < Key2) {
+        return GenericLessThan;
+    } else if (Key1 > Key2) {
+        return GenericGreaterThan;
+    }
+
+    return GenericEqual;
+}
+
+PVOID
+RadixAllocateElement (
+    IN PRTL_GENERIC_TABLE   Table,
+    IN CLONG                Size
+    )
+{
+    return FsRtlAllocatePoolWithTag(NonPagedPool,Size, TAG_RADIX_TABLE);
+}
+
+VOID
+RadixDestroyElement (
+    IN PRTL_GENERIC_TABLE   Table,
+    IN PVOID                Buffer
+    )
+{
+    ExFreePoolWithTag(Buffer, TAG_RADIX_TABLE);
+}
+
+
+PVOID
+RadixInsertElement(
+    IN PRTL_GENERIC_TABLE   Table,
+    IN ULONG                Key,
+    IN PVOID                Value
+    )
+{
+    RADIX_TABLE_ELEMENT element;
+    element.Key = Key;
+    element.Value = Value;
+    return RtlInsertElementGenericTable( Table, &element, 
+                      sizeof(RADIX_TABLE_ELEMENT), NULL );
+}
+
+BOOLEAN
+RadixDeleteElement(
+    IN PRTL_GENERIC_TABLE   Table,
+    IN ULONG                Key
+    )
+{
+    RADIX_TABLE_ELEMENT element;
+    element.Key = Key;
+    return RtlDeleteElementGenericTable(Table, &element);
+}
+
+
+PRADIX_TABLE_ELEMENT
+RadixLookupElement (
+    IN PRTL_GENERIC_TABLE   Table,
+    IN ULONG                Key
+    )
+{
+    RADIX_TABLE_ELEMENT     element;
+
+    element.Key = Key;
+    return (PRADIX_TABLE_ELEMENT) 
+            RtlLookupElementGenericTable(Table, &element);
+}
+
+PRADIX_TABLE_ELEMENT
+RadixGetNextElement (
+    IN PRTL_GENERIC_TABLE   Table,
+    IN PVOID *               Restart
+    )
+{
+    return (PRADIX_TABLE_ELEMENT)
+            RtlEnumerateGenericTableWithoutSplaying(Table, Restart);
+}
+
+
+
+VOID
+RadixInitTable(
+    IN PRTL_GENERIC_TABLE   Table
+    )
+{
+    
+    /*  initialize rafix generic table. */
+
+    RtlInitializeGenericTable(
+        Table,
+        RadixCompareElement,
+        RadixAllocateElement,
+        RadixDestroyElement,
+        NULL
+        );
+}
+
+VOID
+RadixDestroyTable(
+    IN PRTL_GENERIC_TABLE   Table
+    )
+{
+    PRADIX_TABLE_ELEMENT element;
+    PVOID                restart = NULL;
+
+Again:
+    element = (PRADIX_TABLE_ELEMENT) RadixGetNextElement(Table, &restart);
+    if (element) {
+        RadixDeleteElement(Table, element->Key);
+        goto Again;
+    }
+}
+
+/*
+ *  Radix Tree Suppoert Rotuines
+ * 
+ */
+
+/**
+ *     radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ *     @root:          radix tree root
+ *     @results:       where the results of the lookup are placed
+ *     @first_index:   start the lookup from this key
+ *     @max_items:     place up to this many items at *results
+ *
+ *     Performs an index-ascending scan of the tree for present items.  Places
+ *     them at *@results and returns the number of items which were placed at
+ *     *@results.
+ *
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+                       unsigned long first_index, unsigned int max_items)
+{
+    PRADIX_TABLE_ELEMENT element;
+    PVOID                restart = NULL;
+    unsigned int         i = 0;
+
+    element = RadixLookupElement(&root->table, first_index);
+    restart = element;
+    while (element && i < max_items) {
+        results[i++] = element->Value; 
+        element = RadixGetNextElement(&root->table, &restart);
+    }
+
+    return i;
+}
+
+
+/**
+ *     radix_tree_lookup    -    perform lookup operation on a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *
+ *     Lookup the item at the position @index in the radix tree @root.
+ *
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+    PRADIX_TABLE_ELEMENT element;
+    int                  i = 0;
+
+    element = RadixLookupElement(&root->table, index);
+    if (element) {
+        return element->Value;
+    }
+
+    return NULL;
+}
+
+/**
+ *     radix_tree_insert    -    insert into a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *     @item:          item to insert
+ *
+ *     Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+                       unsigned long index, void *item)
+{
+    if (RadixInsertElement(&root->table, index, item)) {
+        return 0;
+    }
+
+    return -ENOMEM;
+}
+
+/**
+ *     radix_tree_delete    -    delete an item from a radix tree
+ *     @root:          radix tree root
+ *     @index:         index key
+ *
+ *     Remove the item at @index from the radix tree rooted at @root.
+ *
+ *     Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+    RadixDeleteElement(&root->table, index);
+    return NULL;
+}
\ No newline at end of file
index be2cb17..81b40df 100644 (file)
@@ -34,7 +34,7 @@
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
-#define DEBUG_SUBSYSTEM S_LIBCFS
+#define DEBUG_SUBSYSTEM S_LNET
 
 #include <libcfs/libcfs.h>
 
@@ -61,7 +61,7 @@ void cfs_waitq_init(cfs_waitq_t *waitq)
 {
     waitq->magic = CFS_WAITQ_MAGIC;
     waitq->flags = 0;
-    INIT_LIST_HEAD(&(waitq->waiters));
+    CFS_INIT_LIST_HEAD(&(waitq->waiters));
     spin_lock_init(&(waitq->guard));
 }
 
@@ -103,8 +103,8 @@ void cfs_waitlink_init(cfs_waitlink_t *link)
 
     atomic_inc(&slot->count);
 
-    INIT_LIST_HEAD(&(link->waitq[0].link));
-    INIT_LIST_HEAD(&(link->waitq[1].link));
+    CFS_INIT_LIST_HEAD(&(link->waitq[0].link));
+    CFS_INIT_LIST_HEAD(&(link->waitq[1].link));
 
     link->waitq[0].waitl = link->waitq[1].waitl = link;
 }
@@ -322,8 +322,9 @@ void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr)
     LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
 
     spin_lock(&waitq->guard);
-
-    list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) {
+    cfs_list_for_each_entry_typed(scan, &waitq->waiters, 
+                            cfs_waitlink_channel_t,
+                            link) {
 
         cfs_waitlink_t *waitl = scan->waitl;
 
@@ -407,7 +408,7 @@ void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state)
         atomic_dec(link->hits);
         LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
     } else {
-        cfs_wait_event(link->event, 0);
+        cfs_wait_event_internal(link->event, 0);
     }
 }
 
@@ -428,16 +429,16 @@ void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state)
  *   What if it happens to be woken up at the just timeout time !?
  */
 
-cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link,
-                                    cfs_task_state_t state,
-                                    cfs_duration_t timeout)
+int64_t cfs_waitq_timedwait( cfs_waitlink_t *link,
+                             cfs_task_state_t state,
+                             int64_t timeout)
 { 
 
     if (atomic_read(link->hits) > 0) {
         atomic_dec(link->hits);
         LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
-        return TRUE;
+        return (int64_t)TRUE;
     }
 
-    return (cfs_duration_t)cfs_wait_event(link->event, timeout);
+    return (int64_t)cfs_wait_event_internal(link->event, timeout);
 }
index 0ad20d9..ee3a5f0 100644 (file)
@@ -1,5 +1,5 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
  *
  * GPL HEADER START
  *
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
-#define DEBUG_SUBSYSTEM S_LIBCFS
+#define DEBUG_SUBSYSTEM S_LNET
 
 #include <libcfs/libcfs.h>
 #include <lnet/lnet.h>
 
-#define TDILND_MODULE_NAME L"Tdilnd"
+#define TDILND_MODULE_NAME L"tdilnd"
 
-ks_data_t ks_data;
+ks_tdi_data_t ks_data;
 
-ULONG
-ks_tdi_send_flags(ULONG SockFlags)
+VOID
+KsDumpPrint(PCHAR buffer, ULONG length)
 {
-    ULONG   TdiFlags = 0;
-
-    if (cfs_is_flag_set(SockFlags, MSG_OOB)) {
-        cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED);
-    }
-
-    if (cfs_is_flag_set(SockFlags, MSG_MORE)) {
-        cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL);
-    }
-
-    if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) {
-        cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING);
+    ULONG i;
+    for (i=0; i < length; i++) {
+        if (((i+1) % 31) == 0) 
+            printk("\n");
+        printk("%2.2x ", (UCHAR)buffer[i]);
     }
-
-    return TdiFlags;
+    printk("\n");
 }
 
-NTSTATUS
-KsIrpCompletionRoutine(
-    IN PDEVICE_OBJECT    DeviceObject,
-    IN PIRP              Irp,
-    IN PVOID             Context
-    )
-{
-    if (NULL != Context) {
-        KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE);
-    }
-
-    return STATUS_MORE_PROCESSING_REQUIRED;
+PVOID
+KsMapMdlBuffer (PMDL    Mdl);
 
-    UNREFERENCED_PARAMETER(DeviceObject);
-    UNREFERENCED_PARAMETER(Irp);
+VOID
+KsDumpMdlChain(PMDL Mdl, ULONG length)
+{
+    PMDL mdl = Mdl;
+    PCHAR buffer = NULL;
+    ULONG len = 0;
+    int i = 0;
+
+    while (mdl) {
+        printk("mdl %d:\n", i);
+        buffer = KsMapMdlBuffer(mdl);
+        KsDumpPrint(buffer, mdl->ByteCount);
+        len += mdl->ByteCount;
+        mdl = mdl->Next;
+    }
+    ASSERT(len == length);
 }
 
-
 /*
- * KsBuildTdiIrp
- *   Allocate a new IRP and initialize it to be issued to tdi
+ * KsLockUserBuffer
+ *   Allocate MDL for the buffer and lock the pages into
+ *   nonpaged pool
  *
  * Arguments:
- *   DeviceObject:  device object created by the underlying
- *                  TDI transport driver
+ *   UserBuffer:  the user buffer to be locked
+ *   Length:      length in bytes of the buffer
+ *   Operation:   read or write access
+ *   pMdl:        the result of the created mdl
  *
  * Return Value:
- *   PRIP:   the allocated Irp in success or NULL in failure.
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
  * NOTES:
  *   N/A
  */
 
-PIRP
-KsBuildTdiIrp(
-    IN PDEVICE_OBJECT    DeviceObject
+NTSTATUS
+KsLockUserBuffer (
+    IN PVOID            UserBuffer,
+    IN BOOLEAN          bPaged,
+    IN ULONG            Length,
+    IN LOCK_OPERATION   Operation,
+    OUT PMDL *          pMdl
     )
 {
-    PIRP                Irp;
-    PIO_STACK_LOCATION  IrpSp;
+    NTSTATUS    Status;
+    PMDL        Mdl = NULL;
 
-    //
-    // Allocating the IRP ...
-    //
+    LASSERT(UserBuffer != NULL);
 
-    Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
+    *pMdl = NULL;
 
-    if (NULL != Irp) {
+    Mdl = IoAllocateMdl(
+                UserBuffer,
+                Length,
+                FALSE,
+                FALSE,
+                NULL
+                );
 
-        //
-        // Getting the Next Stack Location ...
-        //
+    if (Mdl == NULL) {
 
-        IrpSp = IoGetNextIrpStackLocation(Irp);
+        Status = STATUS_INSUFFICIENT_RESOURCES;
 
-        //
-        // Initializing Irp ...
-        //
+    } else {
 
-        IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL;
-        IrpSp->Parameters.DeviceIoControl.IoControlCode = 0;
+        __try {
+
+            if (bPaged) {
+                MmProbeAndLockPages(
+                    Mdl,
+                    KernelMode,
+                    Operation
+                    );
+            } else {
+                MmBuildMdlForNonPagedPool(
+                    Mdl
+                    );
+            }
+
+            Status = STATUS_SUCCESS;
+
+            *pMdl = Mdl;
+
+        } __except (EXCEPTION_EXECUTE_HANDLER) {
+
+            IoFreeMdl(Mdl);
+
+            Mdl = NULL;
+
+            cfs_enter_debugger();
+
+            Status = STATUS_INVALID_USER_BUFFER;
+        }
     }
 
-    return Irp;
+    return Status;
 }
 
 /*
- * KsSubmitTdiIrp
- *   Issue the Irp to the underlying tdi driver
+ * KsMapMdlBuffer
+ *   Map the mdl into a buffer in kernel space
  *
  * Arguments:
- *   DeviceObject:  the device object created by TDI driver
- *   Irp:           the I/O request packet to be processed
- *   bSynchronous:  synchronous or not. If true, we need wait
- *                  until the process is finished.
- *   Information:   returned info
+ *   Mdl:  the mdl to be mapped
  *
  * Return Value:
- *   NTSTATUS:      kernel status code
+ *   PVOID: the buffer mapped or NULL in failure
  *
  * NOTES:
  *   N/A
  */
 
-NTSTATUS
-KsSubmitTdiIrp(
-    IN PDEVICE_OBJECT   DeviceObject,
-    IN PIRP             Irp,
-    IN BOOLEAN          bSynchronous,
-    OUT PULONG          Information
-    )
+PVOID
+KsMapMdlBuffer (PMDL    Mdl)
 {
-    NTSTATUS            Status;
-    KEVENT              Event;
-
-    if (bSynchronous) {
-
-        KeInitializeEvent(
-            &Event,
-            SynchronizationEvent,
-            FALSE
-            );
+    LASSERT(Mdl != NULL);
 
+    return MmGetSystemAddressForMdlSafe(
+                Mdl,
+                NormalPagePriority
+                );
+}
 
-        IoSetCompletionRoutine(
-            Irp,
-            KsIrpCompletionRoutine,
-            &Event,
-            TRUE,
-            TRUE,
-            TRUE
-            );
-    }
 
-    Status = IoCallDriver(DeviceObject, Irp);
+/*
+ * KsReleaseMdl
+ *   Unlock all the pages in the mdl
+ *
+ * Arguments:
+ *   Mdl:  memory description list to be released
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    if (bSynchronous) {
+VOID
+KsReleaseMdl (IN PMDL   Mdl,
+              IN int    Paged )
+{
+    LASSERT(Mdl != NULL);
 
-        if (STATUS_PENDING == Status) {
+    while (Mdl) {
 
-            Status = KeWaitForSingleObject(
-                        &Event,
-                        Executive,
-                        KernelMode,
-                        FALSE,
-                        NULL
-                        );
-        }
+        PMDL    Next;
 
-        Status = Irp->IoStatus.Status;
+        Next = Mdl->Next;
 
-        if (Information) {
-            *Information = (ULONG)(Irp->IoStatus.Information);
+        if (Paged) {
+            MmUnlockPages(Mdl);
         }
 
-        Irp->MdlAddress = NULL;
-        IoFreeIrp(Irp);
-    }
-
-    if (!NT_SUCCESS(Status)) {
+        IoFreeMdl(Mdl);
 
-        KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n",
-                    Status, KsNtStatusToString(Status)));
+        Mdl = Next;
     }
-
-    return (Status);
 }
 
-
-
 /*
- * KsOpenControl
- *   Open the Control Channel Object ...
+ * KsQueryMdlsSize
+ *   Query the whole size of a MDL (may be chained)
  *
  * Arguments:
- *   DeviceName:   the device name to be opened
- *   Handle:       opened handle in success case
- *   FileObject:   the fileobject of the device
+ *   Mdl:  the Mdl to be queried
  *
  * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
+ *   ULONG: the total size of the mdl
  *
- * Notes:
+ * NOTES:
  *   N/A
  */
 
-NTSTATUS
-KsOpenControl(
-    IN PUNICODE_STRING      DeviceName,
-    OUT HANDLE *            Handle,
-    OUT PFILE_OBJECT *      FileObject
-   )
+ULONG
+KsQueryMdlsSize (PMDL Mdl)
 {
-    NTSTATUS          Status = STATUS_SUCCESS;
-
-    OBJECT_ATTRIBUTES ObjectAttributes;
-    IO_STATUS_BLOCK   IoStatus;
-
+    PMDL    Next = Mdl;
+    ULONG   Length = 0;
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
     //
-    // Initializing ...
+    // Walking the MDL Chain ...
     //
 
-    InitializeObjectAttributes(
-        &ObjectAttributes,
-        DeviceName,
-        OBJ_CASE_INSENSITIVE |
-        OBJ_KERNEL_HANDLE,
-        NULL,
-        NULL
-        );
-
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+    while (Next) {
+        Length += MmGetMdlByteCount(Next);
+        Next = Next->Next;
+    }
 
-    //
-    // Creating the Transport Address Object ...
-    //
-
-    Status = ZwCreateFile(
-                Handle,
-                FILE_READ_DATA | FILE_WRITE_DATA,
-                &ObjectAttributes,
-                &IoStatus,
-                0,
-                FILE_ATTRIBUTE_NORMAL,
-                FILE_SHARE_READ | FILE_SHARE_WRITE,
-                FILE_OPEN,
-                0,
-                NULL,
-                0
-                );
+    return (Length);
+}
 
+/*
+ * KsCopyMdlToBuffer
+ *   Copy payload from  Mdl to buffer
+ *
+ * Arguments:
+ *   SourceMdl: the source mdl
+ *   SourceOffset: start offset of the source
+ *   DestinationBuffer: the dst buffer
+ *   DestinationOffset: the offset where data are to be copied.
+ *   BytesTobecopied: the expteced bytes to be copied
+ *
+ * Return Value:
+ *   Length of data copied from MDL to user buffer
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    if (NT_SUCCESS(Status)) {
+ULONG
+KsCopyMdlToBuffer(
+    IN PMDL     SourceMdl,
+    IN ULONG    SourceOffset,
+    IN PVOID    DestinationBuffer,
+    IN ULONG    DestinationOffset,
+    IN ULONG    BytesTobeCopied
+    )
+{
+    PUCHAR      SourceBuffer = NULL;
+    PUCHAR      TargetBuffer = DestinationBuffer;
+    ULONG       BytesCopied = 0;
 
-        //
-        // Now Obtaining the FileObject of the Transport Address ...
-        //
+    if (MmGetMdlByteCount(SourceMdl) <= SourceOffset) {
+        return 0;
+    }
 
-        Status = ObReferenceObjectByHandle(
-                    *Handle,
-                    FILE_ANY_ACCESS,
-                    NULL,
-                    KernelMode,
-                    FileObject,
-                    NULL
-                    );
+    BytesCopied = MmGetMdlByteCount(SourceMdl) - SourceOffset;
+    if (BytesCopied > BytesTobeCopied) {
+        BytesCopied = BytesTobeCopied;
+    }
 
-        if (!NT_SUCCESS(Status)) {
+    SourceBuffer = (PUCHAR)KsMapMdlBuffer(SourceMdl);
 
-            cfs_enter_debugger();
-            ZwClose(*Handle);
-        }
+    RtlMoveMemory(TargetBuffer + DestinationOffset,
+                  SourceBuffer + SourceOffset, BytesCopied);
 
-    } else {
+   return BytesCopied;
+}
 
-        cfs_enter_debugger();
-    }
+/*
+ * KsInitializeKsTsdu
+ *   Initialize the Tsdu buffer header
+ *
+ * Arguments:
+ *   KsTsdu: the Tsdu to be initialized
+ *   Length: the total length of the Tsdu
+ *
+ * Return Value:
+ *   VOID
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    return (Status);
+VOID
+KsInitializeKsTsdu(
+    PKS_TSDU    KsTsdu,
+    ULONG       Length
+    )
+{
+    KsTsdu->Magic = KS_TSDU_MAGIC;
+    KsTsdu->TotalLength = Length;
+    KsTsdu->StartOffset = KsTsdu->LastOffset =
+    KS_QWORD_ALIGN(sizeof(KS_TSDU));
 }
 
-
 /*
- * KsCloseControl
- *   Release the Control Channel Handle and FileObject
+ * KsAllocateKsTsdu
+ *   Reuse a Tsdu from the freelist or allocate a new Tsdu
+ *   from the LookAsideList table or the NonPagedPool
  *
  * Arguments:
- *   Handle:       the channel handle to be released
- *   FileObject:   the fileobject to be released
+ *   N/A
  *
  * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
+ *   PKS_Tsdu: the new Tsdu or NULL if it fails
  *
  * Notes:
  *   N/A
  */
 
-NTSTATUS
-KsCloseControl(
-    IN HANDLE             Handle,
-    IN PFILE_OBJECT       FileObject
-   )
+PKS_TSDU
+KsAllocateKsTsdu()
 {
-    NTSTATUS  Status = STATUS_SUCCESS;
+    PKS_TSDU    KsTsdu = NULL;
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+    spin_lock(&(ks_data.ksnd_tsdu_lock));
 
-    if (FileObject) {
+    if (!list_empty (&(ks_data.ksnd_freetsdus))) {
 
-        ObDereferenceObject(FileObject);
-    }
+        LASSERT(ks_data.ksnd_nfreetsdus > 0);
 
-    if (Handle) {
+        KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link);
+        list_del(&(KsTsdu->Link));
+        ks_data.ksnd_nfreetsdus--;
 
-        Status = ZwClose(Handle);
+    } else {
+
+        KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc(
+                        ks_data.ksnd_tsdu_slab, 0);
     }
 
-    ASSERT(NT_SUCCESS(Status));
+    spin_unlock(&(ks_data.ksnd_tsdu_lock));
 
-    return (Status);
+    if (NULL != KsTsdu) {
+        RtlZeroMemory(KsTsdu, ks_data.ksnd_tsdu_size);
+        KsInitializeKsTsdu(KsTsdu, (ULONG)ks_data.ksnd_tsdu_size);
+    }
+
+    return (KsTsdu);
 }
 
+/*
+ * KsFreeKsTsdu
+ *   Release a Tsdu: uninitialize then free it.
+ *
+ * Arguments:
+ *   KsTsdu: Tsdu to be freed.
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+VOID
+KsFreeKsTsdu(
+    PKS_TSDU  KsTsdu
+    )
+{
+    cfs_mem_cache_free(
+            ks_data.ksnd_tsdu_slab,
+            KsTsdu );
+}
 
 /*
- * KsOpenAddress
- *   Open the tdi address object
+ * KsPutKsTsdu
+ *   Move the Tsdu to the free tsdu list in ks_data.
  *
  * Arguments:
- *   DeviceName:   device name of the address object
- *   pAddress:     tdi address of the address object
- *   AddressLength: length in bytes of the tdi address
- *   Handle:       the newly opened handle
- *   FileObject:   the newly opened fileobject
+ *   KsTsdu: Tsdu to be moved.
  *
  * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
+ *   N/A
  *
  * Notes:
  *   N/A
  */
 
-NTSTATUS
-KsOpenAddress(
-    IN PUNICODE_STRING      DeviceName,
-    IN PTRANSPORT_ADDRESS   pAddress,
-    IN ULONG                AddressLength,
-    OUT HANDLE *            Handle,
-    OUT PFILE_OBJECT *      FileObject
-   )
+VOID
+KsPutKsTsdu(
+    PKS_TSDU  KsTsdu
+    )
 {
-    NTSTATUS          Status = STATUS_SUCCESS;
+    spin_lock(&(ks_data.ksnd_tsdu_lock));
+    if (ks_data.ksnd_nfreetsdus > 128) {
+        KsFreeKsTsdu(KsTsdu);
+    } else {
+        list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus));
+        ks_data.ksnd_nfreetsdus++;
+    }
+    spin_unlock(&(ks_data.ksnd_tsdu_lock));
+}
 
-    PFILE_FULL_EA_INFORMATION Ea = NULL;
-    ULONG             EaLength;
-    UCHAR             EaBuffer[EA_MAX_LENGTH];
+/* with tconn lock acquired */
+ks_mdl_t *
+KsLockTsdus(
+    ks_tconn_t *    tconn,
+    PKS_TSDUMGR     TsduMgr,
+    PULONG          Flags,
+    PULONG          Length
+    )
+{
+    ks_mdl_t *      mdl = NULL;
+    ks_mdl_t *      tail = NULL;
 
-    OBJECT_ATTRIBUTES ObjectAttributes;
-    IO_STATUS_BLOCK   IoStatus;
+    PKS_TSDU        KsTsdu;
+    PKS_TSDU_DAT    KsTsduDat;
+    PKS_TSDU_BUF    KsTsduBuf;
+    PKS_TSDU_MDL    KsTsduMdl;
 
-    //
-    // Building EA for the Address Object to be Opened ...
-    //
+    *Length  = 0;
 
-    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
-    Ea->NextEntryOffset = 0;
-    Ea->Flags = 0;
-    Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;
-    Ea->EaValueLength = (USHORT)AddressLength;
-    RtlCopyMemory(
-        &(Ea->EaName),
-        TdiTransportAddress,
-        Ea->EaNameLength + 1
-        );
-    RtlMoveMemory(
-        &(Ea->EaName[Ea->EaNameLength + 1]),
-        pAddress,
-        AddressLength
-        );
-    EaLength =  sizeof(FILE_FULL_EA_INFORMATION) +
-                Ea->EaNameLength + AddressLength;
+    cfs_list_for_each_entry_typed(KsTsdu,
+            &TsduMgr->TsduList,KS_TSDU, Link) {
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+        ULONG   start = 0;
 
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+        start = KsTsdu->StartOffset;
 
-    //
-    // Initializing ...
-    //
+        while (start < KsTsdu->LastOffset) {
 
-    InitializeObjectAttributes(
-        &ObjectAttributes,
-        DeviceName,
-        OBJ_CASE_INSENSITIVE |
-        OBJ_KERNEL_HANDLE,
-        NULL,
-        NULL
-        );
+            ks_mdl_t *  iov = NULL;
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + start);
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + start);
+            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + start);
+            LASSERT(KsTsduDat->TsduType == TSDU_TYPE_DAT ||
+                    KsTsduBuf->TsduType == TSDU_TYPE_BUF ||
+                    KsTsduMdl->TsduType == TSDU_TYPE_MDL);
 
-    //
-    // Creating the Transport Address Object ...
-    //
+            if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
 
-    Status = ZwCreateFile(
-                Handle,
-                FILE_READ_DATA | FILE_WRITE_DATA,
-                &ObjectAttributes,
-                &IoStatus,
-                0,
-                FILE_ATTRIBUTE_NORMAL,
-                FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */
-                FILE_OPEN,
-                0,
-                Ea,
-                EaLength
-                );
+                ASSERT(KsTsdu->LastOffset >= start + KsTsduDat->TotalLength);
+                if (KsTsduDat->Mdl) {
+                    iov = KsTsduDat->Mdl;
+                } else {
+                    KsLockUserBuffer(
+                        &KsTsduDat->Data[KsTsduDat->StartOffset],
+                        FALSE,
+                        KsTsduDat->DataLength,
+                        IoReadAccess,
+                        &iov );
+                    KsTsduDat->Mdl = iov;
+                }
+/*
+                printk("KsLockTsdus: %u\n", KsTsduDat->DataLength);
+                KsDumpPrint(
+                        &KsTsduDat->Data[KsTsduDat->StartOffset],
+                        KsTsduDat->DataLength);
+*/                        
+                *Length += KsTsduDat->DataLength;
+                start += KsTsduDat->TotalLength;
 
+            } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
 
-    if (NT_SUCCESS(Status)) {
+                ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_BUF));
+                if (KsTsduBuf->Mdl) {
+                    iov = KsTsduBuf->Mdl;
+                } else {
+                    KsLockUserBuffer(
+                        (PUCHAR)KsTsduBuf->UserBuffer + 
+                                 KsTsduBuf->StartOffset,
+                        FALSE,
+                        KsTsduBuf->DataLength,
+                        IoReadAccess,
+                        &iov );
+                    KsTsduBuf->Mdl = iov;
+                }
 
-        //
-        // Now Obtaining the FileObject of the Transport Address ...
-        //
+                *Length += KsTsduBuf->DataLength;
+                start += sizeof(KS_TSDU_BUF);
 
-        Status = ObReferenceObjectByHandle(
-                    *Handle,
-                    FILE_ANY_ACCESS,
-                    NULL,
-                    KernelMode,
-                    FileObject,
-                    NULL
-                    );
-
-        if (!NT_SUCCESS(Status)) {
+            } else {
 
-            cfs_enter_debugger();
-            ZwClose(*Handle);
-        }
+                LASSERT(TSDU_TYPE_MDL == KsTsduMdl->TsduType);
+                ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_MDL));
+                iov = KsTsduMdl->Mdl; 
+                *Length += KsTsduMdl->DataLength;
+                start += sizeof(KS_TSDU_MDL);
+            }
 
-    } else {
+            if (!iov) {
+                cfs_enter_debugger();
+                goto cleanup;
+            }
 
-        cfs_enter_debugger();
+            if (tail) {
+                tail->Next = iov;
+            } else {
+                mdl = iov;
+            }
+            tail = iov;
+            tail->Next = NULL;
+/*
+            printk("KsLockTsdus: mdl %d\n", tail->ByteCount);
+            KsDumpMdlChain(tail, tail->ByteCount);
+*/
+        }
+    }
+#if 0
+    if (Flags) {
+        *Flags = TsduFlags;
     }
+#endif
+    return mdl;
 
-    return (Status);
+cleanup:
+    
+    *Length = 0;
+    return NULL;
 }
 
-/*
- * KsCloseAddress
- *   Release the Hanlde and FileObject of an opened tdi
- *   address object
- *
- * Arguments:
- *   Handle:       the handle to be released
- *   FileObject:   the fileobject to be released
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * Notes:
- *   N/A
- */
-
-NTSTATUS
-KsCloseAddress(
-    IN HANDLE             Handle,
-    IN PFILE_OBJECT       FileObject
-)
+ks_mdl_t *
+KsSplitMdl(
+    IN ks_mdl_t *   master,
+    IN ULONG        offset,
+    IN ULONG        length
+    )
 {
-    NTSTATUS  Status = STATUS_SUCCESS;
-
-    if (FileObject) {
+    ks_mdl_t *  mdl = NULL;
+    char *      ptr = NULL;
 
-        ObDereferenceObject(FileObject);
-    }
+    /* calculate the start virtual address */
+    ptr = (char *)KsMapMdlBuffer(master) + offset;
 
-    if (Handle) {
+    /* allocate new mdl for new memory range */
+    mdl = IoAllocateMdl(ptr, length, FALSE, FALSE, NULL);
 
-        Status = ZwClose(Handle);
+    if (!mdl) {
+        return NULL;
     }
+            
+    /* initialize the mdl */
+    IoBuildPartialMdl(master, mdl, (PVOID)ptr, length);
 
-    ASSERT(NT_SUCCESS(Status));
-
-    return (Status);
+    return mdl;
 }
 
+/* with tconn lock acquired */
+VOID
+KsReleaseTsdus(
+    ks_tconn_t *        tconn,
+    PKS_TSDUMGR         TsduMgr,
+    ULONG               length
+    )
+{
+    PKS_TSDU        KsTsdu;
+    PKS_TSDU_DAT    KsTsduDat;
+    PKS_TSDU_BUF    KsTsduBuf;
+    PKS_TSDU_MDL    KsTsduMdl;
+#if DBG    
+    ULONG           total = TsduMgr->TotalBytes;
+    ULONG           size = length;
+#endif
 
-/*
- * KsOpenConnection
- *   Open a tdi connection object
- *
- * Arguments:
- *   DeviceName:   device name of the connection object
- *   ConnectionContext: the connection context
- *   Handle:       the newly opened handle
- *   FileObject:   the newly opened fileobject
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * Notes:
- *   N/A
- */
+    LASSERT(TsduMgr->TotalBytes >= length);
 
-NTSTATUS
-KsOpenConnection(
-    IN PUNICODE_STRING      DeviceName,
-    IN CONNECTION_CONTEXT   ConnectionContext,
-    OUT HANDLE *            Handle,
-    OUT PFILE_OBJECT *      FileObject
-   )
-{
-    NTSTATUS            Status = STATUS_SUCCESS;
+    while (!list_empty(&TsduMgr->TsduList)) {
 
-    PFILE_FULL_EA_INFORMATION Ea = NULL;
-    ULONG               EaLength;
-    UCHAR               EaBuffer[EA_MAX_LENGTH];
+        ULONG   start = 0;
 
-    OBJECT_ATTRIBUTES   ObjectAttributes;
-    IO_STATUS_BLOCK     IoStatus;
+        KsTsdu = list_entry(TsduMgr->TsduList.next, KS_TSDU, Link);
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+        start = KsTsdu->StartOffset;
 
-    //
-    // Building EA for the Address Object to be Opened ...
-    //
+        while (length > 0 && start < KsTsdu->LastOffset) {
 
-    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
-    Ea->NextEntryOffset = 0;
-    Ea->Flags = 0;
-    Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH;
-    Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT);
-    RtlCopyMemory(
-        &(Ea->EaName),
-        TdiConnectionContext,
-        Ea->EaNameLength + 1
-        );
-    RtlMoveMemory(
-        &(Ea->EaName[Ea->EaNameLength + 1]),
-        &ConnectionContext,
-        sizeof(CONNECTION_CONTEXT)
-        );
-    EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
-                               Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT);
+            ULONG           size = 0;
+            ks_mdl_t *      mdl = NULL;
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + start);
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + start);
+            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + start);
+            LASSERT(KsTsduDat->TsduType == TSDU_TYPE_DAT ||
+                    KsTsduBuf->TsduType == TSDU_TYPE_BUF ||
+                    KsTsduMdl->TsduType == TSDU_TYPE_MDL);
 
+            if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
 
-    //
-    // Initializing ...
-    //
+                ASSERT(KsTsdu->LastOffset >= start + KsTsduDat->DataLength);
+                if (length >= KsTsduDat->DataLength) {
+                    /* whole tsdu is sent out */
+                    size = KsTsduDat->DataLength;
+                    start += KsTsduDat->TotalLength;
+                } else {
+                    size = length;
+                    KsTsduDat->StartOffset += size;
+                }
 
-    InitializeObjectAttributes(
-        &ObjectAttributes,
-        DeviceName,
-        OBJ_CASE_INSENSITIVE |
-        OBJ_KERNEL_HANDLE,
-        NULL,
-        NULL
-        );
+                if (KsTsduDat->Mdl) {
+                    mdl = KsTsduDat->Mdl;
+                    KsTsduDat->Mdl = NULL;
+                }
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+                KsTsduDat->DataLength -= size;
 
-    //
-    // Creating the Connection Object ...
-    //
+            } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
 
-    Status = ZwCreateFile(
-                Handle,
-                FILE_READ_DATA | FILE_WRITE_DATA,
-                &ObjectAttributes,
-                &IoStatus,
-                NULL,
-                FILE_ATTRIBUTE_NORMAL,
-                0,
-                FILE_OPEN,
-                0,
-                Ea,
-                EaLength
-                );
+                ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_BUF));
+                if (length >= KsTsduBuf->DataLength) {
+                    /* whole tsdu is sent out */
+                    size = KsTsduBuf->DataLength;
+                    start += sizeof(KS_TSDU_BUF);
+                    LASSERT(KsTsduBuf->UserBuffer);
+                    ExFreePool(KsTsduBuf->UserBuffer);
+                    KsTsduBuf->UserBuffer = NULL;
+                } else {
+                    KsTsduBuf->StartOffset += length;
+                    size = length;
+                }
 
+                if (KsTsduBuf->Mdl) {
+                    mdl = KsTsduBuf->Mdl;
+                    KsTsduBuf->Mdl = NULL;
+                }
 
-    if (NT_SUCCESS(Status)) {
+                KsTsduBuf->DataLength -= size;
+                
+            } else {
 
-        //
-        // Now Obtaining the FileObject of the Transport Address ...
-        //
+                LASSERT(TSDU_TYPE_MDL == KsTsduMdl->TsduType);
+                ASSERT(KsTsdu->LastOffset >= start + sizeof(KS_TSDU_MDL));
+                mdl = KsTsduMdl->Mdl;
+                if (length >= KsTsduMdl->DataLength) {
+                    /* whole mdl is sent out */
+                    size = KsTsduMdl->DataLength;
+                    start += sizeof(KS_TSDU_MDL);
+                    KsTsduMdl->Mdl = NULL;
+                } else {
+                    /* now split the remained data out */
+                    ks_mdl_t * mdl1 = KsSplitMdl(mdl, length,
+                                  KsTsduMdl->DataLength - length);
+                    if (NULL == mdl1) {
+                        mdl->ByteOffset += length;
+                        mdl = NULL;
+                    } else {
+                        KsTsduMdl->Mdl = mdl1;
+                    }
+                    size = length;
+                    KsTsduMdl->StartOffset += size;
+                }
 
-        Status = ObReferenceObjectByHandle(
-                    *Handle,
-                    FILE_ANY_ACCESS,
-                    NULL,
-                    KernelMode,
-                    FileObject,
-                    NULL
-                    );
+                KsTsduMdl->DataLength -= size;
+            }
 
-        if (!NT_SUCCESS(Status)) {
+            length -= size;
+            TsduMgr->TotalBytes -= size;
 
-            cfs_enter_debugger();
-            ZwClose(*Handle);
+            if (mdl) {
+                mdl->Next = NULL;
+                KsReleaseMdl(mdl, FALSE);
+            }
+
+            KsTsdu->StartOffset = start;
         }
 
-    } else {
+        if (KsTsdu->StartOffset >= KsTsdu->LastOffset) {
 
-        cfs_enter_debugger();
+            /* remove KsTsdu from list */
+            list_del(&KsTsdu->Link);
+            TsduMgr->NumOfTsdu--;
+            KsPutKsTsdu(KsTsdu);
+        }
+
+        if (length == 0) {
+            break;
+        }
     }
 
-    return (Status);
+    LASSERT(length == 0);
+#if DBG
+    LASSERT(total - size == TsduMgr->TotalBytes);
+    KsPrint((4, "KsReleaseTsdus: TsduMgr=%p Remained=%xh (%xh)\n",
+                TsduMgr, TsduMgr->TotalBytes, size ));
+#endif
 }
 
-/*
- * KsCloseConnection
- *   Release the Hanlde and FileObject of an opened tdi
- *   connection object
- *
- * Arguments:
- *   Handle:       the handle to be released
- *   FileObject:   the fileobject to be released
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * Notes:
- *   N/A
- */
-
-NTSTATUS
-KsCloseConnection(
-    IN HANDLE             Handle,
-    IN PFILE_OBJECT       FileObject
+PKS_TSDUMGR
+KsQueryTsduMgr(
+    ks_tconn_t *    tconn,
+    BOOLEAN         expedited,
+    BOOLEAN         sending
     )
 {
-    NTSTATUS  Status = STATUS_SUCCESS;
-
-    if (FileObject) {
 
-        ObDereferenceObject(FileObject);
-    }
+    PKS_CHAIN           KsChain;
+    PKS_TSDUMGR         TsduMgr;
 
-    if (Handle) {
+    /* get the latest Tsdu buffer form TsduMgr list.
+       just set NULL if the list is empty. */
 
-        Status = ZwClose(Handle);
+    if (sending) {
+        if (tconn->kstc_type == kstt_sender) {
+            KsChain = &(tconn->sender.kstc_send);
+        } else {
+            LASSERT(tconn->kstc_type == kstt_child);
+            KsChain = &(tconn->child.kstc_send);
+        }
+    } else {
+        if (tconn->kstc_type == kstt_sender) {
+            KsChain = &(tconn->sender.kstc_recv);
+        } else {
+            LASSERT(tconn->kstc_type == kstt_child);
+            KsChain = &(tconn->child.kstc_recv);
+        }
     }
 
-    ASSERT(NT_SUCCESS(Status));
+    if (expedited) {
+        TsduMgr = &(KsChain->Expedited);
+    } else {
+        TsduMgr = &(KsChain->Normal);
+    }
 
-    return (Status);
+    return TsduMgr;
 }
 
-
-/*
- * KsAssociateAddress
- *   Associate an address object with a connection object
- *
- * Arguments:
- *   AddressHandle:  the handle of the address object
- *   ConnectionObject:  the FileObject of the connection
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * Notes:
- *   N/A
- */
-
-NTSTATUS
-KsAssociateAddress(
-    IN HANDLE           AddressHandle,
-    IN PFILE_OBJECT     ConnectionObject
-    )
+PKS_TSDU
+KsGetTsdu(PKS_TSDUMGR TsduMgr, ULONG Length)
 {
-    NTSTATUS            Status;
-    PDEVICE_OBJECT      DeviceObject;
-    PIRP                Irp;
+    PKS_TSDU KsTsdu = NULL;
 
-    //
-    // Getting the DeviceObject from Connection FileObject
-    //
+    /* retrieve the latest Tsdu buffer form TsduMgr
+       list if the list is not empty. */
 
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
-    //
-    // Building Tdi Internal Irp ...
-    //
-
-    Irp = KsBuildTdiIrp(DeviceObject);
-
-    if (NULL == Irp) {
+    if (list_empty(&(TsduMgr->TsduList))) {
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
+        LASSERT(TsduMgr->NumOfTsdu == 0);
+        KsTsdu = NULL;
 
     } else {
 
-        //
-        // Assocating the Address Object with the Connection Object
-        //
+        LASSERT(TsduMgr->NumOfTsdu > 0);
+        KsTsdu = list_entry(TsduMgr->TsduList.prev, KS_TSDU, Link);
 
-        TdiBuildAssociateAddress(
-            Irp,
-            DeviceObject,
-            ConnectionObject,
-            NULL,
-            NULL,
-            AddressHandle
-            );
+        /* if this Tsdu does not contain enough space, we need
+           allocate a new Tsdu queue. */
 
-        //
-        // Calling the Transprot Driver with the Prepared Irp
-        //
+        if (KsTsdu->LastOffset + Length > KsTsdu->TotalLength) {
+            KsTsdu = NULL;
+        }
+    }
 
-        Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+    /* allocate a new Tsdu in case we are not statisfied. */
+    if (NULL == KsTsdu) {
+        KsTsdu = KsAllocateKsTsdu();
+        if (NULL != KsTsdu) {
+            list_add_tail(&(KsTsdu->Link), &(TsduMgr->TsduList));
+            TsduMgr->NumOfTsdu++;
+        }
     }
 
-    return (Status);
+    return KsTsdu;
 }
 
+ULONG
+KsWriteTsduDat(
+    PKS_TSDUMGR TsduMgr,
+    PCHAR       buffer,
+    ULONG       length,
+    ULONG       flags
+    )
+{
+    PKS_TSDU            KsTsdu;
+    PKS_TSDU_DAT        KsTsduDat;
+    PKS_TSDU_BUF        KsTsduBuf;
+
+    BOOLEAN             bNewBuff = FALSE;
+    PCHAR               Buffer = NULL;
 
 /*
- * KsDisassociateAddress
- *   Disassociate the connection object (the relationship will
- *   the corresponding address object will be dismissed. )
- *
- * Arguments:
- *   ConnectionObject:  the FileObject of the connection
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * Notes:
- *   N/A
- */
+    printk("KsWriteTsduDat: %u\n", length);
+    KsDumpPrint(buffer, length);
+*/
+    /* if the Tsdu is even larger than the biggest Tsdu, we have
+       to allocate new buffer and use TSDU_TYPE_BUF to store it */
 
-NTSTATUS
-KsDisassociateAddress(
-    IN PFILE_OBJECT     ConnectionObject
-    )
-{
-    NTSTATUS            Status;
-    PDEVICE_OBJECT      DeviceObject;
-    PIRP                   Irp;
+    if ( KS_TSDU_STRU_SIZE(length) > ks_data.ksnd_tsdu_size -
+         KS_QWORD_ALIGN(sizeof(KS_TSDU))) {
+        bNewBuff = TRUE;
+    }
 
-    //
-    // Getting the DeviceObject from Connection FileObject
-    //
+    /* allocating the buffer for TSDU_TYPE_BUF */
+    if (bNewBuff) {
+        Buffer = ExAllocatePool(NonPagedPool, length);
+        if (NULL == Buffer) {
+            /* there's no enough memory for us. We just try to
+               receive maximum bytes with a new Tsdu */
+            bNewBuff = FALSE;
+            length = ks_data.ksnd_tsdu_size - KS_TSDU_STRU_SIZE(0) - 
+                     KS_QWORD_ALIGN(sizeof(KS_TSDU));
+        }
+    }
 
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+    /* get empty Tsdu from TsduMgr */
+    KsTsdu = KsGetTsdu(TsduMgr, bNewBuff ? sizeof(KS_TSDU_BUF) :
+                                KS_TSDU_STRU_SIZE(length) );
 
-    //
-    // Building Tdi Internal Irp ...
-    //
+    /* allocate a new Tsdu in case we are not statisfied. */
+    if (NULL == KsTsdu) {
+        goto errorout;
+    }
 
-    Irp = KsBuildTdiIrp(DeviceObject);
+    KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+    KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
 
-    if (NULL == Irp) {
+    if (bNewBuff) {
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
+        /* setup up the KS_TSDU_BUF record */
+        KsTsduBuf->TsduType     = TSDU_TYPE_BUF;
+        KsTsduBuf->TsduFlags    = 0;
+        KsTsduBuf->StartOffset  = 0;
+        KsTsduBuf->UserBuffer   = Buffer;
+        KsTsduBuf->DataLength   = length;
+        KsTsduBuf->Mdl          = NULL;
+        if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) {
+            KsTsduBuf->TsduFlags |= KS_TSDU_COMM_PARTIAL;
+        }
+
+        KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
 
     } else {
 
-        //
-        // Disassocating the Address Object with the Connection Object
-        //
+        /* setup the KS_TSDU_DATA to contain all the messages */
 
-        TdiBuildDisassociateAddress(
-            Irp,
-            DeviceObject,
-            ConnectionObject,
-            NULL,
-            NULL
-            );
+        KsTsduDat->TsduType     =  TSDU_TYPE_DAT;
+        KsTsduDat->TsduFlags    = 0;
 
-        //
-        // Calling the Transprot Driver with the Prepared Irp
-        //
+        if ( KsTsdu->TotalLength - KsTsdu->LastOffset < 
+            KS_TSDU_STRU_SIZE(length) ) {
+            length = KsTsdu->TotalLength - KsTsdu->LastOffset -
+                     FIELD_OFFSET(KS_TSDU_DAT, Data);
+        }
+        KsTsduDat->DataLength   =  length;
+        KsTsduDat->TotalLength  =  KS_TSDU_STRU_SIZE(length);
+        KsTsduDat->StartOffset  = 0;
+        KsTsduDat->Mdl = NULL;
+        if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) {
+            KsTsduDat->TsduFlags |= KS_TSDU_COMM_PARTIAL;
+        }
 
-        Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+        Buffer = &KsTsduDat->Data[0];
+        KsTsdu->LastOffset += KsTsduDat->TotalLength;
     }
 
-    return (Status);
-}
-
+    RtlMoveMemory(Buffer, buffer, length);
+    TsduMgr->TotalBytes += length;
 
-/*
+    KsPrint((4, "KsWriteTsduDat: TsduMgr=%p bytes in queue:%xh (%xh)\n",
+                TsduMgr, TsduMgr->TotalBytes, length));
+    return length;
 
-//
-// Connection Control Event Callbacks
-//
+errorout:
 
-TDI_EVENT_CONNECT
-TDI_EVENT_DISCONNECT
-TDI_EVENT_ERROR
+    return 0;
+}
 
-//
-// Tcp Event Callbacks
-//
+ULONG
+KsWriteTsduBuf(
+    PKS_TSDUMGR TsduMgr,
+    PCHAR       buffer,
+    ULONG       length,
+    ULONG       flags
+    )
+{
+    PKS_TSDU            KsTsdu;
+    PKS_TSDU_BUF        KsTsduBuf;
 
-TDI_EVENT_RECEIVE
-TDI_EVENT_RECEIVE_EXPEDITED
-TDI_EVENT_CHAINED_RECEIVE
-TDI_EVENT_CHAINED_RECEIVE_EXPEDITED
+    /* get empty Tsdu from TsduMgr */
+    KsTsdu = KsGetTsdu(TsduMgr, sizeof(KS_TSDU_BUF));
 
-//
-// Udp Event Callbacks
-//
+    /* allocate a new Tsdu in case we are not statisfied. */
+    if (NULL == KsTsdu) {
+        goto errorout;
+    }
 
-TDI_EVENT_RECEIVE_DATAGRAM
-TDI_EVENT_CHAINED_RECEIVE_DATAGRAM
+    /* setup up the KS_TSDU_BUF record */
+    KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+    KsTsduBuf->TsduType     = TSDU_TYPE_BUF;
+    KsTsduBuf->TsduFlags    = 0;
+    KsTsduBuf->StartOffset  = 0;
+    KsTsduBuf->UserBuffer   = buffer;
+    KsTsduBuf->DataLength   = length;
+    KsTsduBuf->Mdl          = NULL;
+    KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+    if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) {
+        KsTsduBuf->TsduFlags |= KS_TSDU_COMM_PARTIAL;
+    }
 
-*/
+    TsduMgr->TotalBytes  +=  length;
+    KsPrint((4, "KsWriteTsduBuf: TsduMgr=%p bytes in queue:%xh (%xh)\n",
+                TsduMgr, TsduMgr->TotalBytes, length));
 
+    return length;
 
-/*
- * KsSetEventHandlers
- *   Set the tdi event callbacks with an address object
- *
- * Arguments:
- *   AddressObject: the FileObject of the address object
- *   EventContext:  the parameter for the callbacks
- *   Handlers:      the handlers indictor array
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * NOTES:
- *   N/A
- */
+errorout:
+    return 0;
+}
 
-NTSTATUS
-KsSetEventHandlers(
-    IN PFILE_OBJECT                         AddressObject,  // Address File Object
-    IN PVOID                                EventContext,   // Context for Handlers
-    IN PKS_EVENT_HANDLERS                   Handlers        // Handlers Indictor
-   )
+ULONG
+KsWriteTsduMdl(
+    PKS_TSDUMGR     TsduMgr,
+    ks_mdl_t *      mdl, 
+    PVOID           desc,
+    ULONG           offset,
+    ULONG           length,
+    ULONG           flags
+    )
 {
-    NTSTATUS             Status = STATUS_SUCCESS;
-    PDEVICE_OBJECT       DeviceObject;
-    USHORT               i = 0;
+    PKS_TSDU            KsTsdu;
+    PKS_TSDU_MDL        KsTsduMdl;
 
-    DeviceObject = IoGetRelatedDeviceObject(AddressObject);
+    /* get empty Tsdu from TsduMgr */
+    KsTsdu = KsGetTsdu(TsduMgr, sizeof(KS_TSDU_MDL));
 
-    for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) {
+    /* allocate a new Tsdu in case we are not statisfied. */
+    if (NULL == KsTsdu) {
+        goto errorout;
+    }
 
-        //
-        // Setup the tdi event callback handler if requested.
-        //
+    /* setup up the KS_TSDU_MDL record */
+    KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+    KsTsduMdl->TsduType     = TSDU_TYPE_MDL;
+    KsTsduMdl->TsduFlags    = 0;
+    KsTsduMdl->StartOffset  = 0;
+    KsTsduMdl->BaseOffset   = offset;
+    KsTsduMdl->DataLength   = length;
+    KsTsduMdl->Mdl          = mdl;
+    KsTsduMdl->Descriptor   = desc;
+    KsTsdu->LastOffset += sizeof(KS_TSDU_MDL);
+    if (cfs_is_flag_set(flags, TDI_SEND_PARTIAL)) {
+        KsTsduMdl->TsduFlags |= KS_TSDU_COMM_PARTIAL;
+    }
 
-        if (Handlers->IsActive[i]) {
+    TsduMgr->TotalBytes  +=  length;
+    KsPrint((4, "KsWriteTsduMdl: TsduMgr=%p bytes in queue:%xh (%xh)\n",
+                TsduMgr, TsduMgr->TotalBytes, length));
 
-            PIRP            Irp;
+    return length;
 
-            //
-            // Building Tdi Internal Irp ...
-            //
+errorout:
+    return 0;
+}
 
-            Irp = KsBuildTdiIrp(DeviceObject);
+ULONG
+KsReadTsdu (
+    PKS_TSDUMGR     TsduMgr,
+    PCHAR           buffer,
+    ULONG           length,
+    ULONG           flags
+    )
+{
+    PKS_TSDU        KsTsdu;
+    PKS_TSDU_DAT    KsTsduDat;
+    PKS_TSDU_BUF    KsTsduBuf;
+    PKS_TSDU_MDL    KsTsduMdl;
 
-            if (NULL == Irp) {
+    PUCHAR          Buffer;
+    ULONG           BytesRecved = 0;
+#if DBG
+    ULONG           TotalBytes = TsduMgr->TotalBytes;
+#endif    
 
-                Status = STATUS_INSUFFICIENT_RESOURCES;
+    KsPrint((4, "KsReadTsdu: TsduMgr=%p request=%xh total=%xh\n",
+                TsduMgr, length, TsduMgr->TotalBytes ));
+NextTsdu:
 
-            } else {
+    if (TsduMgr->TotalBytes == 0) {
 
-                //
-                // Building the Irp to set the Event Handler ...
-                //
+        /* It's a notification event. We need reset it to
+           un-signaled state in case there no any tsdus. */
 
-                TdiBuildSetEventHandler(
-                    Irp,
-                    DeviceObject,
-                    AddressObject,
-                    NULL,
-                    NULL,
-                    i,                      /* tdi event type */
-                    Handlers->Handler[i],   /* tdi event handler */
-                    EventContext            /* context for the handler */
-                    );
+        KeResetEvent(&(TsduMgr->Event));
 
-                //
-                // Calling the Transprot Driver with the Prepared Irp
-                //
+    } else {
 
-                Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+        KsTsdu = list_entry(TsduMgr->TsduList.next, KS_TSDU, Link);
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
 
-                //
-                // tcp/ip tdi does not support these two event callbacks
-                //
+        /* remove the KsTsdu from TsduMgr list to release the lock */
+        list_del(&(KsTsdu->Link));
+        TsduMgr->NumOfTsdu--;
 
-                if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE ||
-                     i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) {
-                    cfs_enter_debugger();
-                    Status = STATUS_SUCCESS;
-                }
-            }
+        while (length > BytesRecved) {
 
-            if (!NT_SUCCESS(Status)) {
-                cfs_enter_debugger();
-                goto errorout;
+            ULONG BytesToCopy = 0;
+            ULONG StartOffset = 0;
+            ULONG BytesCopied = 0;
+
+            if (KsTsdu->StartOffset >= KsTsdu->LastOffset) {
+                /* KsTsdu is empty now, we need free it ... */
+                KsPutKsTsdu(KsTsdu);
+                KsTsdu = NULL;
+                break;
             }
-        }
-    }
 
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
 
-errorout:
+            if ( TSDU_TYPE_DAT == KsTsduDat->TsduType ||
+                 TSDU_TYPE_BUF == KsTsduBuf->TsduType ) {
 
-    if (!NT_SUCCESS(Status)) {
+                if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
 
-        KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n",
-                    Status, KsNtStatusToString(Status) ));
-    }
+                    /* Data Tsdu Unit ... */
+                    Buffer = &KsTsduDat->Data[0];
+                    StartOffset = KsTsduDat->StartOffset;
+                    if (KsTsduDat->DataLength - KsTsduDat->StartOffset > length - BytesRecved) {
+                        /* Recvmsg requst could be statisfied ... */
+                        BytesToCopy = length - BytesRecved;
+                    } else {
+                        BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset;
+                    }
 
-    return (Status);
-}
+                } else {
 
+                    /* Buffer Tsdu Unit */
+                    ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+                    Buffer = KsTsduBuf->UserBuffer;
+                    StartOffset = KsTsduBuf->StartOffset;
 
+                    if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > length - BytesRecved) {
+                        /* Recvmsg requst could be statisfied ... */
+                        BytesToCopy = length - BytesRecved;
+                    } else {
+                        BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset;
+                    }
+                }
 
-/*
- * KsQueryAddressInfo
- *   Query the address of the FileObject specified
- *
- * Arguments:
- *   FileObject:  the FileObject to be queried
- *   AddressInfo: buffer to contain the address info
- *   AddressSize: length of the AddressInfo buffer
- *
- * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
- *
- * Notes:
- *   N/A
- */
+                if (BytesToCopy > 0) {
+                    RtlMoveMemory(buffer + BytesRecved, Buffer + StartOffset, BytesToCopy);
+                }
+                BytesCopied = BytesToCopy;
+                BytesRecved += BytesCopied;
+                LASSERT(TsduMgr->TotalBytes >= BytesCopied);
+                TsduMgr->TotalBytes -= BytesCopied;
+                KsPrint((4, "KsReadTsdu: TsduMgr=%p copied=%xh recved=%xh\n",
+                            TsduMgr, BytesCopied, BytesRecved ));
 
-NTSTATUS
-KsQueryAddressInfo(
-    PFILE_OBJECT            FileObject,
-    PTDI_ADDRESS_INFO       AddressInfo,
-    PULONG                  AddressSize
-   )
-{
-    NTSTATUS          Status = STATUS_UNSUCCESSFUL;
-    PIRP              Irp = NULL;
-    PMDL              Mdl;
-    PDEVICE_OBJECT    DeviceObject;
+                if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
 
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+                    KsTsduDat->StartOffset += BytesCopied;
+                    if (KsTsduDat->StartOffset == KsTsduDat->DataLength) {
+                        if (KsTsduDat->Mdl) {
+                            KsTsduDat->Mdl->Next = NULL;
+                            KsReleaseMdl(KsTsduDat->Mdl, FALSE);
+                        }
+                        KsTsdu->StartOffset += KsTsduDat->TotalLength;
+                    }
 
-    DeviceObject = IoGetRelatedDeviceObject(FileObject);
+                } else {
 
-    RtlZeroMemory(AddressInfo, *(AddressSize));
+                    ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+                    KsTsduBuf->StartOffset += BytesCopied;
+                    if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) {
+                        KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+                        /* now we need release the buf to system pool */
+                        if (KsTsduBuf->Mdl) {
+                            KsTsduBuf->Mdl->Next = NULL;
+                            KsReleaseMdl(KsTsduBuf->Mdl, FALSE);
+                        }
+                        ExFreePool(KsTsduBuf->UserBuffer);
+                    }
+                }
 
-    //
-    // Allocating the Tdi Setting Irp ...
-    //
+            } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
 
-    Irp = KsBuildTdiIrp(DeviceObject);
+                /* MDL Tsdu Unit ... */
+                if (KsTsduMdl->DataLength > length - BytesRecved) {
+                    /* Recvmsg requst could be statisfied ... */
+                    BytesToCopy = length - BytesRecved;
+                } else {
+                    BytesToCopy = KsTsduMdl->DataLength;
+                }
 
-    if (NULL == Irp) {
+                BytesCopied = 
+                    KsCopyMdlToBuffer(
+                            KsTsduMdl->Mdl,
+                            KsTsduMdl->StartOffset + 
+                            KsTsduMdl->BaseOffset,
+                            buffer,
+                            BytesRecved,
+                            BytesToCopy
+                            );
+                KsPrint((4, "KsReadTsdu: TsduMgr=%p mdl=%p dec=%p copied=%xh "
+                            "recved=%xh\n",
+                            TsduMgr, KsTsduMdl->Mdl, KsTsduMdl->Descriptor,
+                            BytesCopied, BytesRecved + BytesCopied));
+                if (BytesCopied == 0) {
+                    cfs_enter_debugger();
+                    break;
+                }
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
+                KsTsduMdl->StartOffset += BytesCopied;
+                KsTsduMdl->DataLength  -= BytesCopied;
+                BytesRecved += BytesCopied;
+                LASSERT(TsduMgr->TotalBytes >= BytesCopied);
+                TsduMgr->TotalBytes -= BytesCopied;
 
-    } else {
+                if (0 == KsTsduMdl->DataLength) {
 
-        //
-        // Locking the User Buffer / Allocating a MDL for it
-        //
+                    /* Call TdiReturnChainedReceives to release the Tsdu memory */
+                    LASSERT(KsTsduMdl->Descriptor != NULL);
+                    if (KsTsduMdl->Descriptor) {
+                        TdiReturnChainedReceives(
+                            &(KsTsduMdl->Descriptor),
+                            1 );
+                    }
 
-        Status = KsLockUserBuffer(
-                    AddressInfo,
-                    FALSE,
-                    *(AddressSize),
-                    IoModifyAccess,
-                    &Mdl
-                    );
+                    KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+                }
 
-        if (!NT_SUCCESS(Status)) {
+            } else {
+                KsPrint((1, "KsReadTsdu: unknown tsdu slot: slot = %x type = %x Start= %x Length=%x\n",
+                        KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength));
+                KsPrint((1, "            Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x\n",
+                        KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength));
+                cfs_enter_debugger();
+            }
+        }
 
-            IoFreeIrp(Irp);
-            Irp = NULL;
+        /* we need attach the KsTsdu to the list header */
+        if (KsTsdu) {
+            if (KsTsdu->StartOffset >= KsTsdu->LastOffset) {
+                KsPutKsTsdu(KsTsdu);
+                KsTsdu = NULL;
+            } else {
+                TsduMgr->NumOfTsdu++;
+                list_add(&(KsTsdu->Link), &(TsduMgr->TsduList));
+            }
+        }
+        
+        if (length > BytesRecved) {
+            goto NextTsdu;
         }
     }
 
-    if (Irp) {
+#if DBG
+    LASSERT(TotalBytes == TsduMgr->TotalBytes + BytesRecved);
+#endif
+    KsPrint((4, "KsReadTsdu: TsduMgr=%p recved=%xh (%xh) remained=%xh\n",
+                TsduMgr, BytesRecved, length, TsduMgr->TotalBytes ));
 
-        LASSERT(NT_SUCCESS(Status));
+    return BytesRecved;
+}
 
-        TdiBuildQueryInformation(
-                    Irp,
-                    DeviceObject,
-                    FileObject,
-                    NULL,
-                    NULL,
-                    TDI_QUERY_ADDRESS_INFO,
-                    Mdl
-                    );
 
-        Status = KsSubmitTdiIrp(
-                    DeviceObject,
-                    Irp,
-                    TRUE,
-                    AddressSize
-                    );
+ULONG
+KsTdiSendFlags(int SockFlags)
+{
+    ULONG   TdiFlags = 0;
 
-        KsReleaseMdl(Mdl, FALSE);
+    if (cfs_is_flag_set(SockFlags, MSG_OOB)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED);
     }
 
-    if (!NT_SUCCESS(Status)) {
+    if (cfs_is_flag_set(SockFlags, MSG_MORE)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL);
+    }
 
-        cfs_enter_debugger();
-        //TDI_BUFFER_OVERFLOW
+    if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING);
     }
 
-    return (Status);
+    return TdiFlags;
+}
+
+ULONG
+KsTdiRecvFlags(int SockFlags)
+{
+    ULONG   TdiFlags = 0;
+
+    if (cfs_is_flag_set(SockFlags, MSG_OOB)) {
+        cfs_set_flag(TdiFlags, TDI_RECEIVE_EXPEDITED);
+    }
+
+    if (cfs_is_flag_set(SockFlags, MSG_MORE)) {
+        cfs_set_flag(TdiFlags, TDI_RECEIVE_PARTIAL);
+    }
+
+    if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING);
+    }
+
+    return TdiFlags;
+}
+
+int
+KsWriteTsdus(PKS_TSDUMGR TsduMgr, char * buffer, int length, int flags)
+{
+    int rc = 0;
+
+    if (TsduMgr->TotalBytes <= TDINAL_MAX_TSDU_QUEUE_SIZE) {
+        rc = KsWriteTsduDat(TsduMgr, buffer, length, flags);
+    }
+
+    if (rc > 0) {
+        return rc;
+    }
+
+   return -EAGAIN;
+}
+
+int
+KsReadTsdus(PKS_TSDUMGR TsduMgr, char * buffer, int length, int flags)
+{
+    int rc = KsReadTsdu(TsduMgr, buffer, length, flags);
+
+    if (rc > 0) {
+        return rc;
+    }
+
+    return -EAGAIN;
 }
 
 /*
- * KsQueryProviderInfo
- *   Query the underlying transport device's information
+ * KsInitializeKsTsduMgr
+ *   Initialize the management structure of
+ *   Tsdu buffers
  *
  * Arguments:
- *   TdiDeviceName:  the transport device's name string
- *   ProviderInfo:   TDI_PROVIDER_INFO struncture
+ *   TsduMgr: the TsduMgr to be initialized
  *
  * Return Value:
- *   NTSTATUS:       Nt system status code
 *
+ *   VOID
+ *
  * NOTES:
  *   N/A
  */
 
-NTSTATUS
-KsQueryProviderInfo(
-    PWSTR               TdiDeviceName,
-    PTDI_PROVIDER_INFO  ProviderInfo
-   )
+VOID
+KsInitializeKsTsduMgr(
+    PKS_TSDUMGR     TsduMgr
+    )
 {
-    NTSTATUS            Status = STATUS_SUCCESS;
-
-    PIRP                Irp = NULL;
-    PMDL                Mdl = NULL;
-
-    UNICODE_STRING      ControlName;
+    KeInitializeEvent(
+            &(TsduMgr->Event),
+            NotificationEvent,
+            FALSE
+            );
 
-    HANDLE              Handle;
-    PFILE_OBJECT        FileObject;
-    PDEVICE_OBJECT      DeviceObject;
+    CFS_INIT_LIST_HEAD(
+            &(TsduMgr->TsduList)
+            );
 
-    ULONG               ProviderSize = 0;
+    TsduMgr->NumOfTsdu  = 0;
+    TsduMgr->TotalBytes = 0;
 
-    RtlInitUnicodeString(&ControlName, TdiDeviceName);
+    spin_lock_init(&TsduMgr->Lock);
+}
 
-    //
-    // Open the Tdi Control Channel
-    //
 
-    Status = KsOpenControl(
-                &ControlName,
-                &Handle,
-                &FileObject
-                );
+/*
+ * KsInitializeKsChain
+ *   Initialize the China structure for receiving
+ *   or transmitting
+ *
+ * Arguments:
+ *   KsChain: the KsChain to be initialized
+ *
+ * Return Value:
+ *   VOID
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    if (!NT_SUCCESS(Status)) {
+VOID
+KsInitializeKsChain(
+    PKS_CHAIN       KsChain
+    )
+{
+    KsInitializeKsTsduMgr(&(KsChain->Normal));
+    KsInitializeKsTsduMgr(&(KsChain->Expedited));
+    KsChain->Expedited.OOB = TRUE;
+}
 
-        KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n"));
-        return (Status);
-    }
 
-    //
-    // Obtain The Related Device Object
-    //
+/*
+ * KsCleanupTsduMgr
+ *   Clean up all the Tsdus in the TsduMgr list
+ *
+ * Arguments:
+ *   TsduMgr: the Tsdu list manager
+ *
+ * Return Value:
+ *   NTSTATUS:  nt status code
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    DeviceObject = IoGetRelatedDeviceObject(FileObject);
+NTSTATUS
+KsCleanupTsduMgr(
+    PKS_TSDUMGR     TsduMgr
+    )
+{
+    PKS_TSDU        KsTsdu;
+    PKS_TSDU_DAT    KsTsduDat;
+    PKS_TSDU_BUF    KsTsduBuf;
+    PKS_TSDU_MDL    KsTsduMdl;
 
-    ProviderSize = sizeof(TDI_PROVIDER_INFO);
-    RtlZeroMemory(ProviderInfo, ProviderSize);
+    LASSERT(NULL != TsduMgr);
 
-    //
-    // Allocating the Tdi Setting Irp ...
-    //
+    KsRemoveTdiEngine(TsduMgr);
+    KeSetEvent(&(TsduMgr->Event), 0, FALSE);
 
-    Irp = KsBuildTdiIrp(DeviceObject);
+    while (!list_empty(&TsduMgr->TsduList)) {
 
-    if (NULL == Irp) {
+        KsTsdu = list_entry(TsduMgr->TsduList.next, KS_TSDU, Link);
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
+        if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
 
-    } else {
+            //
+            // KsTsdu is empty now, we need free it ...
+            //
 
-        //
-        // Locking the User Buffer / Allocating a MDL for it
-        //
+            list_del(&(KsTsdu->Link));
+            TsduMgr->NumOfTsdu--;
 
-        Status = KsLockUserBuffer(
-                    ProviderInfo,
-                    FALSE,
-                    ProviderSize,
-                    IoModifyAccess,
-                    &Mdl
-                    );
+            KsFreeKsTsdu(KsTsdu);
 
-        if (!NT_SUCCESS(Status)) {
+        } else {
 
-            IoFreeIrp(Irp);
-            Irp = NULL;
-        }
-    }
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
 
-    if (Irp) {
+            if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
 
-        LASSERT(NT_SUCCESS(Status));
+                KsTsdu->StartOffset += KsTsduDat->TotalLength;
 
-        TdiBuildQueryInformation(
-                    Irp,
-                    DeviceObject,
-                    FileObject,
-                    NULL,
-                    NULL,
-                    TDI_QUERY_PROVIDER_INFO,
-                    Mdl
-                    );
+            } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
 
-        Status = KsSubmitTdiIrp(
-                    DeviceObject,
-                    Irp,
-                    TRUE,
-                    &ProviderSize
-                    );
+                ASSERT(KsTsduBuf->UserBuffer != NULL);
 
-        KsReleaseMdl(Mdl, FALSE);
-    }
+                if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) {
+                    if (KsTsduBuf->Mdl) {
+                        KsTsduBuf->Mdl->Next = NULL;
+                        KsReleaseMdl(KsTsduBuf->Mdl, FALSE);
+                    }
+                    ExFreePool(KsTsduBuf->UserBuffer);
+                } else {
+                    cfs_enter_debugger();
+                }
 
-    if (!NT_SUCCESS(Status)) {
+                KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
 
-        cfs_enter_debugger();
-        //TDI_BUFFER_OVERFLOW
-    }
+            } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
 
-    KsCloseControl(Handle, FileObject);
+                //
+                // MDL Tsdu Unit ...
+                //
+                if (KsTsduMdl->Descriptor) {
+                    TdiReturnChainedReceives(
+                        &(KsTsduMdl->Descriptor),
+                        1 );
+                } else if (KsTsduMdl->Mdl) {
+                    KsTsduMdl->Mdl->Next = NULL;
+                    KsReleaseMdl(KsTsduMdl->Mdl, FALSE);
+                }
 
-    return (Status);
+                KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+            }
+        }
+    }
+
+    return STATUS_SUCCESS;
 }
 
+
 /*
- * KsQueryConnectionInfo
- *   Query the connection info of the FileObject specified
- *   (some statics data of the traffic)
+ * KsCleanupKsChain
+ *   Clean up the TsduMgrs of the KsChain
  *
  * Arguments:
- *   FileObject:     the FileObject to be queried
- *   ConnectionInfo: buffer to contain the connection info
- *   ConnectionSize: length of the ConnectionInfo buffer
+ *   KsChain: the chain managing TsduMgr
  *
  * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
+ *   NTSTATUS:  nt status code
  *
  * NOTES:
  *   N/A
  */
 
 NTSTATUS
-KsQueryConnectionInfo(
-    PFILE_OBJECT            ConnectionObject,
-    PTDI_CONNECTION_INFO    ConnectionInfo,
-    PULONG                  ConnectionSize
-   )
+KsCleanupKsChain(
+    PKS_CHAIN   KsChain
+    )
 {
-    NTSTATUS          Status = STATUS_UNSUCCESSFUL;
-    PIRP              Irp = NULL;
-    PMDL              Mdl;
-    PDEVICE_OBJECT    DeviceObject;
-
-    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
-
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
-    RtlZeroMemory(ConnectionInfo, *(ConnectionSize));
-
-    //
-    // Allocating the Tdi Query Irp ...
-    //
-
-    Irp = KsBuildTdiIrp(DeviceObject);
-
-    if (NULL == Irp) {
-
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-
-    } else {
-
-        //
-        // Locking the User Buffer / Allocating a MDL for it
-        //
+    NTSTATUS    Status;
 
-        Status = KsLockUserBuffer(
-                    ConnectionInfo,
-                    FALSE,
-                    *(ConnectionSize),
-                    IoModifyAccess,
-                    &Mdl
-                    );
+    LASSERT(NULL != KsChain);
 
-        if (NT_SUCCESS(Status)) {
+    Status = KsCleanupTsduMgr(
+                &(KsChain->Normal)
+                );
 
-            IoFreeIrp(Irp);
-            Irp = NULL;
-        }
+    if (!NT_SUCCESS(Status)) {
+        cfs_enter_debugger();
+        goto errorout;
     }
 
-    if (Irp) {
-
-        LASSERT(NT_SUCCESS(Status));
-
-        TdiBuildQueryInformation(
-                    Irp,
-                    DeviceObject,
-                    ConnectionObject,
-                    NULL,
-                    NULL,
-                    TDI_QUERY_CONNECTION_INFO,
-                    Mdl
-                    );
-
-        Status = KsSubmitTdiIrp(
-                    DeviceObject,
-                    Irp,
-                    TRUE,
-                    ConnectionSize
-                    );
+    Status = KsCleanupTsduMgr(
+                &(KsChain->Expedited)
+                );
 
-        KsReleaseMdl(Mdl, FALSE);
+    if (!NT_SUCCESS(Status)) {
+        cfs_enter_debugger();
+        goto errorout;
     }
 
-    return (Status);
+errorout:
+
+    return Status;
 }
 
 
 /*
- * KsInitializeTdiAddress
- *   Initialize the tdi addresss
+ * KsCleanupTsdu
+ *   Clean up all the Tsdus of a tdi connected object
  *
  * Arguments:
- *   pTransportAddress: tdi address to be initialized
- *   IpAddress:         the ip address of object
- *   IpPort:            the ip port of the object
+ *   tconn: the tdi connection which is connected already.
  *
  * Return Value:
- *   ULONG: the total size of the tdi address
+ *   Nt status code
  *
  * NOTES:
  *   N/A
  */
 
-ULONG
-KsInitializeTdiAddress(
-    IN OUT PTA_IP_ADDRESS   pTransportAddress,
-    IN ULONG                IpAddress,
-    IN USHORT               IpPort
+NTSTATUS
+KsCleanupTsdu(
+    ks_tconn_t * tconn
     )
 {
-    pTransportAddress->TAAddressCount = 1;
-    pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP;
-    pTransportAddress->Address[ 0 ].AddressType   = TDI_ADDRESS_TYPE_IP;
-    pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort;
-    pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr  = IpAddress;
+    NTSTATUS        Status = STATUS_SUCCESS;
 
-    return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP);
+
+    if (tconn->kstc_type != kstt_sender &&
+        tconn->kstc_type != kstt_child ) {
+
+        goto errorout;
+    }
+
+    if (tconn->kstc_type == kstt_sender) {
+
+        Status = KsCleanupKsChain(
+                    &(tconn->sender.kstc_recv)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+        Status = KsCleanupKsChain(
+                    &(tconn->sender.kstc_send)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+    } else {
+
+        Status = KsCleanupKsChain(
+                    &(tconn->child.kstc_recv)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+        Status = KsCleanupKsChain(
+                    &(tconn->child.kstc_send)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+    }
+
+errorout:
+
+    return (Status);
+}
+
+NTSTATUS
+KsIrpCompletionRoutine(
+    IN PDEVICE_OBJECT    DeviceObject,
+    IN PIRP              Irp,
+    IN PVOID             Context
+    )
+{
+    if (NULL != Context) {
+        KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE);
+    }
+
+    return STATUS_MORE_PROCESSING_REQUIRED;
+
+    UNREFERENCED_PARAMETER(DeviceObject);
+    UNREFERENCED_PARAMETER(Irp);
 }
 
+
 /*
- * KsQueryTdiAddressLength
- *   Query the total size of the tdi address
+ * KsBuildTdiIrp
+ *   Allocate a new IRP and initialize it to be issued to tdi
  *
  * Arguments:
- *   pTransportAddress: tdi address to be queried
+ *   DeviceObject:  device object created by the underlying
+ *                  TDI transport driver
  *
  * Return Value:
- *   ULONG: the total size of the tdi address
+ *   PRIP:   the allocated Irp in success or NULL in failure.
  *
  * NOTES:
  *   N/A
  */
 
-ULONG
-KsQueryTdiAddressLength(
-    PTRANSPORT_ADDRESS      pTransportAddress
+PIRP
+KsBuildTdiIrp(
+    IN PDEVICE_OBJECT    DeviceObject
     )
 {
-    ULONG                   TotalLength = 0;
-    LONG                    i;
+    PIRP                Irp;
+    PIO_STACK_LOCATION  IrpSp;
 
-    PTA_ADDRESS UNALIGNED   pTaAddress = NULL;
+    //
+    // Allocating the IRP ...
+    //
 
-    ASSERT (NULL != pTransportAddress);
+    Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
 
-    TotalLength  = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) +
-                   FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount;
+    if (NULL != Irp) {
 
-    pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address;
+        //
+        // Getting the Next Stack Location ...
+        //
 
-    for (i = 0; i < pTransportAddress->TAAddressCount; i++)
-    {
-        TotalLength += pTaAddress->AddressLength;
-        pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress +
-                                           FIELD_OFFSET(TA_ADDRESS,Address) +
-                                           pTaAddress->AddressLength );
+        IrpSp = IoGetNextIrpStackLocation(Irp);
+
+        //
+        // Initializing Irp ...
+        //
+
+        IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL;
+        IrpSp->Parameters.DeviceIoControl.IoControlCode = 0;
     }
 
-    return (TotalLength);
+    return Irp;
 }
 
-
 /*
- * KsQueryIpAddress
- *   Query the ip address of the tdi object
+ * KsSubmitTdiIrp
+ *   Issue the Irp to the underlying tdi driver
  *
  * Arguments:
- *   FileObject: tdi object to be queried
- *   TdiAddress: TdiAddress buffer, to store the queried
- *               tdi ip address
- *   AddressLength: buffer length of the TdiAddress
+ *   DeviceObject:  the device object created by TDI driver
+ *   Irp:           the I/O request packet to be processed
+ *   bSynchronous:  synchronous or not. If true, we need wait
+ *                  until the process is finished.
+ *   Information:   returned info
  *
  * Return Value:
- *   ULONG: the total size of the tdi ip address
+ *   NTSTATUS:      kernel status code
  *
  * NOTES:
  *   N/A
  */
 
 NTSTATUS
-KsQueryIpAddress(
-    PFILE_OBJECT    FileObject,
-    PVOID           TdiAddress,
-    ULONG*          AddressLength
+KsSubmitTdiIrp(
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN BOOLEAN          bSynchronous,
+    OUT PULONG          Information
     )
 {
-    NTSTATUS        Status;
-
-    PTDI_ADDRESS_INFO   TdiAddressInfo;
-    ULONG               Length;
-
-
-    //
-    // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS
-    //
+    NTSTATUS            Status;
+    KEVENT              Event;
 
-    Length = MAX_ADDRESS_LENGTH;
+    if (bSynchronous) {
 
-    TdiAddressInfo = (PTDI_ADDRESS_INFO)
-                        ExAllocatePoolWithTag(
-                            NonPagedPool,
-                            Length,
-                            'KSAI' );
+        KeInitializeEvent(
+            &Event,
+            SynchronizationEvent,
+            FALSE
+            );
 
-    if (NULL == TdiAddressInfo) {
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
+        IoSetCompletionRoutine(
+            Irp,
+            KsIrpCompletionRoutine,
+            &Event,
+            TRUE,
+            TRUE,
+            TRUE
+            );
     }
 
+    Status = IoCallDriver(DeviceObject, Irp);
 
-    Status = KsQueryAddressInfo(
-        FileObject,
-        TdiAddressInfo,
-        &Length
-        );
-
-errorout:
-
-    if (NT_SUCCESS(Status))
-    {
-        if (*AddressLength < Length) {
+    if (bSynchronous) {
 
-            Status = STATUS_BUFFER_TOO_SMALL;
+        if (STATUS_PENDING == Status) {
 
-        } else {
+            Status = KeWaitForSingleObject(
+                        &Event,
+                        Executive,
+                        KernelMode,
+                        FALSE,
+                        NULL
+                        );
+        }
 
-            *AddressLength = Length;
-            RtlCopyMemory(
-                TdiAddress,
-                &(TdiAddressInfo->Address),
-                Length
-                );
+        Status = Irp->IoStatus.Status;
 
-            Status = STATUS_SUCCESS;
+        if (Information) {
+            *Information = (ULONG)(Irp->IoStatus.Information);
         }
 
-    } else {
-
+        IoFreeIrp(Irp);
     }
 
+    if (!NT_SUCCESS(Status)) {
 
-    if (NULL != TdiAddressInfo) {
-
-        ExFreePool(TdiAddressInfo);
+        KsPrint((1, "KsSubmitTdiIrp: Error when submitting the Irp: "
+                    "Status = %xh (%s)\n", Status, KsNtStatusToString(Status)));
     }
 
-    return Status;
+    return (Status);
 }
 
 
+
 /*
- * KsErrorEventHandler
- *   the common error event handler callback
+ * KsOpenControl
+ *   Open the Control Channel Object ...
  *
  * Arguments:
- *   TdiEventContext: should be the socket
- *   Status: the error code
+ *   DeviceName:   the device name to be opened
+ *   Handle:       opened handle in success case
+ *   FileObject:   the fileobject of the device
  *
  * Return Value:
- *   Status: STATS_SUCCESS
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
- * NOTES:
- *   We need not do anything in such a severe
- *   error case. System will process it for us.
+ * Notes:
+ *   N/A
  */
 
 NTSTATUS
-KsErrorEventHandler(
-    IN PVOID        TdiEventContext,
-    IN NTSTATUS     Status
+KsOpenControl(
+    IN PUNICODE_STRING      DeviceName,
+    OUT HANDLE *            Handle,
+    OUT PFILE_OBJECT *      FileObject
    )
 {
-    KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n",
-                KeGetCurrentIrql()));
+    NTSTATUS          Status = STATUS_SUCCESS;
 
-    cfs_enter_debugger();
+    OBJECT_ATTRIBUTES ObjectAttributes;
+    IO_STATUS_BLOCK   IoStatus;
 
-    return (STATUS_SUCCESS);
-}
 
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-/*
- * ks_set_handlers
- *   setup all the event handler callbacks
- *
- * Arguments:
- *   tconn: the tdi connecton object
- *
- * Return Value:
- *   int: ks error code
- *
- * NOTES:
- *   N/A
- */
+    //
+    // Initializing ...
+    //
 
-int
-ks_set_handlers(
-    ksock_tconn_t *     tconn
-    )
-{
-    NTSTATUS            status = STATUS_SUCCESS;
-    KS_EVENT_HANDLERS   handlers;
+    InitializeObjectAttributes(
+        &ObjectAttributes,
+        DeviceName,
+        OBJ_CASE_INSENSITIVE |
+        OBJ_KERNEL_HANDLE,
+        NULL,
+        NULL
+        );
 
-    /* to make sure the address object is opened already */
-    if (tconn->kstc_addr.FileObject == NULL) {
-        goto errorout;
-    }
+    LASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-    /* initialize the handlers indictor array. for sender and listenr,
-       there are different set of callbacks. for child, we just return. */
+    //
+    // Creating the Transport Address Object ...
+    //
 
-    memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
+    Status = ZwCreateFile(
+                Handle,
+                FILE_READ_DATA | FILE_WRITE_DATA,
+                &ObjectAttributes,
+                &IoStatus,
+                0,
+                FILE_ATTRIBUTE_NORMAL,
+                FILE_SHARE_READ | FILE_SHARE_WRITE,
+                FILE_OPEN,
+                0,
+                NULL,
+                0
+                );
 
-    SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler);
-    SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler);
-    SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler);
-    SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler);
-    SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler);
 
-    // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler);
+    if (NT_SUCCESS(Status)) {
 
-    if (tconn->kstc_type == kstt_listener) {
-        SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler);
-    } else if (tconn->kstc_type == kstt_child) {
-        goto errorout;
-    }
+        //
+        // Now Obtaining the FileObject of the Transport Address ...
+        //
 
-    /* set all the event callbacks */
-    status = KsSetEventHandlers(
-                tconn->kstc_addr.FileObject, /* Address File Object  */
-                tconn,                       /* Event Context */
-                &handlers                    /* Event callback handlers */
-                );
+        Status = ObReferenceObjectByHandle(
+                    *Handle,
+                    FILE_ANY_ACCESS,
+                    NULL,
+                    KernelMode,
+                    FileObject,
+                    NULL
+                    );
 
-errorout:
+        if (!NT_SUCCESS(Status)) {
 
-    return cfs_error_code(status);
+            cfs_enter_debugger();
+            ZwClose(*Handle);
+        }
+
+    } else {
+
+        cfs_enter_debugger();
+    }
+
+    return (Status);
 }
 
 
 /*
- * ks_reset_handlers
- *   disable all the event handler callbacks (set to NULL)
+ * KsCloseControl
+ *   Release the Control Channel Handle and FileObject
  *
  * Arguments:
- *   tconn: the tdi connecton object
+ *   Handle:       the channel handle to be released
+ *   FileObject:   the fileobject to be released
  *
  * Return Value:
- *   int: ks error code
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
- * NOTES:
+ * Notes:
  *   N/A
  */
 
-int
-ks_reset_handlers(
-    ksock_tconn_t *     tconn
-    )
+NTSTATUS
+KsCloseControl(
+    IN HANDLE             Handle,
+    IN PFILE_OBJECT       FileObject
+   )
 {
-    NTSTATUS            status = STATUS_SUCCESS;
-    KS_EVENT_HANDLERS   handlers;
+    NTSTATUS  Status = STATUS_SUCCESS;
 
-    /* to make sure the address object is opened already */
-    if (tconn->kstc_addr.FileObject == NULL) {
-        goto errorout;
-    }
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-    /* initialize the handlers indictor array. for sender and listenr,
-       there are different set of callbacks. for child, we just return. */
+    if (FileObject) {
 
-    memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
+        ObDereferenceObject(FileObject);
+    }
 
-    SetEventHandler(handlers, TDI_EVENT_ERROR, NULL);
-    SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL);
-    SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL);
-    SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL);
-    SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL);
-    // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL);
+    if (Handle) {
 
-    if (tconn->kstc_type == kstt_listener) {
-        SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL);
-    } else if (tconn->kstc_type == kstt_child) {
-        goto errorout;
+        Status = ZwClose(Handle);
     }
 
-    /* set all the event callbacks */
-    status = KsSetEventHandlers(
-                tconn->kstc_addr.FileObject, /* Address File Object  */
-                tconn,                       /* Event Context */
-                &handlers                    /* Event callback handlers */
-                );
-
-errorout:
+    ASSERT(NT_SUCCESS(Status));
 
-    return cfs_error_code(status);
+    return (Status);
 }
 
 
 /*
- * KsAcceptCompletionRoutine
- *   Irp completion routine for TdiBuildAccept (KsConnectEventHandler)
- *
- *   Here system gives us a chance to check the conneciton is built
- *   ready or not.
+ * KsOpenAddress
+ *   Open the tdi address object
  *
  * Arguments:
- *   DeviceObject:  the device object of the transport driver
- *   Irp:           the Irp is being completed.
- *   Context:       the context we specified when issuing the Irp
+ *   DeviceName:   device name of the address object
+ *   pAddress:     tdi address of the address object
+ *   AddressLength: length in bytes of the tdi address
+ *   Handle:       the newly opened handle
+ *   FileObject:   the newly opened fileobject
  *
  * Return Value:
- *   Nt status code
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
  * Notes:
  *   N/A
  */
 
 NTSTATUS
-KsAcceptCompletionRoutine(
-    IN PDEVICE_OBJECT   DeviceObject,
-    IN PIRP             Irp,
-    IN PVOID            Context
-    )
+KsOpenAddress(
+    IN PUNICODE_STRING      DeviceName,
+    IN PTRANSPORT_ADDRESS   pAddress,
+    IN ULONG                AddressLength,
+    OUT HANDLE *            Handle,
+    OUT PFILE_OBJECT *      FileObject
+   )
 {
-    ksock_tconn_t * child = (ksock_tconn_t *) Context;
-    ksock_tconn_t * parent = child->child.kstc_parent;
-
-    KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n",
-                KeGetCurrentIrql() ));
+    NTSTATUS          Status = STATUS_SUCCESS;
 
-    KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n",
-                 Context, Irp->IoStatus.Status));
+    PFILE_FULL_EA_INFORMATION Ea = NULL;
+    ULONG             EaLength;
+    UCHAR             EaBuffer[EA_MAX_LENGTH];
 
-    LASSERT(child->kstc_type == kstt_child);
+    OBJECT_ATTRIBUTES ObjectAttributes;
+    IO_STATUS_BLOCK   IoStatus;
 
-    spin_lock(&(child->kstc_lock));
+    //
+    // Building EA for the Address Object to be Opened ...
+    //
 
-    LASSERT(parent->kstc_state == ksts_listening);
-    LASSERT(child->kstc_state == ksts_connecting);
+    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+    Ea->NextEntryOffset = 0;
+    Ea->Flags = 0;
+    Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;
+    Ea->EaValueLength = (USHORT)AddressLength;
+    RtlCopyMemory(
+        &(Ea->EaName),
+        TdiTransportAddress,
+        Ea->EaNameLength + 1
+        );
+    RtlMoveMemory(
+        &(Ea->EaName[Ea->EaNameLength + 1]),
+        pAddress,
+        AddressLength
+        );
+    EaLength =  sizeof(FILE_FULL_EA_INFORMATION) +
+                Ea->EaNameLength + AddressLength;
 
-    if (NT_SUCCESS(Irp->IoStatus.Status)) {
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-        child->child.kstc_accepted = TRUE;
 
-        child->kstc_state = ksts_connected;
+    //
+    // Initializing ...
+    //
 
-        /* wake up the daemon thread which waits on this event */
-        KeSetEvent(
-            &(parent->listener.kstc_accept_event),
-            0,
-            FALSE
-            );
+    InitializeObjectAttributes(
+        &ObjectAttributes,
+        DeviceName,
+        OBJ_CASE_INSENSITIVE |
+        OBJ_KERNEL_HANDLE,
+        NULL,
+        NULL
+        );
 
-        spin_unlock(&(child->kstc_lock));
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-        KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent));
+    //
+    // Creating the Transport Address Object ...
+    //
 
-    } else {
+    Status = ZwCreateFile(
+                Handle,
+                FILE_READ_DATA | FILE_WRITE_DATA,
+                &ObjectAttributes,
+                &IoStatus,
+                0,
+                FILE_ATTRIBUTE_NORMAL,
+                FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */
+                FILE_OPEN,
+                0,
+                Ea,
+                EaLength
+                );
 
-        /* re-use this child connecton  */
-        child->child.kstc_accepted = FALSE;
-        child->child.kstc_busy = FALSE;
-        child->kstc_state = ksts_associated;
 
-        spin_unlock(&(child->kstc_lock));
-    }
+    if (NT_SUCCESS(Status)) {
 
-    /* now free the Irp */
-    IoFreeIrp(Irp);
+        //
+        // Now Obtaining the FileObject of the Transport Address ...
+        //
 
-    /* drop the refer count of the child */
-    ks_put_tconn(child);
-
-    return (STATUS_MORE_PROCESSING_REQUIRED);
-}
-
-
-/*
- * ks_get_vacancy_backlog
- *   Get a vacancy listeing child from the backlog list
- *
- * Arguments:
- *   parent: the listener daemon connection
- *
- * Return Value:
- *   the child listening connection or NULL in failure
- *
- * Notes
- *   Parent's lock should be acquired before calling.
- */
-
-ksock_tconn_t *
-ks_get_vacancy_backlog(
-    ksock_tconn_t *  parent
-    )
-{
-    ksock_tconn_t * child;
-
-    LASSERT(parent->kstc_type == kstt_listener);
-    LASSERT(parent->kstc_state == ksts_listening);
-
-    if (list_empty(&(parent->listener.kstc_listening.list))) {
-
-        child = NULL;
-
-    } else {
-
-        struct list_head * tmp;
-
-        /* check the listening queue and try to get a free connecton */
-
-        list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
-            child = list_entry (tmp, ksock_tconn_t, child.kstc_link);
-            spin_lock(&(child->kstc_lock));
-
-            if (!child->child.kstc_busy) {
-                LASSERT(child->kstc_state == ksts_associated);
-                child->child.kstc_busy = TRUE;
-                spin_unlock(&(child->kstc_lock));
-                break;
-            } else {
-                spin_unlock(&(child->kstc_lock));
-                child = NULL;
-            }
-        }
-    }
-
-    return child;
-}
-
-ks_addr_slot_t *
-KsSearchIpAddress(PUNICODE_STRING  DeviceName)
-{
-    ks_addr_slot_t * slot = NULL;
-    PLIST_ENTRY      list = NULL;
-
-    spin_lock(&ks_data.ksnd_addrs_lock);
-
-    list = ks_data.ksnd_addrs_list.Flink;
-    while (list != &ks_data.ksnd_addrs_list) {
-        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
-        if (RtlCompareUnicodeString(
-                    DeviceName,
-                    &slot->devname,
-                    TRUE) == 0) {
-            break;
-        }
-        list = list->Flink;
-        slot = NULL;
-    }
-
-    spin_unlock(&ks_data.ksnd_addrs_lock);
-
-    return slot;
-}
-
-void
-KsCleanupIpAddresses()
-{
-    spin_lock(&ks_data.ksnd_addrs_lock);
-
-    while (!IsListEmpty(&ks_data.ksnd_addrs_list)) {
-
-        ks_addr_slot_t * slot = NULL;
-        PLIST_ENTRY      list = NULL;
-
-        list = RemoveHeadList(&ks_data.ksnd_addrs_list);
-        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
-        cfs_free(slot);
-        ks_data.ksnd_naddrs--;
-    }
-
-    cfs_assert(ks_data.ksnd_naddrs == 0);
-    spin_unlock(&ks_data.ksnd_addrs_lock);
-}
-
-VOID
-KsAddAddressHandler(
-    IN  PTA_ADDRESS      Address,
-    IN  PUNICODE_STRING  DeviceName,
-    IN  PTDI_PNP_CONTEXT Context
-    )
-{
-    PTDI_ADDRESS_IP IpAddress = NULL;
-
-    if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
-         Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
-
-        ks_addr_slot_t * slot = NULL;
-
-        IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
-        KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
-                  DeviceName, Context, IpAddress->in_addr,
-                   (IpAddress->in_addr & 0xFF000000) >> 24,
-                   (IpAddress->in_addr & 0x00FF0000) >> 16,
-                   (IpAddress->in_addr & 0x0000FF00) >> 8,
-                   (IpAddress->in_addr & 0x000000FF) >> 0 ));
-
-        slot = KsSearchIpAddress(DeviceName);
-
-        if (slot != NULL) {
-            slot->up = TRUE;
-            slot->ip_addr = ntohl(IpAddress->in_addr);
-        } else {
-            slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO);
-            if (slot != NULL) {
-                spin_lock(&ks_data.ksnd_addrs_lock);
-                InsertTailList(&ks_data.ksnd_addrs_list, &slot->link);
-                sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++);
-                slot->ip_addr = ntohl(IpAddress->in_addr);
-                slot->up = TRUE;
-                RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length);
-                slot->devname.Length = DeviceName->Length;
-                slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR);
-                slot->devname.Buffer = slot->buffer;
-                spin_unlock(&ks_data.ksnd_addrs_lock);
-            }
-        }
-    }
-}
-
-VOID
-KsDelAddressHandler(
-    IN  PTA_ADDRESS      Address,
-    IN  PUNICODE_STRING  DeviceName,
-    IN  PTDI_PNP_CONTEXT Context
-    )
-{
-    PTDI_ADDRESS_IP IpAddress = NULL;
-
-    if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
-         Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
-
-        ks_addr_slot_t * slot = NULL;
-
-        slot = KsSearchIpAddress(DeviceName);
-
-        if (slot != NULL) {
-            slot->up = FALSE;
-        }
-
-        IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
-        KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
-                  DeviceName, Context, IpAddress->in_addr,
-                   (IpAddress->in_addr & 0xFF000000) >> 24,
-                   (IpAddress->in_addr & 0x00FF0000) >> 16,
-                   (IpAddress->in_addr & 0x0000FF00) >> 8,
-                   (IpAddress->in_addr & 0x000000FF) >> 0 ));
-    }
-}
-
-NTSTATUS
-KsRegisterPnpHandlers()
-{
-    TDI20_CLIENT_INTERFACE_INFO ClientInfo;
-
-    /* initialize the global ks_data members */
-    RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME);
-    spin_lock_init(&ks_data.ksnd_addrs_lock);
-    InitializeListHead(&ks_data.ksnd_addrs_list);
-
-    /* register the pnp handlers */
-    RtlZeroMemory(&ClientInfo, sizeof(ClientInfo));
-    ClientInfo.TdiVersion = TDI_CURRENT_VERSION;
-
-    ClientInfo.ClientName = &ks_data.ksnd_client_name;
-    ClientInfo.AddAddressHandlerV2 =  KsAddAddressHandler;
-    ClientInfo.DelAddressHandlerV2 =  KsDelAddressHandler;
-
-    return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo),
-                                  &ks_data.ksnd_pnp_handle);
-}
-
-VOID
-KsDeregisterPnpHandlers()
-{
-    if (ks_data.ksnd_pnp_handle) {
-
-        /* De-register the pnp handlers */
-
-        TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle);
-        ks_data.ksnd_pnp_handle = NULL;
-
-        /* cleanup all the ip address slots */
-        KsCleanupIpAddresses();
-    }
-}
-
-/*
- * KsConnectEventHandler
- *   Connect event handler event handler, called by the underlying TDI
- *   transport in response to an incoming request to the listening daemon.
- *
- *   it will grab a vacancy backlog from the children tconn list, and
- *   build an acception Irp with it, then transfer the Irp to TDI driver.
- *
- * Arguments:
- *   TdiEventContext:  the tdi connnection object of the listening daemon
- *   ......
- *
- * Return Value:
- *   Nt kernel status code
- *
- * Notes:
- *   N/A
- */
-
-NTSTATUS
-KsConnectEventHandler(
-    IN PVOID                    TdiEventContext,
-    IN LONG                     RemoteAddressLength,
-    IN PVOID                    RemoteAddress,
-    IN LONG                     UserDataLength,
-    IN PVOID                    UserData,
-    IN LONG                     OptionsLength,
-    IN PVOID                    Options,
-    OUT CONNECTION_CONTEXT *    ConnectionContext,
-    OUT PIRP *                  AcceptIrp
-    )
-{
-    ksock_tconn_t *             parent;
-    ksock_tconn_t *             child;
-
-    PFILE_OBJECT                FileObject;
-    PDEVICE_OBJECT              DeviceObject;
-    NTSTATUS                    Status;
-
-    PIRP                        Irp = NULL;
-    PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
-
-    KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql()));
-    parent = (ksock_tconn_t *) TdiEventContext;
-
-    LASSERT(parent->kstc_type == kstt_listener);
-
-    spin_lock(&(parent->kstc_lock));
-
-    if (parent->kstc_state == ksts_listening) {
-
-        /* allocate a new ConnectionInfo to backup the peer's info */
-
-        ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
-                NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) +
-                RemoteAddressLength, 'iCsK' );
-
-        if (NULL == ConnectionInfo) {
-
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            cfs_enter_debugger();
-            goto errorout;
-        }
-
-        /* initializing ConnectionInfo structure ... */
-
-        ConnectionInfo->UserDataLength = UserDataLength;
-        ConnectionInfo->UserData = UserData;
-        ConnectionInfo->OptionsLength = OptionsLength;
-        ConnectionInfo->Options = Options;
-        ConnectionInfo->RemoteAddressLength = RemoteAddressLength;
-        ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
-
-        RtlCopyMemory(
-                ConnectionInfo->RemoteAddress,
-                RemoteAddress,
-                RemoteAddressLength
-                );
-
-        /* get the vacancy listening child tdi connections */
-
-        child = ks_get_vacancy_backlog(parent);
-
-        if (child) {
-
-            spin_lock(&(child->kstc_lock));
-            child->child.kstc_info.ConnectionInfo = ConnectionInfo;
-            child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress;
-            child->kstc_state = ksts_connecting;
-            spin_unlock(&(child->kstc_lock));
-
-        } else {
-
-            KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent));
-
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-
-            goto errorout;
-        }
-
-        FileObject = child->child.kstc_info.FileObject;
-        DeviceObject = IoGetRelatedDeviceObject (FileObject);
-
-        Irp = KsBuildTdiIrp(DeviceObject);
-
-        TdiBuildAccept(
-                Irp,
-                DeviceObject,
-                FileObject,
-                KsAcceptCompletionRoutine,
-                child,
-                NULL,
-                NULL
-                );
-
-        IoSetNextIrpStackLocation(Irp);
-
-        /* grap the refer of the child tdi connection */
-        ks_get_tconn(child);
-
-        Status = STATUS_MORE_PROCESSING_REQUIRED;
-
-        *AcceptIrp = Irp;
-        *ConnectionContext = child;
-
-    } else {
-
-        Status = STATUS_CONNECTION_REFUSED;
-        goto errorout;
-    }
-
-    spin_unlock(&(parent->kstc_lock));
-
-    return Status;
-
-errorout:
-
-    spin_unlock(&(parent->kstc_lock));
-
-    {
-        *AcceptIrp = NULL;
-        *ConnectionContext = NULL;
-
-        if (ConnectionInfo) {
-
-            ExFreePool(ConnectionInfo);
-        }
-
-        if (Irp) {
-
-            IoFreeIrp (Irp);
-        }
-    }
-
-    return Status;
-}
-
-/*
- * KsDisconnectCompletionRoutine
- *   the Irp completion routine for TdiBuildDisconect
- *
- *   We just signal the event and return MORE_PRO... to
- *   let the caller take the responsibility of the Irp.
- *
- * Arguments:
- *   DeviceObject:  the device object of the transport
- *   Irp:           the Irp is being completed.
- *   Context:       the event specified by the caller
- *
- * Return Value:
- *   Nt status code
- *
- * Notes:
- *   N/A
- */
-
-NTSTATUS
-KsDisconectCompletionRoutine (
-    IN PDEVICE_OBJECT   DeviceObject,
-    IN PIRP             Irp,
-    IN PVOID            Context
-    )
-{
-
-    KeSetEvent((PKEVENT) Context, 0, FALSE);
-
-    return STATUS_MORE_PROCESSING_REQUIRED;
-
-    UNREFERENCED_PARAMETER(DeviceObject);
-}
-
-
-/*
- * KsDisconnectHelper
- *   the routine to be executed in the WorkItem procedure
- *   this routine is to disconnect a tdi connection
- *
- * Arguments:
- *   Workitem:  the context transferred to the workitem
- *
- * Return Value:
- *   N/A
- *
- * Notes:
- *   tconn is already referred in abort_connecton ...
- */
-
-VOID
-KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem)
-{
-    ksock_tconn_t * tconn = WorkItem->tconn;
-
-    DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn);
-    ks_disconnect_tconn(tconn, WorkItem->Flags);
-
-    KeSetEvent(&(WorkItem->Event), 0, FALSE);
-
-    spin_lock(&(tconn->kstc_lock));
-    cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
-    spin_unlock(&(tconn->kstc_lock));
-    ks_put_tconn(tconn);
-}
-
-
-/*
- * KsDisconnectEventHandler
- *   Disconnect event handler event handler, called by the underlying TDI transport
- *   in response to an incoming disconnection notification from a remote node.
- *
- * Arguments:
- *   ConnectionContext:  tdi connnection object
- *   DisconnectFlags:    specifies the nature of the disconnection
- *   ......
- *
- * Return Value:
- *   Nt kernel status code
- *
- * Notes:
- *   N/A
- */
-
-
-NTSTATUS
-KsDisconnectEventHandler(
-    IN PVOID                TdiEventContext,
-    IN CONNECTION_CONTEXT   ConnectionContext,
-    IN LONG                 DisconnectDataLength,
-    IN PVOID                DisconnectData,
-    IN LONG                 DisconnectInformationLength,
-    IN PVOID                DisconnectInformation,
-    IN ULONG                DisconnectFlags
-    )
-{
-    ksock_tconn_t *         tconn;
-    NTSTATUS                Status;
-    PKS_DISCONNECT_WORKITEM WorkItem;
-
-    tconn = (ksock_tconn_t *)ConnectionContext;
-
-    KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n",
-                KeGetCurrentIrql() ));
-
-    KsPrint((2, "tconn = %x DisconnectFlags= %xh\n",
-                 tconn, DisconnectFlags));
-
-    ks_get_tconn(tconn);
-    spin_lock(&(tconn->kstc_lock));
-
-    WorkItem = &(tconn->kstc_disconnect);
-
-    if (tconn->kstc_state != ksts_connected) {
-
-        Status = STATUS_SUCCESS;
-
-    } else {
-
-        if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) {
-
-            Status = STATUS_REMOTE_DISCONNECT;
-
-        } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) {
-
-            Status = STATUS_GRACEFUL_DISCONNECT;
-        }
-
-        if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
-
-            ks_get_tconn(tconn);
-
-            WorkItem->Flags = DisconnectFlags;
-            WorkItem->tconn = tconn;
-
-            cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
-
-            /* queue the workitem to call */
-            ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue);
-        }
-    }
-
-    spin_unlock(&(tconn->kstc_lock));
-    ks_put_tconn(tconn);
-
-    return  (Status);
-}
-
-NTSTATUS
-KsTcpReceiveCompletionRoutine(
-    IN PIRP                         Irp,
-    IN PKS_TCP_COMPLETION_CONTEXT   Context
-    )
-{
-    NTSTATUS Status = Irp->IoStatus.Status;
-
-    if (NT_SUCCESS(Status)) {
-
-        ksock_tconn_t *tconn = Context->tconn;
-
-        PKS_TSDU_DAT  KsTsduDat = Context->CompletionContext;
-        PKS_TSDU_BUF  KsTsduBuf = Context->CompletionContext;
-
-        KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n",
-                   Context->KsTsduMgr->TotalBytes ));
-
-        spin_lock(&(tconn->kstc_lock));
-
-        if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
-            if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
-                cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
-            } else {
-                cfs_enter_debugger();
-            }
-        } else {
-            ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
-            if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
-                cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
-            } else {
-                cfs_enter_debugger();
-            }
-        }
-
-        spin_unlock(&(tconn->kstc_lock));
+        Status = ObReferenceObjectByHandle(
+                    *Handle,
+                    FILE_ANY_ACCESS,
+                    NULL,
+                    KernelMode,
+                    FileObject,
+                    NULL
+                    );
 
-        /* wake up the thread waiting for the completion of this Irp */
-        KeSetEvent(Context->Event, 0, FALSE);
+        if (!NT_SUCCESS(Status)) {
 
-        /* re-active the ks connection and wake up the scheduler */
-        if (tconn->kstc_conn && tconn->kstc_sched_cb) {
-            tconn->kstc_sched_cb( tconn, FALSE, NULL,
-                                  Context->KsTsduMgr->TotalBytes );
+            cfs_enter_debugger();
+            ZwClose(*Handle);
         }
 
     } else {
 
-        /* un-expected errors occur, we must abort the connection */
-        ks_abort_tconn(Context->tconn);
-    }
-
-    if (Context) {
-
-        /* Freeing the Context structure... */
-        ExFreePool(Context);
-        Context = NULL;
-    }
-
-
-    /* free the Irp */
-    if (Irp) {
-        IoFreeIrp(Irp);
+        cfs_enter_debugger();
     }
 
     return (Status);
 }
 
-
 /*
- * KsTcpCompletionRoutine
- *   the Irp completion routine for TdiBuildSend and TdiBuildReceive ...
- *   We need call the use's own CompletionRoutine if specified. Or
- *   it's a synchronous case, we need signal the event.
+ * KsCloseAddress
+ *   Release the Hanlde and FileObject of an opened tdi
+ *   address object
  *
  * Arguments:
- *   DeviceObject:  the device object of the transport
- *   Irp:           the Irp is being completed.
- *   Context:       the context we specified when issuing the Irp
+ *   Handle:       the handle to be released
+ *   FileObject:   the fileobject to be released
  *
  * Return Value:
- *   Nt status code
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
  * Notes:
  *   N/A
  */
 
 NTSTATUS
-KsTcpCompletionRoutine(
-    IN PDEVICE_OBJECT   DeviceObject,
-    IN PIRP             Irp,
-    IN PVOID            Context
-    )
+KsCloseAddress(
+    IN HANDLE             Handle,
+    IN PFILE_OBJECT       FileObject
+)
 {
-    if (Context) {
-
-        PKS_TCP_COMPLETION_CONTEXT  CompletionContext = NULL;
-        ksock_tconn_t * tconn = NULL;
-
-        CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context;
-        tconn = CompletionContext->tconn;
-
-        /* release the chained mdl */
-        KsReleaseMdl(Irp->MdlAddress, FALSE);
-        Irp->MdlAddress = NULL;
-
-        if (CompletionContext->CompletionRoutine) {
-
-            if ( CompletionContext->bCounted &&
-                 InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) {
-                    goto errorout;
-            }
-
-            //
-            // Giving control to user specified CompletionRoutine ...
-            //
-
-            CompletionContext->CompletionRoutine(
-                    Irp,
-                    CompletionContext
-                    );
-
-        } else {
-
-            //
-            // Signaling  the Event ...
-            //
+    NTSTATUS  Status = STATUS_SUCCESS;
 
-            KeSetEvent(CompletionContext->Event, 0, FALSE);
-        }
+    if (FileObject) {
 
-        /* drop the reference count of the tconn object */
-        ks_put_tconn(tconn);
+        ObDereferenceObject(FileObject);
+    }
 
-    } else {
+    if (Handle) {
 
-        cfs_enter_debugger();
+        Status = ZwClose(Handle);
     }
 
-errorout:
+    ASSERT(NT_SUCCESS(Status));
 
-    return STATUS_MORE_PROCESSING_REQUIRED;
+    return (Status);
 }
 
+
 /*
- * KsTcpSendCompletionRoutine
- *   the user specified Irp completion routine for asynchronous
- *   data transmission requests.
- *
- *   It will do th cleanup job of the ksock_tx_t and wake up the
- *   ks scheduler thread
+ * KsOpenConnection
+ *   Open a tdi connection object
  *
  * Arguments:
- *   Irp:           the Irp is being completed.
- *   Context:       the context we specified when issuing the Irp
+ *   DeviceName:   device name of the connection object
+ *   ConnectionContext: the connection context
+ *   Handle:       the newly opened handle
+ *   FileObject:   the newly opened fileobject
  *
  * Return Value:
- *   Nt status code
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
  * Notes:
  *   N/A
  */
 
 NTSTATUS
-KsTcpSendCompletionRoutine(
-    IN PIRP                         Irp,
-    IN PKS_TCP_COMPLETION_CONTEXT   Context
-    )
+KsOpenConnection(
+    IN PUNICODE_STRING      DeviceName,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    OUT HANDLE *            Handle,
+    OUT PFILE_OBJECT *      FileObject
+   )
 {
-    NTSTATUS        Status = Irp->IoStatus.Status;
-    ULONG           rc = Irp->IoStatus.Information;
-    ksock_tconn_t * tconn = Context->tconn;
-    PKS_TSDUMGR     KsTsduMgr = Context->KsTsduMgr;
+    NTSTATUS            Status = STATUS_SUCCESS;
 
-    ENTRY;
+    PFILE_FULL_EA_INFORMATION Ea = NULL;
+    ULONG               EaLength;
+    UCHAR               EaBuffer[EA_MAX_LENGTH];
 
-    LASSERT(tconn) ;
+    OBJECT_ATTRIBUTES   ObjectAttributes;
+    IO_STATUS_BLOCK     IoStatus;
 
-    if (NT_SUCCESS(Status)) {
+    //
+    // Building EA for the Address Object to be Opened ...
+    //
 
-        if (Context->bCounted) {
-            PVOID   tx = Context->CompletionContext;
+    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+    Ea->NextEntryOffset = 0;
+    Ea->Flags = 0;
+    Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH;
+    Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT);
+    RtlCopyMemory(
+        &(Ea->EaName),
+        TdiConnectionContext,
+        Ea->EaNameLength + 1
+        );
+    RtlMoveMemory(
+        &(Ea->EaName[Ea->EaNameLength + 1]),
+        &ConnectionContext,
+        sizeof(CONNECTION_CONTEXT)
+        );
+    EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
+                               Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT);
 
-            ASSERT(tconn->kstc_update_tx != NULL);
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-            /* update the tx, rebasing the kiov or iov pointers */
-            tx = tconn->kstc_update_tx(tconn, tx, rc);
 
-            /* update the KsTsudMgr total bytes */
-            spin_lock(&tconn->kstc_lock);
-            KsTsduMgr->TotalBytes -= rc;
-            spin_unlock(&tconn->kstc_lock);
+    //
+    // Initializing ...
+    //
 
-            /*
-             * now it's time to re-queue the conns into the
-             * scheduler queue and wake the scheduler thread.
-             */
+    InitializeObjectAttributes(
+        &ObjectAttributes,
+        DeviceName,
+        OBJ_CASE_INSENSITIVE |
+        OBJ_KERNEL_HANDLE,
+        NULL,
+        NULL
+        );
 
-            if (tconn->kstc_conn && tconn->kstc_sched_cb) {
-                tconn->kstc_sched_cb( tconn, TRUE, tx, 0);
-            }
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-        } else {
+    //
+    // Creating the Connection Object ...
+    //
 
-            PKS_TSDU            KsTsdu = Context->CompletionContext;
-            PKS_TSDU_BUF        KsTsduBuf = Context->CompletionContext2;
-            PKS_TSDU_DAT        KsTsduDat = Context->CompletionContext2;
+    Status = ZwCreateFile(
+                Handle,
+                FILE_READ_DATA | FILE_WRITE_DATA,
+                &ObjectAttributes,
+                &IoStatus,
+                NULL,
+                FILE_ATTRIBUTE_NORMAL,
+                FILE_SHARE_READ | FILE_SHARE_WRITE,
+                FILE_OPEN,
+                0,
+                Ea,
+                EaLength
+                );
 
-            spin_lock(&tconn->kstc_lock);
-            /* This is bufferred sending ... */
-            ASSERT(KsTsduBuf->StartOffset == 0);
 
-            if (KsTsduBuf->DataLength > Irp->IoStatus.Information) {
-                /* not fully sent .... we have to abort the connection */
-                spin_unlock(&tconn->kstc_lock);
-                ks_abort_tconn(tconn);
-                goto errorout;
-            }
+    if (NT_SUCCESS(Status)) {
 
-            if (KsTsduBuf->TsduType  == TSDU_TYPE_BUF) {
-                /* free the buffer */
-                ExFreePool(KsTsduBuf->UserBuffer);
-                KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength;
-                KsTsdu->StartOffset   += sizeof(KS_TSDU_BUF);
-            } else if (KsTsduDat->TsduType  == TSDU_TYPE_DAT) {
-                KsTsduMgr->TotalBytes -= KsTsduDat->DataLength;
-                KsTsdu->StartOffset   += KsTsduDat->TotalLength;
-            } else {
-                cfs_enter_debugger(); /* shoult not get here */
-            }
+        //
+        // Now Obtaining the FileObject of the Transport Address ...
+        //
 
-            if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+        Status = ObReferenceObjectByHandle(
+                    *Handle,
+                    FILE_ANY_ACCESS,
+                    NULL,
+                    KernelMode,
+                    FileObject,
+                    NULL
+                    );
 
-                list_del(&KsTsdu->Link);
-                KsTsduMgr->NumOfTsdu--;
-                KsPutKsTsdu(KsTsdu);
-            }
+        if (!NT_SUCCESS(Status)) {
 
-            spin_unlock(&tconn->kstc_lock);
+            cfs_enter_debugger();
+            ZwClose(*Handle);
         }
 
     } else {
 
-        /* cfs_enter_debugger(); */
+        cfs_enter_debugger();
+    }
 
-        /*
-         *  for the case that the transmission is ussuccessful,
-         *  we need abort the tdi connection, but not destroy it.
-         *  the socknal conn will drop the refer count, then the
-         *  tdi connection will be freed.
-         */
+    return (Status);
+}
 
-        ks_abort_tconn(tconn);
-    }
+/*
+ * KsCloseConnection
+ *   Release the Hanlde and FileObject of an opened tdi
+ *   connection object
+ *
+ * Arguments:
+ *   Handle:       the handle to be released
+ *   FileObject:   the fileobject to be released
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
 
-errorout:
+NTSTATUS
+KsCloseConnection(
+    IN HANDLE             Handle,
+    IN PFILE_OBJECT       FileObject
+    )
+{
+    NTSTATUS  Status = STATUS_SUCCESS;
 
-    /* freeing the Context structure... */
+    if (FileObject) {
 
-    if (Context) {
-        ExFreePool(Context);
-        Context = NULL;
+        ObDereferenceObject(FileObject);
     }
 
-    /* it's our duty to free the Irp. */
+    if (Handle) {
 
-    if (Irp) {
-        IoFreeIrp(Irp);
-        Irp = NULL;
+        Status = ZwClose(Handle);
     }
 
-    EXIT;
+    ASSERT(NT_SUCCESS(Status));
 
-    return Status;
+    return (Status);
 }
 
+
 /*
- *  Normal receive event handler
+ * KsAssociateAddress
+ *   Associate an address object with a connection object
  *
- *  It will move data from system Tsdu to our TsduList
+ * Arguments:
+ *   AddressHandle:  the handle of the address object
+ *   ConnectionObject:  the FileObject of the connection
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
  */
 
 NTSTATUS
-KsTcpReceiveEventHandler(
-    IN PVOID                TdiEventContext,
-    IN CONNECTION_CONTEXT   ConnectionContext,
-    IN ULONG                ReceiveFlags,
-    IN ULONG                BytesIndicated,
-    IN ULONG                BytesAvailable,
-    OUT ULONG *             BytesTaken,
-    IN PVOID                Tsdu,
-    OUT PIRP *              IoRequestPacket
-   )
+KsAssociateAddress(
+    IN HANDLE           AddressHandle,
+    IN PFILE_OBJECT     ConnectionObject
+    )
 {
     NTSTATUS            Status;
-
-    ksock_tconn_t *     tconn;
-
-    PKS_CHAIN           KsChain;
-    PKS_TSDUMGR         KsTsduMgr;
-    PKS_TSDU            KsTsdu;
-    PKS_TSDU_DAT        KsTsduDat;
-    PKS_TSDU_BUF        KsTsduBuf;
-
-    BOOLEAN             bIsExpedited;
-    BOOLEAN             bIsCompleteTsdu;
-
-    BOOLEAN             bNewTsdu = FALSE;
-    BOOLEAN             bNewBuff = FALSE;
-
-    PCHAR               Buffer = NULL;
-
-    PIRP                Irp = NULL;
-    PMDL                Mdl = NULL;
-    PFILE_OBJECT        FileObject;
     PDEVICE_OBJECT      DeviceObject;
+    PIRP                Irp;
 
-    ULONG               BytesReceived = 0;
-
-    PKS_TCP_COMPLETION_CONTEXT context = NULL;
-
-
-    tconn = (ksock_tconn_t *) ConnectionContext;
-
-    ks_get_tconn(tconn);
-
-    /* check whether the whole body of payload is received or not */
-    if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) &&
-         (BytesIndicated == BytesAvailable) ) {
-        bIsCompleteTsdu = TRUE;
-    } else {
-        bIsCompleteTsdu = FALSE;
-    }
-
-    bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
-
-    KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable));
-    KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited ));
-
-    spin_lock(&(tconn->kstc_lock));
+    //
+    // Getting the DeviceObject from Connection FileObject
+    //
 
-    /*  check whether we are conntected or not listener Â¡Â­*/
-    if ( !((tconn->kstc_state == ksts_connected) &&
-           (tconn->kstc_type == kstt_sender ||
-            tconn->kstc_type == kstt_child))) {
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-        *BytesTaken = BytesIndicated;
+    //
+    // Building Tdi Internal Irp ...
+    //
 
-        spin_unlock(&(tconn->kstc_lock));
-        ks_put_tconn(tconn);
+    Irp = KsBuildTdiIrp(DeviceObject);
 
-        return (STATUS_SUCCESS);
-    }
+    if (NULL == Irp) {
 
-    if (tconn->kstc_type == kstt_sender) {
-        KsChain = &(tconn->sender.kstc_recv);
-    } else {
-        LASSERT(tconn->kstc_type == kstt_child);
-        KsChain = &(tconn->child.kstc_recv);
-    }
+        Status = STATUS_INSUFFICIENT_RESOURCES;
 
-    if (bIsExpedited) {
-        KsTsduMgr = &(KsChain->Expedited);
     } else {
-        KsTsduMgr = &(KsChain->Normal);
-    }
-
-    /* if the Tsdu is even larger than the biggest Tsdu, we have
-       to allocate new buffer and use TSDU_TYOE_BUF to store it */
-
-    if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size -
-         KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
-        bNewBuff = TRUE;
-    }
 
-    /* retrieve the latest Tsdu buffer form TsduMgr
-       list if the list is not empty. */
-
-    if (list_empty(&(KsTsduMgr->TsduList))) {
-
-        LASSERT(KsTsduMgr->NumOfTsdu == 0);
-        KsTsdu = NULL;
-
-    } else {
+        //
+        // Assocating the Address Object with the Connection Object
+        //
 
-        LASSERT(KsTsduMgr->NumOfTsdu > 0);
-        KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+        TdiBuildAssociateAddress(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            NULL,
+            NULL,
+            AddressHandle
+            );
 
-        /* if this Tsdu does not contain enough space, we need
-           allocate a new Tsdu queue. */
+        //
+        // Calling the Transprot Driver with the Prepared Irp
+        //
 
-        if (bNewBuff) {
-            if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) >
-                 KsTsdu->TotalLength )  {
-                KsTsdu = NULL;
-            }
-        } else {
-            if ( KS_TSDU_STRU_SIZE(BytesAvailable) >
-                 KsTsdu->TotalLength - KsTsdu->LastOffset ) {
-                KsTsdu = NULL;
-            }
-        }
+        Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
     }
 
-    /* allocating the buffer for TSDU_TYPE_BUF */
-    if (bNewBuff) {
-        Buffer = ExAllocatePool(NonPagedPool, BytesAvailable);
-        if (NULL == Buffer) {
-            /* there's no enough memory for us. We just try to
-               receive maximum bytes with a new Tsdu */
-            bNewBuff = FALSE;
-            KsTsdu = NULL;
-        }
-    }
+    return (Status);
+}
 
-    /* allocate a new Tsdu in case we are not statisfied. */
 
-    if (NULL == KsTsdu) {
+/*
+ * KsDisassociateAddress
+ *   Disassociate the connection object (the relationship will
+ *   the corresponding address object will be dismissed. )
+ *
+ * Arguments:
+ *   ConnectionObject:  the FileObject of the connection
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
 
-        KsTsdu = KsAllocateKsTsdu();
+NTSTATUS
+KsDisassociateAddress(
+    IN PFILE_OBJECT     ConnectionObject
+    )
+{
+    NTSTATUS            Status;
+    PDEVICE_OBJECT      DeviceObject;
+    PIRP                   Irp;
 
-        if (NULL == KsTsdu) {
-            goto errorout;
-        } else {
-            bNewTsdu = TRUE;
-        }
-    }
+    //
+    // Getting the DeviceObject from Connection FileObject
+    //
 
-    KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
-    KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-    if (bNewBuff) {
+    //
+    // Building Tdi Internal Irp ...
+    //
 
-        /* setup up the KS_TSDU_BUF record */
+    Irp = KsBuildTdiIrp(DeviceObject);
 
-        KsTsduBuf->TsduType     = TSDU_TYPE_BUF;
-        KsTsduBuf->TsduFlags    = 0;
-        KsTsduBuf->StartOffset  = 0;
-        KsTsduBuf->UserBuffer   = Buffer;
-        KsTsduBuf->DataLength   = BytesReceived = BytesAvailable;
+    if (NULL == Irp) {
 
-        KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+        Status = STATUS_INSUFFICIENT_RESOURCES;
 
     } else {
 
-        /* setup the KS_TSDU_DATA to contain all the messages */
-
-        KsTsduDat->TsduType     =  TSDU_TYPE_DAT;
-        KsTsduDat->TsduFlags    = 0;
+        //
+        // Disassocating the Address Object with the Connection Object
+        //
 
-        if ( KsTsdu->TotalLength - KsTsdu->LastOffset >=
-            KS_TSDU_STRU_SIZE(BytesAvailable) ) {
-            BytesReceived = BytesAvailable;
-        } else {
-            BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset -
-                            FIELD_OFFSET(KS_TSDU_DAT, Data);
-            BytesReceived &= (~((ULONG)3));
-        }
-        KsTsduDat->DataLength   =  BytesReceived;
-        KsTsduDat->TotalLength  =  KS_TSDU_STRU_SIZE(BytesReceived);
-        KsTsduDat->StartOffset  = 0;
+        TdiBuildDisassociateAddress(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            NULL,
+            NULL
+            );
 
-        Buffer = &KsTsduDat->Data[0];
+        //
+        // Calling the Transprot Driver with the Prepared Irp
+        //
 
-        KsTsdu->LastOffset += KsTsduDat->TotalLength;
+        Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
     }
 
-    KsTsduMgr->TotalBytes  +=  BytesReceived;
+    return (Status);
+}
 
-    if (bIsCompleteTsdu) {
 
-        /* It's a complete receive, we just move all
-           the data from system to our Tsdu */
+/*
 
-        RtlMoveMemory(
-            Buffer,
-            Tsdu,
-            BytesReceived
-            );
+//
+// Connection Control Event Callbacks
+//
 
-        *BytesTaken = BytesReceived;
-        Status = STATUS_SUCCESS;
+TDI_EVENT_CONNECT
+TDI_EVENT_DISCONNECT
+TDI_EVENT_ERROR
 
-        if (bNewTsdu) {
-            list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
-            KsTsduMgr->NumOfTsdu++;
-        }
+//
+// Tcp Event Callbacks
+//
 
-        KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+TDI_EVENT_RECEIVE
+TDI_EVENT_RECEIVE_EXPEDITED
+TDI_EVENT_CHAINED_RECEIVE
+TDI_EVENT_CHAINED_RECEIVE_EXPEDITED
 
-        /* re-active the ks connection and wake up the scheduler */
-        if (tconn->kstc_conn && tconn->kstc_sched_cb) {
-            tconn->kstc_sched_cb( tconn, FALSE, NULL,
-                                  KsTsduMgr->TotalBytes );
-        }
+//
+// Udp Event Callbacks
+//
 
-    } else {
+TDI_EVENT_RECEIVE_DATAGRAM
+TDI_EVENT_CHAINED_RECEIVE_DATAGRAM
 
-        /* there's still data in tdi internal queue, we need issue a new
-           Irp to receive all of them. first allocate the tcp context */
+*/
 
-        context = ExAllocatePoolWithTag(
-                        NonPagedPool,
-                        sizeof(KS_TCP_COMPLETION_CONTEXT),
-                        'cTsK');
 
-        if (!context) {
+/*
+ * KsSetEventHandlers
+ *   Set the tdi event callbacks with an address object
+ *
+ * Arguments:
+ *   AddressObject: the FileObject of the address object
+ *   EventContext:  the parameter for the callbacks
+ *   Handlers:      the handlers indictor array
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * NOTES:
+ *   N/A
+ */
 
-            Status = STATUS_INSUFFICIENT_RESOURCES;
-            goto errorout;
-        }
+NTSTATUS
+KsSetEventHandlers(
+    IN PFILE_OBJECT                         AddressObject,  // Address File Object
+    IN PVOID                                EventContext,   // Context for Handlers
+    IN PKS_EVENT_HANDLERS                   Handlers        // Handlers Indictor
+   )
+{
+    NTSTATUS             Status = STATUS_SUCCESS;
+    PDEVICE_OBJECT       DeviceObject;
+    USHORT               i = 0;
 
-        /* setup the context */
-        RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT));
+    DeviceObject = IoGetRelatedDeviceObject(AddressObject);
 
-        context->tconn             = tconn;
-        context->CompletionRoutine = KsTcpReceiveCompletionRoutine;
-        context->CompletionContext = KsTsdu;
-        context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat;
-        context->KsTsduMgr         = KsTsduMgr;
-        context->Event             = &(KsTsduMgr->Event);
+    for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) {
 
-        if (tconn->kstc_type == kstt_sender) {
-            FileObject = tconn->sender.kstc_info.FileObject;
-        } else {
-            FileObject = tconn->child.kstc_info.FileObject;
-        }
+        //
+        // Setup the tdi event callback handler if requested.
+        //
 
-        DeviceObject = IoGetRelatedDeviceObject(FileObject);
+        if (Handlers->IsActive[i]) {
 
-        /* build new tdi Irp and setup it. */
-        Irp = KsBuildTdiIrp(DeviceObject);
+            PIRP            Irp;
 
-        if (NULL == Irp) {
-            goto errorout;
-        }
+            //
+            // Building Tdi Internal Irp ...
+            //
 
-        Status = KsLockUserBuffer(
-                    Buffer,
-                    FALSE,
-                    BytesReceived,
-                    IoModifyAccess,
-                    &Mdl
-                    );
+            Irp = KsBuildTdiIrp(DeviceObject);
 
-        if (!NT_SUCCESS(Status)) {
-            goto errorout;
-        }
+            if (NULL == Irp) {
 
-        TdiBuildReceive(
-            Irp,
-            DeviceObject,
-            FileObject,
-            KsTcpCompletionRoutine,
-            context,
-            Mdl,
-            ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED),
-            BytesReceived
-          );
+                Status = STATUS_INSUFFICIENT_RESOURCES;
 
-        IoSetNextIrpStackLocation(Irp);
+            } else {
 
-        /* return the newly built Irp to transport driver,
-           it will process it to receive all the data */
+                //
+                // Building the Irp to set the Event Handler ...
+                //
 
-        *IoRequestPacket = Irp;
-        *BytesTaken = 0;
+                TdiBuildSetEventHandler(
+                    Irp,
+                    DeviceObject,
+                    AddressObject,
+                    NULL,
+                    NULL,
+                    i,                      /* tdi event type */
+                    Handlers->Handler[i],   /* tdi event handler */
+                    EventContext            /* context for the handler */
+                    );
 
-        if (bNewTsdu) {
+                //
+                // Calling the Transprot Driver with the Prepared Irp
+                //
 
-            list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
-            KsTsduMgr->NumOfTsdu++;
-        }
+                Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
 
-        if (bNewBuff) {
-            cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
-        } else {
-            cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
+                //
+                // tcp/ip tdi does not support these two event callbacks
+                //
+
+                if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE ||
+                     i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) {
+                    cfs_enter_debugger();
+                    Status = STATUS_SUCCESS;
+                }
+            }
+
+            if (!NT_SUCCESS(Status)) {
+                cfs_enter_debugger();
+                goto errorout;
+            }
         }
-        ks_get_tconn(tconn);
-        Status = STATUS_MORE_PROCESSING_REQUIRED;
     }
 
-    spin_unlock(&(tconn->kstc_lock));
-    ks_put_tconn(tconn);
-
-    return (Status);
 
 errorout:
 
-    spin_unlock(&(tconn->kstc_lock));
-
-    if (bNewTsdu && (KsTsdu != NULL)) {
-        KsFreeKsTsdu(KsTsdu);
-    }
-
-    if (Mdl) {
-        KsReleaseMdl(Mdl, FALSE);
-    }
-
-    if (Irp) {
-        IoFreeIrp(Irp);
-    }
+    if (!NT_SUCCESS(Status)) {
 
-    if (context) {
-        ExFreePool(context);
+        KsPrint((1, "KsSetEventHandlers: Error Status = %xh (%s)\n",
+                    Status, KsNtStatusToString(Status) ));
     }
 
-    ks_abort_tconn(tconn);
-    ks_put_tconn(tconn);
-
-    *BytesTaken = BytesAvailable;
-    Status = STATUS_SUCCESS;
-
     return (Status);
 }
 
-/*
- *  Expedited receive event handler
- */
-
-NTSTATUS
-KsTcpReceiveExpeditedEventHandler(
-    IN PVOID                TdiEventContext,
-    IN CONNECTION_CONTEXT   ConnectionContext,
-    IN ULONG                ReceiveFlags,
-    IN ULONG                BytesIndicated,
-    IN ULONG                BytesAvailable,
-    OUT ULONG *             BytesTaken,
-    IN PVOID                Tsdu,
-    OUT PIRP *              IoRequestPacket
-    )
-{
-    return KsTcpReceiveEventHandler(
-                TdiEventContext,
-                ConnectionContext,
-                ReceiveFlags | TDI_RECEIVE_EXPEDITED,
-                BytesIndicated,
-                BytesAvailable,
-                BytesTaken,
-                Tsdu,
-                IoRequestPacket
-                );
-}
 
 
 /*
- *  Bulk receive event handler
+ * KsQueryAddressInfo
+ *   Query the address of the FileObject specified
  *
- *  It will queue all the system Tsdus to our TsduList.
- *  Then later ks_recv_mdl will release them.
- */
-
-NTSTATUS
-KsTcpChainedReceiveEventHandler (
-    IN PVOID TdiEventContext,       // the event context
-    IN CONNECTION_CONTEXT ConnectionContext,
-    IN ULONG ReceiveFlags,
-    IN ULONG ReceiveLength,
-    IN ULONG StartingOffset,        // offset of start of client data in TSDU
-    IN PMDL  Tsdu,                  // TSDU data chain
-    IN PVOID TsduDescriptor         // for call to TdiReturnChainedReceives
-    )
-{
-
-    NTSTATUS            Status;
-
-    ksock_tconn_t *     tconn;
-
-    PKS_CHAIN           KsChain;
-    PKS_TSDUMGR         KsTsduMgr;
-    PKS_TSDU            KsTsdu;
-    PKS_TSDU_MDL        KsTsduMdl;
-
-    BOOLEAN             bIsExpedited;
-    BOOLEAN             bNewTsdu = FALSE;
-
-    tconn = (ksock_tconn_t *) ConnectionContext;
-
-    bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
-
-    KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited));
-
-    ks_get_tconn(tconn);
-    spin_lock(&(tconn->kstc_lock));
+ * Arguments:
+ *   FileObject:  the FileObject to be queried
+ *   AddressInfo: buffer to contain the address info
+ *   AddressSize: length of the AddressInfo buffer
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
 
-    /* check whether we are conntected or not listener Â¡Â­*/
-    if ( !((tconn->kstc_state == ksts_connected) &&
-         (tconn->kstc_type == kstt_sender ||
-          tconn->kstc_type == kstt_child))) {
+NTSTATUS
+KsQueryAddressInfo(
+    PFILE_OBJECT            FileObject,
+    PTDI_ADDRESS_INFO       AddressInfo,
+    PULONG                  AddressSize
+   )
+{
+    NTSTATUS          Status = STATUS_UNSUCCESSFUL;
+    PIRP              Irp = NULL;
+    PMDL              Mdl;
+    PDEVICE_OBJECT    DeviceObject;
 
-        spin_unlock(&(tconn->kstc_lock));
-        ks_put_tconn(tconn);
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-        return (STATUS_SUCCESS);
-    }
+    DeviceObject = IoGetRelatedDeviceObject(FileObject);
 
-    /* get the latest Tsdu buffer form TsduMgr list.
-       just set NULL if the list is empty. */
+    RtlZeroMemory(AddressInfo, *(AddressSize));
 
-    if (tconn->kstc_type == kstt_sender) {
-        KsChain = &(tconn->sender.kstc_recv);
-    } else {
-        LASSERT(tconn->kstc_type == kstt_child);
-        KsChain = &(tconn->child.kstc_recv);
-    }
+    //
+    // Allocating the Tdi Setting Irp ...
+    //
 
-    if (bIsExpedited) {
-        KsTsduMgr = &(KsChain->Expedited);
-    } else {
-        KsTsduMgr = &(KsChain->Normal);
-    }
+    Irp = KsBuildTdiIrp(DeviceObject);
 
-    if (list_empty(&(KsTsduMgr->TsduList))) {
+    if (NULL == Irp) {
 
-        LASSERT(KsTsduMgr->NumOfTsdu == 0);
-        KsTsdu = NULL;
+        Status = STATUS_INSUFFICIENT_RESOURCES;
 
     } else {
 
-        LASSERT(KsTsduMgr->NumOfTsdu > 0);
-        KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
-        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
-
-        if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) {
-            KsTsdu = NULL;
-        }
-    }
-
-    /* if there's no Tsdu or the free size is not enough for this
-       KS_TSDU_MDL structure. We need re-allocate a new Tsdu.  */
+        //
+        // Locking the User Buffer / Allocating a MDL for it
+        //
 
-    if (NULL == KsTsdu) {
+        Status = KsLockUserBuffer(
+                    AddressInfo,
+                    FALSE,
+                    *(AddressSize),
+                    IoModifyAccess,
+                    &Mdl
+                    );
 
-        KsTsdu = KsAllocateKsTsdu();
+        if (!NT_SUCCESS(Status)) {
 
-        if (NULL == KsTsdu) {
-            goto errorout;
-        } else {
-            bNewTsdu = TRUE;
+            IoFreeIrp(Irp);
+            Irp = NULL;
         }
     }
 
-    /* just queue the KS_TSDU_MDL to the Tsdu buffer */
-
-    KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
-
-    KsTsduMdl->TsduType     =  TSDU_TYPE_MDL;
-    KsTsduMdl->DataLength   =  ReceiveLength;
-    KsTsduMdl->StartOffset  =  StartingOffset;
-    KsTsduMdl->Mdl          =  Tsdu;
-    KsTsduMdl->Descriptor   =  TsduDescriptor;
-
-    KsTsdu->LastOffset     += sizeof(KS_TSDU_MDL);
-    KsTsduMgr->TotalBytes  += ReceiveLength;
+    if (Irp) {
 
-    KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n",
-                KsTsduMgr->TotalBytes ));
+        LASSERT(NT_SUCCESS(Status));
 
-    Status = STATUS_PENDING;
+        TdiBuildQueryInformation(
+                    Irp,
+                    DeviceObject,
+                    FileObject,
+                    NULL,
+                    NULL,
+                    TDI_QUERY_ADDRESS_INFO,
+                    Mdl
+                    );
 
-    /* attach it to the TsduMgr list if the Tsdu is newly created. */
-    if (bNewTsdu) {
+        Status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    AddressSize
+                    );
 
-        list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
-        KsTsduMgr->NumOfTsdu++;
+        KsReleaseMdl(Mdl, FALSE);
     }
 
-    spin_unlock(&(tconn->kstc_lock));
-
-    /* wake up the threads waiing in ks_recv_mdl */
-    KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+    if (!NT_SUCCESS(Status)) {
 
-    if (tconn->kstc_conn && tconn->kstc_sched_cb) {
-        tconn->kstc_sched_cb( tconn, FALSE, NULL,
-                              KsTsduMgr->TotalBytes );
+        cfs_enter_debugger();
+        //TDI_BUFFER_OVERFLOW
     }
 
-    ks_put_tconn(tconn);
+    return (Status);
+}
 
-    /* Return STATUS_PENDING to system because we are still
-       owning the MDL resources. ks_recv_mdl is expected
-       to free the MDL resources. */
+/*
+ * KsQueryProviderInfo
+ *   Query the underlying transport device's information
+ *
+ * Arguments:
+ *   TdiDeviceName:  the transport device's name string
+ *   ProviderInfo:   TDI_PROVIDER_INFO struncture
+ *
+ * Return Value:
+ *   NTSTATUS:       Nt system status code
+  *
+ * NOTES:
+ *   N/A
+ */
 
-    return (Status);
+NTSTATUS
+KsQueryProviderInfo(
+    PWSTR               TdiDeviceName,
+    PTDI_PROVIDER_INFO  ProviderInfo
+   )
+{
+    NTSTATUS            Status = STATUS_SUCCESS;
 
-errorout:
+    PIRP                Irp = NULL;
+    PMDL                Mdl = NULL;
 
-    spin_unlock(&(tconn->kstc_lock));
+    UNICODE_STRING      ControlName;
 
-    if (bNewTsdu && (KsTsdu != NULL)) {
-        KsFreeKsTsdu(KsTsdu);
-    }
+    HANDLE              Handle;
+    PFILE_OBJECT        FileObject;
+    PDEVICE_OBJECT      DeviceObject;
 
-    /* abort the tdi connection */
-    ks_abort_tconn(tconn);
-    ks_put_tconn(tconn);
+    ULONG               ProviderSize = 0;
 
+    RtlInitUnicodeString(&ControlName, TdiDeviceName);
 
-    Status = STATUS_SUCCESS;
+    //
+    // Open the Tdi Control Channel
+    //
 
-    return (Status);
-}
+    Status = KsOpenControl(
+                &ControlName,
+                &Handle,
+                &FileObject
+                );
 
+    if (!NT_SUCCESS(Status)) {
 
-/*
- *  Expedited & Bulk receive event handler
- */
+        KsPrint((1, "KsQueryProviderInfo: Fail to open the tdi control channel.\n"));
+        return (Status);
+    }
 
-NTSTATUS
-KsTcpChainedReceiveExpeditedEventHandler (
-    IN PVOID                TdiEventContext,       // the event context
-    IN CONNECTION_CONTEXT   ConnectionContext,
-    IN ULONG                ReceiveFlags,
-    IN ULONG                ReceiveLength,
-    IN ULONG                StartingOffset,        // offset of start of client data in TSDU
-    IN PMDL                 Tsdu,                  // TSDU data chain
-    IN PVOID                TsduDescriptor         // for call to TdiReturnChainedReceives
-    )
-{
-    return KsTcpChainedReceiveEventHandler(
-                TdiEventContext,
-                ConnectionContext,
-                ReceiveFlags | TDI_RECEIVE_EXPEDITED,
-                ReceiveLength,
-                StartingOffset,
-                Tsdu,
-                TsduDescriptor );
-}
+    //
+    // Obtain The Related Device Object
+    //
 
+    DeviceObject = IoGetRelatedDeviceObject(FileObject);
 
-VOID
-KsPrintProviderInfo(
-   PWSTR DeviceName,
-   PTDI_PROVIDER_INFO ProviderInfo
-   )
-{
-    KsPrint((2, "%ws ProviderInfo:\n", DeviceName));
+    ProviderSize = sizeof(TDI_PROVIDER_INFO);
+    RtlZeroMemory(ProviderInfo, ProviderSize);
 
-    KsPrint((2, "  Version              : 0x%4.4X\n", ProviderInfo->Version ));
-    KsPrint((2, "  MaxSendSize          : %d\n", ProviderInfo->MaxSendSize ));
-    KsPrint((2, "  MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData ));
-    KsPrint((2, "  MaxDatagramSize      : %d\n", ProviderInfo->MaxDatagramSize ));
-    KsPrint((2, "  ServiceFlags         : 0x%8.8X\n", ProviderInfo->ServiceFlags ));
+    //
+    // Allocating the Tdi Setting Irp ...
+    //
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) {
-        KsPrint((2, "  CONNECTION_MODE\n"));
-    }
+    Irp = KsBuildTdiIrp(DeviceObject);
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) {
-        KsPrint((2, "  ORDERLY_RELEASE\n"));
-    }
+    if (NULL == Irp) {
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) {
-        KsPrint((2, "  CONNECTIONLESS_MODE\n"));
-    }
+        Status = STATUS_INSUFFICIENT_RESOURCES;
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) {
-        KsPrint((2, "  ERROR_FREE_DELIVERY\n"));
-    }
+    } else {
 
-    if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) {
-        KsPrint((2, "  SECURITY_LEVEL\n"));
-    }
+        //
+        // Locking the User Buffer / Allocating a MDL for it
+        //
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) {
-        KsPrint((2, "  BROADCAST_SUPPORTED\n"));
-    }
+        Status = KsLockUserBuffer(
+                    ProviderInfo,
+                    FALSE,
+                    ProviderSize,
+                    IoModifyAccess,
+                    &Mdl
+                    );
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) {
-        KsPrint((2, "  MULTICAST_SUPPORTED\n"));
-    }
+        if (!NT_SUCCESS(Status)) {
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) {
-        KsPrint((2, "  DELAYED_ACCEPTANCE\n"));
+            IoFreeIrp(Irp);
+            Irp = NULL;
+        }
     }
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) {
-        KsPrint((2, "  EXPEDITED_DATA\n"));
-    }
+    if (Irp) {
 
-    if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) {
-        KsPrint((2, "  INTERNAL_BUFFERING\n"));
-    }
+        LASSERT(NT_SUCCESS(Status));
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) {
-        KsPrint((2, "  ROUTE_DIRECTED\n"));
-    }
+        TdiBuildQueryInformation(
+                    Irp,
+                    DeviceObject,
+                    FileObject,
+                    NULL,
+                    NULL,
+                    TDI_QUERY_PROVIDER_INFO,
+                    Mdl
+                    );
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) {
-        KsPrint((2, "  NO_ZERO_LENGTH\n"));
-    }
+        Status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    &ProviderSize
+                    );
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) {
-        KsPrint((2, "  POINT_TO_POINT\n"));
+        KsReleaseMdl(Mdl, FALSE);
     }
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) {
-        KsPrint((2, "  MESSAGE_MODE\n"));
-    }
+    if (!NT_SUCCESS(Status)) {
 
-    if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) {
-        KsPrint((2, "  HALF_DUPLEX\n"));
+        cfs_enter_debugger();
+        //TDI_BUFFER_OVERFLOW
     }
 
-    KsPrint((2, "  MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData ));
-    KsPrint((2, "  MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData ));
-    KsPrint((2, "  NumberOfResources    : %d\n", ProviderInfo->NumberOfResources ));
-}
+    KsCloseControl(Handle, FileObject);
 
+    return (Status);
+}
 
 /*
- * KsAllocateKsTsdu
- *   Reuse a Tsdu from the freelist or allocate a new Tsdu
- *   from the LookAsideList table or the NonPagedPool
+ * KsQueryConnectionInfo
+ *   Query the connection info of the FileObject specified
+ *   (some statics data of the traffic)
  *
  * Arguments:
- *   N/A
+ *   FileObject:     the FileObject to be queried
+ *   ConnectionInfo: buffer to contain the connection info
+ *   ConnectionSize: length of the ConnectionInfo buffer
  *
  * Return Value:
- *   PKS_Tsdu: the new Tsdu or NULL if it fails
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
  *
- * Notes:
+ * NOTES:
  *   N/A
  */
 
-PKS_TSDU
-KsAllocateKsTsdu()
+NTSTATUS
+KsQueryConnectionInfo(
+    PFILE_OBJECT            ConnectionObject,
+    PTDI_CONNECTION_INFO    ConnectionInfo,
+    PULONG                  ConnectionSize
+   )
 {
-    PKS_TSDU    KsTsdu = NULL;
+    NTSTATUS          Status = STATUS_UNSUCCESSFUL;
+    PIRP              Irp = NULL;
+    PMDL              Mdl;
+    PDEVICE_OBJECT    DeviceObject;
 
-    spin_lock(&(ks_data.ksnd_tsdu_lock));
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
 
-    if (!list_empty (&(ks_data.ksnd_freetsdus))) {
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-        LASSERT(ks_data.ksnd_nfreetsdus > 0);
+    RtlZeroMemory(ConnectionInfo, *(ConnectionSize));
 
-        KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link);
-        list_del(&(KsTsdu->Link));
-        ks_data.ksnd_nfreetsdus--;
+    //
+    // Allocating the Tdi Query Irp ...
+    //
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
 
     } else {
 
-        KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc(
-                        ks_data.ksnd_tsdu_slab, 0);
-    }
+        //
+        // Locking the User Buffer / Allocating a MDL for it
+        //
 
-    spin_unlock(&(ks_data.ksnd_tsdu_lock));
+        Status = KsLockUserBuffer(
+                    ConnectionInfo,
+                    FALSE,
+                    *(ConnectionSize),
+                    IoModifyAccess,
+                    &Mdl
+                    );
 
-    if (NULL != KsTsdu) {
-        KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size);
+        if (NT_SUCCESS(Status)) {
+
+            IoFreeIrp(Irp);
+            Irp = NULL;
+        }
     }
 
-    return (KsTsdu);
-}
+    if (Irp) {
 
+        LASSERT(NT_SUCCESS(Status));
 
-/*
- * KsPutKsTsdu
- *   Move the Tsdu to the free tsdu list in ks_data.
- *
- * Arguments:
- *   KsTsdu: Tsdu to be moved.
- *
- * Return Value:
- *   N/A
- *
- * Notes:
- *   N/A
- */
+        TdiBuildQueryInformation(
+                    Irp,
+                    DeviceObject,
+                    ConnectionObject,
+                    NULL,
+                    NULL,
+                    TDI_QUERY_CONNECTION_INFO,
+                    Mdl
+                    );
 
-VOID
-KsPutKsTsdu(
-    PKS_TSDU  KsTsdu
-    )
-{
-    spin_lock(&(ks_data.ksnd_tsdu_lock));
+        Status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    ConnectionSize
+                    );
 
-    list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus));
-    ks_data.ksnd_nfreetsdus++;
+        KsReleaseMdl(Mdl, FALSE);
+    }
 
-    spin_unlock(&(ks_data.ksnd_tsdu_lock));
+    return (Status);
 }
 
 
 /*
- * KsFreeKsTsdu
- *   Release a Tsdu: uninitialize then free it.
+ * KsInitializeTdiAddress
+ *   Initialize the tdi addresss
  *
  * Arguments:
- *   KsTsdu: Tsdu to be freed.
+ *   pTransportAddress: tdi address to be initialized
+ *   IpAddress:         the ip address of object
+ *   IpPort:            the ip port of the object
  *
  * Return Value:
- *   N/A
+ *   ULONG: the total size of the tdi address
  *
- * Notes:
+ * NOTES:
  *   N/A
  */
 
-VOID
-KsFreeKsTsdu(
-    PKS_TSDU  KsTsdu
+ULONG
+KsInitializeTdiAddress(
+    IN OUT PTA_IP_ADDRESS   pTransportAddress,
+    IN ULONG                IpAddress,
+    IN USHORT               IpPort
     )
 {
-    cfs_mem_cache_free(
-            ks_data.ksnd_tsdu_slab,
-            KsTsdu );
-}
+    pTransportAddress->TAAddressCount = 1;
+    pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP;
+    pTransportAddress->Address[ 0 ].AddressType   = TDI_ADDRESS_TYPE_IP;
+    pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort;
+    pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr  = IpAddress;
 
+    return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP);
+}
 
 /*
- * KsInitializeKsTsdu
- *   Initialize the Tsdu buffer header
+ * KsQueryTdiAddressLength
+ *   Query the total size of the tdi address
  *
  * Arguments:
- *   KsTsdu: the Tsdu to be initialized
- *   Length: the total length of the Tsdu
+ *   pTransportAddress: tdi address to be queried
  *
  * Return Value:
- *   VOID
+ *   ULONG: the total size of the tdi address
  *
  * NOTES:
  *   N/A
  */
 
-VOID
-KsInitializeKsTsdu(
-    PKS_TSDU    KsTsdu,
-    ULONG       Length
+ULONG
+KsQueryTdiAddressLength(
+    PTRANSPORT_ADDRESS      pTransportAddress
     )
 {
-    RtlZeroMemory(KsTsdu, Length);
-    KsTsdu->Magic = KS_TSDU_MAGIC;
-    KsTsdu->TotalLength = Length;
-    KsTsdu->StartOffset = KsTsdu->LastOffset =
-    KS_DWORD_ALIGN(sizeof(KS_TSDU));
+    ULONG                   TotalLength = 0;
+    LONG                    i;
+
+    PTA_ADDRESS             pTaAddress = NULL;
+
+    ASSERT (NULL != pTransportAddress);
+
+    TotalLength  = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) +
+                   FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount;
+
+    pTaAddress = (PTA_ADDRESS)pTransportAddress->Address;
+
+    for (i = 0; i < pTransportAddress->TAAddressCount; i++)
+    {
+        TotalLength += pTaAddress->AddressLength;
+        pTaAddress = (PTA_ADDRESS)((PCHAR)pTaAddress +
+                                           FIELD_OFFSET(TA_ADDRESS,Address) +
+                                           pTaAddress->AddressLength );
+    }
+
+    return (TotalLength);
 }
 
 
 /*
- * KsInitializeKsTsduMgr
- *   Initialize the management structure of
- *   Tsdu buffers
+ * KsQueryIpAddress
+ *   Query the ip address of the tdi object
  *
  * Arguments:
- *   TsduMgr: the TsduMgr to be initialized
+ *   FileObject: tdi object to be queried
+ *   TdiAddress: TdiAddress buffer, to store the queried
+ *               tdi ip address
+ *   AddressLength: buffer length of the TdiAddress
  *
  * Return Value:
- *   VOID
+ *   ULONG: the total size of the tdi ip address
  *
  * NOTES:
  *   N/A
  */
 
-VOID
-KsInitializeKsTsduMgr(
-    PKS_TSDUMGR     TsduMgr
+NTSTATUS
+KsQueryIpAddress(
+    PFILE_OBJECT    FileObject,
+    PVOID           TdiAddress,
+    ULONG*          AddressLength
     )
 {
-    KeInitializeEvent(
-            &(TsduMgr->Event),
-            NotificationEvent,
-            FALSE
-            );
+    NTSTATUS        Status;
+
+    PTDI_ADDRESS_INFO   TdiAddressInfo;
+    ULONG               Length;
+
+
+    //
+    // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS
+    //
+
+    Length = MAX_ADDRESS_LENGTH;
+
+    TdiAddressInfo = (PTDI_ADDRESS_INFO)
+                        ExAllocatePoolWithTag(
+                            NonPagedPool,
+                            Length,
+                            'KSAI' );
+
+    if (NULL == TdiAddressInfo) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+
+    Status = KsQueryAddressInfo(
+                FileObject,
+                TdiAddressInfo,
+                &Length
+                );
+
+errorout:
+
+    if (NT_SUCCESS(Status)) {
+
+        if (*AddressLength < Length) {
+            Status = STATUS_BUFFER_TOO_SMALL;
+        } else {
+            *AddressLength = Length;
+            RtlCopyMemory(
+                TdiAddress,
+                &(TdiAddressInfo->Address),
+                Length
+                );
+            Status = STATUS_SUCCESS;
+        }
+    }
 
-    CFS_INIT_LIST_HEAD(
-            &(TsduMgr->TsduList)
-            );
+    if (NULL != TdiAddressInfo) {
+        ExFreePool(TdiAddressInfo);
+    }
 
-    TsduMgr->NumOfTsdu  = 0;
-    TsduMgr->TotalBytes = 0;
+    return Status;
 }
 
 
 /*
- * KsInitializeKsChain
- *   Initialize the China structure for receiving
- *   or transmitting
+ * KsErrorEventHandler
+ *   the common error event handler callback
  *
  * Arguments:
- *   KsChain: the KsChain to be initialized
+ *   TdiEventContext: should be the socket
+ *   Status: the error code
  *
  * Return Value:
- *   VOID
+ *   Status: STATS_SUCCESS
  *
  * NOTES:
- *   N/A
+ *   We need not do anything in such a severe
+ *   error case. System will process it for us.
  */
 
-VOID
-KsInitializeKsChain(
-    PKS_CHAIN       KsChain
-    )
+NTSTATUS
+KsErrorEventHandler(
+    IN PVOID        TdiEventContext,
+    IN NTSTATUS     Status
+   )
 {
-    KsInitializeKsTsduMgr(&(KsChain->Normal));
-    KsInitializeKsTsduMgr(&(KsChain->Expedited));
-}
+    KsPrint((1, "KsErrorEventHandler called at Irql = %xh ...\n",
+                KeGetCurrentIrql()));
+
+    cfs_enter_debugger();
 
+    return (STATUS_SUCCESS);
+}
 
 /*
- * KsCleanupTsduMgr
- *   Clean up all the Tsdus in the TsduMgr list
+ * KsAcceptCompletionRoutine
+ *   Irp completion routine for TdiBuildAccept (KsConnectEventHandler)
+ *
+ *   Here system gives us a chance to check the conneciton is built
+ *   ready or not.
  *
  * Arguments:
- *   KsTsduMgr: the Tsdu list manager
+ *   DeviceObject:  the device object of the transport driver
+ *   Irp:           the Irp is being completed.
+ *   Context:       the context we specified when issuing the Irp
  *
  * Return Value:
- *   NTSTATUS:  nt status code
+ *   Nt status code
  *
- * NOTES:
+ * Notes:
  *   N/A
  */
 
 NTSTATUS
-KsCleanupTsduMgr(
-    PKS_TSDUMGR     KsTsduMgr
+KsAcceptCompletionRoutine(
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN PVOID            Context
     )
 {
-    PKS_TSDU        KsTsdu;
-    PKS_TSDU_DAT    KsTsduDat;
-    PKS_TSDU_BUF    KsTsduBuf;
-    PKS_TSDU_MDL    KsTsduMdl;
+    ks_tconn_t * child = (ks_tconn_t *) Context;
+    ks_tconn_t * parent = child->child.kstc_parent;
 
-    LASSERT(NULL != KsTsduMgr);
+    KsPrint((2, "KsAcceptCompletionRoutine at Irql: %xh child: %p status: %p\n",
+                 KeGetCurrentIrql(), child, Irp->IoStatus.Status));
 
-    KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+    LASSERT(child->kstc_type == kstt_child);
 
-    while (!list_empty(&KsTsduMgr->TsduList)) {
+    spin_lock(&(child->kstc_lock));
 
-        KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
-        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+    LASSERT(parent->kstc_state == ksts_listening);
+    LASSERT(child->kstc_state == ksts_connecting);
 
-        if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+    if (NT_SUCCESS(Irp->IoStatus.Status)) {
 
-            //
-            // KsTsdu is empty now, we need free it ...
-            //
+        child->child.kstc_accepted = TRUE;
 
-            list_del(&(KsTsdu->Link));
-            KsTsduMgr->NumOfTsdu--;
+        child->kstc_state = ksts_connected;
 
-            KsFreeKsTsdu(KsTsdu);
+        /* wake up the daemon thread which waits on this event */
+        KeSetEvent(
+            &(parent->listener.kstc_accept_event),
+            0,
+            FALSE
+            );
 
-        } else {
+        spin_unlock(&(child->kstc_lock));
 
-            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
-            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
-            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+        KsPrint((2, "KsAcceptCompletionRoutine: singal parent: %p (child: %p)\n",
+                    parent, child));
 
-            if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+    } else {
 
-                KsTsdu->StartOffset += KsTsduDat->TotalLength;
+        /* re-use this child connecton  */
+        child->child.kstc_accepted = FALSE;
+        child->child.kstc_busy = FALSE;
+        child->kstc_state = ksts_associated;
 
-            } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
+        spin_unlock(&(child->kstc_lock));
+    }
 
-                ASSERT(KsTsduBuf->UserBuffer != NULL);
+    /* now free the Irp */
+    IoFreeIrp(Irp);
 
-                if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) {
-                    ExFreePool(KsTsduBuf->UserBuffer);
-                } else {
-                    cfs_enter_debugger();
-                }
+    /* drop the refer count of the child */
+    ks_put_tconn(child);
 
-                KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+    return (STATUS_MORE_PROCESSING_REQUIRED);
+}
 
-            } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
+ks_addr_slot_t *
+KsSearchIpAddress(PUNICODE_STRING  DeviceName)
+{
+    ks_addr_slot_t * slot = NULL;
+    PLIST_ENTRY      list = NULL;
 
-                //
-                // MDL Tsdu Unit ...
-                //
+    spin_lock(&ks_data.ksnd_addrs_lock);
 
-                TdiReturnChainedReceives(
-                    &(KsTsduMdl->Descriptor),
-                    1 );
+    list = ks_data.ksnd_addrs_list.Flink;
+    while (list != &ks_data.ksnd_addrs_list) {
+        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
+        if (RtlCompareUnicodeString(
+                    DeviceName,
+                    &slot->devname,
+                    TRUE) == 0) {
+            break;
+        }
+        list = list->Flink;
+        slot = NULL;
+    }
 
-                KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+    spin_unlock(&ks_data.ksnd_addrs_lock);
+
+    return slot;
+}
+
+void
+KsCleanupIpAddresses()
+{
+    spin_lock(&ks_data.ksnd_addrs_lock);
+
+    while (!IsListEmpty(&ks_data.ksnd_addrs_list)) {
+
+        ks_addr_slot_t * slot = NULL;
+        PLIST_ENTRY      list = NULL;
+
+        list = RemoveHeadList(&ks_data.ksnd_addrs_list);
+        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
+        cfs_free(slot);
+        ks_data.ksnd_naddrs--;
+    }
+
+    cfs_assert(ks_data.ksnd_naddrs == 0);
+    spin_unlock(&ks_data.ksnd_addrs_lock);
+}
+
+VOID
+KsAddAddressHandler(
+    IN  PTA_ADDRESS      Address,
+    IN  PUNICODE_STRING  DeviceName,
+    IN  PTDI_PNP_CONTEXT Context
+    )
+{
+    PTDI_ADDRESS_IP IpAddress = NULL;
+
+    if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
+         Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
+
+        ks_addr_slot_t * slot = NULL;
+
+        IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
+        KsPrint((2, "KsAddAddressHandle: Device=%wZ Context=%xh "
+                     "IpAddress=%xh(%d.%d.%d.%d)\n",
+                     DeviceName, Context, IpAddress->in_addr,
+                     (IpAddress->in_addr & 0x000000FF) >> 0,
+                     (IpAddress->in_addr & 0x0000FF00) >> 8,
+                     (IpAddress->in_addr & 0x00FF0000) >> 16,
+                     (IpAddress->in_addr & 0xFF000000) >> 24
+               ));
+
+        slot = KsSearchIpAddress(DeviceName);
+
+        if (slot != NULL) {
+            slot->up = TRUE;
+            slot->ip_addr = ntohl(IpAddress->in_addr);
+        } else {
+
+            /* Matt: only add 192.168.10/5/92.xxx for temporary test */
+            if ((IpAddress->in_addr & 0x00FFFFFF) != 0x000aa8c0 &&
+                (IpAddress->in_addr & 0x00FFFFFF) != 0x0092a8c0 &&
+                (IpAddress->in_addr & 0x00FFFFFF) != 0x0005a8c0 ) {
+                return;
+            }
+
+            slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO);
+            if (slot != NULL) {
+                spin_lock(&ks_data.ksnd_addrs_lock);
+                InsertTailList(&ks_data.ksnd_addrs_list, &slot->link);
+                sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++);
+                slot->ip_addr = ntohl(IpAddress->in_addr);
+                slot->netmask = 0x00FFFFFF; /* Matt: hardcode*/
+                slot->up = TRUE;
+                RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length);
+                slot->devname.Length = DeviceName->Length;
+                slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR);
+                slot->devname.Buffer = slot->buffer;
+                spin_unlock(&ks_data.ksnd_addrs_lock);
+
+                KsPrint((0, "KsAddAddressHandle: %s added: ip=%xh(%d.%d.%d.%d)\n",
+                            slot->iface, IpAddress->in_addr,
+                            (IpAddress->in_addr & 0x000000FF) >> 0,
+                            (IpAddress->in_addr & 0x0000FF00) >> 8,
+                            (IpAddress->in_addr & 0x00FF0000) >> 16,
+                            (IpAddress->in_addr & 0xFF000000) >> 24
+                       ));
             }
         }
     }
-
-    return STATUS_SUCCESS;
 }
 
+VOID
+KsDelAddressHandler(
+    IN  PTA_ADDRESS      Address,
+    IN  PUNICODE_STRING  DeviceName,
+    IN  PTDI_PNP_CONTEXT Context
+    )
+{
+    PTDI_ADDRESS_IP IpAddress = NULL;
 
-/*
- * KsCleanupKsChain
- *   Clean up the TsduMgrs of the KsChain
- *
- * Arguments:
- *   KsChain: the chain managing TsduMgr
- *
- * Return Value:
- *   NTSTATUS:  nt status code
- *
- * NOTES:
- *   N/A
- */
+    if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
+         Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
+
+        ks_addr_slot_t * slot = NULL;
+
+        slot = KsSearchIpAddress(DeviceName);
+
+        if (slot != NULL) {
+            slot->up = FALSE;
+        }
+
+        IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
+        KsPrint((2, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
+                  DeviceName, Context, IpAddress->in_addr,
+                   (IpAddress->in_addr & 0xFF000000) >> 24,
+                   (IpAddress->in_addr & 0x00FF0000) >> 16,
+                   (IpAddress->in_addr & 0x0000FF00) >> 8,
+                   (IpAddress->in_addr & 0x000000FF) >> 0 ));
+    }
+}
 
 NTSTATUS
-KsCleanupKsChain(
-    PKS_CHAIN   KsChain
-    )
+KsRegisterPnpHandlers()
 {
-    NTSTATUS    Status;
+    TDI20_CLIENT_INTERFACE_INFO ClientInfo;
+
+    /* initialize the global ks_data members */
+    RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME);
+    spin_lock_init(&ks_data.ksnd_addrs_lock);
+    InitializeListHead(&ks_data.ksnd_addrs_list);
 
-    LASSERT(NULL != KsChain);
+    /* register the pnp handlers */
+    RtlZeroMemory(&ClientInfo, sizeof(ClientInfo));
+    ClientInfo.TdiVersion = TDI_CURRENT_VERSION;
 
-    Status = KsCleanupTsduMgr(
-                &(KsChain->Normal)
-                );
+    ClientInfo.ClientName = &ks_data.ksnd_client_name;
+    ClientInfo.AddAddressHandlerV2 =  KsAddAddressHandler;
+    ClientInfo.DelAddressHandlerV2 =  KsDelAddressHandler;
 
-    if (!NT_SUCCESS(Status)) {
-        cfs_enter_debugger();
-        goto errorout;
-    }
+    return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo),
+                                  &ks_data.ksnd_pnp_handle);
+}
 
-    Status = KsCleanupTsduMgr(
-                &(KsChain->Expedited)
-                );
+VOID
+KsDeregisterPnpHandlers()
+{
+    if (ks_data.ksnd_pnp_handle) {
 
-    if (!NT_SUCCESS(Status)) {
-        cfs_enter_debugger();
-        goto errorout;
-    }
+        /* De-register the pnp handlers */
 
-errorout:
+        TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle);
+        ks_data.ksnd_pnp_handle = NULL;
 
-    return Status;
+        /* cleanup all the ip address slots */
+        KsCleanupIpAddresses();
+    }
 }
 
 
 /*
- * KsCleanupTsdu
- *   Clean up all the Tsdus of a tdi connected object
+ * KsGetVacancyBacklog
+ *   Get a vacancy listeing child from the backlog list
  *
  * Arguments:
- *   tconn: the tdi connection which is connected already.
+ *   parent: the listener daemon connection
  *
  * Return Value:
- *   Nt status code
+ *   the child listening connection or NULL in failure
  *
- * NOTES:
- *   N/A
+ * Notes
+ *   Parent's lock should be acquired before calling.
  */
 
-NTSTATUS
-KsCleanupTsdu(
-    ksock_tconn_t * tconn
+ks_tconn_t *
+KsGetVacancyBacklog(
+    ks_tconn_t *  parent
     )
 {
-    NTSTATUS        Status = STATUS_SUCCESS;
-
-
-    if (tconn->kstc_type != kstt_sender &&
-        tconn->kstc_type != kstt_child ) {
-
-        goto errorout;
-    }
-
-    if (tconn->kstc_type == kstt_sender) {
-
-        Status = KsCleanupKsChain(
-                    &(tconn->sender.kstc_recv)
-                    );
+    ks_tconn_t * child;
 
-        if (!NT_SUCCESS(Status)) {
-            cfs_enter_debugger();
-            goto errorout;
-        }
+    LASSERT(parent->kstc_type == kstt_listener);
+    LASSERT(parent->kstc_state == ksts_listening);
 
-        Status = KsCleanupKsChain(
-                    &(tconn->sender.kstc_send)
-                    );
+    if (list_empty(&(parent->listener.kstc_listening.list))) {
 
-        if (!NT_SUCCESS(Status)) {
-            cfs_enter_debugger();
-            goto errorout;
-        }
+        child = NULL;
 
     } else {
 
-        Status = KsCleanupKsChain(
-                    &(tconn->child.kstc_recv)
-                    );
+        struct list_head * tmp;
 
-        if (!NT_SUCCESS(Status)) {
-            cfs_enter_debugger();
-            goto errorout;
-        }
+        /* check the listening queue and try to get a free connecton */
 
-        Status = KsCleanupKsChain(
-                    &(tconn->child.kstc_send)
-                    );
+        list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
+            child = list_entry (tmp, ks_tconn_t, child.kstc_link);
+            spin_lock(&(child->kstc_lock));
 
-        if (!NT_SUCCESS(Status)) {
-            cfs_enter_debugger();
-            goto errorout;
+            if (!child->child.kstc_busy) {
+                LASSERT(child->kstc_state == ksts_associated);
+                child->child.kstc_busy = TRUE;
+                spin_unlock(&(child->kstc_lock));
+                break;
+            } else {
+                spin_unlock(&(child->kstc_lock));
+                child = NULL;
+            }
         }
-
     }
 
-errorout:
-
-    return (Status);
+    return child;
 }
 
-
 /*
- * KsCopyMdlChainToMdlChain
- *   Copy data from  a [chained] Mdl to anther [chained] Mdl.
- *   Tdi library does not provide this function. We have to
- *   realize it ourselives.
+ * KsConnectEventHandler
+ *   Connect event handler event handler, called by the underlying TDI
+ *   transport in response to an incoming request to the listening daemon.
+ *
+ *   it will grab a vacancy backlog from the children tconn list, and
+ *   build an acception Irp with it, then transfer the Irp to TDI driver.
  *
  * Arguments:
- *   SourceMdlChain: the source mdl
- *   SourceOffset:   start offset of the source
- *   DestinationMdlChain: the dst mdl
- *   DestinationOffset: the offset where data are to be copied.
- *   BytesTobecopied:   the expteced bytes to be copied
- *   BytesCopied:    to store the really copied data length
+ *   TdiEventContext:  the tdi connnection object of the listening daemon
+ *   ......
  *
  * Return Value:
- *   NTSTATUS: STATUS_SUCCESS or other error code
+ *   Nt kernel status code
  *
- * NOTES:
- *   The length of source mdl must be >= SourceOffset + BytesTobecopied
+ * Notes:
+ *   N/A
  */
 
 NTSTATUS
-KsCopyMdlChainToMdlChain(
-    IN PMDL     SourceMdlChain,
-    IN ULONG    SourceOffset,
-    IN PMDL     DestinationMdlChain,
-    IN ULONG    DestinationOffset,
-    IN ULONG    BytesTobecopied,
-    OUT PULONG  BytesCopied
+KsConnectEventHandler(
+    IN PVOID                    TdiEventContext,
+    IN LONG                     RemoteAddressLength,
+    IN PVOID                    RemoteAddress,
+    IN LONG                     UserDataLength,
+    IN PVOID                    UserData,
+    IN LONG                     OptionsLength,
+    IN PVOID                    Options,
+    OUT CONNECTION_CONTEXT *    ConnectionContext,
+    OUT PIRP *                  AcceptIrp
     )
 {
-    PMDL        SrcMdl = SourceMdlChain;
-    PMDL        DstMdl = DestinationMdlChain;
+    ks_tconn_t *                parent;
+    ks_tconn_t *                child;
 
-    PUCHAR      SrcBuf = NULL;
-    PUCHAR      DstBuf = NULL;
-
-    ULONG       dwBytes = 0;
+    PFILE_OBJECT                FileObject;
+    PDEVICE_OBJECT              DeviceObject;
+    NTSTATUS                    Status;
 
-    NTSTATUS    Status = STATUS_SUCCESS;
+    PIRP                        Irp = NULL;
+    PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
 
+    KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql()));
+    parent = (ks_tconn_t *) TdiEventContext;
 
-    while (dwBytes < BytesTobecopied) {
+    LASSERT(parent->kstc_type == kstt_listener);
 
-        ULONG   Length = 0;
+    spin_lock(&(parent->kstc_lock));
 
-        while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) {
+    if (parent->kstc_state == ksts_listening) {
 
-            SourceOffset -= MmGetMdlByteCount(SrcMdl);
+        /* allocate a new ConnectionInfo to backup the peer's info */
 
-            SrcMdl = SrcMdl->Next;
+        ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
+                NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) +
+                RemoteAddressLength, 'iCsK' );
 
-            if (NULL == SrcMdl) {
+        if (NULL == ConnectionInfo) {
 
-                Status = STATUS_INVALID_PARAMETER;
-                goto errorout;
-            }
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            cfs_enter_debugger();
+            goto errorout;
         }
 
-        while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) {
+        /* initializing ConnectionInfo structure ... */
+
+        ConnectionInfo->UserDataLength = UserDataLength;
+        ConnectionInfo->UserData = UserData;
+        ConnectionInfo->OptionsLength = OptionsLength;
+        ConnectionInfo->Options = Options;
+        ConnectionInfo->RemoteAddressLength = RemoteAddressLength;
+        ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
+
+        RtlCopyMemory(
+                ConnectionInfo->RemoteAddress,
+                RemoteAddress,
+                RemoteAddressLength
+                );
 
-            DestinationOffset -= MmGetMdlByteCount(DstMdl);
+        /* get the vacancy listening child tdi connections */
 
-            DstMdl = DstMdl->Next;
+        child = KsGetVacancyBacklog(parent);
 
-            if (NULL == DstMdl) {
+        if (child) {
 
-                Status = STATUS_INVALID_PARAMETER;
-                goto errorout;
-            }
-        }
+            spin_lock(&(child->kstc_lock));
+            child->child.kstc_info.ConnectionInfo = ConnectionInfo;
+            child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress;
+            child->kstc_state = ksts_connecting;
+            spin_unlock(&(child->kstc_lock));
 
-        DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl);
+        } else {
 
-        if ((NULL == DstBuf)) {
+            KsPrint((1, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent));
             Status = STATUS_INSUFFICIENT_RESOURCES;
             goto errorout;
         }
 
-        //
-        // Here we need skip the OVERFLOW case via RtlCopyMemory :-(
-        //
+        FileObject = child->child.kstc_info.FileObject;
+        DeviceObject = IoGetRelatedDeviceObject (FileObject);
+
+        Irp = KsBuildTdiIrp(DeviceObject);
+
+        TdiBuildAccept(
+                Irp,
+                DeviceObject,
+                FileObject,
+                KsAcceptCompletionRoutine,
+                child,
+                NULL,
+                NULL
+                );
 
-        if ( KsQueryMdlsSize(SrcMdl) - SourceOffset >
-             MmGetMdlByteCount(DstMdl) - DestinationOffset ) {
+        IoSetNextIrpStackLocation(Irp);
 
-            Length = BytesTobecopied - dwBytes;
+        /* grap the refer of the child tdi connection */
+        ks_get_tconn(child);
 
-            if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) {
-                Length = KsQueryMdlsSize(SrcMdl) - SourceOffset;
-            }
+        Status = STATUS_MORE_PROCESSING_REQUIRED;
+        *AcceptIrp = Irp;
+        *ConnectionContext = child;
 
-            if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) {
-                Length = MmGetMdlByteCount(DstMdl) - DestinationOffset;
-            }
+    } else {
 
-            SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl);
+        Status = STATUS_CONNECTION_REFUSED;
+        goto errorout;
+    }
 
-            if ((NULL == DstBuf)) {
-                Status = STATUS_INSUFFICIENT_RESOURCES;
-                goto errorout;
-            }
+    spin_unlock(&(parent->kstc_lock));
 
-            RtlCopyMemory(
-                DstBuf + DestinationOffset,
-                SrcBuf + SourceOffset,
-                Length
-                );
+    return Status;
 
-        } else {
+errorout:
 
-            Status = TdiCopyMdlToBuffer(
-                        SrcMdl,
-                        SourceOffset,
-                        DstBuf,
-                        DestinationOffset,
-                        MmGetMdlByteCount(DstMdl),
-                        &Length
-                        );
+    spin_unlock(&(parent->kstc_lock));
 
-            if (STATUS_BUFFER_OVERFLOW == Status) {
-                cfs_enter_debugger();
-            } else if (!NT_SUCCESS(Status)) {
-                cfs_enter_debugger();
-                goto errorout;
-            }
-        }
+    *AcceptIrp = NULL;
+    *ConnectionContext = NULL;
+
+    if (ConnectionInfo) {
+        ExFreePool(ConnectionInfo);
+    }
 
-        SourceOffset += Length;
-        DestinationOffset += Length;
-        dwBytes += Length;
+    if (Irp) {
+        IoFreeIrp (Irp);
     }
 
-errorout:
+    return Status;
+}
+
+/*
+ * KsDisconnectCompletionRoutine
+ *   the Irp completion routine for TdiBuildDisconect
+ *
+ *   We just signal the event and return MORE_PRO... to
+ *   let the caller take the responsibility of the Irp.
+ *
+ * Arguments:
+ *   DeviceObject:  the device object of the transport
+ *   Irp:           the Irp is being completed.
+ *   Context:       the event specified by the caller
+ *
+ * Return Value:
+ *   Nt status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsDisconectCompletionRoutine (
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN PVOID            Context
+    )
+{
+
+    KeSetEvent((PKEVENT) Context, 0, FALSE);
 
-    if (NT_SUCCESS(Status)) {
-        *BytesCopied = dwBytes;
-    } else {
-        *BytesCopied = 0;
-    }
+    return STATUS_MORE_PROCESSING_REQUIRED;
 
-    return Status;
+    UNREFERENCED_PARAMETER(DeviceObject);
 }
 
 
-
 /*
- * KsQueryMdlSize
- *   Query the whole size of a MDL (may be chained)
+ * KsDisconnectHelper
+ *   the routine to be executed in the WorkItem procedure
+ *   this routine is to disconnect a tdi connection
  *
  * Arguments:
- *   Mdl:  the Mdl to be queried
+ *   Workitem:  the context transferred to the workitem
  *
  * Return Value:
- *   ULONG: the total size of the mdl
- *
- * NOTES:
  *   N/A
+ *
+ * Notes:
+ *   tconn is already referred in abort_connecton ...
  */
 
-ULONG
-KsQueryMdlsSize (PMDL Mdl)
+VOID
+KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem)
 {
-    PMDL    Next = Mdl;
-    ULONG   Length = 0;
+    ks_tconn_t * tconn = WorkItem->tconn;
 
+    KsPrint((1, "KsDisconnectHelper: disconnecting tconn=%p\n", tconn));
+    ks_disconnect_tconn(tconn, WorkItem->Flags);
 
-    //
-    // Walking the MDL Chain ...
-    //
-
-    while (Next) {
-        Length += MmGetMdlByteCount(Next);
-        Next = Next->Next;
-    }
+    KeSetEvent(&(WorkItem->Event), 0, FALSE);
 
-    return (Length);
+    spin_lock(&(tconn->kstc_lock));
+    cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+    spin_unlock(&(tconn->kstc_lock));
+    ks_put_tconn(tconn);
 }
 
 
 /*
- * KsLockUserBuffer
- *   Allocate MDL for the buffer and lock the pages into
- *   nonpaged pool
+ * KsDisconnectEventHandler
+ *   Disconnect event handler event handler, called by the underlying TDI transport
+ *   in response to an incoming disconnection notification from a remote node.
  *
  * Arguments:
- *   UserBuffer:  the user buffer to be locked
- *   Length:      length in bytes of the buffer
- *   Operation:   read or write access
- *   pMdl:        the result of the created mdl
+ *   ConnectionContext:  tdi connnection object
+ *   DisconnectFlags:    specifies the nature of the disconnection
+ *   ......
  *
  * Return Value:
- *   NTSTATUS:     kernel status code (STATUS_SUCCESS
- *                 or other error code)
+ *   Nt kernel status code
  *
- * NOTES:
+ * Notes:
  *   N/A
  */
 
+
 NTSTATUS
-KsLockUserBuffer (
-    IN PVOID            UserBuffer,
-    IN BOOLEAN          bPaged,
-    IN ULONG            Length,
-    IN LOCK_OPERATION   Operation,
-    OUT PMDL *          pMdl
+KsDisconnectEventHandler(
+    IN PVOID                TdiEventContext,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN LONG                 DisconnectDataLength,
+    IN PVOID                DisconnectData,
+    IN LONG                 DisconnectInformationLength,
+    IN PVOID                DisconnectInformation,
+    IN ULONG                DisconnectFlags
     )
 {
-    NTSTATUS    Status;
-    PMDL        Mdl = NULL;
+    ks_tconn_t *            tconn;
+    NTSTATUS                Status;
+    PKS_DISCONNECT_WORKITEM WorkItem;
 
-    LASSERT(UserBuffer != NULL);
+    tconn = (ks_tconn_t *)ConnectionContext;
 
-    *pMdl = NULL;
+    KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n",
+                KeGetCurrentIrql() ));
 
-    Mdl = IoAllocateMdl(
-                UserBuffer,
-                Length,
-                FALSE,
-                FALSE,
-                NULL
-                );
+    KsPrint((2, "tconn = %x DisconnectFlags= %xh\n",
+                 tconn, DisconnectFlags));
 
-    if (Mdl == NULL) {
+    ks_get_tconn(tconn);
+    spin_lock(&(tconn->kstc_lock));
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
+    WorkItem = &(tconn->kstc_disconnect);
+
+    if (tconn->kstc_state != ksts_connected) {
+
+        Status = STATUS_SUCCESS;
 
     } else {
 
-        __try {
+        if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) {
 
-            if (bPaged) {
-                MmProbeAndLockPages(
-                    Mdl,
-                    KernelMode,
-                    Operation
-                    );
-            } else {
-                MmBuildMdlForNonPagedPool(
-                    Mdl
-                    );
-            }
+            Status = STATUS_REMOTE_DISCONNECT;
 
-            Status = STATUS_SUCCESS;
+        } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) {
 
-            *pMdl = Mdl;
+            Status = STATUS_GRACEFUL_DISCONNECT;
+        }
 
-        } __except (EXCEPTION_EXECUTE_HANDLER) {
+        if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
 
-            IoFreeMdl(Mdl);
+            ks_get_tconn(tconn);
 
-            Mdl = NULL;
+            WorkItem->Flags = DisconnectFlags;
+            WorkItem->tconn = tconn;
 
-            cfs_enter_debugger();
+            cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
 
-            Status = STATUS_INVALID_USER_BUFFER;
+            /* queue the workitem to call */
+            ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue);
         }
     }
 
-    return Status;
-}
+    spin_unlock(&(tconn->kstc_lock));
+    ks_put_tconn(tconn);
 
-/*
- * KsMapMdlBuffer
- *   Map the mdl into a buffer in kernel space
- *
- * Arguments:
- *   Mdl:  the mdl to be mapped
- *
- * Return Value:
- *   PVOID: the buffer mapped or NULL in failure
- *
- * NOTES:
- *   N/A
- */
+    return  (Status);
+}
 
-PVOID
-KsMapMdlBuffer (PMDL    Mdl)
+NTSTATUS
+KsTcpReceiveCompletionRoutine(
+    IN PIRP                         Irp,
+    IN PKS_TCP_COMPLETION_CONTEXT   Context
+    )
 {
-    LASSERT(Mdl != NULL);
+    ks_tconn_t *tconn = Context->tconn;
+    NTSTATUS    status = Irp->IoStatus.Status;
+    ULONG       length = (ULONG)Irp->IoStatus.Information;
 
-    return MmGetSystemAddressForMdlSafe(
-                Mdl,
-                NormalPagePriority
-                );
-}
+    LASSERT(Context != NULL);
 
+    if (NT_SUCCESS(status)) {
 
-/*
- * KsReleaseMdl
- *   Unlock all the pages in the mdl
- *
- * Arguments:
- *   Mdl:  memory description list to be released
- *
- * Return Value:
- *   N/A
- *
- * NOTES:
- *   N/A
- */
+        PKS_TSDUMGR  TsduMgr = Context->TsduMgr;
+        PCHAR        Buffer = Context->Buffer;
 
-VOID
-KsReleaseMdl (IN PMDL   Mdl,
-              IN int    Paged )
-{
-    LASSERT(Mdl != NULL);
+        KsPrint((4, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n",
+                    TsduMgr->TotalBytes ));
 
-    while (Mdl) {
+        ks_lock_tsdumgr(TsduMgr);
+        KsWriteTsduBuf(TsduMgr, Context->Buffer, length, 0);
+        /* signal TsduMgr event */
+        KeSetEvent(&(Context->TsduMgr->Event), 0, FALSE);
+        ks_unlock_tsdumgr(TsduMgr);
 
-        PMDL    Next;
+        /* re-active the ks connection and wake up the scheduler */
+        if (KS_CAN_SCHED(TsduMgr)) {
+            if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+                tconn->kstc_sched_cb(tconn, FALSE);
+            }
+        }
 
-        Next = Mdl->Next;
+        ks_put_tconn(tconn);
 
-        if (Paged) {
-            MmUnlockPages(Mdl);
-        }
+    } else {
 
-        IoFreeMdl(Mdl);
+        /* un-expected errors occur, we must abort the connection */
+        ks_put_tconn(tconn);
+        ks_abort_tconn(tconn);
+    }
 
-        Mdl = Next;
+
+    if (Context) {
+
+        /* free the Context structure... */
+        ASSERT(Context->Magic == KS_TCP_CONTEXT_MAGIC);
+        Context->Magic = 'CDAB';
+        cfs_free(Context);
+    }
+
+    /* free the Irp */
+    if (Irp) {
+
+        /* release mdl chain */
+        if (Irp->MdlAddress) {
+            KsReleaseMdl(Irp->MdlAddress, FALSE);
+        }
+
+        /* free irp packet */
+        IoFreeIrp(Irp);
     }
+
+    return (status);
 }
 
 
 /*
- * ks_lock_buffer
- *   allocate MDL for the user spepcified buffer and lock (paging-in)
- *   all the pages of the buffer into system memory
+ * KsTcpCompletionRoutine
+ *   the Irp completion routine for TdiBuildSend and TdiBuildReceive ...
+ *   We need call the use's own CompletionRoutine if specified. Or
+ *   it's a synchronous case, we need signal the event.
  *
  * Arguments:
- *   buffer:  the user buffer to be locked
- *   length:  length in bytes of the buffer
- *   access:  read or write access
- *   mdl:     the result of the created mdl
+ *   DeviceObject:  the device object of the transport
+ *   Irp:           the Irp is being completed.
+ *   Context:       the context we specified when issuing the Irp
  *
  * Return Value:
- *   int:     the ks error code: 0: success / -x: failture
+ *   Nt status code
  *
  * Notes:
  *   N/A
  */
 
-int
-ks_lock_buffer (
-    void *            buffer,
-    int               paged,
-    int               length,
-    LOCK_OPERATION    access,
-    ksock_mdl_t **    kmdl
+NTSTATUS
+KsTcpCompletionRoutine(
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN PVOID            Context
     )
 {
-    NTSTATUS        status;
+    if (Context) {
 
-    status = KsLockUserBuffer(
-                    buffer,
-                    paged !=0,
-                    length,
-                    access,
-                    kmdl
-                    );
+        PKS_TCP_COMPLETION_CONTEXT  context = NULL;
+        ks_tconn_t * tconn = NULL;
 
-    return cfs_error_code(status);
-}
+        context = (PKS_TCP_COMPLETION_CONTEXT) Context;
+        ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC);
+        tconn = context->tconn;
 
+        if (context->CompletionRoutine) {
 
-/*
- * ks_map_mdl
- *   Map the mdl pages into kernel space
- *
- * Arguments:
- *   mdl:  the mdl to be mapped
- *
- * Return Value:
- *   void *: the buffer mapped or NULL in failure
- *
- * Notes:
- *   N/A
- */
+            //
+            // Giving control to user specified CompletionRoutine ...
+            //
+
+            context->CompletionRoutine(Irp, context);
+
+        } else {
+
+            //
+            // Signaling  the Event ...
+            //
+            LASSERT(NULL != context->Event);
+            KeSetEvent(context->Event, 0, FALSE);
+
+            /* drop the reference count of the tconn object */
+            ks_put_tconn(tconn);
+        }
 
-void *
-ks_map_mdl (ksock_mdl_t * mdl)
-{
-    LASSERT(mdl != NULL);
+    } else {
+
+        /* cfs_enter_debugger(); */
+    }
 
-    return KsMapMdlBuffer(mdl);
+    return STATUS_MORE_PROCESSING_REQUIRED;
 }
 
 /*
- *  ks_release_mdl
- *   Unlock all the pages in the mdl and release the mdl
+ * KsTcpSendCompletionRoutine
+ *   the user specified Irp completion routine for asynchronous
+ *   data transmission requests.
+ *
+ *   It will do th cleanup job of the ks_tx_t and wake up the
+ *   ks scheduler thread
  *
  * Arguments:
- *   mdl:  memory description list to be released
+ *   Irp:           the Irp is being completed.
+ *   Context:       the context we specified when issuing the Irp
  *
  * Return Value:
- *   N/A
+ *   Nt status code
  *
  * Notes:
  *   N/A
  */
 
-void
-ks_release_mdl (ksock_mdl_t *mdl, int paged)
+NTSTATUS
+KsTcpSendCompletionRoutine(
+    IN PIRP                         Irp,
+    IN PKS_TCP_COMPLETION_CONTEXT   context
+    )
 {
-    LASSERT(mdl != NULL);
+    NTSTATUS          status = Irp->IoStatus.Status;
+    ULONG             rc = (ULONG)(ULONG_PTR)Irp->IoStatus.Information;
+    ks_tconn_t *      tconn = context->tconn;
 
-    KsReleaseMdl(mdl, paged);
-}
+    PKS_TSDUMGR       TsduMgr = context->TsduMgr;
+    PKEVENT           Event = context->Event;
 
+    LASSERT(tconn != NULL && tconn->kstc_magic == KS_TCONN_MAGIC);
+    LASSERT(context && context->Magic == KS_TCP_CONTEXT_MAGIC);
 
-/*
- * ks_create_tconn
- *   allocate a new tconn structure from the SLAB cache or
- *   NonPaged sysetm pool
- *
- * Arguments:
- *   N/A
- *
- * Return Value:
- *   ksock_tconn_t *: the address of tconn or NULL if it fails
- *
- * NOTES:
- *   N/A
- */
+    KsPrint((4, "KsTcpSendCompltionRoutine: tconn = %p TsduMgr = %p "
+                "status = %xh bytes = %xh/%x\n", tconn, TsduMgr, status,
+                Irp->IoStatus.Information, TsduMgr->TotalBytes));
 
-ksock_tconn_t *
-ks_create_tconn()
-{
-    ksock_tconn_t * tconn = NULL;
+    ks_lock_tsdumgr(TsduMgr);
 
-    /* allocate ksoc_tconn_t from the slab cache memory */
+    if (NT_SUCCESS(status)) {
 
-    tconn = (ksock_tconn_t *)cfs_mem_cache_alloc(
-                ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO);
+        /* cleanup processed TsduMgr queue */
+        KsReleaseTsdus(tconn, TsduMgr, rc);
 
-    if (tconn) {
+        /* queue to delivery engine if there's still remained data */
+        TsduMgr->Busy = FALSE;
+        if (TsduMgr->TotalBytes > 0) {
+            KsQueueTdiEngine(tconn, TsduMgr);
+        }
+        /* signal TsduMgr event */
+        KeSetEvent(&(TsduMgr->Event), 0, FALSE);
+        ks_unlock_tsdumgr(TsduMgr);
 
-        /* zero tconn elements */
-        memset(tconn, 0, sizeof(ksock_tconn_t));
+        /*
+         * now it's time to re-queue the conns into the
+         * scheduler queue and wake the scheduler thread.
+         */
 
-        /* initialize the tconn ... */
-        tconn->kstc_magic = KS_TCONN_MAGIC;
+        if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+            tconn->kstc_sched_cb(tconn, TRUE);
+        }
 
-        ExInitializeWorkItem(
-            &(tconn->kstc_disconnect.WorkItem),
-            KsDisconnectHelper,
-            &(tconn->kstc_disconnect)
-            );
+    } else {
 
-        KeInitializeEvent(
-                &(tconn->kstc_disconnect.Event),
-                SynchronizationEvent,
-                FALSE );
+        ks_unlock_tsdumgr(TsduMgr);
 
-        ExInitializeWorkItem(
-            &(tconn->kstc_destroy),
-            ks_destroy_tconn,
-            tconn
-            );
+        KsPrint((1, "KsTcpSendCompltionRoutine: failed tconn: %p "
+                    "TsduMgr: %p status: %xh\n", tconn, TsduMgr, status));
 
-        spin_lock_init(&(tconn->kstc_lock));
+        /* cfs_enter_debugger(); */
 
-        ks_get_tconn(tconn);
+        /*
+         *  for the case that the transmission is unsuccessful,
+         *  we need abort the tdi connection, but not destroy it.
+         *  the socknal conn will drop the refer count, then the
+         *  tdi connection will be freed.
+         */
 
-        spin_lock(&(ks_data.ksnd_tconn_lock));
+        ks_abort_tconn(tconn);
+    }
 
-        /* attach it into global list in ks_data */
+    /* drop tconn reference */
+    ks_put_tconn(tconn);
 
-        list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns));
-        ks_data.ksnd_ntconns++;
-        spin_unlock(&(ks_data.ksnd_tconn_lock));
+    /* freeing the context structure */
+    if (context) {
+        ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC);
+        context->Magic = 'CDAB';
+        cfs_free(context);
+    }
 
-        tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000;
+    /* free the Irp structure */
+    if (Irp) {
+        /* mdl chain was released by KsReleaseTsdus*/
+        Irp->MdlAddress = NULL;
+        IoFreeIrp(Irp);
+        Irp = NULL;
     }
 
-    return (tconn);
+    return status;
 }
 
-
 /*
- * ks_free_tconn
- *   free the tconn structure to the SLAB cache or NonPaged
- *   sysetm pool
- *
- * Arguments:
- *   tconn:  the tcon is to be freed
- *
- * Return Value:
- *   N/A
+ *  Normal receive event handler
  *
- * Notes:
- *   N/A
+ *  It will move data from system Tsdu to our TsduList
  */
 
-void
-ks_free_tconn(ksock_tconn_t * tconn)
+NTSTATUS
+KsTcpReceiveEventHandler(
+    IN PVOID                TdiEventContext,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN ULONG                ReceiveFlags,
+    IN ULONG                BytesIndicated,
+    IN ULONG                BytesAvailable,
+    OUT ULONG *             BytesTaken,
+    IN PVOID                Tsdu,
+    OUT PIRP *              IoRequestPacket
+   )
 {
-    LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0);
+    NTSTATUS            status;
 
-    spin_lock(&(ks_data.ksnd_tconn_lock));
+    ks_tconn_t *        tconn;
 
-    /* remove it from the global list */
-    list_del(&tconn->kstc_list);
-    ks_data.ksnd_ntconns--;
+    BOOLEAN             bIsExpedited;
+    BOOLEAN             bIsCompleteTsdu;
 
-    /* if this is the last tconn, it would be safe for
-       ks_tdi_fini_data to quit ... */
-    if (ks_data.ksnd_ntconns == 0) {
-        cfs_wake_event(&ks_data.ksnd_tconn_exit);
+    PCHAR               Buffer = NULL;
+    PIRP                Irp = NULL;
+    PMDL                Mdl = NULL;
+    PFILE_OBJECT        FileObject;
+    PDEVICE_OBJECT      DeviceObject;
+    PKS_TSDUMGR         TsduMgr;
+
+    PKS_TCP_COMPLETION_CONTEXT context = NULL;
+
+    tconn = (ks_tconn_t *) ConnectionContext;
+    ks_get_tconn(tconn);
+
+    /* check expedited flag */
+    bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
+
+    /* check whether the whole body of payload is received or not */
+    if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) &&
+         (BytesIndicated == BytesAvailable) ) {
+        bIsCompleteTsdu = TRUE;
+    } else {
+        bIsCompleteTsdu = FALSE;
     }
-    spin_unlock(&(ks_data.ksnd_tconn_lock));
 
-    /* free the structure memory */
-    cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn);
-}
+    KsPrint((4, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n",
+                BytesIndicated, BytesAvailable));
+    KsPrint((4, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited ));
 
+    /* check whether we are conntected or not listener */
+    if ( !((tconn->kstc_state == ksts_connected) &&
+           (tconn->kstc_type == kstt_sender ||
+            tconn->kstc_type == kstt_child))) {
 
-/*
- * ks_init_listener
- *   Initialize the tconn as a listener (daemon)
- *
- * Arguments:
- *   tconn: the listener tconn
- *
- * Return Value:
- *   N/A
- *
- * Notes:
- *   N/A
- */
+        *BytesTaken = BytesIndicated;
+        ks_put_tconn(tconn);
+        return (STATUS_SUCCESS);
+    }
 
-void
-ks_init_listener(
-    ksock_tconn_t * tconn
-    )
-{
-    /* preparation: intialize the tconn members */
+    /* query tsdu mgr */
+    TsduMgr = KsQueryTsduMgr(tconn, bIsExpedited, FALSE);
 
-    tconn->kstc_type = kstt_listener;
+    ks_lock_tsdumgr(TsduMgr);
+    if (bIsCompleteTsdu) {
 
-    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+        *BytesTaken = KsWriteTsduDat(TsduMgr, Tsdu, BytesAvailable, 0);
+        status = STATUS_SUCCESS;
 
-    CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list));
-    CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list));
+        /* signal TsduMgr event */
+        KeSetEvent(&(TsduMgr->Event), 0, FALSE);
+        ks_unlock_tsdumgr(TsduMgr);
 
-    cfs_init_event( &(tconn->listener.kstc_accept_event),
-                    TRUE,
-                    FALSE );
+        /* re-active the ks connection and wake up the scheduler */
+        if (KS_CAN_SCHED(TsduMgr)) {
+            if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+                tconn->kstc_sched_cb(tconn, FALSE);
+            }
+        }
 
-    cfs_init_event( &(tconn->listener.kstc_destroy_event),
-                    TRUE,
-                    FALSE );
+    } else {
 
-    tconn->kstc_state = ksts_inited;
-}
+        ks_unlock_tsdumgr(TsduMgr);
 
+        /* allocate buffer for further data in tsdu queue */
+        Buffer = ExAllocatePool(NonPagedPool, BytesAvailable);
+        if (NULL == Buffer) {
+            status = STATUS_INSUFFICIENT_RESOURCES;
+            goto errorout;
+        }
 
-/*
- * ks_init_sender
- *   Initialize the tconn as a sender
- *
- * Arguments:
- *   tconn: the sender tconn
- *
- * Return Value:
- *   N/A
- *
- * Notes:
- *   N/A
- */
+        /* there's still data in tdi internal queue, we need issue a new
+           Irp to receive all of them. first allocate the tcp context */
+        context = cfs_alloc(sizeof(KS_TCP_COMPLETION_CONTEXT), 0);
+        if (!context) {
+            status = STATUS_INSUFFICIENT_RESOURCES;
+            goto errorout;
+        }
+
+        /* setup the context */
+        RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT));
+        context->Magic             = KS_TCP_CONTEXT_MAGIC;
+        context->tconn             = tconn;
+        context->CompletionRoutine = KsTcpReceiveCompletionRoutine;
+        context->CompletionContext = Buffer;
+        context->TsduMgr           = TsduMgr;
+        context->Buffer            = Buffer;
+        context->Event             = &(TsduMgr->Event);
+
+        if (tconn->kstc_type == kstt_sender) {
+            FileObject = tconn->sender.kstc_info.FileObject;
+        } else {
+            FileObject = tconn->child.kstc_info.FileObject;
+        }
+        DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+        /* build new tdi Irp and setup it. */
+        Irp = KsBuildTdiIrp(DeviceObject);
+        if (NULL == Irp) {
+            goto errorout;
+        }
+
+        status = KsLockUserBuffer(
+                    Buffer,
+                    FALSE,
+                    BytesAvailable,
+                    IoModifyAccess,
+                    &Mdl
+                    );
+
+        if (!NT_SUCCESS(status)) {
+            goto errorout;
+        }
+
+        TdiBuildReceive(
+            Irp,
+            DeviceObject,
+            FileObject,
+            KsTcpCompletionRoutine,
+            context,
+            Mdl,
+            ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED),
+            BytesAvailable
+          );
+
+        IoSetNextIrpStackLocation(Irp);
+
+        /* return the newly built Irp to transport driver,
+           it will process it to receive all the data */
+
+        *IoRequestPacket = Irp;
+        *BytesTaken = 0;
+
+        ks_get_tconn(tconn);
+        status = STATUS_MORE_PROCESSING_REQUIRED;
+    }
 
-void
-ks_init_sender(
-    ksock_tconn_t * tconn
-    )
-{
-    tconn->kstc_type = kstt_sender;
-    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+    ks_put_tconn(tconn);
 
-    KsInitializeKsChain(&(tconn->sender.kstc_recv));
-    KsInitializeKsChain(&(tconn->sender.kstc_send));
+    return (status);
 
-    tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
-    tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+errorout:
 
-    tconn->kstc_state = ksts_inited;
-}
+    if (Mdl) {
+        KsReleaseMdl(Mdl, FALSE);
+    }
 
-/*
- * ks_init_child
- *   Initialize the tconn as a child
- *
- * Arguments:
- *   tconn: the child tconn
- *
- * Return Value:
- *   N/A
- *
- * NOTES:
- *   N/A
- */
+    if (Buffer) {
+        ExFreePool(Buffer);
+    }
 
-void
-ks_init_child(
-    ksock_tconn_t * tconn
-    )
-{
-    tconn->kstc_type = kstt_child;
-    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+    if (Irp) {
+        IoFreeIrp(Irp);
+    }
 
-    KsInitializeKsChain(&(tconn->child.kstc_recv));
-    KsInitializeKsChain(&(tconn->child.kstc_send));
+    if (context) {
+        ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC);
+        context->Magic = 'CDAB';
+        cfs_free(context);
+    }
 
-    tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
-    tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+    ks_abort_tconn(tconn);
+    ks_put_tconn(tconn);
 
-    tconn->kstc_state = ksts_inited;
+    *BytesTaken = BytesAvailable;
+
+    return STATUS_SUCCESS;
 }
 
 /*
- * ks_get_tconn
- *   increase the reference count of the tconn with 1
- *
- * Arguments:
- *   tconn: the tdi connection to be referred
- *
- * Return Value:
- *   N/A
- *
- * NOTES:
- *   N/A
+ *  Expedited receive event handler
  */
 
-void
-ks_get_tconn(
-    ksock_tconn_t * tconn
+NTSTATUS
+KsTcpReceiveExpeditedEventHandler(
+    IN PVOID                TdiEventContext,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN ULONG                ReceiveFlags,
+    IN ULONG                BytesIndicated,
+    IN ULONG                BytesAvailable,
+    OUT ULONG *             BytesTaken,
+    IN PVOID                Tsdu,
+    OUT PIRP *              IoRequestPacket
     )
 {
-    atomic_inc(&(tconn->kstc_refcount));
+    return KsTcpReceiveEventHandler(
+                TdiEventContext,
+                ConnectionContext,
+                ReceiveFlags | TDI_RECEIVE_EXPEDITED,
+                BytesIndicated,
+                BytesAvailable,
+                BytesTaken,
+                Tsdu,
+                IoRequestPacket
+                );
 }
 
 /*
- * ks_put_tconn
- *   decrease the reference count of the tconn and destroy
- *   it if the refercount becomes 0.
- *
- * Arguments:
- *   tconn: the tdi connection to be dereferred
- *
- * Return Value:
- *   N/A
+ *  Bulk receive event handler
  *
- * NOTES:
- *   N/A
+ *  It will queue all the system Tsdus to our TsduList.
+ *  Then later ks_recv_mdl will release them.
  */
 
-void
-ks_put_tconn(
-    ksock_tconn_t *tconn
+NTSTATUS
+KsTcpChainedReceiveEventHandler (
+    IN PVOID TdiEventContext,       // the event context
+    IN CONNECTION_CONTEXT ConnectionContext,
+    IN ULONG ReceiveFlags,
+    IN ULONG ReceiveLength,
+    IN ULONG StartingOffset,        // offset of start of client data in TSDU
+    IN PMDL  Tsdu,                  // TSDU data chain
+    IN PVOID TsduDescriptor         // for call to TdiReturnChainedReceives
     )
 {
-    if (atomic_dec_and_test(&(tconn->kstc_refcount))) {
 
-        spin_lock(&(tconn->kstc_lock));
+    NTSTATUS            status;
+    ks_tconn_t *        tconn;
 
-        if ( ( tconn->kstc_type == kstt_child ||
-               tconn->kstc_type == kstt_sender ) &&
-             ( tconn->kstc_state == ksts_connected ) ) {
+    PKS_TSDUMGR         TsduMgr;
 
-            spin_unlock(&(tconn->kstc_lock));
+    BOOLEAN             expedited;
 
-            ks_abort_tconn(tconn);
+    tconn = (ks_tconn_t *) ConnectionContext;
+    expedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
 
-        } else {
+    KsPrint((4, "KsTcpChainedReceive: sock: %p conn: %p ReceiveLength: %xh "
+                "bIsExpedited: %d Tsdu=%p TsduDesc=%p data=%xh\n",
+                 tconn, tconn->kstc_conn, ReceiveLength, expedited,
+                 Tsdu, TsduDescriptor, *((PULONG)KsMapMdlBuffer(Tsdu))));
 
-            if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) {
-                cfs_enter_debugger();
-            } else {
-                ExQueueWorkItem(
-                        &(tconn->kstc_destroy),
-                        DelayedWorkQueue
-                        );
+    ks_get_tconn(tconn);
 
-                cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY);
-            }
+    /* check whether we are conntected or not listener */
+    if ( !((tconn->kstc_state == ksts_connected) &&
+         (tconn->kstc_type == kstt_sender ||
+          tconn->kstc_type == kstt_child))) {
 
-            spin_unlock(&(tconn->kstc_lock));
+        ks_put_tconn(tconn);
+        return (STATUS_SUCCESS);
+    }
+
+    if (Tsdu) {
+
+        TsduMgr = KsQueryTsduMgr(tconn, expedited, FALSE);
+        ks_lock_tsdumgr(TsduMgr);
+#if FALSE
+        KsWriteTsduMdl(TsduMgr, Tsdu,  TsduDescriptor,
+                       StartingOffset, ReceiveLength, 0);
+        status = STATUS_PENDING;
+#else
+        KsWriteTsduDat(TsduMgr, (PCHAR)KsMapMdlBuffer(Tsdu) + 
+                       StartingOffset, ReceiveLength, 0);
+        status = STATUS_SUCCESS;
+#endif
+        KeSetEvent(&(TsduMgr->Event), 0, FALSE);
+        ks_unlock_tsdumgr(TsduMgr);
+
+        /* re-active the ks connection and wake up the scheduler */
+        if (KS_CAN_SCHED(TsduMgr)) {
+            if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+                tconn->kstc_sched_cb(tconn, FALSE);
+            }
         }
+
+    } else {
+
+        ks_abort_tconn(tconn);
+        status = STATUS_CONNECTION_ABORTED;
     }
+
+    ks_put_tconn(tconn);
+
+    /* Return STATUS_PENDING to system because we are still
+       owning the MDL resources. ks_recv_mdl is expected
+       to free the MDL resources. */
+
+    return (status);
 }
 
+
 /*
- * ks_destroy_tconn
- *   cleanup the tdi connection and free it
+ *  Expedited & Bulk receive event handler
+ */
+
+NTSTATUS
+KsTcpChainedReceiveExpeditedEventHandler (
+    IN PVOID                TdiEventContext,       // the event context
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN ULONG                ReceiveFlags,
+    IN ULONG                ReceiveLength,
+    IN ULONG                StartingOffset,        // offset of start of client data in TSDU
+    IN PMDL                 Tsdu,                  // TSDU data chain
+    IN PVOID                TsduDescriptor         // for call to TdiReturnChainedReceives
+    )
+{
+    return KsTcpChainedReceiveEventHandler(
+                TdiEventContext,
+                ConnectionContext,
+                ReceiveFlags | TDI_RECEIVE_EXPEDITED,
+                ReceiveLength,
+                StartingOffset,
+                Tsdu,
+                TsduDescriptor );
+}
+
+
+/*
+ * KsSetHandlers
+ *   setup all the event handler callbacks
  *
  * Arguments:
- *   tconn: the tdi connection to be cleaned.
+ *   tconn: the tdi connecton object
  *
  * Return Value:
- *   N/A
+ *   int: ks error code
  *
  * NOTES:
  *   N/A
  */
 
-void
-ks_destroy_tconn(
-    ksock_tconn_t *     tconn
+int
+KsSetHandlers(
+    ks_tconn_t *     tconn
     )
 {
-    LASSERT(tconn->kstc_refcount.counter == 0);
-
-    if (tconn->kstc_type == kstt_listener) {
-
-        ks_reset_handlers(tconn);
-
-        /* for listener, we just need to close the address object */
-        KsCloseAddress(
-                tconn->kstc_addr.Handle,
-                tconn->kstc_addr.FileObject
-                );
-
-        tconn->kstc_state = ksts_inited;
+    NTSTATUS            status = STATUS_SUCCESS;
+    KS_EVENT_HANDLERS   handlers;
 
-    } else if (tconn->kstc_type == kstt_child) {
+    /* to make sure the address object is opened already */
+    if (tconn->kstc_addr.FileObject == NULL) {
+        goto errorout;
+    }
 
-        /* for child tdi conections */
+    /* initialize the handlers indictor array. for sender and listenr,
+       there are different set of callbacks. for child, we just return. */
 
-        /* disassociate the relation between it's connection object
-           and the address object */
+    memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
 
-        if (tconn->kstc_state == ksts_associated) {
-            KsDisassociateAddress(
-                tconn->child.kstc_info.FileObject
-                );
-        }
+    SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler);
 
-        /* release the connection object */
+    // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler);
 
-        KsCloseConnection(
-                tconn->child.kstc_info.Handle,
-                tconn->child.kstc_info.FileObject
-                );
+    if (tconn->kstc_type == kstt_listener) {
+        SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler);
+    } else if (tconn->kstc_type == kstt_child) {
+        goto errorout;
+    }
 
-        /* release it's refer of it's parent's address object */
-        KsCloseAddress(
-                NULL,
-                tconn->kstc_addr.FileObject
+    /* set all the event callbacks */
+    status = KsSetEventHandlers(
+                tconn->kstc_addr.FileObject, /* Address File Object  */
+                tconn,                       /* Event Context */
+                &handlers                    /* Event callback handlers */
                 );
 
-        spin_lock(&tconn->child.kstc_parent->kstc_lock);
-        spin_lock(&tconn->kstc_lock);
-
-        tconn->kstc_state = ksts_inited;
-
-        /* remove it frome it's parent's queues */
+errorout:
 
-        if (tconn->child.kstc_queued) {
+    return cfs_error_code(status);
+}
 
-            list_del(&(tconn->child.kstc_link));
 
-            if (tconn->child.kstc_queueno) {
+/*
+ * KsResetHandlers
+ *   disable all the event handler callbacks (set to NULL)
+ *
+ * Arguments:
+ *   tconn: the tdi connecton object
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * NOTES:
+ *   N/A
+ */
 
-                LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0);
-                tconn->child.kstc_parent->listener.kstc_accepted.num -= 1;
+int
+KsResetHandlers(
+    ks_tconn_t *     tconn
+    )
+{
+    NTSTATUS            status = STATUS_SUCCESS;
+    KS_EVENT_HANDLERS   handlers;
 
-            } else {
+    /* to make sure the address object is opened already */
+    if (tconn->kstc_addr.FileObject == NULL) {
+        goto errorout;
+    }
 
-                LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0);
-                tconn->child.kstc_parent->listener.kstc_listening.num -= 1;
-            }
+    /* initialize the handlers indictor array. for sender and listenr,
+       there are different set of callbacks. for child, we just return. */
 
-            tconn->child.kstc_queued = FALSE;
-        }
+    memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
 
-        spin_unlock(&tconn->kstc_lock);
-        spin_unlock(&tconn->child.kstc_parent->kstc_lock);
+    SetEventHandler(handlers, TDI_EVENT_ERROR, NULL);
+    SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL);
+    SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL);
+    // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL);
 
-        /* drop the reference of the parent tconn */
-        ks_put_tconn(tconn->child.kstc_parent);
+    if (tconn->kstc_type == kstt_listener) {
+        SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL);
+    } else if (tconn->kstc_type == kstt_child) {
+        goto errorout;
+    }
 
-    } else if (tconn->kstc_type == kstt_sender) {
+    /* set all the event callbacks */
+    status = KsSetEventHandlers(
+                tconn->kstc_addr.FileObject, /* Address File Object  */
+                tconn,                       /* Event Context */
+                &handlers                    /* Event callback handlers */
+                );
 
-        ks_reset_handlers(tconn);
+errorout:
 
-        /* release the connection object */
+    return cfs_error_code(status);
+}
 
-        KsCloseConnection(
-                tconn->sender.kstc_info.Handle,
-                tconn->sender.kstc_info.FileObject
-                );
+VOID
+KsPrintProviderInfo(
+   PWSTR DeviceName,
+   PTDI_PROVIDER_INFO ProviderInfo
+   )
+{
+    KsPrint((2, "%ws ProviderInfo:\n", DeviceName));
 
-        /* release it's refer of it's parent's address object */
-        KsCloseAddress(
-                tconn->kstc_addr.Handle,
-                tconn->kstc_addr.FileObject
-                );
+    KsPrint((2, "  Version              : 0x%4.4X\n", ProviderInfo->Version ));
+    KsPrint((2, "  MaxSendSize          : %d\n", ProviderInfo->MaxSendSize ));
+    KsPrint((2, "  MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData ));
+    KsPrint((2, "  MaxDatagramSize      : %d\n", ProviderInfo->MaxDatagramSize ));
+    KsPrint((2, "  ServiceFlags         : 0x%8.8X\n", ProviderInfo->ServiceFlags ));
 
-        tconn->kstc_state = ksts_inited;
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) {
+        KsPrint((2, "  CONNECTION_MODE\n"));
+    }
 
-    } else {
-        cfs_enter_debugger();
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) {
+        KsPrint((2, "  ORDERLY_RELEASE\n"));
     }
 
-    /* free the tconn structure ... */
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) {
+        KsPrint((2, "  CONNECTIONLESS_MODE\n"));
+    }
 
-    ks_free_tconn(tconn);
-}
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) {
+        KsPrint((2, "  ERROR_FREE_DELIVERY\n"));
+    }
 
-int
-ks_query_data(
-    ksock_tconn_t * tconn,
-    size_t *        size,
-    int             bIsExpedited )
-{
-    int             rc = 0;
+    if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) {
+        KsPrint((2, "  SECURITY_LEVEL\n"));
+    }
 
-    PKS_CHAIN       KsChain;
-    PKS_TSDUMGR     KsTsduMgr;
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) {
+        KsPrint((2, "  BROADCAST_SUPPORTED\n"));
+    }
 
-    *size = 0;
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) {
+        KsPrint((2, "  MULTICAST_SUPPORTED\n"));
+    }
 
-    ks_get_tconn(tconn);
-    spin_lock(&(tconn->kstc_lock));
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) {
+        KsPrint((2, "  DELAYED_ACCEPTANCE\n"));
+    }
 
-    if ( tconn->kstc_type != kstt_sender &&
-         tconn->kstc_type != kstt_child) {
-        rc = -EINVAL;
-        spin_unlock(&(tconn->kstc_lock));
-        goto errorout;
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) {
+        KsPrint((2, "  EXPEDITED_DATA\n"));
     }
 
-    if (tconn->kstc_state != ksts_connected) {
-        rc = -ENOTCONN;
-        spin_unlock(&(tconn->kstc_lock));
-        goto errorout;
+    if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) {
+        KsPrint((2, "  INTERNAL_BUFFERING\n"));
     }
 
-    if (tconn->kstc_type == kstt_sender) {
-        KsChain = &(tconn->sender.kstc_recv);
-    } else {
-        LASSERT(tconn->kstc_type == kstt_child);
-        KsChain = &(tconn->child.kstc_recv);
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) {
+        KsPrint((2, "  ROUTE_DIRECTED\n"));
     }
 
-    if (bIsExpedited) {
-        KsTsduMgr = &(KsChain->Expedited);
-    } else {
-        KsTsduMgr = &(KsChain->Normal);
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) {
+        KsPrint((2, "  NO_ZERO_LENGTH\n"));
     }
 
-    *size = KsTsduMgr->TotalBytes;
-    spin_unlock(&(tconn->kstc_lock));
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) {
+        KsPrint((2, "  POINT_TO_POINT\n"));
+    }
 
-errorout:
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) {
+        KsPrint((2, "  MESSAGE_MODE\n"));
+    }
 
-    ks_put_tconn(tconn);
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) {
+        KsPrint((2, "  HALF_DUPLEX\n"));
+    }
 
-    return (rc);
+    KsPrint((2, "  MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData ));
+    KsPrint((2, "  MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData ));
+    KsPrint((2, "  NumberOfResources    : %d\n", ProviderInfo->NumberOfResources ));
 }
 
+
 /*
- * ks_get_tcp_option
- *   Query the the options of the tcp stream connnection
+ * ks_create_tconn
+ *   allocate a new tconn structure from the SLAB cache or
+ *   NonPaged sysetm pool
  *
  * Arguments:
- *   tconn:         the tdi connection
- *   ID:            option id
- *   OptionValue:   buffer to store the option value
- *   Length:        the length of the value, to be returned
+ *   N/A
  *
  * Return Value:
- *   int:           ks return code
+ *   ks_tconn_t *: the address of tconn or NULL if it fails
  *
  * NOTES:
  *   N/A
  */
 
-int
-ks_get_tcp_option (
-    ksock_tconn_t *     tconn,
-    ULONG               ID,
-    PVOID               OptionValue,
-    PULONG              Length
-    )
+ks_tconn_t *
+ks_create_tconn()
 {
-    NTSTATUS            Status = STATUS_SUCCESS;
-
-    IO_STATUS_BLOCK     IoStatus;
+    ks_tconn_t * tconn = NULL;
 
-    TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx;
-
-    PFILE_OBJECT        ConnectionObject;
-    PDEVICE_OBJECT      DeviceObject = NULL;
-
-    PIRP                Irp = NULL;
-    PIO_STACK_LOCATION  IrpSp = NULL;
-
-    KEVENT              Event;
-
-    /* make sure the tdi connection is connected ? */
-
-    ks_get_tconn(tconn);
+    /* allocate ksoc_tconn_t from the slab cache memory */
+    tconn = (ks_tconn_t *)cfs_mem_cache_alloc(
+                ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO);
 
-    if (tconn->kstc_state != ksts_connected) {
-        Status = STATUS_INVALID_PARAMETER;
-        goto errorout;
-    }
+    if (tconn) {
 
-    LASSERT(tconn->kstc_type == kstt_sender ||
-           tconn->kstc_type == kstt_child);
+        /* zero tconn elements */
+        memset(tconn, 0, sizeof(ks_tconn_t));
 
-    if (tconn->kstc_type == kstt_sender) {
-        ConnectionObject = tconn->sender.kstc_info.FileObject;
-    } else {
-        ConnectionObject = tconn->child.kstc_info.FileObject;
-    }
+        /* initialize the tconn ... */
+        tconn->kstc_magic = KS_TCONN_MAGIC;
 
-    QueryInfoEx.ID.toi_id = ID;
-    QueryInfoEx.ID.toi_type   = INFO_TYPE_CONNECTION;
-    QueryInfoEx.ID.toi_class  = INFO_CLASS_PROTOCOL;
-    QueryInfoEx.ID.toi_entity.tei_entity   = CO_TL_ENTITY;
-    QueryInfoEx.ID.toi_entity.tei_instance = 0;
+        ExInitializeWorkItem(
+                &(tconn->kstc_disconnect.WorkItem),
+                KsDisconnectHelper,
+                &(tconn->kstc_disconnect)
+                );
 
-    RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE);
+        KeInitializeEvent(
+                &(tconn->kstc_disconnect.Event),
+                SynchronizationEvent,
+                FALSE );
 
-    KeInitializeEvent(&Event, NotificationEvent, FALSE);
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+        ExInitializeWorkItem(
+                &(tconn->kstc_destroy),
+                ks_destroy_tconn,
+                tconn
+            );
 
-    Irp = IoBuildDeviceIoControlRequest(
-                IOCTL_TCP_QUERY_INFORMATION_EX,
-                DeviceObject,
-                &QueryInfoEx,
-                sizeof(TCP_REQUEST_QUERY_INFORMATION_EX),
-                OptionValue,
-                *Length,
-                FALSE,
-                &Event,
-                &IoStatus
-                );
+        spin_lock_init(&(tconn->kstc_lock));
 
-    if (Irp == NULL) {
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
-    }
+        ks_get_tconn(tconn);
+        spin_lock(&(ks_data.ksnd_tconn_lock));
 
-    IrpSp = IoGetNextIrpStackLocation(Irp);
+        /* attach it into global list in ks_data */
 
-    if (IrpSp == NULL) {
+        list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns));
+        ks_data.ksnd_ntconns++;
+        spin_unlock(&(ks_data.ksnd_tconn_lock));
 
-        IoFreeIrp(Irp);
-        Irp = NULL;
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
+        tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000;
     }
+    KsPrint((3, "ks_create_tconn: new connection: %p\n", tconn));
+    return (tconn);
+}
 
-    IrpSp->FileObject = ConnectionObject;
-    IrpSp->DeviceObject = DeviceObject;
-
-    Status = IoCallDriver(DeviceObject, Irp);
-
-    if (Status == STATUS_PENDING) {
+/*
+ * ks_free_tconn
+ *   free the tconn structure to the SLAB cache or NonPaged
+ *   sysetm pool
+ *
+ * Arguments:
+ *   tconn:  the tcon is to be freed
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
 
-        KeWaitForSingleObject(
-                &Event,
-                Executive,
-                KernelMode,
-                FALSE,
-                NULL
-                );
+void
+ks_free_tconn(ks_tconn_t * tconn)
+{
+    LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0);
 
-        Status = IoStatus.Status;
-    }
+    spin_lock(&(ks_data.ksnd_tconn_lock));
 
+    /* remove it from the global list */
+    list_del(&tconn->kstc_list);
+    ks_data.ksnd_ntconns--;
 
-    if (NT_SUCCESS(Status)) {
-        *Length = IoStatus.Information;
-    } else {
-        cfs_enter_debugger();
-        memset(OptionValue, 0, *Length);
-        Status = STATUS_SUCCESS;
+    /* if this is the last tconn, it would be safe for
+       ks_tdi_fini_data to quit ... */
+    if (ks_data.ksnd_ntconns == 0) {
+        cfs_wake_event(&ks_data.ksnd_tconn_exit);
     }
+    spin_unlock(&(ks_data.ksnd_tconn_lock));
 
-errorout:
-
-    ks_put_tconn(tconn);
+    /* free the structure memory */
+    cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn);
 
-    return cfs_error_code(Status);
+    KsPrint((3, "ks_free_tconn: tconn %p is freed.\n", tconn));
 }
 
+
 /*
- * ks_set_tcp_option
- *   Set the the options for the tcp stream connnection
+ * ks_init_listener
+ *   Initialize the tconn as a listener (daemon)
  *
  * Arguments:
- *   tconn:     the tdi connection
- *   ID:        option id
- *   OptionValue: buffer containing the new option value
- *   Length:    the length of the value
+ *   tconn: the listener tconn
  *
  * Return Value:
- *   int:       ks return code
+ *   N/A
  *
- * NOTES:
+ * Notes:
  *   N/A
  */
 
-NTSTATUS
-ks_set_tcp_option (
-    ksock_tconn_t * tconn,
-    ULONG           ID,
-    PVOID           OptionValue,
-    ULONG           Length
+void
+ks_init_listener(
+    ks_tconn_t * tconn
     )
 {
-    NTSTATUS            Status = STATUS_SUCCESS;
-
-    IO_STATUS_BLOCK     IoStatus;
-
-    ULONG               SetInfoExLength;
-    PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL;
-
-    PFILE_OBJECT        ConnectionObject;
-    PDEVICE_OBJECT      DeviceObject = NULL;
+    /* preparation: intialize the tconn members */
 
-    PIRP                Irp = NULL;
-    PIO_STACK_LOCATION  IrpSp = NULL;
+    tconn->kstc_type = kstt_listener;
 
-    PKEVENT             Event;
+    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
 
-    /* make sure the tdi connection is connected ? */
+    CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list));
+    CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list));
 
-    ks_get_tconn(tconn);
+    cfs_init_event( &(tconn->listener.kstc_accept_event),
+                    TRUE,
+                    FALSE );
 
-    if (tconn->kstc_state != ksts_connected) {
-        Status = STATUS_INVALID_PARAMETER;
-        goto errorout;
-    }
+    cfs_init_event( &(tconn->listener.kstc_destroy_event),
+                    TRUE,
+                    FALSE );
 
-    LASSERT(tconn->kstc_type == kstt_sender ||
-           tconn->kstc_type == kstt_child);
+    tconn->kstc_state = ksts_inited;
+}
 
-    if (tconn->kstc_type == kstt_sender) {
-        ConnectionObject = tconn->sender.kstc_info.FileObject;
-    } else {
-        ConnectionObject = tconn->child.kstc_info.FileObject;
-    }
 
-    SetInfoExLength =  sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT);
+/*
+ * ks_init_sender
+ *   Initialize the tconn as a sender
+ *
+ * Arguments:
+ *   tconn: the sender tconn
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
 
-    SetInfoEx = ExAllocatePoolWithTag(
-                    NonPagedPool,
-                    SetInfoExLength,
-                    'TSSK'
-                    );
+void
+ks_init_sender(
+    ks_tconn_t * tconn
+    )
+{
+    tconn->kstc_type = kstt_sender;
+    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
 
-    if (SetInfoEx == NULL) {
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
-    }
+    KsInitializeKsChain(&(tconn->sender.kstc_recv));
+    KsInitializeKsChain(&(tconn->sender.kstc_send));
 
-    SetInfoEx->ID.toi_id = ID;
+    tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+    tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
 
-    SetInfoEx->ID.toi_type  = INFO_TYPE_CONNECTION;
-    SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL;
-    SetInfoEx->ID.toi_entity.tei_entity   = CO_TL_ENTITY;
-    SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE;
+    tconn->kstc_state = ksts_inited;
+}
 
-    SetInfoEx->BufferSize = Length;
-    RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length);
+/*
+ * ks_init_child
+ *   Initialize the tconn as a child
+ *
+ * Arguments:
+ *   tconn: the child tconn
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    Event = (PKEVENT)(&(SetInfoEx->Buffer[Length]));
-    KeInitializeEvent(Event, NotificationEvent, FALSE);
+void
+ks_init_child(
+    ks_tconn_t * tconn
+    )
+{
+    tconn->kstc_type = kstt_child;
+    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
 
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+    KsInitializeKsChain(&(tconn->child.kstc_recv));
+    KsInitializeKsChain(&(tconn->child.kstc_send));
 
-    Irp = IoBuildDeviceIoControlRequest(
-                IOCTL_TCP_SET_INFORMATION_EX,
-                DeviceObject,
-                SetInfoEx,
-                SetInfoExLength,
-                NULL,
-                0,
-                FALSE,
-                Event,
-                &IoStatus
-                );
+    tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+    tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
 
-    if (Irp == NULL) {
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
-    }
+    tconn->kstc_state = ksts_inited;
+}
 
-    IrpSp = IoGetNextIrpStackLocation(Irp);
+/*
+ * ks_get_tconn
+ *   increase the reference count of the tconn with 1
+ *
+ * Arguments:
+ *   tconn: the tdi connection to be referred
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    if (IrpSp == NULL) {
-        IoFreeIrp(Irp);
-        Irp = NULL;
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
-    }
+void
+ks_get_tconn(
+    ks_tconn_t * tconn
+    )
+{
+    atomic_inc(&(tconn->kstc_refcount));
+}
 
-    IrpSp->FileObject = ConnectionObject;
-    IrpSp->DeviceObject = DeviceObject;
+/*
+ * ks_put_tconn
+ *   decrease the reference count of the tconn and destroy
+ *   it if the refercount becomes 0.
+ *
+ * Arguments:
+ *   tconn: the tdi connection to be dereferred
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
 
-    Status = IoCallDriver(DeviceObject, Irp);
+void
+ks_put_tconn(
+    ks_tconn_t *tconn
+    )
+{
+    if (atomic_dec_and_test(&(tconn->kstc_refcount))) {
 
-    if (Status == STATUS_PENDING) {
+        spin_lock(&(tconn->kstc_lock));
 
-        KeWaitForSingleObject(
-                Event,
-                Executive,
-                KernelMode,
-                FALSE,
-                NULL
-                );
+        if ( ( tconn->kstc_type == kstt_child ||
+               tconn->kstc_type == kstt_sender ) &&
+             ( tconn->kstc_state == ksts_connected ) ) {
 
-        Status = IoStatus.Status;
-    }
+            spin_unlock(&(tconn->kstc_lock));
 
-errorout:
+            ks_abort_tconn(tconn);
 
-    if (SetInfoEx) {
-        ExFreePool(SetInfoEx);
-    }
+        } else {
 
-    if (!NT_SUCCESS(Status)) {
-        printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n",
-               ID, Status);
-        Status = STATUS_SUCCESS;
-    }
+            if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) {
+                cfs_enter_debugger();
+            } else {
+                ExQueueWorkItem(
+                        &(tconn->kstc_destroy),
+                        DelayedWorkQueue
+                        );
 
-    ks_put_tconn(tconn);
+                cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY);
+            }
 
-    return cfs_error_code(Status);
+            spin_unlock(&(tconn->kstc_lock));
+        }
+    }
 }
 
 /*
- * ks_bind_tconn
- *   bind the tdi connection object with an address
+ * ks_destroy_tconn
+ *   cleanup the tdi connection and free it
  *
  * Arguments:
- *   tconn:    tconn to be bound
- *   parent:   the parent tconn object
- *   ipaddr:   the ip address
- *   port:     the port number
+ *   tconn: the tdi connection to be cleaned.
  *
  * Return Value:
- *   int:   0 for success or ks error codes.
+ *   N/A
  *
  * NOTES:
  *   N/A
  */
 
-int
-ks_bind_tconn (
-    ksock_tconn_t * tconn,
-    ksock_tconn_t * parent,
-    ulong_ptr   addr,
-    unsigned short  port
+void
+ks_destroy_tconn(
+    ks_tconn_t *     tconn
     )
 {
-    NTSTATUS            status;
-    int                 rc = 0;
-
-    ksock_tdi_addr_t    taddr;
+    LASSERT(tconn->kstc_refcount.counter == 0);
 
-    memset(&taddr, 0, sizeof(ksock_tdi_addr_t));
+    if (tconn->kstc_type == kstt_listener) {
 
-    if (tconn->kstc_state != ksts_inited) {
+        KsResetHandlers(tconn);
 
-        status = STATUS_INVALID_PARAMETER;
-        rc = cfs_error_code(status);
+        /* for listener, we just need to close the address object */
+        KsCloseAddress(
+                tconn->kstc_addr.Handle,
+                tconn->kstc_addr.FileObject
+                );
 
-        goto errorout;
+        tconn->kstc_state = ksts_inited;
 
     } else if (tconn->kstc_type == kstt_child) {
 
-        if (NULL == parent) {
-            status = STATUS_INVALID_PARAMETER;
-            rc = cfs_error_code(status);
-
-            goto errorout;
-        }
+        /* for child tdi conections */
 
-        /* refer it's parent's address object */
+        /* disassociate the relation between it's connection object
+           and the address object */
 
-        taddr = parent->kstc_addr;
-        ObReferenceObject(taddr.FileObject);
+        if (tconn->kstc_state == ksts_associated) {
+            KsDisassociateAddress(
+                tconn->child.kstc_info.FileObject
+                );
+        }
 
-        ks_get_tconn(parent);
+        /* release the connection object */
 
-    } else {
+        KsCloseConnection(
+                tconn->child.kstc_info.Handle,
+                tconn->child.kstc_info.FileObject
+                );
 
-        PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi);
-        ULONG              AddrLen = 0;
+        /* release it's refer of it's parent's address object */
+        KsCloseAddress(
+                NULL,
+                tconn->kstc_addr.FileObject
+                );
 
-        /* intialize the tdi address*/
+        spin_lock(&tconn->child.kstc_parent->kstc_lock);
+        spin_lock(&tconn->kstc_lock);
 
-        TdiAddress->TAAddressCount = 1;
-        TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
-        TdiAddress->Address[0].AddressType   = TDI_ADDRESS_TYPE_IP;
+        tconn->kstc_state = ksts_inited;
 
-        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
-        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
+        /* remove it frome it's parent's queues */
 
-        memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
+        if (tconn->child.kstc_queued) {
 
+            list_del(&(tconn->child.kstc_link));
 
-        /* open the transport address object */
+            if (tconn->child.kstc_queueno) {
 
-        AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) +
-                  TDI_ADDRESS_LENGTH_IP;
+                LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0);
+                tconn->child.kstc_parent->listener.kstc_accepted.num -= 1;
 
-        status = KsOpenAddress(
-                    &(tconn->kstc_dev),
-                    &(taddr.Tdi),
-                    AddrLen,
-                    &(taddr.Handle),
-                    &(taddr.FileObject)
-                    );
+            } else {
 
-        if (!NT_SUCCESS(status)) {
+                LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0);
+                tconn->child.kstc_parent->listener.kstc_listening.num -= 1;
+            }
 
-            KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n",
-                        addr, port,  status ));
-            rc = cfs_error_code(status);
-            goto errorout;
+            tconn->child.kstc_queued = FALSE;
         }
-    }
-
-    if (tconn->kstc_type == kstt_child) {
-        tconn->child.kstc_parent = parent;
-    }
-
-    tconn->kstc_state = ksts_bind;
-    tconn->kstc_addr  = taddr;
-
-errorout:
-
-    return (rc);
-}
-
-/*
- * ks_build_tconn
- *  build tcp/streaming connection to remote peer
- *
- * Arguments:
- *   tconn:    tconn to be connected to the peer
- *   addr:     the peer's ip address
- *   port:     the peer's port number
- *
- * Return Value:
- *   int:   0 for success or ks error codes.
- *
- * Notes:
- *   N/A
- */
-
-int
-ks_build_tconn(
-    ksock_tconn_t *                 tconn,
-    ulong_ptr                       addr,
-    unsigned short                  port
-    )
-{
-    int                             rc = 0;
-    NTSTATUS                        status = STATUS_SUCCESS;
-
-
-    PFILE_OBJECT                    ConnectionObject = NULL;
-    PDEVICE_OBJECT                  DeviceObject = NULL;
 
-    PTDI_CONNECTION_INFORMATION     ConnectionInfo = NULL;
-    ULONG                           AddrLength;
-
-    PIRP                            Irp = NULL;
+        spin_unlock(&tconn->kstc_lock);
+        spin_unlock(&tconn->child.kstc_parent->kstc_lock);
 
-    LASSERT(tconn->kstc_type == kstt_sender);
-    LASSERT(tconn->kstc_state == ksts_bind);
+        /* drop the reference of the parent tconn */
+        ks_put_tconn(tconn->child.kstc_parent);
 
-    ks_get_tconn(tconn);
+    } else if (tconn->kstc_type == kstt_sender) {
 
-    {
-        /* set the event callbacks */
-        rc = ks_set_handlers(tconn);
+        KsResetHandlers(tconn);
 
-        if (rc < 0) {
-            cfs_enter_debugger();
-            goto errorout;
-        }
-    }
+        /* release the connection object */
 
-    /* create the connection file handle / object  */
-    status = KsOpenConnection(
-                &(tconn->kstc_dev),
-                (CONNECTION_CONTEXT)tconn,
-                &(tconn->sender.kstc_info.Handle),
-                &(tconn->sender.kstc_info.FileObject)
+        KsCloseConnection(
+                tconn->sender.kstc_info.Handle,
+                tconn->sender.kstc_info.FileObject
                 );
 
-    if (!NT_SUCCESS(status)) {
-        rc = cfs_error_code(status);
-        cfs_enter_debugger();
-        goto errorout;
-    }
-
-    /* associdate the the connection with the adress object of the tconn */
-
-    status = KsAssociateAddress(
+        /* release it's refer of it's parent's address object */
+        KsCloseAddress(
                 tconn->kstc_addr.Handle,
-                tconn->sender.kstc_info.FileObject
+                tconn->kstc_addr.FileObject
                 );
 
-    if (!NT_SUCCESS(status)) {
-        rc = cfs_error_code(status);
-        cfs_enter_debugger();
-        goto errorout;
-    }
-
-    tconn->kstc_state = ksts_associated;
-
-    /* Allocating Connection Info Together with the Address */
-    AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
-                 + TDI_ADDRESS_LENGTH_IP;
-
-    ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
-    NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK');
-
-    if (NULL == ConnectionInfo) {
+        tconn->kstc_state = ksts_inited;
 
-        status = STATUS_INSUFFICIENT_RESOURCES;
-        rc = cfs_error_code(status);
+    } else {
         cfs_enter_debugger();
-        goto errorout;
     }
 
-    /* Initializing ConnectionInfo ... */
-    {
-        PTRANSPORT_ADDRESS TdiAddress;
-
-        /* ConnectionInfo settings */
-
-        ConnectionInfo->UserDataLength = 0;
-        ConnectionInfo->UserData = NULL;
-        ConnectionInfo->OptionsLength = 0;
-        ConnectionInfo->Options = NULL;
-        ConnectionInfo->RemoteAddressLength = AddrLength;
-        ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
-
+    /* free the tconn structure ... */
 
-        /* intialize the tdi address*/
+    ks_free_tconn(tconn);
+}
 
-        TdiAddress = ConnectionInfo->RemoteAddress;
+/*
+ * ks_get_tcp_option
+ *   Query the the options of the tcp stream connnection
+ *
+ * Arguments:
+ *   tconn:         the tdi connection
+ *   ID:            option id
+ *   OptionValue:   buffer to store the option value
+ *   Length:        the length of the value, to be returned
+ *
+ * Return Value:
+ *   int:           ks return code
+ *
+ * NOTES:
+ *   N/A
+ */
 
-        TdiAddress->TAAddressCount = 1;
-        TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
-        TdiAddress->Address[0].AddressType   = TDI_ADDRESS_TYPE_IP;
+int
+ks_get_tcp_option (
+    ks_tconn_t *        tconn,
+    ULONG               ID,
+    PVOID               OptionValue,
+    PULONG              Length
+    )
+{
+    NTSTATUS            Status = STATUS_SUCCESS;
 
-        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
-        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
+    IO_STATUS_BLOCK     IoStatus;
 
-        memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
-    }
+    TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx;
 
-    /* Now prepare to connect the remote peer ... */
+    PFILE_OBJECT        ConnectionObject;
+    PDEVICE_OBJECT      DeviceObject = NULL;
 
-    ConnectionObject = tconn->sender.kstc_info.FileObject;
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+    PIRP                Irp = NULL;
+    PIO_STACK_LOCATION  IrpSp = NULL;
 
-    /* allocate a new Irp */
+    KEVENT              Event;
 
-    Irp = KsBuildTdiIrp(DeviceObject);
+    /* make sure the tdi connection is connected ? */
 
-    if (NULL == Irp) {
+    ks_get_tconn(tconn);
 
-        status = STATUS_INSUFFICIENT_RESOURCES;
-        rc = cfs_error_code(status);
-        cfs_enter_debugger();
+    if (tconn->kstc_state != ksts_connected) {
+        Status = STATUS_INVALID_PARAMETER;
         goto errorout;
     }
 
-    /* setup the Irp */
-
-    TdiBuildConnect(
-            Irp,
-            DeviceObject,
-            ConnectionObject,
-            NULL,
-            NULL,
-            NULL,
-            ConnectionInfo,
-            NULL
-            );
-
-
-    /* sumbit the Irp to the underlying transport driver */
-    status = KsSubmitTdiIrp(
-                    DeviceObject,
-                    Irp,
-                    TRUE,
-                    NULL
-                    );
-
-    spin_lock(&(tconn->kstc_lock));
+    LASSERT(tconn->kstc_type == kstt_sender ||
+           tconn->kstc_type == kstt_child);
 
-    if (NT_SUCCESS(status)) {
+    if (tconn->kstc_type == kstt_sender) {
+        ConnectionObject = tconn->sender.kstc_info.FileObject;
+    } else {
+        ConnectionObject = tconn->child.kstc_info.FileObject;
+    }
 
-        /* Connected! the conneciton is built successfully. */
+    QueryInfoEx.ID.toi_id = ID;
+    QueryInfoEx.ID.toi_type   = INFO_TYPE_CONNECTION;
+    QueryInfoEx.ID.toi_class  = INFO_CLASS_PROTOCOL;
+    QueryInfoEx.ID.toi_entity.tei_entity   = CO_TL_ENTITY;
+    QueryInfoEx.ID.toi_entity.tei_instance = 0;
 
-        tconn->kstc_state = ksts_connected;
+    RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE);
 
-        tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo;
-        tconn->sender.kstc_info.Remote         = ConnectionInfo->RemoteAddress;
+    KeInitializeEvent(&Event, NotificationEvent, FALSE);
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-        spin_unlock(&(tconn->kstc_lock));
+    Irp = IoBuildDeviceIoControlRequest(
+                IOCTL_TCP_QUERY_INFORMATION_EX,
+                DeviceObject,
+                &QueryInfoEx,
+                sizeof(TCP_REQUEST_QUERY_INFORMATION_EX),
+                OptionValue,
+                *Length,
+                FALSE,
+                &Event,
+                &IoStatus
+                );
 
-    } else {
+    if (Irp == NULL) {
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
 
-        /* Not connected! Abort it ... */
+    IrpSp = IoGetNextIrpStackLocation(Irp);
 
-        if (rc != 0) {
-            cfs_enter_debugger();
-        }
+    if (IrpSp == NULL) {
 
+        IoFreeIrp(Irp);
         Irp = NULL;
-        rc = cfs_error_code(status);
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
 
-        tconn->kstc_state = ksts_associated;
-        spin_unlock(&(tconn->kstc_lock));
+    IrpSp->FileObject = ConnectionObject;
+    IrpSp->DeviceObject = DeviceObject;
 
-        /* disassocidate the connection and the address object,
-           after cleanup,  it's safe to set the state to abort ... */
+    Status = IoCallDriver(DeviceObject, Irp);
 
-        if ( NT_SUCCESS(KsDisassociateAddress(
-                        tconn->sender.kstc_info.FileObject))) {
-            tconn->kstc_state = ksts_aborted;
-        }
+    if (Status == STATUS_PENDING) {
 
-        /* reset the event callbacks */
-        rc = ks_reset_handlers(tconn);
+        KeWaitForSingleObject(
+                &Event,
+                Executive,
+                KernelMode,
+                FALSE,
+                NULL
+                );
 
-        goto errorout;
+        Status = IoStatus.Status;
     }
 
-errorout:
-
-    if (NT_SUCCESS(status)) {
-
-        ks_query_local_ipaddr(tconn);
 
+    if (NT_SUCCESS(Status)) {
+        *Length = (ULONG)(ULONG_PTR)IoStatus.Information;
     } else {
-
-        if (ConnectionInfo) {
-            ExFreePool(ConnectionInfo);
-        }
-        if (Irp) {
-            IoFreeIrp(Irp);
-        }
+        cfs_enter_debugger();
+        memset(OptionValue, 0, *Length);
+        Status = STATUS_SUCCESS;
     }
 
+errorout:
+
     ks_put_tconn(tconn);
 
-    return (rc);
+    return cfs_error_code(Status);
 }
 
-
 /*
- * ks_disconnect_tconn
- *   disconnect the tconn from a connection
+ * ks_set_tcp_option
+ *   Set the the options for the tcp stream connnection
  *
  * Arguments:
- *   tconn: the tdi connecton object connected already
- *   flags: flags & options for disconnecting
+ *   tconn:     the tdi connection
+ *   ID:        option id
+ *   OptionValue: buffer containing the new option value
+ *   Length:    the length of the value
  *
  * Return Value:
- *   int: ks error code
+ *   int:       ks return code
  *
- * Notes:
+ * NOTES:
  *   N/A
  */
 
-int
-ks_disconnect_tconn(
-    ksock_tconn_t *     tconn,
-    ulong_ptr       flags
+NTSTATUS
+ks_set_tcp_option (
+    ks_tconn_t *    tconn,
+    ULONG           ID,
+    PVOID           OptionValue,
+    ULONG           Length
     )
 {
-    NTSTATUS            status = STATUS_SUCCESS;
+    NTSTATUS            Status = STATUS_SUCCESS;
 
-    ksock_tconn_info_t * info;
+    IO_STATUS_BLOCK     IoStatus;
+
+    ULONG               SetInfoExLength;
+    PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL;
 
     PFILE_OBJECT        ConnectionObject;
     PDEVICE_OBJECT      DeviceObject = NULL;
 
     PIRP                Irp = NULL;
+    PIO_STACK_LOCATION  IrpSp = NULL;
 
-    KEVENT              Event;
-
-    ks_get_tconn(tconn);
-
-    /* make sure tt's connected already and it
-       must be a sender or a child ...       */
+    PKEVENT             Event;
 
-    LASSERT(tconn->kstc_state == ksts_connected);
-    LASSERT( tconn->kstc_type == kstt_sender ||
-            tconn->kstc_type == kstt_child);
+    /* make sure the tdi connection is connected ? */
 
-    /* reset all the event handlers to NULL */
+    ks_get_tconn(tconn);
 
-    if (tconn->kstc_type != kstt_child) {
-        ks_reset_handlers (tconn);
+    if (tconn->kstc_state != ksts_connected) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto errorout;
     }
 
-    /* Disconnecting to the remote peer ... */
+    LASSERT(tconn->kstc_type == kstt_sender ||
+           tconn->kstc_type == kstt_child);
 
     if (tconn->kstc_type == kstt_sender) {
-        info = &(tconn->sender.kstc_info);
+        ConnectionObject = tconn->sender.kstc_info.FileObject;
     } else {
-        info = &(tconn->child.kstc_info);
+        ConnectionObject = tconn->child.kstc_info.FileObject;
     }
 
-    ConnectionObject = info->FileObject;
-    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
-
-    /* allocate an Irp and setup it */
-
-    Irp = KsBuildTdiIrp(DeviceObject);
+    SetInfoExLength =  sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT);
 
-    if (NULL == Irp) {
+    SetInfoEx = ExAllocatePoolWithTag(
+                    NonPagedPool,
+                    SetInfoExLength,
+                    'TSSK'
+                    );
 
-        status = STATUS_INSUFFICIENT_RESOURCES;
-        cfs_enter_debugger();
+    if (SetInfoEx == NULL) {
+        Status = STATUS_INSUFFICIENT_RESOURCES;
         goto errorout;
     }
 
-    KeInitializeEvent(
-            &Event,
-            SynchronizationEvent,
-            FALSE
-            );
-
-    TdiBuildDisconnect(
-            Irp,
-            DeviceObject,
-            ConnectionObject,
-            KsDisconectCompletionRoutine,
-            &Event,
-            NULL,
-            flags,
-            NULL,
-            NULL
-            );
-
-    /* issue the Irp to the underlying transport
-       driver to disconnect the connection    */
-
-    status = IoCallDriver(DeviceObject, Irp);
-
-    if (STATUS_PENDING == status) {
+    SetInfoEx->ID.toi_id = ID;
 
-        status = KeWaitForSingleObject(
-                     &Event,
-                     Executive,
-                     KernelMode,
-                     FALSE,
-                     NULL
-                     );
+    SetInfoEx->ID.toi_type  = INFO_TYPE_CONNECTION;
+    SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL;
+    SetInfoEx->ID.toi_entity.tei_entity   = CO_TL_ENTITY;
+    SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE;
 
-        status = Irp->IoStatus.Status;
-    }
+    SetInfoEx->BufferSize = Length;
+    RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length);
 
-    KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n",
-                status, KsNtStatusToString(status)));
+    Event = (PKEVENT)(&(SetInfoEx->Buffer[Length]));
+    KeInitializeEvent(Event, NotificationEvent, FALSE);
 
-    IoFreeIrp(Irp);
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-    if (info->ConnectionInfo) {
+    Irp = IoBuildDeviceIoControlRequest(
+                IOCTL_TCP_SET_INFORMATION_EX,
+                DeviceObject,
+                SetInfoEx,
+                SetInfoExLength,
+                NULL,
+                0,
+                FALSE,
+                Event,
+                &IoStatus
+                );
 
-        /* disassociate the association between connection/address objects */
+    if (Irp == NULL) {
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
 
-        status = KsDisassociateAddress(ConnectionObject);
+    IrpSp = IoGetNextIrpStackLocation(Irp);
 
-        if (!NT_SUCCESS(status)) {
-            cfs_enter_debugger();
-        }
+    if (IrpSp == NULL) {
+        IoFreeIrp(Irp);
+        Irp = NULL;
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
 
-        spin_lock(&(tconn->kstc_lock));
+    IrpSp->FileObject = ConnectionObject;
+    IrpSp->DeviceObject = DeviceObject;
 
-        /* cleanup the tsdumgr Lists */
-        KsCleanupTsdu (tconn);
+    Status = IoCallDriver(DeviceObject, Irp);
 
-        /* set the state of the tconn */
-        if (NT_SUCCESS(status)) {
-            tconn->kstc_state = ksts_disconnected;
-        } else {
-            tconn->kstc_state = ksts_associated;
-        }
+    if (Status == STATUS_PENDING) {
 
-        /* free  the connection info to system pool*/
-        ExFreePool(info->ConnectionInfo);
-        info->ConnectionInfo = NULL;
-        info->Remote = NULL;
+        KeWaitForSingleObject(
+                Event,
+                Executive,
+                KernelMode,
+                FALSE,
+                NULL
+                );
 
-        spin_unlock(&(tconn->kstc_lock));
+        Status = IoStatus.Status;
     }
 
-    status = STATUS_SUCCESS;
-
 errorout:
 
+    if (SetInfoEx) {
+        ExFreePool(SetInfoEx);
+    }
+
+    if (!NT_SUCCESS(Status)) {
+        KsPrint((0, "ks_set_tcp_option: error setup tcp option: "
+                    "ID (%d) Status = %xh\n", ID, Status));
+        Status = STATUS_SUCCESS;
+    }
+
     ks_put_tconn(tconn);
 
-    return cfs_error_code(status);
+    return cfs_error_code(Status);
 }
 
-
 /*
- * ks_abort_tconn
- *   The connection is broken un-expectedly. We need do
- *   some cleanup.
+ * ks_bind_tconn
+ *   bind the tdi connection object with an address
  *
  * Arguments:
- *   tconn: the tdi connection
+ *   tconn:    tconn to be bound
+ *   parent:   the parent tconn object
+ *   ipaddr:   the ip address
+ *   port:     the port number
  *
  * Return Value:
- *   N/A
+ *   int:   0 for success or ks error codes.
  *
- * Notes:
+ * NOTES:
  *   N/A
  */
 
-void
-ks_abort_tconn(
-    ksock_tconn_t *     tconn
+int
+ks_bind_tconn (
+    ks_tconn_t *    tconn,
+    ks_tconn_t *    parent,
+    ulong           addr,
+    unsigned short  port
     )
 {
-    PKS_DISCONNECT_WORKITEM WorkItem = NULL;
+    NTSTATUS            status;
+    int                 rc = 0;
 
-    WorkItem = &(tconn->kstc_disconnect);
+    ks_tdi_addr_t    taddr;
 
-    ks_get_tconn(tconn);
-    spin_lock(&(tconn->kstc_lock));
+    memset(&taddr, 0, sizeof(ks_tdi_addr_t));
 
-    if (tconn->kstc_state != ksts_connected) {
-        ks_put_tconn(tconn);
-    } else {
+    if (tconn->kstc_state != ksts_inited) {
 
-        if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
+        status = STATUS_INVALID_PARAMETER;
+        rc = cfs_error_code(status);
+        goto errorout;
 
-            WorkItem->Flags = TDI_DISCONNECT_ABORT;
-            WorkItem->tconn = tconn;
+    } else if (tconn->kstc_type == kstt_child) {
 
-            cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+        if (NULL == parent) {
+            status = STATUS_INVALID_PARAMETER;
+            rc = cfs_error_code(status);
 
-            ExQueueWorkItem(
-                    &(WorkItem->WorkItem),
-                    DelayedWorkQueue
-                    );
+            goto errorout;
         }
-    }
 
-    spin_unlock(&(tconn->kstc_lock));
-}
+        /* refer it's parent's address object */
 
+        taddr = parent->kstc_addr;
+        ObReferenceObject(taddr.FileObject);
 
-/*
- * ks_query_local_ipaddr
- *   query the local connection ip address
- *
- * Arguments:
- *   tconn:  the tconn which is connected
- *
- * Return Value:
- *   int: ks error code
- *
- * Notes:
- *   N/A
- */
+        ks_get_tconn(parent);
 
-int
-ks_query_local_ipaddr(
-    ksock_tconn_t *     tconn
-    )
-{
-    PFILE_OBJECT    FileObject = NULL;
-    NTSTATUS        status;
+    } else {
 
-    PTRANSPORT_ADDRESS TdiAddress;
-    ULONG              AddressLength;
+        PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi);
+        ULONG              AddrLen = 0;
 
-    if (tconn->kstc_type == kstt_sender) {
-        FileObject = tconn->sender.kstc_info.FileObject;
-    } else if (tconn->kstc_type == kstt_child) {
-        FileObject = tconn->child.kstc_info.FileObject;
-    } else {
-        status = STATUS_INVALID_PARAMETER;
-        goto errorout;
-    }
+        /* intialize the tdi address*/
 
-    TdiAddress = &(tconn->kstc_addr.Tdi);
-    AddressLength = MAX_ADDRESS_LENGTH;
+        TdiAddress->TAAddressCount = 1;
+        TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
+        TdiAddress->Address[0].AddressType   = TDI_ADDRESS_TYPE_IP;
 
-    status =  KsQueryIpAddress(FileObject, TdiAddress, &AddressLength);
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = (ULONG)htonl(addr);
 
-    if (NT_SUCCESS(status)) {
+        memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
 
-        KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n",
-                ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr,
-                ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port ));
-    } else {
-        KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n"));
+
+        /* open the transport address object */
+
+        AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) +
+                  TDI_ADDRESS_LENGTH_IP;
+
+        status = KsOpenAddress(
+                    &(tconn->kstc_dev),
+                    &(taddr.Tdi),
+                    AddrLen,
+                    &(taddr.Handle),
+                    &(taddr.FileObject)
+                    );
+
+        if (!NT_SUCCESS(status)) {
+
+            KsPrint((1, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n",
+                        addr, port,  status ));
+            rc = cfs_error_code(status);
+            goto errorout;
+        }
     }
 
+    if (tconn->kstc_type == kstt_child) {
+        tconn->child.kstc_parent = parent;
+    }
+
+    tconn->kstc_state = ksts_bind;
+    tconn->kstc_addr  = taddr;
+
 errorout:
 
-    return cfs_error_code(status);
+    return (rc);
 }
 
 /*
- * ks_send_mdl
- *   send MDL chain to the peer for a stream connection
+ * ks_build_tconn
+ *  build tcp/streaming connection to remote peer
  *
  * Arguments:
- *   tconn: tdi connection object
- *   tx:    the transmit context
- *   mdl:   the mdl chain containing the data
- *   len:   length of the data
- *   flags: flags of the transmission
+ *   tconn:    tconn to be connected to the peer
+ *   addr:     the peer's ip address
+ *   port:     the peer's port number
  *
  * Return Value:
- *   ks return code
+ *   int:   0 for success or ks error codes.
  *
  * Notes:
  *   N/A
  */
 
 int
-ks_send_mdl(
-    ksock_tconn_t * tconn,
-    void *          tx,
-    ksock_mdl_t *   mdl,
-    int             len,
-    int             flags
+ks_build_tconn(
+    ks_tconn_t *                    tconn,
+    ulong                           addr,
+    unsigned short                  port
     )
 {
-    NTSTATUS            Status;
-    int                 rc = 0;
-    ulong_ptr       length;
-    ulong_ptr       tflags;
-    ksock_tdi_tx_t *    context;
-
-    PKS_CHAIN           KsChain;
-    PKS_TSDUMGR         KsTsduMgr;
-    PKS_TSDU            KsTsdu;
-    PKS_TSDU_BUF        KsTsduBuf;
-    PKS_TSDU_DAT        KsTsduDat;
+    int                             rc = 0;
+    NTSTATUS                        status = STATUS_SUCCESS;
 
-    BOOLEAN             bNewTsdu = FALSE;   /* newly allocated */
-    BOOLEAN             bNewBuff = FALSE;   /* newly allocated */
 
-    BOOLEAN             bBuffed;            /* bufferred sending */
+    PFILE_OBJECT                    ConnectionObject = NULL;
+    PDEVICE_OBJECT                  DeviceObject = NULL;
 
-    PUCHAR              Buffer = NULL;
-    ksock_mdl_t *       NewMdl = NULL;
+    PTDI_CONNECTION_INFORMATION     ConnectionInfo = NULL;
+    ULONG                           AddrLength;
 
-    PIRP                Irp = NULL;
-    PFILE_OBJECT        ConnObject;
-    PDEVICE_OBJECT      DeviceObject;
+    PIRP                            Irp = NULL;
 
-    BOOLEAN             bIsNonBlock;
+    LASSERT(tconn->kstc_type == kstt_sender);
+    LASSERT(tconn->kstc_state == ksts_bind);
 
     ks_get_tconn(tconn);
 
-    tflags = ks_tdi_send_flags(flags);
-    bIsNonBlock  = cfs_is_flag_set(flags, MSG_DONTWAIT);
-
-    spin_lock(&tconn->kstc_lock);
-
-    LASSERT( tconn->kstc_type == kstt_sender ||
-             tconn->kstc_type == kstt_child );
+    {
+        /* set the event callbacks */
+        rc = KsSetHandlers(tconn);
 
-    if (tconn->kstc_state != ksts_connected) {
-        spin_unlock(&tconn->kstc_lock);
-        ks_put_tconn(tconn);
-        return -ENOTCONN;
+        if (rc < 0) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
     }
 
-    /* get the latest Tsdu buffer form TsduMgr list.
-       just set NULL if the list is empty. */
+    /* create the connection file handle / object  */
+    status = KsOpenConnection(
+                &(tconn->kstc_dev),
+                (CONNECTION_CONTEXT)tconn,
+                &(tconn->sender.kstc_info.Handle),
+                &(tconn->sender.kstc_info.FileObject)
+                );
 
-    if (tconn->kstc_type == kstt_sender) {
-        KsChain = &(tconn->sender.kstc_send);
-    } else {
-        LASSERT(tconn->kstc_type == kstt_child);
-        KsChain = &(tconn->child.kstc_send);
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
     }
 
-    if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) {
-        KsTsduMgr = &(KsChain->Expedited);
-    } else {
-        KsTsduMgr = &(KsChain->Normal);
-    }
+    /* associdate the the connection with the adress object of the tconn */
 
-    if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) {
-        bBuffed = TRUE;
-    } else {
-        bBuffed = FALSE;
+    status = KsAssociateAddress(
+                tconn->kstc_addr.Handle,
+                tconn->sender.kstc_info.FileObject
+                );
+
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
     }
 
-    /* do the preparation work for bufferred sending */
+    tconn->kstc_state = ksts_associated;
 
-    if (bBuffed) {
+    /* Allocating Connection Info Together with the Address */
+    AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
+                 + TDI_ADDRESS_LENGTH_IP;
 
-        /* if the data is even larger than the biggest Tsdu, we have
-           to allocate new buffer and use TSDU_TYOE_BUF to store it */
+    ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
+    NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK');
 
-        if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size
-             - KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
-            bNewBuff = TRUE;
-        }
+    if (NULL == ConnectionInfo) {
 
-        if (list_empty(&(KsTsduMgr->TsduList))) {
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
+    }
 
-            LASSERT(KsTsduMgr->NumOfTsdu == 0);
-            KsTsdu = NULL;
+    /* Initializing ConnectionInfo ... */
+    {
+        PTRANSPORT_ADDRESS TdiAddress;
 
-        } else {
+        /* ConnectionInfo settings */
 
-            LASSERT(KsTsduMgr->NumOfTsdu > 0);
-            KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
-            LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+        ConnectionInfo->UserDataLength = 0;
+        ConnectionInfo->UserData = NULL;
+        ConnectionInfo->OptionsLength = 0;
+        ConnectionInfo->Options = NULL;
+        ConnectionInfo->RemoteAddressLength = AddrLength;
+        ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
 
 
-            /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */
-            if (bNewBuff) {
-                if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) {
-                    KsTsdu = NULL;
-                }
-            } else {
-                if ( KS_TSDU_STRU_SIZE((ULONG)len) >
-                     KsTsdu->TotalLength - KsTsdu->LastOffset ) {
-                    KsTsdu = NULL;
-                }
-            }
-        }
+        /* intialize the tdi address*/
 
-        /* if there's no Tsdu or the free size is not enough for the
-           KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu.  */
+        TdiAddress = ConnectionInfo->RemoteAddress;
 
-        if (NULL == KsTsdu) {
+        TdiAddress->TAAddressCount = 1;
+        TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
+        TdiAddress->Address[0].AddressType   = TDI_ADDRESS_TYPE_IP;
 
-            KsTsdu = KsAllocateKsTsdu();
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = (ULONG)htonl(addr);
 
-            if (NULL == KsTsdu) {
-                bBuffed = FALSE;
-                bNewBuff = FALSE;
-            } else {
-                bNewTsdu = TRUE;
-            }
-        }
+        memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
+    }
 
-        /* process the case that a new buffer is to be allocated from system memory */
-        if (bNewBuff) {
+    /* Now prepare to connect the remote peer ... */
 
-            /* now allocating internal buffer to contain the payload */
-            Buffer = ExAllocatePool(NonPagedPool, len);
+    ConnectionObject = tconn->sender.kstc_info.FileObject;
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-            if (NULL == Buffer) {
-                bBuffed = FALSE;
-            }
-        }
-    }
+    /* allocate a new Irp */
 
-    if (bBuffed) {
+    Irp = KsBuildTdiIrp(DeviceObject);
 
-        if (bNewBuff) {
+    if (NULL == Irp) {
 
-            /* queue a new KS_TSDU_BUF to the Tsdu buffer */
-            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
+    }
 
-            KsTsduBuf->TsduFlags    =  0;
-            KsTsduBuf->DataLength   =  (ULONG)len;
-            KsTsduBuf->StartOffset  =  0;
-            KsTsduBuf->UserBuffer   =  Buffer;
-        } else {
-            /* queue a new KS_TSDU_BUF to the Tsdu buffer */
-            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+    /* setup the Irp */
 
-            KsTsduDat->TsduFlags    =  0;
-            KsTsduDat->DataLength   =  (ULONG)len;
-            KsTsduDat->StartOffset  =  0;
-            KsTsduDat->TotalLength  = KS_TSDU_STRU_SIZE((ULONG)len);
+    TdiBuildConnect(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            NULL,
+            NULL,
+            NULL,
+            ConnectionInfo,
+            NULL
+            );
 
-            Buffer = &KsTsduDat->Data[0];
-        }
 
-        /* now locking the Buffer and copy user payload into the buffer */
-        ASSERT(Buffer != NULL);
+    /* sumbit the Irp to the underlying transport driver */
+    status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    NULL
+                    );
 
-        rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl);
-        if (rc != 0) {
-            printk("ks_send_mdl: bufferred: error allocating mdl.\n");
-            bBuffed = FALSE;
-        } else {
-            ULONG BytesCopied = 0;
-            TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied);
-            if (BytesCopied != (ULONG) len) {
-                bBuffed = FALSE;
-            }
-        }
+    spin_lock(&(tconn->kstc_lock));
 
-        /* Do the finializing job if we succeed to to lock the buffer and move
-           user data. Or we need do cleaning up ... */
-        if (bBuffed) {
+    if (NT_SUCCESS(status)) {
 
-            if (bNewBuff) {
-                KsTsduBuf->TsduType     =  TSDU_TYPE_BUF;
-                KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+        /* Connected! the conneciton is built successfully. */
 
-            } else {
-                KsTsduDat->TsduType     =  TSDU_TYPE_DAT;
-                KsTsdu->LastOffset += KsTsduDat->TotalLength;
-            }
+        tconn->kstc_state = ksts_connected;
 
-            /* attach it to the TsduMgr list if the Tsdu is newly created. */
-            if (bNewTsdu) {
+        tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo;
+        tconn->sender.kstc_info.Remote         = ConnectionInfo->RemoteAddress;
 
-                list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
-                KsTsduMgr->NumOfTsdu++;
-            }
+        spin_unlock(&(tconn->kstc_lock));
 
-        } else {
+    } else {
 
-            if (NewMdl) {
-                ks_release_mdl(NewMdl, FALSE);
-                NewMdl = NULL;
-            }
+        /* Not connected! Abort it ... */
 
-            if (bNewBuff) {
-                ExFreePool(Buffer);
-                Buffer = NULL;
-                bNewBuff = FALSE;
-            }
+        if (rc != 0) {
+            cfs_enter_debugger();
         }
-    }
 
-    /* update the TotalBytes being in sending */
-    KsTsduMgr->TotalBytes += (ULONG)len;
+        Irp = NULL;
+        rc = cfs_error_code(status);
 
-    spin_unlock(&tconn->kstc_lock);
+        tconn->kstc_state = ksts_associated;
+        spin_unlock(&(tconn->kstc_lock));
 
-    /* cleanup the Tsdu if not successful */
-    if (!bBuffed && bNewTsdu) {
-        KsPutKsTsdu(KsTsdu);
-        bNewTsdu = FALSE;
-        KsTsdu = NULL;
-    }
+        /* disassocidate the connection and the address object,
+           after cleanup,  it's safe to set the state to abort ... */
 
-    /* we need allocate the ksock_tx_t structure from memory pool. */
+        if ( NT_SUCCESS(KsDisassociateAddress(
+                        tconn->sender.kstc_info.FileObject))) {
+            tconn->kstc_state = ksts_aborted;
+        }
 
-    context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0);
-    if (!context) {
-        /* release the chained mdl */
-        ks_release_mdl(mdl, FALSE);
+        /* reset the event callbacks */
+        rc = KsResetHandlers(tconn);
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
         goto errorout;
     }
 
-    /* intialize the TcpContext */
-
-    memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT));
+errorout:
 
-    context->tconn = tconn;
-    context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t));
+    if (NT_SUCCESS(status)) {
 
-    KeInitializeEvent(context->Event, SynchronizationEvent, FALSE);
+        ks_query_local_ipaddr(tconn);
 
-    if (bBuffed) {
+    } else {
 
-         /* for bufferred transmission, we need set
-            the internal completion routine.  */
+        if (ConnectionInfo) {
+            ExFreePool(ConnectionInfo);
+        }
+        if (Irp) {
+            IoFreeIrp(Irp);
+        }
+    }
 
-        context->CompletionRoutine  = KsTcpSendCompletionRoutine;
-        context->KsTsduMgr          = KsTsduMgr;
-        context->CompletionContext  = KsTsdu;
-        context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat);
-        context->bCounted = FALSE;
+    ks_put_tconn(tconn);
 
-    } else if (bIsNonBlock) {
+    return (rc);
+}
 
-         /* for non-blocking transmission, we need set
-            the internal completion routine too.  */
 
-        context->CompletionRoutine = KsTcpSendCompletionRoutine;
-        context->CompletionContext = tx;
-        context->KsTsduMgr         = KsTsduMgr;
-        context->bCounted = TRUE;
-        context->ReferCount = 2;
-    }
+/*
+ * ks_disconnect_tconn
+ *   disconnect the tconn from a connection
+ *
+ * Arguments:
+ *   tconn: the tdi connecton object connected already
+ *   flags: flags & options for disconnecting
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * Notes:
+ *   N/A
+ */
 
-    if (tconn->kstc_type == kstt_sender) {
-        ConnObject = tconn->sender.kstc_info.FileObject;
-    } else {
-        LASSERT(tconn->kstc_type == kstt_child);
-        ConnObject = tconn->child.kstc_info.FileObject;
-    }
+int
+ks_disconnect_tconn(
+    ks_tconn_t *    tconn,
+    ulong           flags
+    )
+{
+    NTSTATUS            status = STATUS_SUCCESS;
 
-    DeviceObject = IoGetRelatedDeviceObject(ConnObject);
+    ks_tconn_info_t *   info;
 
-    Irp = KsBuildTdiIrp(DeviceObject);
+    PFILE_OBJECT        ConnectionObject;
+    PDEVICE_OBJECT      DeviceObject = NULL;
 
-    if (NULL == Irp) {
+    PIRP                Irp = NULL;
 
-        /* release the chained mdl */
-        ks_release_mdl(mdl, FALSE);
+    KEVENT              Event;
 
-        Status = STATUS_INSUFFICIENT_RESOURCES;
-        goto errorout;
-    }
+    ks_get_tconn(tconn);
 
-    length = KsQueryMdlsSize(mdl);
+    /* make sure tt's connected already and it
+       must be a sender or a child ...       */
 
-    LASSERT((ULONG)len <= length);
+    LASSERT(tconn->kstc_state == ksts_connected);
+    LASSERT( tconn->kstc_type == kstt_sender ||
+            tconn->kstc_type == kstt_child);
 
-    ks_get_tconn(tconn);
+    /* reset all the event handlers to NULL */
 
-    TdiBuildSend(
-        Irp,
-        DeviceObject,
-        ConnObject,
-        KsTcpCompletionRoutine,
-        context,
-        (bBuffed ? NewMdl : mdl),
-        (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags),
-        (ULONG)len;
-      );
+    if (tconn->kstc_type != kstt_child) {
+        KsResetHandlers (tconn);
+    }
 
-    Status = IoCallDriver(DeviceObject, Irp);
+    /* Disconnecting to the remote peer ... */
 
-    if (bBuffed) {
-        ks_release_mdl(mdl, FALSE);
-        NewMdl = NULL;
+    if (tconn->kstc_type == kstt_sender) {
+        info = &(tconn->sender.kstc_info);
+    } else {
+        info = &(tconn->child.kstc_info);
     }
 
-    if (!NT_SUCCESS(Status)) {
-        cfs_enter_debugger();
-        rc = cfs_error_code(Status);
-        goto errorout;
-    }
+    ConnectionObject = info->FileObject;
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
 
-    if (bBuffed) {
-        Status = STATUS_SUCCESS;
-        rc  = len;
-        context = NULL;
-    } else {
-        if (bIsNonBlock) {
-            if (InterlockedDecrement(&context->ReferCount) == 0) {
-                Status = Irp->IoStatus.Status;
-            } else {
-                Status = STATUS_PENDING;
-                context = NULL;
-            }
-        } else {
-            if (STATUS_PENDING == Status) {
-                Status = KeWaitForSingleObject(
-                         context->Event,
-                         Executive,
-                         KernelMode,
-                         FALSE,
-                         NULL
-                         );
-
-                if (NT_SUCCESS(Status)) {
-                    Status = Irp->IoStatus.Status;
-                }
-            }
-        }
+    /* allocate an Irp and setup it */
 
-        if (Status == STATUS_SUCCESS) {
-            rc = (int)(Irp->IoStatus.Information);
+    Irp = KsBuildTdiIrp(DeviceObject);
 
-            spin_lock(&tconn->kstc_lock);
-            KsTsduMgr->TotalBytes -= rc;
-            spin_unlock(&tconn->kstc_lock);
+    if (NULL == Irp) {
 
-        } else {
-            rc = cfs_error_code(Status);
-        }
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        cfs_enter_debugger();
+        goto errorout;
     }
 
-errorout:
+    KeInitializeEvent(
+            &Event,
+            SynchronizationEvent,
+            FALSE
+            );
 
-    if (bBuffed) {
+    TdiBuildDisconnect(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            KsDisconectCompletionRoutine,
+            &Event,
+            NULL,
+            flags,
+            NULL,
+            NULL
+            );
 
-        if (NewMdl) {
-            ks_release_mdl(NewMdl, FALSE);
-            NewMdl = NULL;
-        }
+    /* issue the Irp to the underlying transport
+       driver to disconnect the connection    */
 
-        if (bNewBuff) {
-            if (!NT_SUCCESS(Status)) {
-                ExFreePool(Buffer);
-                Buffer = NULL;
-            }
-        }
+    status = IoCallDriver(DeviceObject, Irp);
 
-    } else {
+    if (STATUS_PENDING == status) {
 
-        if (Status != STATUS_PENDING) {
+        status = KeWaitForSingleObject(
+                     &Event,
+                     Executive,
+                     KernelMode,
+                     FALSE,
+                     NULL
+                     );
 
-            if (Irp) {
+        status = Irp->IoStatus.Status;
+    }
 
-                /* Freeing the Irp ... */
+    KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n",
+                status, KsNtStatusToString(status)));
 
-                IoFreeIrp(Irp);
-                Irp = NULL;
-            }
-        }
-    }
+    IoFreeIrp(Irp);
 
-    if (!NT_SUCCESS(Status)) {
+    if (info->ConnectionInfo) {
 
-        spin_lock(&tconn->kstc_lock);
+        /* disassociate the association between connection/address objects */
 
-        KsTsduMgr->TotalBytes -= (ULONG)len;
+        status = KsDisassociateAddress(ConnectionObject);
 
-        if (bBuffed) {
+        if (!NT_SUCCESS(status)) {
+            cfs_enter_debugger();
+        }
 
-            /* attach it to the TsduMgr list if the Tsdu is newly created. */
-            if (bNewTsdu) {
+        spin_lock(&(tconn->kstc_lock));
 
-                list_del(&(KsTsdu->Link));
-                KsTsduMgr->NumOfTsdu--;
+        /* cleanup the tsdumgr Lists */
+        KsCleanupTsdu (tconn);
 
-                KsPutKsTsdu(KsTsdu);
-            } else {
-                if (bNewBuff) {
-                    if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) ==
-                         (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
-                        KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF);
-                        KsTsduBuf->TsduType = 0;
-                    } else {
-                        cfs_enter_debugger();
-                        KsTsduBuf->StartOffset = KsTsduBuf->DataLength;
-                    }
-                } else {
-                    if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength ==
-                         (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
-                        KsTsdu->LastOffset -= KsTsduDat->TotalLength;
-                        KsTsduDat->TsduType = 0;
-                    } else {
-                        cfs_enter_debugger();
-                        KsTsduDat->StartOffset = KsTsduDat->DataLength;
-                    }
-                }
-            }
+        /* set the state of the tconn */
+        if (NT_SUCCESS(status)) {
+            tconn->kstc_state = ksts_disconnected;
+        } else {
+            tconn->kstc_state = ksts_associated;
         }
 
-        spin_unlock(&tconn->kstc_lock);
-    }
+        /* free  the connection info to system pool*/
+        ExFreePool(info->ConnectionInfo);
+        info->ConnectionInfo = NULL;
+        info->Remote = NULL;
 
-    /* free the context if is not used at all */
-    if (context) {
-        cfs_free(context);
+        spin_unlock(&(tconn->kstc_lock));
     }
 
+    status = STATUS_SUCCESS;
+
+errorout:
+
     ks_put_tconn(tconn);
 
-    return rc;
+    return cfs_error_code(status);
 }
 
+
 /*
- * ks_recv_mdl
- *   Receive data from the peer for a stream connection
+ * ks_abort_tconn
+ *   The connection is broken un-expectedly. We need do
+ *   some cleanup.
  *
  * Arguments:
- *   tconn: tdi connection object
- *   mdl:   the mdl chain to contain the incoming data
- *   len:   length of the data
- *   flags: flags of the receiving
+ *   tconn: the tdi connection
  *
  * Return Value:
- *   ks return code
+ *   N/A
  *
  * Notes:
  *   N/A
  */
 
-int
-ks_recv_mdl(
-    ksock_tconn_t * tconn,
-    ksock_mdl_t *   mdl,
-    int             size,
-    int             flags
+void
+ks_abort_tconn(
+    ks_tconn_t *     tconn
     )
 {
-    NTSTATUS        Status = STATUS_SUCCESS;
-    int             rc = 0;
-
-    BOOLEAN         bIsNonBlock;
-    BOOLEAN         bIsExpedited;
-
-    PKS_CHAIN       KsChain;
-    PKS_TSDUMGR     KsTsduMgr;
-    PKS_TSDU        KsTsdu;
-    PKS_TSDU_DAT    KsTsduDat;
-    PKS_TSDU_BUF    KsTsduBuf;
-    PKS_TSDU_MDL    KsTsduMdl;
-
-    PUCHAR          Buffer;
-
-    ULONG           BytesRecved = 0;
-    ULONG           RecvedOnce;
+    PKS_DISCONNECT_WORKITEM WorkItem = NULL;
 
-    bIsNonBlock  = cfs_is_flag_set(flags, MSG_DONTWAIT);
-    bIsExpedited = cfs_is_flag_set(flags, MSG_OOB);
+    WorkItem = &(tconn->kstc_disconnect);
 
     ks_get_tconn(tconn);
-
-Again:
-
-    RecvedOnce = 0;
-
     spin_lock(&(tconn->kstc_lock));
 
-    if ( tconn->kstc_type != kstt_sender &&
-         tconn->kstc_type != kstt_child) {
-
-        rc = -EINVAL;
-        spin_unlock(&(tconn->kstc_lock));
-
-        goto errorout;
-    }
-
     if (tconn->kstc_state != ksts_connected) {
-
-        rc = -ENOTCONN;
-        spin_unlock(&(tconn->kstc_lock));
-
-        goto errorout;
-    }
-
-    if (tconn->kstc_type == kstt_sender) {
-        KsChain = &(tconn->sender.kstc_recv);
-    } else {
-        LASSERT(tconn->kstc_type == kstt_child);
-        KsChain = &(tconn->child.kstc_recv);
-    }
-
-    if (bIsExpedited) {
-        KsTsduMgr = &(KsChain->Expedited);
-    } else {
-        KsTsduMgr = &(KsChain->Normal);
-    }
-
-NextTsdu:
-
-    if (list_empty(&(KsTsduMgr->TsduList))) {
-
-        //
-        // It's a notification event. We need reset it to
-        // un-signaled state in case there no any tsdus.
-        //
-
-        KeResetEvent(&(KsTsduMgr->Event));
-
+        ks_put_tconn(tconn);
     } else {
 
-        KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
-        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
-
-        /* remove the KsTsdu from TsduMgr list to release the lock */
-        list_del(&(KsTsdu->Link));
-        KsTsduMgr->NumOfTsdu--;
-
-        spin_unlock(&(tconn->kstc_lock));
-
-        while ((ULONG)size > BytesRecved) {
+        if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
 
-            ULONG BytesCopied = 0;
-            ULONG BytesToCopy = 0;
-            ULONG StartOffset = 0;
+            WorkItem->Flags = TDI_DISCONNECT_ABORT;
+            WorkItem->tconn = tconn;
 
-            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
-            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
-            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
 
-            if ( TSDU_TYPE_DAT == KsTsduDat->TsduType ||
-                 TSDU_TYPE_BUF == KsTsduBuf->TsduType ) {
+            ExQueueWorkItem(
+                    &(WorkItem->WorkItem),
+                    DelayedWorkQueue
+                    );
+        }
+    }
 
+    spin_unlock(&(tconn->kstc_lock));
+}
 
-                //
-                // Data Tsdu Unit ...
-                //
 
-                if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+/*
+ * ks_query_local_ipaddr
+ *   query the local connection ip address
+ *
+ * Arguments:
+ *   tconn:  the tconn which is connected
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * Notes:
+ *   N/A
+ */
 
-                    if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
-                        /* data is not ready yet*/
-                        KeResetEvent(&(KsTsduMgr->Event));
-                        printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat);
-                        break;
-                    }
+int
+ks_query_local_ipaddr(
+    ks_tconn_t *     tconn
+    )
+{
+    PFILE_OBJECT    FileObject = NULL;
+    NTSTATUS        status;
 
-                    Buffer = &KsTsduDat->Data[0];
-                    StartOffset = KsTsduDat->StartOffset;
-                    if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) {
-                        /* Recvmsg requst could be statisfied ... */
-                        BytesToCopy = size - BytesRecved;
-                    } else {
-                        BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset;
-                    }
+    PTRANSPORT_ADDRESS TdiAddress;
+    ULONG              AddressLength;
 
-                } else {
+    if (tconn->kstc_type == kstt_sender) {
+        FileObject = tconn->sender.kstc_info.FileObject;
+    } else if (tconn->kstc_type == kstt_child) {
+        FileObject = tconn->child.kstc_info.FileObject;
+    } else {
+        status = STATUS_INVALID_PARAMETER;
+        goto errorout;
+    }
 
-                    if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
-                        /* data is not ready yet*/
-                        KeResetEvent(&(KsTsduMgr->Event));
-                        DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf);
-                        break;
-                    }
+    TdiAddress = &(tconn->kstc_addr.Tdi);
+    AddressLength = MAX_ADDRESS_LENGTH;
 
-                    ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
-                    Buffer = KsTsduBuf->UserBuffer;
-                    StartOffset = KsTsduBuf->StartOffset;
+    status =  KsQueryIpAddress(FileObject, TdiAddress, &AddressLength);
 
-                    if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) {
-                        /* Recvmsg requst could be statisfied ... */
-                        BytesToCopy = size - BytesRecved;
-                    } else {
-                        BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset;
-                    }
-                }
+    if (NT_SUCCESS(status)) {
+        KsPrint((2, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n",
+                ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr,
+                ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port ));
+    } else {
+        KsPrint((2, "ks_query_local_ipaddr: Failed to query the connection local ip address.\n"));
+    }
 
-                if (BytesToCopy > 0) {
-                    Status = TdiCopyBufferToMdl(
-                                    Buffer,
-                                    StartOffset,
-                                    BytesToCopy,
-                                    mdl,
-                                    BytesRecved,
-                                    &BytesCopied
-                                    );
-
-                    if (NT_SUCCESS(Status)) {
-
-                        if (BytesToCopy != BytesCopied) {
-                            cfs_enter_debugger();
-                        }
+errorout:
 
-                        BytesRecved += BytesCopied;
-                        RecvedOnce  += BytesCopied;
+    return cfs_error_code(status);
+}
 
-                    } else {
+int
+KsCalcWhichEngine(ks_tconn_t * tconn)
+{
+    PTRANSPORT_ADDRESS TdiAddress = &(tconn->kstc_addr.Tdi);
+    ULONG addr = ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr;
+    ULONG sum = (addr & 0xFF) + ((addr & 0xFF00) >> 8) + ((addr & 0xFF0000) >> 16);
 
-                        cfs_enter_debugger();
+    return (int)(sum % ks_data.ksnd_engine_nums);
+}
 
-                        if (STATUS_BUFFER_OVERFLOW == Status) {
-                        }
-                    }
-                }
+void
+KsQueueTdiEngine(ks_tconn_t * tconn, PKS_TSDUMGR TsduMgr)
+{
+    ks_engine_mgr_t *   engm;
+    ks_engine_slot_t *  engs;
+
+    engm = &ks_data.ksnd_engine_mgr[KsCalcWhichEngine(tconn)];
+    engs = &TsduMgr->Slot;
+
+    if (!engs->queued) {
+        spin_lock(&engm->lock);
+        if (!engs->queued) {
+            list_add_tail(&engs->link, &engm->list);
+            engs->queued = TRUE;
+            engs->tconn = tconn;
+            engs->emgr = engm;
+            engs->tsdumgr = TsduMgr;
+            KeSetEvent(&(engm->start),0, FALSE);
+        }
+        spin_unlock(&engm->lock);
+        KsPrint((4, "KsQueueTdiEngine: TsduMgr=%p is queued to engine %p\n",
+                    TsduMgr, engm));
+    }
+    KeSetEvent(&(engm->start),0, FALSE);
+}
 
-                if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+void
+KsRemoveTdiEngine(PKS_TSDUMGR TsduMgr)
+{
+    ks_engine_mgr_t *   engm;
+    ks_engine_slot_t *  engs;
+
+    engs = &TsduMgr->Slot;
+    if (engs->queued) {
+        engm = engs->emgr;
+        LASSERT(engm != NULL);
+        spin_lock(&engm->lock);
+        if (engs->queued) {
+            list_del(&engs->link);
+            engs->queued = FALSE;
+            engs->tconn = NULL;
+            engs->emgr = NULL;
+            engs->tsdumgr = NULL;
+        }
+        spin_unlock(&engm->lock);
+        KsPrint((4, "KsQueueTdiEngine: TsduMgr %p is removed from engine %p\n",
+                    TsduMgr, engm));
+    }
+}
 
-                    KsTsduDat->StartOffset += BytesCopied;
+int
+KsDeliveryIrp(ks_tconn_t * tconn, PIRP irp)
+{
+    PFILE_OBJECT        connobj;
+    PDEVICE_OBJECT      devobj;
+    NTSTATUS            status;
+    int                 rc = 0;
 
-                    if (KsTsduDat->StartOffset == KsTsduDat->DataLength) {
-                        KsTsdu->StartOffset += KsTsduDat->TotalLength;
-                    }
+    /* construct Irp */
+    if (tconn->kstc_type == kstt_sender) {
+        connobj = tconn->sender.kstc_info.FileObject;
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        connobj = tconn->child.kstc_info.FileObject;
+    }
+    devobj = IoGetRelatedDeviceObject(connobj);
+    
+    /* send irp to transport layer */
+    status = IoCallDriver(devobj, irp);
 
-                } else {
+    /* convert status to linux error code */
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+    }
 
-                    ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
-                    KsTsduBuf->StartOffset += BytesCopied;
-                    if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) {
-                        KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
-                        /* now we need release the buf to system pool */
-                        ExFreePool(KsTsduBuf->UserBuffer);
-                    }
-                }
+    KsPrint((4, "KsDeliveryIrp: tconn=%p irp=%p status=%xh rc=%d.\n",
+                 tconn, irp, status, rc));
+    return rc;
+}
 
-            } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
+PIRP
+KsBuildSend(ks_tconn_t * tconn, PKS_TSDUMGR TsduMgr,
+            ks_mdl_t * mdl, ulong flags )
+{
+    ks_tdi_tx_t *       context;
+    PIRP                irp = NULL;
+    PFILE_OBJECT        connobj;
+    PDEVICE_OBJECT      devobj;
+    NTSTATUS            status;
+    ULONG               length;
 
-                //
-                // MDL Tsdu Unit ...
-                //
+    int                 rc = 0;
 
-                if (KsTsduMdl->DataLength > size - BytesRecved) {
+    /* query mdl chain total length */
+    length = KsQueryMdlsSize(mdl);
 
-                    /* Recvmsg requst could be statisfied ... */
+    /* we need allocate the ks_tx_t structure from memory pool. */
+    context = cfs_alloc(sizeof(ks_tdi_tx_t), 0);
+    if (!context) {
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
 
-                    BytesToCopy = size - BytesRecved;
+    /* intialize the TcpContext */
+    memset(context,0, sizeof(ks_tdi_tx_t));
+    context->Magic = KS_TCP_CONTEXT_MAGIC;
+    context->tconn = tconn;
+    context->CompletionRoutine = KsTcpSendCompletionRoutine;
+    context->TsduMgr = TsduMgr;
+    context->Length = length;
 
-                } else {
+    /* construct Irp */
+    if (tconn->kstc_type == kstt_sender) {
+        connobj = tconn->sender.kstc_info.FileObject;
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        connobj = tconn->child.kstc_info.FileObject;
+    }
+    devobj = IoGetRelatedDeviceObject(connobj);
+    irp = KsBuildTdiIrp(devobj);
+    if (NULL == irp) {
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
 
-                    BytesToCopy = KsTsduMdl->DataLength;
-                }
+    /* grab tconn reference */
+    ks_get_tconn(tconn);
 
-                Status = KsCopyMdlChainToMdlChain(
-                            KsTsduMdl->Mdl,
-                            KsTsduMdl->StartOffset,
-                            mdl,
-                            BytesRecved,
-                            BytesToCopy,
-                            &BytesCopied
-                            );
+    /* delivery the sending request */
+    TdiBuildSend(
+        irp,
+        devobj,
+        connobj,
+        KsTcpCompletionRoutine,
+        context,
+        mdl,
+        flags,
+        length
+      );
 
-                if (NT_SUCCESS(Status)) {
+      return irp;
 
-                    if (BytesToCopy != BytesCopied) {
-                        cfs_enter_debugger();
-                    }
+errorout:
 
-                    KsTsduMdl->StartOffset += BytesCopied;
-                    KsTsduMdl->DataLength  -= BytesCopied;
+    /* free the context if is not used at all */
+    if (context) {
+        ASSERT(context->Magic == KS_TCP_CONTEXT_MAGIC);
+        context->Magic = 'CDAB';
+        cfs_free(context);
+    }
 
-                    BytesRecved += BytesCopied;
-                    RecvedOnce  += BytesCopied;
-                } else {
-                    cfs_enter_debugger();
-                }
+    /* here need free the Irp. */
+    if (irp) {
+        IoFreeIrp(irp);
+        irp = NULL;
+    }
 
-                if (0 == KsTsduMdl->DataLength) {
+    return NULL;
+}
 
-                    //
-                    // Call TdiReturnChainedReceives to release the Tsdu memory
-                    //
+int
+KsDeliveryTsdus(ks_tconn_t * tconn, PKS_TSDUMGR TsduMgr)
+{
+    int                 rc = 0;
+    ulong               length = 0;
+    ulong               tflags = 0;
+    ks_mdl_t *          mdl = NULL;
+    PIRP                irp = NULL;
+    BOOLEAN             expedited;
 
-                    TdiReturnChainedReceives(
-                        &(KsTsduMdl->Descriptor),
-                        1 );
+    LASSERT(tconn->kstc_magic == KS_TCONN_MAGIC);
 
-                    KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
-                }
+    ks_get_tconn(tconn);
+    ks_lock_tsdumgr(TsduMgr);
 
-            } else {
-                printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n",
-                        KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength);
-                printk("        Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x",
-                        KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength);
-                cfs_enter_debugger();
-            }
+    if ( tconn->kstc_type != kstt_sender &&
+         tconn->kstc_type != kstt_child) {
+        rc = -EINVAL;
+        ks_unlock_tsdumgr(TsduMgr);
+        goto errorout;
+    }
 
-            if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+    if (tconn->kstc_state != ksts_connected) {
+        rc = -ENOTCONN;
+        ks_unlock_tsdumgr(TsduMgr);
+        goto errorout;
+    }
 
-                //
-                // KsTsdu is empty now, we need free it ...
-                //
+    if (TsduMgr->OOB) {
+        tflags = TDI_SEND_NON_BLOCKING | TDI_SEND_EXPEDITED;
+    } else {
+        tflags = TDI_SEND_NON_BLOCKING;
+    }
+   
+    if (list_empty(&TsduMgr->TsduList)) {
+        LASSERT(TsduMgr->TotalBytes == 0);
+        ks_unlock_tsdumgr(TsduMgr);
+        goto errorout;
+    }
 
-                KsPutKsTsdu(KsTsdu);
-                KsTsdu = NULL;
+    /* check whether there's outstanding sending requests */
+    if (TsduMgr->Busy) {
+        rc = -EAGAIN;
+        ks_unlock_tsdumgr(TsduMgr);
+        goto errorout;
+    }
 
-                break;
-            }
+    /* probe all Tsdus and merge buffers together */
+    mdl = KsLockTsdus(tconn, TsduMgr, &tflags, &length);
+    if (NULL == mdl) {
+         if (length == 0) {
+            LASSERT(TsduMgr->TotalBytes == 0);
+            rc = -EAGAIN;
+        } else {
+            rc = -ENOMEM;
         }
+        ks_unlock_tsdumgr(TsduMgr);
+        goto errorout;
+    }
 
-        spin_lock(&(tconn->kstc_lock));
+    KsPrint((4, "KsDeliveryTsdus: tconn=%p TsudMgr=%p, length=%xh/%xh\n",
+                tconn, TsduMgr, length, TsduMgr->TotalBytes));
 
-        /* we need attach the KsTsdu to the list header */
-        if (KsTsdu) {
-            KsTsduMgr->NumOfTsdu++;
-            list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
-        } else if ((ULONG)size > BytesRecved) {
-            goto NextTsdu;
-        }
+    /* build send irp request */
+    irp = KsBuildSend(tconn, TsduMgr, mdl, tflags);
+    if (NULL == irp) {
+        rc = -ENOMEM;
+        ks_unlock_tsdumgr(TsduMgr);
+        goto errorout;
     }
+    TsduMgr->Busy = TRUE;
+    ks_unlock_tsdumgr(TsduMgr);
 
-    if (KsTsduMgr->TotalBytes < RecvedOnce) {
-        cfs_enter_debugger();
-        KsTsduMgr->TotalBytes = 0;
-    } else {
-        KsTsduMgr->TotalBytes -= RecvedOnce;
+    /* delivery mdl chain */
+    LASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
+    rc = KsDeliveryIrp(tconn, irp);
+    if (rc < 0) {
+        goto errorout;
     }
 
-    spin_unlock(&(tconn->kstc_lock));
+errorout:
 
-    if (NT_SUCCESS(Status)) {
+    LASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
+    ks_put_tconn(tconn);
+    return rc;
+}
 
-        if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) {
+int
+KsDeliveryEngineThread(void * context)
+{
+    ks_engine_mgr_t *   engm = context;
+    ks_engine_slot_t *  engs;
+    struct list_head *  list;
+    ks_tconn_t *        tconn;
 
-            KeWaitForSingleObject(
-                &(KsTsduMgr->Event),
-                Executive,
-                KernelMode,
-                FALSE,
-                NULL
-                );
+    cfs_set_thread_priority(31);
 
-            goto Again;
-        }
+    while (!engm->stop) {
 
-        if (bIsNonBlock && (BytesRecved == 0)) {
-            rc = -EAGAIN;
-        } else {
-            rc = BytesRecved;
+        cfs_wait_event_internal(&engm->start, 0);
+
+        spin_lock(&engm->lock);
+        if (list_empty(&engm->list)) {
+            spin_unlock(&engm->lock);
+            continue;
         }
-    }
 
-errorout:
+        list = engm->list.next;
+        list_del(list);
+        engs = list_entry(list, ks_engine_slot_t, link);
+        LASSERT(engs->emgr == engm);
+        LASSERT(engs->queued);
+        engs->emgr = NULL;
+        engs->queued = FALSE;
+        spin_unlock(&engm->lock);
 
-    ks_put_tconn(tconn);
+        tconn = engs->tconn;
+        LASSERT(tconn->kstc_magic == KS_TCONN_MAGIC);
 
-    if (rc > 0) {
-        KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc));
-    } else {
-        KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status));
+        KsPrint((4, "KsDeliveryEngineThread: %p active: tconn=%p "
+                    "TsduMgr=%p\n", engm, tconn, engs->tsdumgr));
+        KsDeliveryTsdus(tconn, engs->tsdumgr);
+
+        LASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
     }
 
-    /* release the chained mdl */
-    ks_release_mdl(mdl, FALSE);
+    KeSetEvent(&engm->exit, 0, FALSE);
 
-    return (rc);
+    return 0;
 }
 
-
 /*
  * ks_init_tdi_data
  *   initialize the global data in ksockal_data
@@ -6019,17 +5856,17 @@ errorout:
 int
 ks_init_tdi_data()
 {
-    int rc = 0;
+    int rc = 0, i;
 
     /* initialize tconn related globals */
-    RtlZeroMemory(&ks_data, sizeof(ks_data_t));
+    RtlZeroMemory(&ks_data, sizeof(ks_tdi_data_t));
 
     spin_lock_init(&ks_data.ksnd_tconn_lock);
     CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns);
     cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE);
 
     ks_data.ksnd_tconn_slab = cfs_mem_cache_create(
-        "tcon", sizeof(ksock_tconn_t) , 0, 0);
+        "tcon", sizeof(ks_tconn_t) , 0, 0);
 
     if (!ks_data.ksnd_tconn_slab) {
         rc = -ENOMEM;
@@ -6037,7 +5874,6 @@ ks_init_tdi_data()
     }
 
     /* initialize tsdu related globals */
-
     spin_lock_init(&ks_data.ksnd_tsdu_lock);
     CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus);
     ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */
@@ -6046,21 +5882,41 @@ ks_init_tdi_data()
 
     if (!ks_data.ksnd_tsdu_slab) {
         rc = -ENOMEM;
-        cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
-        ks_data.ksnd_tconn_slab = NULL;
         goto errorout;
     }
 
-    /* initialize daemon related globals */
-
-    spin_lock_init(&ks_data.ksnd_daemon_lock);
-    CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons);
-    cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE);
+    /* initialize engine threads list */
+    ks_data.ksnd_engine_nums = num_online_cpus();
+    if (ks_data.ksnd_engine_nums < 4) {
+        ks_data.ksnd_engine_nums = 4;
+    }
+    ks_data.ksnd_engine_mgr = cfs_alloc(sizeof(ks_engine_mgr_t) * 
+                         ks_data.ksnd_engine_nums,CFS_ALLOC_ZERO);
+    if (ks_data.ksnd_engine_mgr == NULL) {
+        rc = -ENOMEM;
+        goto errorout;
+    }
+    for (i = 0; i < ks_data.ksnd_engine_nums; i++) {
+        spin_lock_init(&ks_data.ksnd_engine_mgr[i].lock);
+        cfs_init_event(&ks_data.ksnd_engine_mgr[i].start, TRUE, FALSE);
+        cfs_init_event(&ks_data.ksnd_engine_mgr[i].exit, TRUE, FALSE);
+        CFS_INIT_LIST_HEAD(&ks_data.ksnd_engine_mgr[i].list);
+        cfs_kernel_thread(KsDeliveryEngineThread, &ks_data.ksnd_engine_mgr[i], 0);
+    }
 
+    /* register pnp handlers to watch network condition */
     KsRegisterPnpHandlers();
 
 errorout:
 
+    /* do cleanup in case we get failures */
+    if (rc < 0) {
+        if (ks_data.ksnd_tconn_slab) {
+            cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
+            ks_data.ksnd_tconn_slab = NULL;
+        }
+    }
+
     return rc;
 }
 
@@ -6084,10 +5940,21 @@ ks_fini_tdi_data()
 {
     PKS_TSDU            KsTsdu = NULL;
     struct list_head *  list   = NULL;
+    int i;
 
     /* clean up the pnp handler and address slots */
     KsDeregisterPnpHandlers();
 
+    /* stop all tcp sending engines */
+    for (i = 0; i < ks_data.ksnd_engine_nums; i++) {
+        ks_data.ksnd_engine_mgr[i].stop = TRUE;
+        KeSetEvent(&ks_data.ksnd_engine_mgr[i].start, 0, FALSE);
+    }
+
+    for (i = 0; i < ks_data.ksnd_engine_nums; i++) {
+        cfs_wait_event_internal(&ks_data.ksnd_engine_mgr[i].exit, 0);
+    }
+
     /* we need wait until all the tconn are freed */
     spin_lock(&(ks_data.ksnd_tconn_lock));
 
@@ -6097,7 +5964,7 @@ ks_fini_tdi_data()
     spin_unlock(&(ks_data.ksnd_tconn_lock));
 
     /* now wait on the tconn exit event */
-    cfs_wait_event(&ks_data.ksnd_tconn_exit, 0);
+    cfs_wait_event_internal(&ks_data.ksnd_tconn_exit, 0);
 
     /* it's safe to delete the tconn slab ... */
     cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
@@ -6135,13 +6002,13 @@ ks_fini_tdi_data()
  *   N/A
  */
 
-ksock_tconn_t *
+ks_tconn_t *
 ks_create_child_tconn(
-    ksock_tconn_t * parent
+    ks_tconn_t * parent
     )
 {
     NTSTATUS            status;
-    ksock_tconn_t *     backlog;
+    ks_tconn_t *        backlog;
 
     /* allocate the tdi connecton object */
     backlog = ks_create_tconn();
@@ -6215,12 +6082,12 @@ errorout:
 
 void
 ks_replenish_backlogs(
-    ksock_tconn_t * parent,
-    int     nbacklog
+    ks_tconn_t *    parent,
+    int             nbacklog
     )
 {
-    ksock_tconn_t * backlog;
-    int            n = 0;
+    ks_tconn_t *    backlog;
+    int             n = 0;
 
     /* calculate how many backlogs needed */
     if ( ( parent->listener.kstc_listening.num +
@@ -6272,7 +6139,7 @@ ks_replenish_backlogs(
  */
 
 int
-ks_start_listen(ksock_tconn_t *tconn, int nbacklog)
+ks_start_listen(ks_tconn_t *tconn, int nbacklog)
 {
     int rc = 0;
 
@@ -6280,7 +6147,7 @@ ks_start_listen(ksock_tconn_t *tconn, int nbacklog)
     ks_replenish_backlogs(tconn, nbacklog);
 
     /* set the event callback handlers */
-    rc = ks_set_handlers(tconn);
+    rc = KsSetHandlers(tconn);
 
     if (rc < 0) {
         return rc;
@@ -6296,13 +6163,13 @@ ks_start_listen(ksock_tconn_t *tconn, int nbacklog)
 }
 
 void
-ks_stop_listen(ksock_tconn_t *tconn)
+ks_stop_listen(ks_tconn_t *tconn)
 {
     struct list_head *      list;
-    ksock_tconn_t *         backlog;
+    ks_tconn_t *            backlog;
 
     /* reset all tdi event callbacks to NULL */
-    ks_reset_handlers (tconn);
+    KsResetHandlers (tconn);
 
     spin_lock(&tconn->kstc_lock);
 
@@ -6310,7 +6177,7 @@ ks_stop_listen(ksock_tconn_t *tconn)
 
     /* cleanup all the listening backlog child connections */
     list_for_each (list, &(tconn->listener.kstc_listening.list)) {
-        backlog = list_entry(list, ksock_tconn_t, child.kstc_link);
+        backlog = list_entry(list, ks_tconn_t, child.kstc_link);
 
         /* destory and free it */
         ks_put_tconn(backlog);
@@ -6343,12 +6210,12 @@ ks_stop_listen(ksock_tconn_t *tconn)
 
 int
 ks_wait_child_tconn(
-    ksock_tconn_t *  parent,
-    ksock_tconn_t ** child
+    ks_tconn_t *    parent,
+    ks_tconn_t **   child
     )
 {
     struct list_head * tmp;
-    ksock_tconn_t * backlog = NULL;
+    ks_tconn_t * backlog = NULL;
 
     ks_replenish_backlogs(parent, parent->listener.nbacklog);
 
@@ -6364,7 +6231,7 @@ again:
     /* check the listening queue and try to search the accepted connecton */
 
     list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
-        backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link);
+        backlog = list_entry (tmp, ks_tconn_t, child.kstc_link);
 
         spin_lock(&(backlog->kstc_lock));
 
@@ -6415,16 +6282,245 @@ again:
         }
     }
 
+    KsPrint((2, "ks_wait_child_tconn: connection %p accepted.\n", backlog));
+
     if (backlog) {
         /* query the local ip address of the connection */
         ks_query_local_ipaddr(backlog);
+    } else {
+        return -EINTR;
     }
-
     *child = backlog;
 
     return 0;
 }
 
+int
+ks_query_iovs_length(struct iovec  *iov, int niov)
+{
+    int             i;
+    int             total = 0;
+
+    LASSERT(iov != NULL);
+    LASSERT(niov > 0);
+
+    for (i=0; i < niov; i++) {
+        total += iov[i].iov_len;
+    }
+
+    return total;
+}
+
+int
+ks_query_kiovs_length(lnet_kiov_t *kiov, int nkiov)
+{
+    int             i;
+    int             total = 0;
+
+    LASSERT(kiov != NULL);
+    LASSERT(nkiov > 0);
+
+    for (i=0; i < nkiov; i++) {
+        total += kiov[i].kiov_len;
+    }
+
+    return total;
+}
+
+int
+ks_sock_buf_cb(void *tsdu, int ns, int off, char **buf)
+{
+    int rc = 0;
+
+    if (off < ns) {
+        *buf = (char *)tsdu + off;
+        rc = ns - off;
+    }
+    return rc;
+}
+
+int
+ks_sock_iov_cb(void *tsdu, int ns, int off, char **buf)
+{
+    int rc = 0, i;
+    struct iovec *iov = tsdu;
+
+    for (i=0; i < ns; i++) {
+        if ((size_t)off >= iov[i].iov_len) {
+            off -= iov[i].iov_len;
+        } else {
+            *buf = (char *)iov[i].iov_base + off;
+            rc = iov[i].iov_len - off;
+            break;
+        }
+    }
+    return rc;
+}
+
+int
+ks_sock_kiov_cb(void *tsdu, int ns, int off, char **buf)
+{
+    int rc = 0, i;
+    lnet_kiov_t *kiov = tsdu;
+
+    for (i=0; i < ns; i++) {
+        if ((size_t)off >= kiov[i].kiov_len) {
+            off -= kiov[i].kiov_len;
+        } else {
+            *buf = (char *)kiov[i].kiov_page->addr +
+                    kiov[i].kiov_offset + off;
+            rc = kiov[i].kiov_len - off;
+            break;
+        }
+    }
+    return rc;
+}
+
+typedef int (*ks_tsdu_cb_t)(void *tsdu, int ns, int off, char **buf);
+
+int
+ks_sock_io(ks_tconn_t *tconn, void *tsdu, int ns, int reqlen,
+           int flags, int timeout, int out, ks_tsdu_cb_t callback)
+{
+    ULONG       tflags;
+    BOOLEAN     expedited;
+    PKS_TSDUMGR TsduMgr;
+
+    int         rc;
+    int         length;
+    int         total = 0;
+    int64_t     remained;
+    PCHAR       buffer;
+    BOOLEAN     async;
+
+    LASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
+    remained = (int64_t)cfs_time_seconds(timeout);
+
+    /* query tsdu manager */
+    expedited = cfs_is_flag_set(flags, MSG_OOB);
+    TsduMgr = KsQueryTsduMgr(tconn, expedited, (BOOLEAN)out);
+
+    /* check whether equest is nonblocking */
+    if (async = cfs_is_flag_set(flags, MSG_DONTWAIT)) {
+        timeout = 0;
+    }
+
+    ks_get_tconn(tconn);
+    ks_lock_tsdumgr(TsduMgr);
+    if ( tconn->kstc_type != kstt_sender &&
+         tconn->kstc_type != kstt_child) {
+        rc = -EINVAL;
+        goto errorout;
+    }
+
+    while (length = callback(tsdu, ns, total, &buffer)) {
+
+        /* check whether socket is stil valid */
+        if (tconn->kstc_state != ksts_connected) {
+            rc = -ENOTCONN;
+            goto errorout;
+        }
+
+        if (out) {
+            tflags = KsTdiSendFlags(flags);
+            rc = KsWriteTsdus(TsduMgr, buffer, length, tflags);
+        } else {
+            tflags = KsTdiRecvFlags(flags);
+            rc = KsReadTsdus(TsduMgr, buffer, length, tflags);
+        }
+
+        if (rc > 0) {
+            total += rc;
+        } else if (!async && rc == -EAGAIN) {
+            if (timeout) {
+                if (remained) { 
+                    ks_unlock_tsdumgr(TsduMgr);
+                    remained = cfs_wait_event_internal(
+                                    &TsduMgr->Event,
+                                    remained );
+                } else {
+                    goto errorout;
+                }
+            } else {
+                ks_unlock_tsdumgr(TsduMgr);
+                cfs_wait_event_internal(&TsduMgr->Event, 0);
+            }
+            ks_lock_tsdumgr(TsduMgr);
+        } else {
+            break;
+        }
+    }
+
+errorout:
+
+    if (!out) {
+        TsduMgr->Payload = reqlen - total;
+    }
+    ks_unlock_tsdumgr(TsduMgr);
+
+    KsPrint((4, "ks_sock_io: tconn=%p tsdumgr=%p %c total=%xh/%xh rc=%d\n",
+                tconn, TsduMgr, out?'W':'R', total, TsduMgr->TotalBytes, rc));
+
+    if (total) {
+        if (out) {
+            /* signal Tdi sending engine */
+            KsQueueTdiEngine(tconn, TsduMgr);
+        }
+        rc = total;
+    }
+
+    ks_put_tconn(tconn);
+
+    LASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
+    return rc;
+}
+
+int ks_send_buf(ks_tconn_t * tconn, char *buf,
+                int len, int flags, int timeout)
+{
+    return ks_sock_io(tconn, buf, len, len, flags,
+                      timeout, 1, ks_sock_buf_cb);
+}
+
+int ks_recv_buf(ks_tconn_t * tconn, char *buf,
+                int len, int flags, int timeout)
+{
+    return ks_sock_io(tconn, buf, len, len, flags,
+                      timeout, 0, ks_sock_buf_cb);
+}
+
+int ks_send_iovs(ks_tconn_t * tconn, struct iovec *iov,
+                 int niov, int flags, int timeout)
+{
+    int reqlen = ks_query_iovs_length(iov, niov);
+    return ks_sock_io(tconn, iov, niov, reqlen, flags,
+                      timeout, TRUE, ks_sock_iov_cb);
+}
+
+int ks_recv_iovs(ks_tconn_t * tconn, struct iovec *iov,
+                 int niov, int flags, int timeout)
+{
+    int reqlen = ks_query_iovs_length(iov, niov);
+    return ks_sock_io(tconn, iov, niov, reqlen, flags,
+                      timeout, FALSE, ks_sock_iov_cb);
+}
+
+int ks_send_kiovs(ks_tconn_t * tconn, lnet_kiov_t *kiov,
+                  int nkiov, int flags, int timeout)
+{
+    int reqlen = ks_query_kiovs_length(kiov, nkiov);
+    return ks_sock_io(tconn, kiov, nkiov, reqlen, flags,
+                      timeout, TRUE, ks_sock_kiov_cb);
+}
+
+int ks_recv_kiovs(ks_tconn_t * tconn, lnet_kiov_t *kiov,
+                  int nkiov, int flags, int timeout)
+{
+    int reqlen = ks_query_kiovs_length(kiov, nkiov);
+    return ks_sock_io(tconn, kiov, nkiov, reqlen, flags,
+                      timeout, FALSE, ks_sock_kiov_cb);
+}
+
 int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
 {
     ks_addr_slot_t * slot = NULL;
@@ -6488,8 +6584,8 @@ void libcfs_ipif_free_enumeration(char **names, int n)
 
 int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog)
 {
-    int                     rc = 0;
-    ksock_tconn_t *         parent;
+    int                 rc = 0;
+    ks_tconn_t *        parent;
 
     parent = ks_create_tconn();
     if (!parent) {
@@ -6565,12 +6661,13 @@ int libcfs_sock_connect(struct socket **sockp, int *fatal,
                         __u32 local_ip, int local_port,
                         __u32 peer_ip, int peer_port)
 {
-    ksock_tconn_t * tconn = NULL;
+    ks_tconn_t *    tconn = NULL;
     int             rc = 0;
 
     *sockp = NULL;
+    if (fatal) *fatal = 0;
 
-    KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n",
+    KsPrint((2, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n",
                 peer_ip, peer_port, local_ip, local_port ));
 
     /* create the tdi connecion structure */
@@ -6586,7 +6683,7 @@ int libcfs_sock_connect(struct socket **sockp, int *fatal,
     /* bind the local ip address with the tconn */
     rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port);
     if (rc < 0) {
-        KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n",
+        KsPrint((1, "libcfs_sock_connect: failed to bind address %x:%d...\n",
                     local_ip, local_port ));
         ks_free_tconn(tconn);
         goto errorout;
@@ -6595,7 +6692,7 @@ int libcfs_sock_connect(struct socket **sockp, int *fatal,
     /* connect to the remote peer */
     rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port);
     if (rc < 0) {
-        KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n",
+        KsPrint((1, "libcfs_sock_connect: failed to connect %x:%d ...\n",
                     peer_ip, peer_port ));
 
         ks_put_tconn(tconn);
@@ -6652,61 +6749,47 @@ int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port)
 int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
 {
     int           rc;
-    ksock_mdl_t * mdl;
-
     int           offset = 0;
 
     while (nob > offset) {
 
-        /* lock the user buffer */
-        rc = ks_lock_buffer( (char *)buffer + offset,
-                        FALSE, nob - offset, IoReadAccess, &mdl );
+        rc = ks_send_buf(sock, (char *)buffer + offset, nob - offset, 0, timeout);
 
-        if (rc < 0) {
-            return (rc);
-        }
-
-        /* send out the whole mdl */
-        rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 );
-
-        if (rc > 0) {
-            offset += rc;
+        if (rc <= 0) {
+            goto errorout;
         } else {
-            return (rc);
+            offset += rc;
+            rc = 0;
         }
     }
 
-    return (0);
+errorout:
+
+    KsPrint((4, "libcfs_sock_write: sock: %p %d bytes rc: %d\n", sock, offset, rc));
+    return rc;
 }
 
 int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
 {
-    int           rc;
-    ksock_mdl_t * mdl;
-
+    int           rc = 0;
     int           offset = 0;
 
     while (nob > offset) {
 
-        /* lock the user buffer */
-        rc = ks_lock_buffer( (char *)buffer + offset,
-                               FALSE, nob - offset, IoWriteAccess, &mdl );
-
-        if (rc < 0) {
-            return (rc);
-        }
-
-        /* recv the requested buffer */
-        rc = ks_recv_mdl( sock, mdl, nob - offset, 0 );
+        rc = ks_recv_buf(sock, (char *)buffer + offset, nob - offset, 0, timeout);
 
-        if (rc > 0) {
-            offset += rc;
+        if (rc <= 0) {
+            goto errorout;
         } else {
-            return (rc);
+            offset += rc;
+            rc = 0;
         }
     }
 
-    return (0);
+errorout:
+
+    KsPrint((4, "libcfs_sock_read: sock: %p %d bytes rc: %d\n", sock, offset, rc));
+    return rc;
 }
 
 void libcfs_sock_release(struct socket *sock)
index 4927aa6..6b065d7 100644 (file)
 #define put_cpu() do { } while (0)
 #endif
 
-#define TCD_TYPE_MAX        1
+/* only define one trace_data type for windows */
+enum {
+        TCD_TYPE_PASSIVE = 0,
+        TCD_TYPE_DISPATCH,
+        TCD_TYPE_MAX
+};
 
-event_t     tracefile_event;
+/* percents to share the total debug memory for each type */
+static unsigned int pages_factor[TCD_TYPE_MAX] = {
+        90,  /* 90% pages for TCD_TYPE_PASSIVE */
+        10   /* 10% pages for TCD_TYPE_DISPATCH */
+};
 
-void tracefile_init_arch()
+char *trace_console_buffers[NR_CPUS][TCD_TYPE_MAX];
+
+struct rw_semaphore tracefile_sem;
+
+int tracefile_init_arch()
 {
        int    i;
        int    j;
-    struct trace_cpu_data *tcd;
+       struct trace_cpu_data *tcd;
 
-    cfs_init_event(&tracefile_event, TRUE, TRUE);
-
-    /* initialize trace_data */
-    memset(trace_data, 0, sizeof(trace_data));
-    for (i = 0; i < TCD_TYPE_MAX; i++) {
-        trace_data[i]=cfs_alloc(sizeof(struct trace_data_union)*NR_CPUS, 0);
-        if (trace_data[i] == NULL)
-            goto out;
-    }
+       init_rwsem(&tracefile_sem);
 
-    /* arch related info initialized */
-    tcd_for_each(tcd, i, j) {
-        tcd->tcd_pages_factor = 100; /* Only one type */
-        tcd->tcd_cpu = j;
-        tcd->tcd_type = i;
-    }
+       /* initialize trace_data */
+       memset(trace_data, 0, sizeof(trace_data));
+       for (i = 0; i < TCD_TYPE_MAX; i++) {
+               trace_data[i]=cfs_alloc(sizeof(union trace_data_union)*NR_CPUS,
+                                                         GFP_KERNEL);
+               if (trace_data[i] == NULL)
+                       goto out;
+       }
 
-    memset(trace_console_buffers, 0, sizeof(trace_console_buffers));
+       /* arch related info initialized */
+       tcd_for_each(tcd, i, j) {
+               tcd->tcd_pages_factor = (USHORT) pages_factor[i];
+               tcd->tcd_type = (USHORT) i;
+               tcd->tcd_cpu = (USHORT)j;
+       }
 
-       for (i = 0; i < NR_CPUS; i++) {
-               for (j = 0; j < 1; j++) {
+       for (i = 0; i < num_possible_cpus(); i++)
+               for (j = 0; j < TCD_TYPE_MAX; j++) {
                        trace_console_buffers[i][j] =
                                cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE,
-                                       CFS_ALLOC_ZERO);
+                                          GFP_KERNEL);
 
                        if (trace_console_buffers[i][j] == NULL)
-                goto out;
+                               goto out;
                }
-    }
 
        return 0;
 
 out:
        tracefile_fini_arch();
-       KsPrint((0, "lnet: No enough memory\n"));
+       printk(KERN_ERR "lnet: No enough memory\n");
        return -ENOMEM;
+
 }
 
 void tracefile_fini_arch()
@@ -98,84 +110,104 @@ void tracefile_fini_arch()
        int    i;
        int    j;
 
-       for (i = 0; i < NR_CPUS; i++) {
-               for (j = 0; j < 2; j++) {
+       for (i = 0; i < num_possible_cpus(); i++) {
+               for (j = 0; j < TCD_TYPE_MAX; j++) {
                        if (trace_console_buffers[i][j] != NULL) {
                                cfs_free(trace_console_buffers[i][j]);
                                trace_console_buffers[i][j] = NULL;
                        }
-        }
-    }
+               }
+       }
 
-    for (i = 0; trace_data[i] != NULL; i++) {
-        cfs_free(trace_data[i]);
-        trace_data[i] = NULL;
-    }
+       for (i = 0; trace_data[i] != NULL; i++) {
+               cfs_free(trace_data[i]);
+               trace_data[i] = NULL;
+       }
+
+       fini_rwsem(&tracefile_sem);
 }
 
 void tracefile_read_lock()
 {
-    cfs_wait_event(&tracefile_event, 0);
+       down_read(&tracefile_sem);
 }
 
 void tracefile_read_unlock()
 {
-    cfs_wake_event(&tracefile_event);
+       up_read(&tracefile_sem);
 }
 
 void tracefile_write_lock()
 {
-    cfs_wait_event(&tracefile_event, 0);
+       down_write(&tracefile_sem);
 }
 
 void tracefile_write_unlock()
 {
-    cfs_wake_event(&tracefile_event);
+       up_write(&tracefile_sem);
 }
 
 char *
 trace_get_console_buffer(void)
 {
-#pragma message ("is there possible problem with pre-emption ?")
-    int cpu = (int) KeGetCurrentProcessorNumber();
-    return trace_console_buffers[cpu][0];
+        int cpu  = get_cpu();
+        int type = 0;
+        
+        if (KeGetCurrentIrql() >= DISPATCH_LEVEL)
+                type = TCD_TYPE_DISPATCH;
+        else
+                type = TCD_TYPE_PASSIVE;
+       return trace_console_buffers[cpu][type];
 }
 
 void
 trace_put_console_buffer(char *buffer)
 {
+       put_cpu();
 }
 
 struct trace_cpu_data *
 trace_get_tcd(void)
 {
-#pragma message("todo: return NULL if in interrupt context")
-
-       int cpu = (int) KeGetCurrentProcessorNumber();
-       return &(*trace_data[0])[cpu].tcd;
+        int cpu  = get_cpu();
+        int type = 0;
+        
+        if (KeGetCurrentIrql() >= DISPATCH_LEVEL)
+                type = TCD_TYPE_DISPATCH;
+        else
+                type = TCD_TYPE_PASSIVE;
+       return &(*trace_data[type])[cpu].tcd;
 }
 
 void
-trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
+trace_put_tcd (struct trace_cpu_data *tcd)
 {
+       put_cpu();
 }
 
-int 
-trace_lock_tcd(struct trace_cpu_data *tcd)
+int trace_lock_tcd(struct trace_cpu_data *tcd)
 {
-    __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
-    return 1;
+       __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+       return 1;
 }
 
-void
-trace_unlock_tcd(struct trace_cpu_data *tcd)
+void trace_unlock_tcd(struct trace_cpu_data *tcd)
+{
+       __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+}
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
 {
-    __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       return tcd->tcd_cpu == tage->cpu;
 }
 
 void
 set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
-                    const int line, unsigned long stack)
+                   const int line, unsigned long stack)
 {
        struct timeval tv;
 
@@ -187,16 +219,16 @@ set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
        header->ph_sec = (__u32)tv.tv_sec;
        header->ph_usec = tv.tv_usec;
        header->ph_stack = stack;
-       header->ph_pid = current->pid;
+       header->ph_pid = (__u32)(ULONG_PTR)current->pid;
        header->ph_line_num = line;
        header->ph_extern_pid = 0;
        return;
 }
 
 void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
-                                 int len, const char *file, const char *fn)
+                            int len, const char *file, const char *fn)
 {
-       char *prefix = NULL, *ptype = NULL;
+       char *prefix = "Lustre", *ptype = NULL;
 
        if ((mask & D_EMERG) != 0) {
                prefix = "LustreError";
@@ -207,25 +239,20 @@ void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
        } else if ((mask & D_WARNING) != 0) {
                prefix = "Lustre";
                ptype = KERN_WARNING;
-       } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) {
+       } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
                prefix = "Lustre";
                ptype = KERN_INFO;
        }
 
        if ((mask & D_CONSOLE) != 0) {
-               printk("%s%s: %s", ptype, prefix, buf);
+               printk("%s%s: %.*s", ptype, prefix, len, buf);
        } else {
-               printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid,
-                      hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf);
+               printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
+                      hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
        }
        return;
 }
 
-int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
-{
-       return 1;
-}
-
 int trace_max_debug_mb(void)
 {
        int  total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT));
@@ -234,7 +261,16 @@ int trace_max_debug_mb(void)
 }
 
 void
-trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
+trace_call_on_all_cpus(void (*fn)(void *_arg), void *arg)
 {
-#error "tbd"
+    int         cpu;
+    KAFFINITY   mask = cfs_query_thread_affinity();
+
+    for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+        if (cfs_tie_thread_to_cpu(cpu)) {
+            ASSERT((int)KeGetCurrentProcessorNumber() == cpu);
+                   fn(arg);
+            cfs_set_thread_affinity(mask);
+        }
+    }
 }
index 8f326a8..3c1a3de 100644 (file)
@@ -1,5 +1,5 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
  *
  * GPL HEADER START
  *
 
 #ifndef __KERNEL__
 
-#include <stdio.h>
-#include <stdlib.h>
-#include <io.h>
-#include <time.h>
+#define _NTDDK_
 #include <windows.h>
+#include <libcfs/libcfs.h>
 
-void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-                              const int line, unsigned long stack,
-                              char *format, ...) {
-    }
-
-int cfs_proc_mknod(const char *path, unsigned short  mode,  unsigned int dev)
+void sleep(int time)
 {
-    return 0;
+    DWORD Time = 1000 * time;
+    Sleep(Time);
 }
 
-
 void print_last_error(char* Prefix)
 {
     LPVOID lpMsgBuf;
@@ -74,33 +67,6 @@ void print_last_error(char* Prefix)
     LocalFree(lpMsgBuf);
 }
 
-//
-// The following declarations are defined in io.h of VC
-// sys/types.h will conflict with io.h, so we need place
-// these declartions here.
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-    void
-    __declspec (naked) __cdecl _chkesp(void)
-    {
-#if _X86_
-        __asm {  jz      exit_chkesp     };
-        __asm {  int     3               };
-    exit_chkesp:
-        __asm {  ret                     };
-#endif
-    }
-#ifdef __cplusplus
-}
-#endif
-
-unsigned int sleep (unsigned int seconds)
-{
-    Sleep(seconds * 1000);
-    return 0;
-}
 
 int gethostname(char * name, int namelen)
 {
@@ -117,4 +83,954 @@ int ioctl (
     return 0;
 }
 
-#endif /* __KERNEL__ */
+
+/*
+ * getopt structures & routines
+ */
+
+
+/* Data type for reentrant functions.  */
+struct _getopt_data
+{
+  /* These have exactly the same meaning as the corresponding global
+     variables, except that they are used for the reentrant
+     versions of getopt.  */
+  int optind;
+  int opterr;
+  int optopt;
+  char *optarg;
+
+  /* Internal members.  */
+
+  /* True if the internal members have been initialized.  */
+  int __initialized;
+
+  /* The next char to be scanned in the option-element
+     in which the last option character we returned was found.
+     This allows us to pick up the scan where we left off.
+
+     If this is zero, or a null string, it means resume the scan
+     by advancing to the next ARGV-element.  */
+  char *__nextchar;
+
+  /* Describe how to deal with options that follow non-option ARGV-elements.
+
+     If the caller did not specify anything,
+     the default is REQUIRE_ORDER if the environment variable
+     POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+     REQUIRE_ORDER means don't recognize them as options;
+     stop option processing when the first non-option is seen.
+     This is what Unix does.
+     This mode of operation is selected by either setting the environment
+     variable POSIXLY_CORRECT, or using `+' as the first character
+     of the list of option characters.
+
+     PERMUTE is the default.  We permute the contents of ARGV as we
+     scan, so that eventually all the non-options are at the end.
+     This allows options to be given in any order, even with programs
+     that were not written to expect this.
+
+     RETURN_IN_ORDER is an option available to programs that were
+     written to expect options and other ARGV-elements in any order
+     and that care about the ordering of the two.  We describe each
+     non-option ARGV-element as if it were the argument of an option
+     with character code 1.  Using `-' as the first character of the
+     list of option characters selects this mode of operation.
+
+     The special argument `--' forces an end of option-scanning regardless
+     of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+     `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
+
+  enum
+    {
+      REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+    } __ordering;
+
+  /* If the POSIXLY_CORRECT environment variable is set.  */
+  int __posixly_correct;
+
+
+  /* Handle permutation of arguments.  */
+
+  /* Describe the part of ARGV that contains non-options that have
+     been skipped.  `first_nonopt' is the index in ARGV of the first
+     of them; `last_nonopt' is the index after the last of them.  */
+
+  int __first_nonopt;
+  int __last_nonopt;
+};
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+/* Keep a global copy of all internal members of getopt_data.  */
+
+static struct _getopt_data getopt_data;
+
+
+/* Initialize the internal data when the first call is made.  */
+
+static const char *
+_getopt_initialize (int argc, char *const *argv, const char *optstring,
+                   struct _getopt_data *d)
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  d->__first_nonopt = d->__last_nonopt = d->optind;
+
+  d->__nextchar = NULL;
+
+  d->__posixly_correct = 0;
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      d->__ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      d->__ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (d->__posixly_correct)
+    d->__ordering = REQUIRE_ORDER;
+  else
+    d->__ordering = PERMUTE;
+
+  return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+#define SWAP_FLAGS(ch1, ch2)
+
+static void
+exchange (char **argv, struct _getopt_data *d)
+{
+  int bottom = d->__first_nonopt;
+  int middle = d->__last_nonopt;
+  int top = d->optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+       {
+         /* Bottom segment is the short one.  */
+         int len = middle - bottom;
+         register int i;
+
+         /* Swap it with the top part of the top segment.  */
+         for (i = 0; i < len; i++)
+           {
+             tem = argv[bottom + i];
+             argv[bottom + i] = argv[top - (middle - bottom) + i];
+             argv[top - (middle - bottom) + i] = tem;
+             SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+           }
+         /* Exclude the moved bottom segment from further swapping.  */
+         top -= len;
+       }
+      else
+       {
+         /* Top segment is the short one.  */
+         int len = top - middle;
+         register int i;
+
+         /* Swap it with the bottom part of the bottom segment.  */
+         for (i = 0; i < len; i++)
+           {
+             tem = argv[bottom + i];
+             argv[bottom + i] = argv[middle + i];
+             argv[middle + i] = tem;
+             SWAP_FLAGS (bottom + i, middle + i);
+           }
+         /* Exclude the moved top segment from further swapping.  */
+         bottom += len;
+       }
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  d->__first_nonopt += (d->optind - d->__last_nonopt);
+  d->__last_nonopt = d->optind;
+}
+
+int
+_getopt_internal_r (int argc, char *const *argv, const char *optstring,
+                   const struct option *longopts, int *longind,
+                   int long_only, struct _getopt_data *d)
+{
+  int print_errors = d->opterr;
+  if (optstring[0] == ':')
+    print_errors = 0;
+
+  if (argc < 1)
+    return -1;
+
+  d->optarg = NULL;
+
+  if (d->optind == 0 || !d->__initialized)
+    {
+      if (d->optind == 0)
+       d->optind = 1;  /* Don't scan ARGV[0], the program name.  */
+      optstring = _getopt_initialize (argc, argv, optstring, d);
+      d->__initialized = 1;
+    }
+
+  /* Test whether ARGV[optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+
+# define NONOPTION_P (argv[d->optind][0] != '-' || argv[d->optind][1] == '\0')
+
+  if (d->__nextchar == NULL || *d->__nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+        moved back by the user (who may also have changed the arguments).  */
+      if (d->__last_nonopt > d->optind)
+       d->__last_nonopt = d->optind;
+      if (d->__first_nonopt > d->optind)
+       d->__first_nonopt = d->optind;
+
+      if (d->__ordering == PERMUTE)
+       {
+         /* If we have just processed some options following some non-options,
+            exchange them so that the options come first.  */
+
+         if (d->__first_nonopt != d->__last_nonopt
+             && d->__last_nonopt != d->optind)
+           exchange ((char **) argv, d);
+         else if (d->__last_nonopt != d->optind)
+           d->__first_nonopt = d->optind;
+
+         /* Skip any additional non-options
+            and extend the range of non-options previously skipped.  */
+
+         while (d->optind < argc && NONOPTION_P)
+           d->optind++;
+         d->__last_nonopt = d->optind;
+       }
+
+      /* The special ARGV-element `--' means premature end of options.
+        Skip it like a null option,
+        then exchange with previous non-options as if it were an option,
+        then skip everything else like a non-option.  */
+
+      if (d->optind != argc && !strcmp (argv[d->optind], "--"))
+       {
+         d->optind++;
+
+         if (d->__first_nonopt != d->__last_nonopt
+             && d->__last_nonopt != d->optind)
+           exchange ((char **) argv, d);
+         else if (d->__first_nonopt == d->__last_nonopt)
+           d->__first_nonopt = d->optind;
+         d->__last_nonopt = argc;
+
+         d->optind = argc;
+       }
+
+      /* If we have done all the ARGV-elements, stop the scan
+        and back over any non-options that we skipped and permuted.  */
+
+      if (d->optind == argc)
+       {
+         /* Set the next-arg-index to point at the non-options
+            that we previously skipped, so the caller will digest them.  */
+         if (d->__first_nonopt != d->__last_nonopt)
+           d->optind = d->__first_nonopt;
+         return -1;
+       }
+
+      /* If we have come to a non-option and did not permute it,
+        either stop the scan or describe it to the caller and pass it by.  */
+
+      if (NONOPTION_P)
+       {
+         if (d->__ordering == REQUIRE_ORDER)
+           return -1;
+         d->optarg = argv[d->optind++];
+         return 1;
+       }
+
+      /* We have found another option-ARGV-element.
+        Skip the initial punctuation.  */
+
+      d->__nextchar = (argv[d->optind] + 1
+                 + (longopts != NULL && argv[d->optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[d->optind][1] == '-'
+         || (long_only && (argv[d->optind][2]
+                           || !strchr (optstring, argv[d->optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = -1;
+      int option_index;
+
+      for (nameend = d->__nextchar; *nameend && *nameend != '='; nameend++)
+       /* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+        or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+       if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+         {
+           if ((unsigned int) (nameend - d->__nextchar)
+               == (unsigned int) strlen (p->name))
+             {
+               /* Exact match found.  */
+               pfound = p;
+               indfound = option_index;
+               exact = 1;
+               break;
+             }
+           else if (pfound == NULL)
+             {
+               /* First nonexact match found.  */
+               pfound = p;
+               indfound = option_index;
+             }
+           else if (long_only
+                    || pfound->has_arg != p->has_arg
+                    || pfound->flag != p->flag
+                    || pfound->val != p->val)
+             /* Second or later nonexact match found.  */
+             ambig = 1;
+         }
+
+      if (ambig && !exact)
+       {
+         if (print_errors)
+           {
+             fprintf (stderr, "%s: option '%s' is ambiguous\n",
+                      argv[0], argv[d->optind]);
+           }
+         d->__nextchar += strlen (d->__nextchar);
+         d->optind++;
+         d->optopt = 0;
+         return '?';
+       }
+
+      if (pfound != NULL)
+       {
+         option_index = indfound;
+         d->optind++;
+         if (*nameend)
+           {
+             /* Don't test has_arg with >, because some C compilers don't
+                allow it to be used on enums.  */
+             if (pfound->has_arg)
+               d->optarg = nameend + 1;
+             else
+               {
+                 if (print_errors)
+                   {
+
+                     if (argv[d->optind - 1][1] == '-')
+                       {
+                         /* --option */
+                         fprintf (stderr, "%s: option '--%s' doesn't allow an argument\n",
+                                  argv[0], pfound->name);
+                       }
+                     else
+                       {
+                         /* +option or -option */
+                         fprintf (stderr, "%s: option '%c%s' doesn't allow an argument\n",
+                                  argv[0], argv[d->optind - 1][0],
+                                  pfound->name);
+                       }
+
+                   }
+
+                 d->__nextchar += strlen (d->__nextchar);
+
+                 d->optopt = pfound->val;
+                 return '?';
+               }
+           }
+         else if (pfound->has_arg == 1)
+           {
+             if (d->optind < argc)
+               d->optarg = argv[d->optind++];
+             else
+               {
+                 if (print_errors)
+                   {
+                     fprintf (stderr,
+                              "%s: option '%s' requires an argument\n",
+                              argv[0], argv[d->optind - 1]);
+                   }
+                 d->__nextchar += strlen (d->__nextchar);
+                 d->optopt = pfound->val;
+                 return optstring[0] == ':' ? ':' : '?';
+               }
+           }
+         d->__nextchar += strlen (d->__nextchar);
+         if (longind != NULL)
+           *longind = option_index;
+         if (pfound->flag)
+           {
+             *(pfound->flag) = pfound->val;
+             return 0;
+           }
+         return pfound->val;
+       }
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+        or the option starts with '--' or is not a valid short
+        option, then it's an error.
+        Otherwise interpret it as a short option.  */
+      if (!long_only || argv[d->optind][1] == '-'
+         || strchr (optstring, *d->__nextchar) == NULL)
+       {
+         if (print_errors)
+           {
+             if (argv[d->optind][1] == '-')
+               {
+                 /* --option */
+
+                 fprintf (stderr, "%s: unrecognized option '--%s'\n",
+                          argv[0], d->__nextchar);
+               }
+             else
+               {
+                 /* +option or -option */
+                 fprintf (stderr, "%s: unrecognized option '%c%s'\n",
+                          argv[0], argv[d->optind][0], d->__nextchar);
+               }
+
+
+           }
+         d->__nextchar = (char *) "";
+         d->optind++;
+         d->optopt = 0;
+         return '?';
+       }
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *d->__nextchar++;
+    char *temp = strchr (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*d->__nextchar == '\0')
+      ++d->optind;
+
+    if (temp == NULL || c == ':')
+      {
+       if (print_errors)
+         {
+           fprintf (stderr, "%s: invalid option -- '%c'\n", argv[0], c);
+         }
+       d->optopt = c;
+       return '?';
+      }
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';')
+      {
+       char *nameend;
+       const struct option *p;
+       const struct option *pfound = NULL;
+       int exact = 0;
+       int ambig = 0;
+       int indfound = 0;
+       int option_index;
+
+       /* This is an option that requires an argument.  */
+       if (*d->__nextchar != '\0')
+         {
+           d->optarg = d->__nextchar;
+           /* If we end this ARGV-element by taking the rest as an arg,
+              we must advance to the next element now.  */
+           d->optind++;
+         }
+       else if (d->optind == argc)
+         {
+           if (print_errors)
+             {
+               fprintf (stderr,
+                        "%s: option requires an argument -- '%c'\n",
+                        argv[0], c);
+             }
+           d->optopt = c;
+           if (optstring[0] == ':')
+             c = ':';
+           else
+             c = '?';
+           return c;
+         }
+       else
+         /* We already incremented `d->optind' once;
+            increment it again when taking next ARGV-elt as argument.  */
+         d->optarg = argv[d->optind++];
+
+       /* optarg is now the argument, see if it's in the
+          table of longopts.  */
+
+       for (d->__nextchar = nameend = d->optarg; *nameend && *nameend != '=';
+            nameend++)
+         /* Do nothing.  */ ;
+
+       /* Test all long options for either exact match
+          or abbreviated matches.  */
+       for (p = longopts, option_index = 0; p->name; p++, option_index++)
+         if (!strncmp (p->name, d->__nextchar, nameend - d->__nextchar))
+           {
+             if ((unsigned int) (nameend - d->__nextchar) == strlen (p->name))
+               {
+                 /* Exact match found.  */
+                 pfound = p;
+                 indfound = option_index;
+                 exact = 1;
+                 break;
+               }
+             else if (pfound == NULL)
+               {
+                 /* First nonexact match found.  */
+                 pfound = p;
+                 indfound = option_index;
+               }
+             else
+               /* Second or later nonexact match found.  */
+               ambig = 1;
+           }
+       if (ambig && !exact)
+         {
+           if (print_errors)
+             {
+               fprintf (stderr, "%s: option '-W %s' is ambiguous\n",
+                        argv[0], argv[d->optind]);
+             }
+           d->__nextchar += strlen (d->__nextchar);
+           d->optind++;
+           return '?';
+         }
+       if (pfound != NULL)
+         {
+           option_index = indfound;
+           if (*nameend)
+             {
+               /* Don't test has_arg with >, because some C compilers don't
+                  allow it to be used on enums.  */
+               if (pfound->has_arg)
+                 d->optarg = nameend + 1;
+               else
+                 {
+                   if (print_errors)
+                     {
+                       fprintf (stderr, "\
+%s: option '-W %s' doesn't allow an argument\n",
+                                argv[0], pfound->name);
+                     }
+
+                   d->__nextchar += strlen (d->__nextchar);
+                   return '?';
+                 }
+             }
+           else if (pfound->has_arg == 1)
+             {
+               if (d->optind < argc)
+                 d->optarg = argv[d->optind++];
+               else
+                 {
+                   if (print_errors)
+                     {
+
+                       fprintf (stderr,
+                                "%s: option '%s' requires an argument\n",
+                                argv[0], argv[d->optind - 1]);
+                     }
+                   d->__nextchar += strlen (d->__nextchar);
+                   return optstring[0] == ':' ? ':' : '?';
+                 }
+             }
+           d->__nextchar += strlen (d->__nextchar);
+           if (longind != NULL)
+             *longind = option_index;
+           if (pfound->flag)
+             {
+               *(pfound->flag) = pfound->val;
+               return 0;
+             }
+           return pfound->val;
+         }
+         d->__nextchar = NULL;
+         return 'W';   /* Let the application handle it.   */
+      }
+    if (temp[1] == ':')
+      {
+       if (temp[2] == ':')
+         {
+           /* This is an option that accepts an argument optionally.  */
+           if (*d->__nextchar != '\0')
+             {
+               d->optarg = d->__nextchar;
+               d->optind++;
+             }
+           else
+             d->optarg = NULL;
+           d->__nextchar = NULL;
+         }
+       else
+         {
+           /* This is an option that requires an argument.  */
+           if (*d->__nextchar != '\0')
+             {
+               d->optarg = d->__nextchar;
+               /* If we end this ARGV-element by taking the rest as an arg,
+                  we must advance to the next element now.  */
+               d->optind++;
+             }
+           else if (d->optind == argc)
+             {
+               if (print_errors)
+                 {
+                   fprintf (stderr,
+                            "%s: option requires an argument -- '%c'\n",
+                            argv[0], c);
+                 }
+               d->optopt = c;
+               if (optstring[0] == ':')
+                 c = ':';
+               else
+                 c = '?';
+             }
+           else
+             /* We already incremented `optind' once;
+                increment it again when taking next ARGV-elt as argument.  */
+             d->optarg = argv[d->optind++];
+           d->__nextchar = NULL;
+         }
+      }
+    return c;
+  }
+}
+
+int
+_getopt_internal (int argc, char *const *argv, const char *optstring,
+                 const struct option *longopts, int *longind, int long_only)
+{
+  int result;
+
+  getopt_data.optind = optind;
+  getopt_data.opterr = opterr;
+
+  result = _getopt_internal_r (argc, argv, optstring, longopts,
+                              longind, long_only, &getopt_data);
+
+  optind = getopt_data.optind;
+  optarg = getopt_data.optarg;
+  optopt = getopt_data.optopt;
+
+  return result;
+}
+
+int
+getopt_long (int argc, char *const *argv, const char *options,
+            const struct option *long_options, int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+#define TOLOWER(c) tolower(c)
+typedef unsigned chartype;
+
+char *
+strcasestr (phaystack, pneedle)
+     const char *phaystack;
+     const char *pneedle;
+{
+  register const unsigned char *haystack, *needle;
+  register chartype b, c;
+
+
+  haystack = (const unsigned char *) phaystack;
+  needle = (const unsigned char *) pneedle;
+
+  b = TOLOWER (*needle);
+  if (b != '\0')
+    {
+      haystack--;                              /* possible ANSI violation */
+      do
+       {
+         c = *++haystack;
+         if (c == '\0')
+           goto ret0;
+       }
+      while (TOLOWER (c) != (int) b);
+
+      c = TOLOWER (*++needle);
+      if (c == '\0')
+       goto foundneedle;
+      ++needle;
+      goto jin;
+
+      for (;;)
+        {
+          register chartype a;
+         register const unsigned char *rhaystack, *rneedle;
+
+         do
+           {
+             a = *++haystack;
+             if (a == '\0')
+               goto ret0;
+             if (TOLOWER (a) == (int) b)
+               break;
+             a = *++haystack;
+             if (a == '\0')
+               goto ret0;
+shloop:
+             ;
+           }
+          while (TOLOWER (a) != (int) b);
+
+jin:     a = *++haystack;
+         if (a == '\0')
+           goto ret0;
+
+         if (TOLOWER (a) != (int) c)
+           goto shloop;
+
+         rhaystack = haystack-- + 1;
+         rneedle = needle;
+         a = TOLOWER (*rneedle);
+
+         if (TOLOWER (*rhaystack) == (int) a)
+           do
+             {
+               if (a == '\0')
+                 goto foundneedle;
+               ++rhaystack;
+               a = TOLOWER (*++needle);
+               if (TOLOWER (*rhaystack) != (int) a)
+                 break;
+               if (a == '\0')
+                 goto foundneedle;
+               ++rhaystack;
+               a = TOLOWER (*++needle);
+             }
+           while (TOLOWER (*rhaystack) == (int) a);
+
+         needle = rneedle;             /* took the register-poor approach */
+
+         if (a == '\0')
+           break;
+        }
+    }
+foundneedle:
+  return (char*) haystack;
+ret0:
+  return 0;
+}
+
+int glob (const char * __pattern, int __flags,
+                int (*__errfunc) (const char *, int),
+                glob_t * __pglob) {
+
+    cfs_enter_debugger();
+    return 0;
+}
+
+void globfree(glob_t *__pglog)
+{
+}
+
+int setenv(const char *envname, const char *envval, int overwrite)
+{
+    int rc = 0;
+
+    if (GetEnvironmentVariable(envname, NULL, 0) == 0) {
+        overwrite = TRUE;
+    }
+
+    if (overwrite) {
+        rc = SetEnvironmentVariable(envname, envval);
+        rc = rc > 0 ? 0 : -1;
+    } else {
+        rc = -1;
+    }
+    return rc;
+}
+
+int uname(struct utsname *uts)
+{
+    OSVERSIONINFO   OsVerInfo;
+
+    /* query computer name */
+    memset(uts, 0, sizeof(struct utsname));
+    strcpy(uts->sysname, "winnt");
+    strcpy(uts->release, "winnt");
+
+    /* query system version */
+    OsVerInfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
+    GetVersionEx(&OsVerInfo);
+
+    if (OsVerInfo.dwPlatformId == VER_PLATFORM_WIN32_NT) {
+        if (OsVerInfo.dwMajorVersion == 6 &&
+            OsVerInfo.dwBuildNumber > 3790) {
+            strcpy(uts->release, "Vista");
+        }
+    } else {
+        /* we got errors */
+        return -1;
+    }
+
+    sprintf(uts->version, "%d.%d", OsVerInfo.dwMajorVersion,
+            OsVerInfo.dwMinorVersion);
+    return 0;
+}
+
+struct passwd * getpwuid(uid_t uid)
+{
+    static struct passwd generic_passwd = {0, "root"};
+    return &generic_passwd;
+}
+
+void* pgalloc(size_t factor)
+{
+    LPVOID page;
+
+    page = VirtualAlloc(NULL, CFS_PAGE_SIZE << factor,
+                        MEM_COMMIT, PAGE_READWRITE);
+    return page;
+}
+
+void  pgfree(void * page)
+{
+    _ASSERT(page != NULL);
+    VirtualFree(page, 0, MEM_RELEASE);
+}
+
+#endif /* !__KERNEL__ */
index 1977da4..e34e744 100644 (file)
@@ -1,5 +1,5 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
  *
  * GPL HEADER START
  *
  * miscellaneous libcfs stuff
  */
 #define DEBUG_SUBSYSTEM S_LNET
-#include <lnet/types.h>
+#include <libcfs/libcfs.h>
+#include <errno.h>
 
 /*
- * Convert server error code to client format. Error codes are from
- * Linux errno.h, so for Linux client---identity.
+ *  IDR support routines
+ *
+ *  local global id <-> handle context
  */
-int convert_server_error(__u64 ecode)
+
+/* idr definitions */
+
+#define IDR_BITS 7
+#define IDR_FULL 0xffffffff
+#define IDR_SIZE (1 << IDR_BITS)
+#define IDR_MASK ((1 << IDR_BITS)-1)
+#define MAX_ID_SHIFT (sizeof(int)*8 - 1)
+#define MAX_ID_BIT (1U << MAX_ID_SHIFT)
+#define MAX_ID_MASK (MAX_ID_BIT - 1)
+#define MAX_LEVEL (MAX_ID_SHIFT + IDR_BITS - 1) / IDR_BITS
+#define IDR_FREE_MAX MAX_LEVEL + MAX_LEVEL
+
+#define idr_set_bit(bit, v) (v) |= (1<<(bit))
+#define idr_clear_bit(bit, v) (v) &= ~(1<<(bit))
+#define idr_test_bit(bit, v) ((v) & (1<<(bit)))
+
+struct idr_layer {
+       uint32_t            bitmap;
+       struct idr_layer   *ary[IDR_SIZE];
+       int                             count;
+};
+
+struct idr_context {
+       struct idr_layer *top;
+       struct idr_layer *id_free;
+       int               layers;
+       int               id_free_cnt;
+};
+
+
+/*
+ * id (fd) <-> pointer (HANDLE)
+ */
+
+/**********************************************************
+  private structures and routines for id implementation
+***********************************************************/
+                                  
+static struct idr_layer *alloc_layer(struct idr_context *idp)
 {
-       return cfs_error_code((NTSTATUS)ecode);
+       struct idr_layer *p;
+
+       if (!(p = idp->id_free))
+               return NULL;
+       idp->id_free = p->ary[0];
+       idp->id_free_cnt--;
+       p->ary[0] = NULL;
+       return p;
+}
+
+static int find_next_idrbit(uint32_t bm, int maxid, int n)
+{
+       while (n<maxid && !idr_test_bit(n, bm)) n++;
+       return n;
+}
+
+static void free_layer(struct idr_context *idp, struct idr_layer *p)
+{
+       p->ary[0] = idp->id_free;
+       idp->id_free = p;
+       idp->id_free_cnt++;
+}
+
+static int idr_pre_get(struct idr_context *idp)
+{
+       while (idp->id_free_cnt < IDR_FREE_MAX) {
+               struct idr_layer *new;
+
+        new = cfs_alloc(sizeof(struct idr_layer), CFS_ALLOC_ZERO);
+               if(new == NULL)
+                       return (0);
+               free_layer(idp, new);
+       }
+       return 1;
+}
+
+static int sub_alloc(struct idr_context *idp, void *ptr, int *starting_id)
+{
+       int n, m, sh;
+       struct idr_layer *p, *new;
+       struct idr_layer *pa[MAX_LEVEL];
+       int l, id;
+       uint32_t bm;
+
+       memset(pa, 0, sizeof(pa));
+
+       id = *starting_id;
+       p = idp->top;
+       l = idp->layers;
+       pa[l--] = NULL;
+       while (1) {
+               /*
+                * We run around this while until we reach the leaf node...
+                */
+               n = (id >> (IDR_BITS*l)) & IDR_MASK;
+               bm = ~p->bitmap;
+               m = find_next_idrbit(bm, IDR_SIZE, n);
+               if (m == IDR_SIZE) {
+                       /* no space available go back to previous layer. */
+                       l++;
+                       id = (id | ((1 << (IDR_BITS*l))-1)) + 1;
+                       if (!(p = pa[l])) {
+                               *starting_id = id;
+                               return -2;
+                       }
+                       continue;
+               }
+               if (m != n) {
+                       sh = IDR_BITS*l;
+                       id = ((id >> sh) ^ n ^ m) << sh;
+               }
+               if ((id >= MAX_ID_BIT) || (id < 0))
+                       return -1;
+               if (l == 0)
+                       break;
+               /*
+                * Create the layer below if it is missing.
+                */
+               if (!p->ary[m]) {
+                       if (!(new = alloc_layer(idp)))
+                               return -1;
+                       p->ary[m] = new;
+                       p->count++;
+               }
+               pa[l--] = p;
+               p = p->ary[m];
+       }
+       /*
+        * We have reached the leaf node, plant the
+        * users pointer and return the raw id.
+        */
+       p->ary[m] = (struct idr_layer *)ptr;
+       idr_set_bit(m, p->bitmap);
+       p->count++;
+       /*
+        * If this layer is full mark the bit in the layer above
+        * to show that this part of the radix tree is full.
+        * This may complete the layer above and require walking
+        * up the radix tree.
+        */
+       n = id;
+       while (p->bitmap == IDR_FULL) {
+               if (!(p = pa[++l]))
+                       break;
+               n = n >> IDR_BITS;
+               idr_set_bit((n & IDR_MASK), p->bitmap);
+       }
+       return(id);
+}
+
+static int idr_get_new_above_int(struct idr_context *idp, void *ptr, int starting_id)
+{
+       struct idr_layer *p, *new;
+       int layers, v, id;
+
+       idr_pre_get(idp);
+       
+       id = starting_id;
+build_up:
+       p = idp->top;
+       layers = idp->layers;
+       if (!p) {
+               if (!(p = alloc_layer(idp)))
+                       return -1;
+               layers = 1;
+       }
+       /*
+        * Add a new layer to the top of the tree if the requested
+        * id is larger than the currently allocated space.
+        */
+       while ((layers < MAX_LEVEL) && (id >= (1 << (layers*IDR_BITS)))) {
+               layers++;
+               if (!p->count)
+                       continue;
+               if (!(new = alloc_layer(idp))) {
+                       /*
+                        * The allocation failed.  If we built part of
+                        * the structure tear it down.
+                        */
+                       for (new = p; p && p != idp->top; new = p) {
+                               p = p->ary[0];
+                               new->ary[0] = NULL;
+                               new->bitmap = new->count = 0;
+                               free_layer(idp, new);
+                       }
+                       return -1;
+               }
+               new->ary[0] = p;
+               new->count = 1;
+               if (p->bitmap == IDR_FULL)
+                       idr_set_bit(0, new->bitmap);
+               p = new;
+       }
+       idp->top = p;
+       idp->layers = layers;
+       v = sub_alloc(idp, ptr, &id);
+       if (v == -2)
+               goto build_up;
+       return(v);
+}
+
+static int sub_remove(struct idr_context *idp, int shift, int id)
+{
+       struct idr_layer *p = idp->top;
+       struct idr_layer **pa[MAX_LEVEL];
+       struct idr_layer ***paa = &pa[0];
+       int n;
+
+       *paa = NULL;
+       *++paa = &idp->top;
+
+       while ((shift > 0) && p) {
+               n = (id >> shift) & IDR_MASK;
+               idr_clear_bit(n, p->bitmap);
+               *++paa = &p->ary[n];
+               p = p->ary[n];
+               shift -= IDR_BITS;
+       }
+       n = id & IDR_MASK;
+       if (p != NULL && idr_test_bit(n, p->bitmap)) {
+               idr_clear_bit(n, p->bitmap);
+               p->ary[n] = NULL;
+               while(*paa && ! --((**paa)->count)){
+                       free_layer(idp, **paa);
+                       **paa-- = NULL;
+               }
+               if ( ! *paa )
+                       idp->layers = 0;
+               return 0;
+       }
+       return -1;
+}
+
+static void *_idr_find(struct idr_context *idp, int id)
+{
+       int n;
+       struct idr_layer *p;
+
+       n = idp->layers * IDR_BITS;
+       p = idp->top;
+       /*
+        * This tests to see if bits outside the current tree are
+        * present.  If so, tain't one of ours!
+        */
+       if ((id & ~(~0 << MAX_ID_SHIFT)) >> (n + IDR_BITS))
+            return NULL;
+
+       /* Mask off upper bits we don't use for the search. */
+       id &= MAX_ID_MASK;
+
+       while (n >= IDR_BITS && p) {
+               n -= IDR_BITS;
+               p = p->ary[(id >> n) & IDR_MASK];
+       }
+       return((void *)p);
+}
+
+static int _idr_remove(struct idr_context *idp, int id)
+{
+       struct idr_layer *p;
+
+       /* Mask off upper bits we don't use for the search. */
+       id &= MAX_ID_MASK;
+
+       if (sub_remove(idp, (idp->layers - 1) * IDR_BITS, id) == -1) {
+               return -1;
+       }
+
+       if ( idp->top && idp->top->count == 1 && 
+            (idp->layers > 1) &&
+            idp->top->ary[0]) {
+               /* We can drop a layer */
+               p = idp->top->ary[0];
+               idp->top->bitmap = idp->top->count = 0;
+               free_layer(idp, idp->top);
+               idp->top = p;
+               --idp->layers;
+       }
+       while (idp->id_free_cnt >= IDR_FREE_MAX) {
+               p = alloc_layer(idp);
+               cfs_free(p);
+       }
+       return 0;
+}
+
+/**********************************************************
+  publick interfaces of id vs handle conversion
+***********************************************************/
+
+/**
+  initialise a idr tree.
+ */
+struct idr_context *cfs_idr_init()
+{
+    struct idr_context * idp = NULL;
+    idp = cfs_alloc(sizeof(struct idr_context), 0);
+    if (idp) {
+        memset(idp, 0, sizeof(struct idr_context));
+    }
+
+    return idp;
+}
+
+/**
+  remove an id from the idr tree
+*/
+int cfs_idr_remove(struct idr_context *idp, int id)
+{
+       int ret;
+       ret = _idr_remove((struct idr_context *)idp, id);
+       if (ret != 0) {
+               CWARN("WARNING: attempt to remove unset id %d in idtree\n", id);
+       }
+       return ret;
+}
+
+/**
+  allocate the next available id, and assign 'ptr' into its slot.
+  you can retrieve later this pointer using idr_find()
+*/
+int cfs_idr_get_new(struct idr_context *idp, void *ptr)
+{
+       int ret = idr_get_new_above_int(idp, ptr, 0);
+       if (ret > MAX_ID_MASK) {
+               cfs_idr_remove(idp, ret);
+               return -1;
+       }
+       return ret;
+}
+
+/**
+   allocate a new id, giving the first available value greater than or
+   equal to the given starting id
+*/
+int cfs_idr_get_new_above(struct idr_context *idp, void *ptr, int starting_id)
+{
+       int ret = idr_get_new_above_int(idp, ptr, starting_id);
+       if (ret > MAX_ID_MASK) {
+               cfs_idr_remove(idp, ret);
+               return -1;
+       }
+       return ret;
+}
+
+/**
+  find a pointer value previously set with idr_get_new given an id
+*/
+void *cfs_idr_find(struct idr_context *idp, int id)
+{
+       return _idr_find(idp, id);
+}
+
+/**
+  destroy a idr tree. 
+ */
+void cfs_idr_exit(struct idr_context *idp)
+{
+    if (idp) {
+           cfs_free(idp);
+    }
 }
 
 /*
@@ -64,7 +424,6 @@ int convert_client_oflag(int cflag, int *result)
        return 0;
 }
 
-
 int cfs_error_code(NTSTATUS Status)
 {
     switch (Status) {
@@ -115,12 +474,14 @@ int cfs_error_code(NTSTATUS Status)
         case STATUS_PORT_UNREACHABLE:
             return (-ECONNRESET);
 
+        case STATUS_INSUFFICIENT_RESOURCES:
+            return (-ENOMEM);
+
         case STATUS_PAGEFILE_QUOTA:
         case STATUS_NO_MEMORY:
         case STATUS_CONFLICTING_ADDRESSES:
         case STATUS_QUOTA_EXCEEDED:
         case STATUS_TOO_MANY_PAGING_FILES:
-        case STATUS_INSUFFICIENT_RESOURCES:
         case STATUS_WORKING_SET_QUOTA:
         case STATUS_COMMITMENT_LIMIT:
         case STATUS_TOO_MANY_ADDRESSES:
@@ -157,6 +518,8 @@ int cfs_error_code(NTSTATUS Status)
         case STATUS_INVALID_BUFFER_SIZE:
             return (-EMSGSIZE);
 
+        case STATUS_ADDRESS_ALREADY_EXISTS:
+            return (-EADDRINUSE);
     }
 
     if (NT_SUCCESS(Status)) 
@@ -165,12 +528,307 @@ int cfs_error_code(NTSTATUS Status)
     return (-EINVAL);
 }
 
+/*
+ * Convert server error code to client format. Error codes are from
+ * Linux errno.h, so for Linux client---identity.
+ */
+int convert_server_error(__u64 ecode)
+{
+       return cfs_error_code((NTSTATUS)ecode);
+}
 
-void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+char * strsep(char **strp, const char *delim)
 {
+    char *begin, *end;
+
+    begin = *strp;
+    if (begin == NULL) {
+        return NULL;
+    }
+
+    if (delim[0] == '\0' || delim[1] == '\0') {
+        char ch = delim[0];
+        if (ch == '\0') {
+               end = NULL;
+        } else {
+               if (*begin == ch) {
+                   end = begin;
+               } else if (*begin == '\0') {
+                   end = NULL;
+               } else {
+                   end = strchr (begin + 1, ch);
+               }
+        }
+    } else {
+        end = strpbrk (begin, delim);
+    }
+
+    if (end) {
+        *end++ = '\0';
+        *strp = end;
+    } else {
+        *strp = NULL;
+    }
+
+    return begin;
 }
 
-void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+/*
+ * strnchr - Find a character in a length limited string
+ * @s: The string to be searched
+ * @count: The number of characters to be searched
+ * @c: The character to search for
+ */
+
+char *strnchr(const char *s, size_t count, int c)
 {
+    for (; count-- && *s != '\0'; ++s)
+        if (*s == (char) c)
+            return (char *) s;
     return NULL;
 }
+
+__u64 strtoull(char *nptr, char **endptr,int base)
+{
+       char *s = nptr;
+       __u64 acc, cutoff;
+       int c, neg = 0, any, cutlim;
+
+       /*
+        * See strtol for comments as to the logic used.
+        */
+       do {
+               c = *s++;
+       } while (cfs_isspace(c));
+       if (c == '-') {
+               neg = 1;
+               c = *s++;
+       } else if (c == '+')
+               c = *s++;
+       if ((base == 0 || base == 16) &&
+           c == '0' && (*s == 'x' || *s == 'X')) {
+               c = s[1];
+               s += 2;
+               base = 16;
+       }
+       if (base == 0)
+               base = c == '0' ? 8 : 10;
+       cutoff = (__u64)ULONG_LONG_MAX / (__u64)base;
+       cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base);
+       for (acc = 0, any = 0;; c = *s++) {
+               if (cfs_isdigit(c))
+                       c -= '0';
+               else if (cfs_isalpha(c))
+                       c -= cfs_isupper(c) ? 'A' - 10 : 'a' - 10;
+               else
+                       break;
+               if (c >= base)
+                       break;
+               if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+                       any = -1;
+               else {
+                       any = 1;
+                       acc *= base;
+                       acc += c;
+               }
+       }
+       if (any < 0) {
+               acc = ULONG_LONG_MAX;
+       } else if (neg)
+               acc = 0 - acc;
+       if (endptr != 0)
+               *endptr = (char *) (any ? s - 1 : nptr);
+       return (acc);
+}
+
+#if __KERNEL__
+
+#define BASE 65521L /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {s1 += buf[i]; s2 += s1;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+
+/* ========================================================================= */
+/* 
+    Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+  return the updated checksum. If buf is NULL, this function returns
+  the required initial value for the checksum.
+  An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+  much faster. Usage example:
+
+    uLong adler = adler32(0L, NULL, 0);
+
+    while (read_buffer(buffer, length) != EOF) {
+      adler = adler32(adler, buffer, length);
+    }
+    if (adler != original_adler) error();
+*/
+
+ULONG zlib_adler32(ULONG adler,
+                   const BYTE *buf,
+                   UINT len)
+{
+    unsigned long s1 = adler & 0xffff;
+    unsigned long s2 = (adler >> 16) & 0xffff;
+    int k;
+
+    if (buf == NULL) return 1L;
+
+    while (len > 0) {
+        k = len < NMAX ? len : NMAX;
+        len -= k;
+        while (k >= 16) {
+            DO16(buf);
+            buf += 16;
+            k -= 16;
+        }
+        if (k != 0) do {
+            s1 += *buf++;
+            s2 += s1;
+        } while (--k);
+        s1 %= BASE;
+        s2 %= BASE;
+    }
+    return (s2 << 16) | s1;
+}
+
+#if  !defined(NTDDI_VERSION) || NTDDI_VERSION < 0x06000000
+_CRTIMP size_t  __cdecl strnlen(const char * _Str, size_t _MaxCount)
+{
+        size_t len = 0;
+        while(len < _MaxCount && _Str[len++]);
+        return len;
+}
+#endif
+
+int (__cdecl *_cfs_isalpha)(int);
+int (__cdecl *_cfs_isspace)(int);
+int (__cdecl *_cfs_isupper)(int);
+int (__cdecl *_cfs_isdigit)(int);
+int (__cdecl *_cfs_isxdigit)(int);
+
+int cfs_isalpha(int c)
+{
+    if (_cfs_isalpha) {
+        return _cfs_isalpha(c);
+    } else {
+        return ((c >= 'a' && c <= 'z') ||
+                (c >= 'A' && c <= 'Z'));
+    }
+}
+
+int cfs_isspace(int c)
+{
+    if (_cfs_isspace) {
+        return _cfs_isspace(c);
+    } else {
+        return ((c >= 0x09 && c <= 0x0d) ||
+                (c == 0x20));
+    }
+}
+
+int cfs_isupper(int c)
+{
+    if (_cfs_isupper) {
+        return _cfs_isupper(c);
+    } else {
+        return (c >= 'A' && c <= 'Z');
+    }
+}
+
+int cfs_isdigit(int c)
+{
+    if (_cfs_isdigit) {
+        return _cfs_isdigit(c);
+    } else {
+        return (c >= '0' && c <= '9');
+    }
+}
+
+int cfs_isxdigit(int c)
+{
+    if (_cfs_isxdigit) {
+        return _cfs_isxdigit(c);
+    } else {
+        return ((c >= '0' && c <= '9') ||
+                (c >= 'A' && c <= 'F') ||
+                (c >= 'a' && c <= 'F'));
+    }
+}
+
+void cfs_libc_init()
+{
+    UNICODE_STRING  fn;
+    int             i;
+
+    struct {WCHAR * name; PVOID * addr;} funcs[] = {
+            { L"isspace", (PVOID *)&_cfs_isspace},
+            { L"isalpha", (PVOID *)&_cfs_isalpha},
+            { L"isupper", (PVOID *)&_cfs_isupper},
+            { L"isdigit", (PVOID *)&_cfs_isdigit},
+            { L"isxdigit",(PVOID *)&_cfs_isxdigit},
+            { NULL, NULL },
+            };
+
+    for (i=0; funcs[i].name != NULL; i++) {
+        RtlInitUnicodeString(&fn, funcs[i].name);
+        *(funcs[i].addr) = MmGetSystemRoutineAddress(&fn);
+    }
+
+#if DBG
+    ASSERT(cfs_isspace(0x20) && cfs_isspace(0x09) &&
+           cfs_isspace(0x0a) && cfs_isspace(0x0d) &&
+           !cfs_isspace('a') && !cfs_isspace('0'));
+    ASSERT(cfs_isalpha('a')  && cfs_isalpha('Z') && 
+           !cfs_isalpha('0') && !cfs_isalpha('='));
+    ASSERT(cfs_isupper('A')  && cfs_isupper('Z') && 
+           !cfs_isupper('a') && !cfs_isupper('='));
+    ASSERT(cfs_isdigit('0')   && cfs_isdigit('9') && 
+           !cfs_isdigit('a')  && !cfs_isdigit('#'));
+    ASSERT(cfs_isxdigit('0')  && cfs_isxdigit('9') && 
+           cfs_isxdigit('a')  && cfs_isxdigit('A') &&
+           cfs_isxdigit('F')  && cfs_isxdigit('f') &&
+           !cfs_isxdigit('G') && !cfs_isxdigit('z'));
+#endif    
+}
+
+#else
+
+unsigned int libcfs_subsystem_debug = ~0;
+
+int cfs_isalpha(int c)
+{
+    return isalpha(c);
+}
+
+int cfs_isspace(int c)
+{
+    return isspace(c);
+}
+
+int cfs_isupper(int c)
+{
+    return isupper(c);
+}
+
+int cfs_isdigit(int c)
+{
+    return isdigit(c);
+}
+
+int cfs_isxdigit(int c)
+{
+    return isxdigit(c);
+}
+
+void cfs_libc_init()
+{
+}
+
+
+#endif