Whamcloud - gitweb
Branch HEAD
authorrobert.read <robert.read>
Thu, 19 Jun 2008 22:04:35 +0000 (22:04 +0000)
committerrobert.read <robert.read>
Thu, 19 Jun 2008 22:04:35 +0000 (22:04 +0000)
b=15969
i=isaac
i=adilger

Move libcfs to a module.

123 files changed:
libcfs/.cvsignore [new file with mode: 0644]
libcfs/Kernelenv.in [new file with mode: 0644]
libcfs/Kernelenv.mk [new file with mode: 0644]
libcfs/Makefile.in [new file with mode: 0644]
libcfs/autoMakefile.am [new file with mode: 0644]
libcfs/autoconf/.cvsignore [new file with mode: 0644]
libcfs/autoconf/Makefile.am [new file with mode: 0644]
libcfs/autoconf/lustre-libcfs.m4 [new file with mode: 0644]
libcfs/include/Makefile.am [new file with mode: 0644]
libcfs/include/libcfs/.cvsignore [new file with mode: 0644]
libcfs/include/libcfs/Makefile.am [new file with mode: 0644]
libcfs/include/libcfs/bitmap.h [new file with mode: 0644]
libcfs/include/libcfs/curproc.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/.cvsignore [new file with mode: 0644]
libcfs/include/libcfs/darwin/Makefile.am [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-fs.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-lock.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-mem.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-prim.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-sync.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-tcpip.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-time.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-types.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/darwin-utils.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/kp30.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/libcfs.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/lltrace.h [new file with mode: 0644]
libcfs/include/libcfs/darwin/portals_utils.h [new file with mode: 0644]
libcfs/include/libcfs/kp30.h [new file with mode: 0644]
libcfs/include/libcfs/libcfs.h [new file with mode: 0644]
libcfs/include/libcfs/linux/.cvsignore [new file with mode: 0644]
libcfs/include/libcfs/linux/Makefile.am [new file with mode: 0644]
libcfs/include/libcfs/linux/kp30.h [new file with mode: 0644]
libcfs/include/libcfs/linux/libcfs.h [new file with mode: 0644]
libcfs/include/libcfs/linux/linux-fs.h [new file with mode: 0644]
libcfs/include/libcfs/linux/linux-lock.h [new file with mode: 0644]
libcfs/include/libcfs/linux/linux-mem.h [new file with mode: 0644]
libcfs/include/libcfs/linux/linux-prim.h [new file with mode: 0644]
libcfs/include/libcfs/linux/linux-tcpip.h [new file with mode: 0644]
libcfs/include/libcfs/linux/linux-time.h [new file with mode: 0644]
libcfs/include/libcfs/linux/lltrace.h [new file with mode: 0644]
libcfs/include/libcfs/linux/portals_compat25.h [new file with mode: 0644]
libcfs/include/libcfs/linux/portals_utils.h [new file with mode: 0644]
libcfs/include/libcfs/list.h [new file with mode: 0644]
libcfs/include/libcfs/lltrace.h [new file with mode: 0644]
libcfs/include/libcfs/portals_utils.h [new file with mode: 0644]
libcfs/include/libcfs/types.h [new file with mode: 0644]
libcfs/include/libcfs/user-bitops.h [new file with mode: 0644]
libcfs/include/libcfs/user-lock.h [new file with mode: 0644]
libcfs/include/libcfs/user-prim.h [new file with mode: 0644]
libcfs/include/libcfs/user-tcpip.h [new file with mode: 0644]
libcfs/include/libcfs/user-time.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/kp30.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/libcfs.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/lltrace.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/portals_compat25.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/portals_utils.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-fs.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-lock.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-mem.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-prim.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-tcpip.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-time.h [new file with mode: 0644]
libcfs/include/libcfs/winnt/winnt-types.h [new file with mode: 0644]
libcfs/libcfs/.cvsignore [new file with mode: 0644]
libcfs/libcfs/Info.plist [new file with mode: 0644]
libcfs/libcfs/Makefile.in [new file with mode: 0644]
libcfs/libcfs/autoMakefile.am [new file with mode: 0644]
libcfs/libcfs/darwin/.cvsignore [new file with mode: 0644]
libcfs/libcfs/darwin/Makefile.am [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-curproc.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-debug.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-fs.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-internal.h [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-mem.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-module.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-prim.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-proc.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-sync.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-tcpip.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-tracefile.c [new file with mode: 0644]
libcfs/libcfs/darwin/darwin-utils.c [new file with mode: 0644]
libcfs/libcfs/debug.c [new file with mode: 0644]
libcfs/libcfs/libcfs.xcode/project.pbxproj [new file with mode: 0644]
libcfs/libcfs/linux/.cvsignore [new file with mode: 0644]
libcfs/libcfs/linux/Makefile.am [new file with mode: 0644]
libcfs/libcfs/linux/linux-curproc.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-debug.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-fs.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-lock.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-lwt.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-mem.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-module.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-prim.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-proc.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-sync.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-tcpip.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-tracefile.c [new file with mode: 0644]
libcfs/libcfs/linux/linux-utils.c [new file with mode: 0644]
libcfs/libcfs/lwt.c [new file with mode: 0644]
libcfs/libcfs/module.c [new file with mode: 0644]
libcfs/libcfs/nidstrings.c [new file with mode: 0644]
libcfs/libcfs/tracefile.c [new file with mode: 0644]
libcfs/libcfs/tracefile.h [new file with mode: 0644]
libcfs/libcfs/user-bitops.c [new file with mode: 0644]
libcfs/libcfs/user-lock.c [new file with mode: 0644]
libcfs/libcfs/user-prim.c [new file with mode: 0644]
libcfs/libcfs/user-tcpip.c [new file with mode: 0644]
libcfs/libcfs/watchdog.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-curproc.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-debug.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-fs.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-lock.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-lwt.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-mem.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-module.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-prim.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-proc.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-sync.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-tcpip.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-tracefile.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-usr.c [new file with mode: 0644]
libcfs/libcfs/winnt/winnt-utils.c [new file with mode: 0644]

diff --git a/libcfs/.cvsignore b/libcfs/.cvsignore
new file mode 100644 (file)
index 0000000..f30d862
--- /dev/null
@@ -0,0 +1,11 @@
+Kernelenv
+Makefile
+autoMakefile
+autoMakefile.in
+aclocal.m4
+autom4te.cache
+config.log
+config.status
+configure
+.*.cmd
+.depend
diff --git a/libcfs/Kernelenv.in b/libcfs/Kernelenv.in
new file mode 100644 (file)
index 0000000..59eda30
--- /dev/null
@@ -0,0 +1,6 @@
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include
+# lnet/utils/debug.c wants <linux/version.h> from userspace.  sigh.
+HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
+LIBREADLINE := @LIBREADLINE@
+# 2.5's makefiles aren't nice to cross dir libraries in host programs
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/libcfs/Kernelenv.mk b/libcfs/Kernelenv.mk
new file mode 100644 (file)
index 0000000..d973e5d
--- /dev/null
@@ -0,0 +1,4 @@
+EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include
+HOSTCFLAGS := $(EXTRA_CFLAGS)
+# the kernel doesn't want us to build archives for host binaries :/
+PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/libcfs/Makefile.in b/libcfs/Makefile.in
new file mode 100644 (file)
index 0000000..8c58d42
--- /dev/null
@@ -0,0 +1,3 @@
+subdir-m += libcfs
+
+@INCLUDE_RULES@
diff --git a/libcfs/autoMakefile.am b/libcfs/autoMakefile.am
new file mode 100644 (file)
index 0000000..ef90449
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) 2001  Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS = libcfs include autoconf
+
+sources:
+       $(MAKE) sources -C libcfs
diff --git a/libcfs/autoconf/.cvsignore b/libcfs/autoconf/.cvsignore
new file mode 100644 (file)
index 0000000..282522d
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/libcfs/autoconf/Makefile.am b/libcfs/autoconf/Makefile.am
new file mode 100644 (file)
index 0000000..3927b04
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST := lustre-libcfs.m4
diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4
new file mode 100644 (file)
index 0000000..86fe0e6
--- /dev/null
@@ -0,0 +1,517 @@
+#
+# LIBCFS_CONFIG_CDEBUG
+#
+# whether to enable various libcfs debugs (CDEBUG, ENTRY/EXIT, LASSERT, etc.)
+#
+AC_DEFUN([LIBCFS_CONFIG_CDEBUG],
+[
+AC_MSG_CHECKING([whether to enable CDEBUG, CWARN])
+AC_ARG_ENABLE([libcfs_cdebug],
+       AC_HELP_STRING([--disable-libcfs-cdebug],
+                       [disable libcfs CDEBUG, CWARN]),
+       [],[enable_libcfs_cdebug='yes'])
+AC_MSG_RESULT([$enable_libcfs_cdebug])
+if test x$enable_libcfs_cdebug = xyes; then
+   AC_DEFINE(CDEBUG_ENABLED, 1, [enable libcfs CDEBUG, CWARN])
+else
+   AC_DEFINE(CDEBUG_ENABLED, 0, [disable libcfs CDEBUG, CWARN])
+fi
+
+AC_MSG_CHECKING([whether to enable ENTRY/EXIT])
+AC_ARG_ENABLE([libcfs_trace],
+       AC_HELP_STRING([--disable-libcfs-trace],
+                       [disable libcfs ENTRY/EXIT]),
+       [],[enable_libcfs_trace='yes'])
+AC_MSG_RESULT([$enable_libcfs_trace])
+if test x$enable_libcfs_trace = xyes; then
+   AC_DEFINE(CDEBUG_ENTRY_EXIT, 1, [enable libcfs ENTRY/EXIT])
+else
+   AC_DEFINE(CDEBUG_ENTRY_EXIT, 0, [disable libcfs ENTRY/EXIT])
+fi
+
+AC_MSG_CHECKING([whether to enable LASSERT, LASSERTF])
+AC_ARG_ENABLE([libcfs_assert],
+       AC_HELP_STRING([--disable-libcfs-assert],
+                       [disable libcfs LASSERT, LASSERTF]),
+       [],[enable_libcfs_assert='yes'])
+AC_MSG_RESULT([$enable_libcfs_assert])
+if test x$enable_libcfs_assert = xyes; then
+   AC_DEFINE(LIBCFS_DEBUG, 1, [enable libcfs LASSERT, LASSERTF])
+fi
+])
+
+#
+# LIBCFS_CONFIG_PANIC_DUMPLOG
+#
+# check if tunable panic_dumplog is wanted
+#
+AC_DEFUN([LIBCFS_CONFIG_PANIC_DUMPLOG],
+[AC_MSG_CHECKING([for tunable panic_dumplog support])
+AC_ARG_ENABLE([panic_dumplog],
+       AC_HELP_STRING([--enable-panic_dumplog],
+                      [enable panic_dumplog]),
+       [],[enable_panic_dumplog='no'])
+if test x$enable_panic_dumplog = xyes ; then
+       AC_DEFINE(LNET_DUMP_ON_PANIC, 1, [use dumplog on panic])
+       AC_MSG_RESULT([yes (by request)])
+else
+       AC_MSG_RESULT([no])
+fi
+])
+
+#
+# LIBCFS_STRUCT_PAGE_LIST
+#
+# 2.6.4 no longer has page->list
+#
+AC_DEFUN([LIBCFS_STRUCT_PAGE_LIST],
+[AC_MSG_CHECKING([if struct page has a list field])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/mm.h>
+],[
+       struct page page;
+       &page.list;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LIBCFS_STRUCT_SIGHAND
+#
+# red hat 2.4 adds sighand to struct task_struct
+#
+AC_DEFUN([LIBCFS_STRUCT_SIGHAND],
+[AC_MSG_CHECKING([if task_struct has a sighand field])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       struct task_struct p;
+       p.sighand = NULL;
+],[
+       AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
+       AC_MSG_RESULT([yes])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LIBCFS_FUNC_CPU_ONLINE
+#
+# cpu_online is different in rh 2.4, vanilla 2.4, and 2.6
+#
+AC_DEFUN([LIBCFS_FUNC_CPU_ONLINE],
+[AC_MSG_CHECKING([if kernel defines cpu_online()])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       cpu_online(0);
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LIBCFS_TYPE_GFP_T
+#
+# check if gfp_t is typedef-ed
+#
+AC_DEFUN([LIBCFS_TYPE_GFP_T],
+[AC_MSG_CHECKING([if kernel defines gfp_t])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/gfp.h>
+],[
+       return sizeof(gfp_t);
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_GFP_T, 1, [gfp_t found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LIBCFS_TYPE_CPUMASK_T
+#
+# same goes for cpumask_t
+#
+AC_DEFUN([LIBCFS_TYPE_CPUMASK_T],
+[AC_MSG_CHECKING([if kernel defines cpumask_t])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       return sizeof (cpumask_t);
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LIBCFS_FUNC_SHOW_TASK
+#
+# we export show_task(), but not all kernels have it (yet)
+#
+AC_DEFUN([LIBCFS_FUNC_SHOW_TASK],
+[LB_CHECK_SYMBOL_EXPORT([show_task],
+[kernel/ksyms.c kernel/sched.c],[
+AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported])
+],[
+])
+])
+
+# check userland __u64 type
+AC_DEFUN([LIBCFS_U64_LONG_LONG],
+[AC_MSG_CHECKING([u64 is long long type])
+tmp_flags="$CFLAGS"
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([
+       #include <linux/types.h>
+       int main(void) {
+               unsigned long long *data1;
+               __u64 *data2;
+               
+               data1 = data2;
+               return 0;
+       }
+],[
+       AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_U64_LONG_LONG, 1,
+                  [__u64 is long long type])
+],[
+       AC_MSG_RESULT([no])
+])
+CFLAGS="$tmp_flags"
+])
+
+# check userland size_t type
+AC_DEFUN([LIBCFS_SIZE_T_LONG],
+[AC_MSG_CHECKING([size_t is unsigned long type])
+tmp_flags="$CFLAGS"
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([
+       #include <linux/types.h>
+       int main(void) {
+               unsigned long *data1;
+               size_t *data2;
+               
+               data1 = data2;
+               return 0;
+       }
+],[
+       AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_SIZE_T_LONG, 1,
+                  [size_t is long type])
+],[
+       AC_MSG_RESULT([no])
+])
+CFLAGS="$tmp_flags"
+])
+
+AC_DEFUN([LIBCFS_SSIZE_T_LONG],
+[AC_MSG_CHECKING([ssize_t is signed long type])
+tmp_flags="$CFLAGS"
+CFLAGS="$CFLAGS -Werror"
+AC_COMPILE_IFELSE([
+       #include <linux/types.h>
+       int main(void) {
+               long *data1;
+               ssize_t *data2;
+               
+               data1 = data2;
+               return 0;
+       }
+],[
+       AC_MSG_RESULT([yes])
+        AC_DEFINE(HAVE_SSIZE_T_LONG, 1,
+                  [ssize_t is long type])
+],[
+       AC_MSG_RESULT([no])
+])
+CFLAGS="$tmp_flags"
+])
+
+
+# LIBCFS_TASKLIST_LOCK
+# 2.6.18 remove tasklist_lock export
+AC_DEFUN([LIBCFS_TASKLIST_LOCK],
+[LB_CHECK_SYMBOL_EXPORT([tasklist_lock],
+[kernel/fork.c],[
+AC_DEFINE(HAVE_TASKLIST_LOCK, 1,
+         [tasklist_lock exported])
+],[
+])
+])
+
+# 2.6.19 API changes
+# kmem_cache_destroy(cachep) return void instead of
+# int
+AC_DEFUN([LIBCFS_KMEM_CACHE_DESTROY_INT],
+[AC_MSG_CHECKING([kmem_cache_destroy(cachep) return int])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/slab.h>
+],[
+       int i = kmem_cache_destroy(NULL);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1,
+                [kmem_cache_destroy(cachep) return int])
+],[
+        AC_MSG_RESULT(NO)
+])
+])
+
+# 2.6.19 API change
+#panic_notifier_list use atomic_notifier operations
+#
+AC_DEFUN([LIBCFS_ATOMIC_PANIC_NOTIFIER],
+[AC_MSG_CHECKING([panic_notifier_list is atomic])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/notifier.h>
+       #include <linux/kernel.h>
+],[
+       struct atomic_notifier_head panic_notifier_list;
+],[
+        AC_MSG_RESULT(yes)
+       AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1,
+               [panic_notifier_list is atomic_notifier_head])
+],[
+        AC_MSG_RESULT(NO)
+])
+])
+
+# 2.6.20 API change INIT_WORK use 2 args and not
+# store data inside
+AC_DEFUN([LIBCFS_3ARGS_INIT_WORK],
+[AC_MSG_CHECKING([check INIT_WORK want 3 args])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/workqueue.h>
+],[
+       struct work_struct work;
+
+       INIT_WORK(&work, NULL, NULL);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_3ARGS_INIT_WORK, 1,
+                  [INIT_WORK use 3 args and store data inside])
+],[
+        AC_MSG_RESULT(NO)
+])
+])
+
+# 2.6.21 api change. 'register_sysctl_table' use only one argument,
+# instead of more old which need two.
+AC_DEFUN([LIBCFS_2ARGS_REGISTER_SYSCTL],
+[AC_MSG_CHECKING([check register_sysctl_table want 2 args])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/sysctl.h>
+],[
+       return register_sysctl_table(NULL,0);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_2ARGS_REGISTER_SYSCTL, 1,
+                  [register_sysctl_table want 2 args])
+],[
+        AC_MSG_RESULT(NO)
+])
+])
+
+# 2.6.21 marks kmem_cache_t deprecated and uses struct kmem_cache
+# instead
+AC_DEFUN([LIBCFS_KMEM_CACHE],
+[AC_MSG_CHECKING([check kernel has struct kmem_cache])
+tmp_flags="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="-Werror"
+LB_LINUX_TRY_COMPILE([
+        #include <linux/slab.h>
+        typedef struct kmem_cache cache_t;
+],[
+       cache_t *cachep = NULL;
+
+       kmem_cache_alloc(cachep, 0);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_KMEM_CACHE, 1,
+                  [kernel has struct kmem_cache])
+],[
+        AC_MSG_RESULT(NO)
+])
+EXTRA_KCFLAGS="$tmp_flags"
+])
+# 2.6.23 lost dtor argument
+AC_DEFUN([LIBCFS_KMEM_CACHE_CREATE_DTOR],
+[AC_MSG_CHECKING([check kmem_cache_create has dtor argument])
+LB_LINUX_TRY_COMPILE([
+        #include <linux/slab.h>
+],[
+       kmem_cache_create(NULL, 0, 0, 0, NULL, NULL);
+],[
+        AC_MSG_RESULT(yes)
+        AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DTOR, 1,
+                  [kmem_cache_create has dtor argument])
+],[
+        AC_MSG_RESULT(NO)
+])
+])
+
+#
+# LIBCFS_PROG_LINUX
+#
+# LNet linux kernel checks
+#
+AC_DEFUN([LIBCFS_PROG_LINUX],
+[
+LIBCFS_FUNC_CPU_ONLINE
+LIBCFS_TYPE_GFP_T
+LIBCFS_TYPE_CPUMASK_T
+LIBCFS_CONFIG_PANIC_DUMPLOG
+
+LIBCFS_STRUCT_PAGE_LIST
+LIBCFS_STRUCT_SIGHAND
+LIBCFS_FUNC_SHOW_TASK
+LIBCFS_U64_LONG_LONG
+LIBCFS_SSIZE_T_LONG
+LIBCFS_SIZE_T_LONG
+# 2.6.18
+LIBCFS_TASKLIST_LOCK
+# 2.6.19
+LIBCFS_KMEM_CACHE_DESTROY_INT
+LIBCFS_ATOMIC_PANIC_NOTIFIER
+# 2.6.20
+LIBCFS_3ARGS_INIT_WORK
+# 2.6.21
+LIBCFS_2ARGS_REGISTER_SYSCTL
+LIBCFS_KMEM_CACHE
+# 2.6.23
+LIBCFS_KMEM_CACHE_CREATE_DTOR
+])
+
+#
+# LIBCFS_PROG_DARWIN
+#
+# Darwin checks
+#
+AC_DEFUN([LIBCFS_PROG_DARWIN],
+[LB_DARWIN_CHECK_FUNCS([get_preemption_level])
+])
+
+#
+# LIBCFS_PATH_DEFAULTS
+#
+# default paths for installed files
+#
+AC_DEFUN([LIBCFS_PATH_DEFAULTS],
+[
+])
+
+#
+# LIBCFS_CONFIGURE
+#
+# other configure checks
+#
+AC_DEFUN([LIBCFS_CONFIGURE],
+[# lnet/utils/portals.c
+AC_CHECK_HEADERS([asm/types.h endian.h sys/ioctl.h])
+
+# lnet/utils/debug.c
+AC_CHECK_HEADERS([linux/version.h])
+
+AC_CHECK_TYPE([spinlock_t],
+       [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])],
+       [],
+       [#include <linux/spinlock.h>])
+
+# lnet/utils/wirecheck.c
+AC_CHECK_FUNCS([strnlen])
+
+# --------  Check for required packages  --------------
+
+
+AC_MSG_CHECKING([if efence debugging support is requested])
+AC_ARG_ENABLE(efence,
+       AC_HELP_STRING([--enable-efence],
+                       [use efence library]),
+       [],[enable_efence='no'])
+AC_MSG_RESULT([$enable_efence])
+if test "$enable_efence" = "yes" ; then
+       LIBEFENCE="-lefence"
+       AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested])
+else
+       LIBEFENCE=""
+fi
+AC_SUBST(LIBEFENCE)
+
+
+# -------- check for -lpthread support ----
+AC_MSG_CHECKING([whether to use libpthread for libcfs library])
+AC_ARG_ENABLE([libpthread],
+               AC_HELP_STRING([--disable-libpthread],
+                       [disable libpthread]),
+               [],[enable_libpthread=yes])
+if test "$enable_libpthread" = "yes" ; then
+       AC_CHECK_LIB([pthread], [pthread_create],
+               [ENABLE_LIBPTHREAD="yes"],
+               [ENABLE_LIBPTHREAD="no"])
+       if test "$ENABLE_LIBPTHREAD" = "yes" ; then
+               AC_MSG_RESULT([$ENABLE_LIBPTHREAD])
+               PTHREAD_LIBS="-lpthread"
+               AC_DEFINE([HAVE_LIBPTHREAD], 1, [use libpthread])
+       else
+               PTHREAD_LIBS=""
+               AC_MSG_RESULT([no libpthread is found])
+       fi
+       AC_SUBST(PTHREAD_LIBS)
+else
+       AC_MSG_RESULT([no (disabled explicitly)])
+       ENABLE_LIBPTHREAD="no"
+fi
+AC_SUBST(ENABLE_LIBPTHREAD)
+
+
+])
+
+#
+# LIBCFS_CONDITIONALS
+#
+# AM_CONDITOINAL defines for lnet
+#
+AC_DEFUN([LIBCFS_CONDITIONALS],
+[
+])
+
+#
+# LIBCFS_CONFIG_FILES
+#
+# files that should be generated with AC_OUTPUT
+#
+AC_DEFUN([LIBCFS_CONFIG_FILES],
+[AC_CONFIG_FILES([
+libcfs/Kernelenv
+libcfs/Makefile
+libcfs/autoMakefile
+libcfs/autoconf/Makefile
+libcfs/include/Makefile
+libcfs/include/libcfs/Makefile
+libcfs/include/libcfs/linux/Makefile
+libcfs/libcfs/Makefile
+libcfs/libcfs/autoMakefile
+libcfs/libcfs/linux/Makefile
+])
+case $lb_target_os in
+       darwin)
+               AC_CONFIG_FILES([
+libcfs/include/libcfs/darwin/Makefile
+libcfs/libcfs/darwin/Makefile
+])
+               ;;
+esac
+])
diff --git a/libcfs/include/Makefile.am b/libcfs/include/Makefile.am
new file mode 100644 (file)
index 0000000..8289f5f
--- /dev/null
@@ -0,0 +1 @@
+SUBDIRS = libcfs 
diff --git a/libcfs/include/libcfs/.cvsignore b/libcfs/include/libcfs/.cvsignore
new file mode 100644 (file)
index 0000000..3dda729
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile.in
+Makefile
diff --git a/libcfs/include/libcfs/Makefile.am b/libcfs/include/libcfs/Makefile.am
new file mode 100644 (file)
index 0000000..472d0ae
--- /dev/null
@@ -0,0 +1,9 @@
+SUBDIRS := linux
+if DARWIN
+SUBDIRS += darwin
+endif
+DIST_SUBDIRS := $(SUBDIRS)
+
+EXTRA_DIST := curproc.h kp30.h libcfs.h list.h lltrace.h \
+       portals_utils.h types.h user-lock.h user-prim.h user-time.h \
+       user-tcpip.h user-bitops.h bitmap.h
diff --git a/libcfs/include/libcfs/bitmap.h b/libcfs/include/libcfs/bitmap.h
new file mode 100644 (file)
index 0000000..7f6189a
--- /dev/null
@@ -0,0 +1,80 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2007 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef _LIBCFS_BITMAP_H_
+#define _LIBCFS_BITMAP_H_
+
+
+typedef struct {
+        int             size;
+        unsigned long   data[0];
+} bitmap_t;
+
+#define CFS_BITMAP_SIZE(nbits) \
+     (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(bitmap_t))
+
+static inline
+bitmap_t *ALLOCATE_BITMAP(int size)
+{
+        bitmap_t *ptr;
+
+        OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size));
+        if (ptr == NULL)
+                RETURN(ptr);
+
+        ptr->size = size;
+
+        RETURN (ptr);
+}
+
+#define FREE_BITMAP(ptr)        OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size))
+
+static inline
+void cfs_bitmap_set(bitmap_t *bitmap, int nbit)
+{
+       set_bit(nbit, bitmap->data);
+}
+
+static inline
+void cfs_bitmap_clear(bitmap_t *bitmap, int nbit)
+{
+        clear_bit(nbit, bitmap->data);
+}
+
+static inline
+int cfs_bitmap_check(bitmap_t *bitmap, int nbit)
+{
+       return test_bit(nbit, bitmap->data);
+}
+
+/* return 0 is bitmap has none set bits */
+static inline
+int cfs_bitmap_check_empty(bitmap_t *bitmap)
+{
+        return find_first_bit(bitmap->data, bitmap->size) == bitmap->size;
+}
+
+#define cfs_foreach_bit(bitmap, pos) \
+       for((pos)=find_first_bit((bitmap)->data, bitmap->size);   \
+            (pos) < (bitmap)->size;                               \
+            (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos)))
+
+#endif
diff --git a/libcfs/include/libcfs/curproc.h b/libcfs/include/libcfs/curproc.h
new file mode 100644 (file)
index 0000000..6495c66
--- /dev/null
@@ -0,0 +1,64 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre curproc API declaration
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation. Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+ * Public License for more details. You should have received a copy of the GNU
+ * General Public License along with Lustre; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef __LIBCFS_CURPROC_H__
+#define __LIBCFS_CURPROC_H__
+
+#ifdef __KERNEL__
+/*
+ * Portable API to access common characteristics of "current" UNIX process.
+ *
+ * Implemented in portals/include/libcfs/<os>/
+ */
+uid_t  cfs_curproc_uid(void);
+gid_t  cfs_curproc_gid(void);
+uid_t  cfs_curproc_fsuid(void);
+gid_t  cfs_curproc_fsgid(void);
+pid_t  cfs_curproc_pid(void);
+int    cfs_curproc_groups_nr(void);
+int    cfs_curproc_is_in_groups(gid_t group);
+void   cfs_curproc_groups_dump(gid_t *array, int size);
+mode_t cfs_curproc_umask(void);
+char  *cfs_curproc_comm(void);
+
+
+/*
+ * Plus, platform-specific constant
+ *
+ * CFS_CURPROC_COMM_MAX,
+ *
+ * and opaque scalar type
+ *
+ * cfs_kernel_cap_t
+ */
+cfs_kernel_cap_t cfs_curproc_cap_get(void);
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap);
+#endif
+
+/* __LIBCFS_CURPROC_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/darwin/.cvsignore b/libcfs/include/libcfs/darwin/.cvsignore
new file mode 100644 (file)
index 0000000..3dda729
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile.in
+Makefile
diff --git a/libcfs/include/libcfs/darwin/Makefile.am b/libcfs/include/libcfs/darwin/Makefile.am
new file mode 100644 (file)
index 0000000..f2f217a
--- /dev/null
@@ -0,0 +1,3 @@
+EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h     \
+       darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h              \
+       darwin-lock.h darwin-sync.h darwin-tcpip.h kp30.h
diff --git a/libcfs/include/libcfs/darwin/darwin-fs.h b/libcfs/include/libcfs/darwin/darwin-fs.h
new file mode 100644 (file)
index 0000000..da613ba
--- /dev/null
@@ -0,0 +1,193 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Implementation of standard file system interfaces for XNU kernel.
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+#ifndef __LIBCFS_DARWIN_FS_H__
+#define __LIBCFS_DARWIN_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/filedesc.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/sysctl.h>
+#include <sys/ubc.h>
+#include <sys/mbuf.h>
+#include <sys/namei.h>
+#include <sys/fcntl.h>
+#include <sys/lockf.h>
+#include <stdarg.h>
+
+#include <mach/mach_types.h>
+#include <mach/time_value.h>
+#include <kern/clock.h>
+#include <sys/param.h>
+#include <IOKit/system.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/darwin/darwin-mem.h>
+#include <libcfs/list.h>
+
+/*
+ * File operating APIs in kernel
+ */
+#ifdef __DARWIN8__
+/*
+ * Kernel file descriptor
+ */
+typedef struct cfs_kern_file {
+        int             f_flags;
+        vnode_t         f_vp;
+        vfs_context_t   f_ctxt;
+} cfs_file_t;
+
+#else
+
+typedef struct file cfs_file_t;
+
+#endif
+
+int    kern_file_size(cfs_file_t *fp, off_t    *size);
+#define cfs_filp_size(fp)                      \
+       ({                                      \
+               off_t           __size;         \
+               kern_file_size((fp), &__size);  \
+               __size;                         \
+        })
+#define cfs_filp_poff(fp)               (NULL)
+
+cfs_file_t *kern_file_open(const char *name, int flags, int mode, int *err);
+int kern_file_close(cfs_file_t *fp);
+int kern_file_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
+int kern_file_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos);
+int kern_file_sync(cfs_file_t *fp);
+
+#define cfs_filp_open(n, f, m, e)      kern_file_open(n, f, m, e)
+#define cfs_filp_close(f)              kern_file_close(f)
+#define cfs_filp_read(f, b, n, p)      kern_file_read(f, b, n, p)
+#define cfs_filp_write(f, b, n, p)     kern_file_write(f, b, n, p)
+#define cfs_filp_fsync(f)              kern_file_sync(f)
+
+int ref_file(cfs_file_t *fp);
+int rele_file(cfs_file_t *fp);
+int file_count(cfs_file_t *fp);
+#define cfs_get_file(f)                        ref_file(f)
+#define cfs_put_file(f)                        rele_file(f)
+#define cfs_file_count(f)              file_count(f)
+
+#define CFS_INT_LIMIT(x)               (~((x)1 << (sizeof(x)*8 - 1)))
+#define CFS_OFFSET_MAX                 CFS_INT_LIMIT(loff_t)
+
+typedef struct flock                   cfs_flock_t;
+#define cfs_flock_type(fl)             ((fl)->l_type)
+#define cfs_flock_set_type(fl, type)   do { (fl)->l_type = (type); } while(0)
+#define cfs_flock_pid(fl)              ((fl)->l_pid)
+#define cfs_flock_set_pid(fl, pid)     do { (fl)->l_pid = (pid); } while(0)
+#define cfs_flock_start(fl)            ((fl)->l_start)
+#define cfs_flock_set_start(fl, start) do { (fl)->l_start = (start); } while(0)
+
+static inline loff_t cfs_flock_end(cfs_flock_t *fl)
+{
+        return (fl->l_len == 0 ? CFS_OFFSET_MAX: (fl->l_start + fl->l_len));
+}
+
+static inline void cfs_flock_set_end(cfs_flock_t *fl, loff_t end)
+{
+        if (end == CFS_OFFSET_MAX)
+                fl->l_len = 0;
+        else
+                fl->l_len = end - fl->l_start;
+}
+
+#define ATTR_MODE       0x0001
+#define ATTR_UID        0x0002
+#define ATTR_GID        0x0004
+#define ATTR_SIZE       0x0008
+#define ATTR_ATIME      0x0010
+#define ATTR_MTIME      0x0020
+#define ATTR_CTIME      0x0040
+#define ATTR_ATIME_SET  0x0080
+#define ATTR_MTIME_SET  0x0100
+#define ATTR_FORCE      0x0200  /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG  0x0400
+#define ATTR_RAW        0x0800  /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN  0x1000  /* called from open path, ie O_TRUNC */
+#define ATTR_CTIME_SET  0x2000
+#define ATTR_BLOCKS     0x4000
+#define ATTR_KILL_SUID  0
+#define ATTR_KILL_SGID  0
+
+#define in_group_p(x)  (0)
+
+struct posix_acl_entry {
+        short                   e_tag;
+        unsigned short          e_perm;
+        unsigned int            e_id;
+};
+
+struct posix_acl {
+        atomic_t                a_refcount;
+        unsigned int            a_count;
+        struct posix_acl_entry  a_entries[0];
+};
+
+struct posix_acl *posix_acl_alloc(int count, int flags);
+static inline struct posix_acl *posix_acl_from_xattr(const void *value, 
+                                                     size_t size)
+{ 
+        return posix_acl_alloc(0, 0);
+}
+static inline void posix_acl_release(struct posix_acl *acl) {};
+static inline int posix_acl_valid(const struct posix_acl *acl) { return 0; }
+static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) 
+{ 
+        return acl;
+}
+
+#else  /* !__KERNEL__ */
+
+typedef struct file cfs_file_t;
+
+#endif /* END __KERNEL__ */
+
+typedef struct {
+       void    *d;
+} cfs_dentry_t;
+
+#ifndef O_SYNC
+#define O_SYNC                                 0
+#endif
+#ifndef O_DIRECTORY
+#define O_DIRECTORY                            0
+#endif
+#ifndef O_LARGEFILE
+#define O_LARGEFILE                            0
+#endif
+
+#endif
diff --git a/libcfs/include/libcfs/darwin/darwin-lock.h b/libcfs/include/libcfs/darwin/darwin-lock.h
new file mode 100644 (file)
index 0000000..f826fef
--- /dev/null
@@ -0,0 +1,284 @@
+#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__
+#define __LIBCFS_DARWIN_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <mach/sync_policy.h>
+#include <mach/task.h>
+#include <mach/semaphore.h>
+#include <kern/assert.h>
+#include <kern/thread.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-sync.h>
+
+/*
+ * spin_lock (use Linux kernel's primitives)
+ * 
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ * 
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ */
+struct spin_lock {
+       struct kspin spin;
+};
+
+typedef struct spin_lock spinlock_t;
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+       kspin_init(&lock->spin);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+       kspin_lock(&lock->spin);
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+       kspin_unlock(&lock->spin);
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+       return kspin_trylock(&lock->spin);
+}
+
+static inline void spin_lock_done(spinlock_t *lock)
+{
+       kspin_done(&lock->spin);
+}
+
+#error "does this lock out timer callbacks?"
+#define spin_lock_bh(x)                spin_lock(x)
+#define spin_unlock_bh(x)      spin_unlock(x)
+#define spin_lock_bh_init(x)   spin_lock_init(x)
+
+extern boolean_t ml_set_interrupts_enabled(boolean_t enable);
+#define __disable_irq()         ml_set_interrupts_enabled(FALSE)
+#define __enable_irq(x)         (void) ml_set_interrupts_enabled(x)
+
+#define spin_lock_irqsave(s, f)                do{                     \
+                                       f = __disable_irq();    \
+                                       spin_lock(s);   }while(0)
+
+#define spin_unlock_irqrestore(s, f)   do{                     \
+                                       spin_unlock(s);         \
+                                       __enable_irq(f);}while(0)
+
+/* 
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+struct semaphore {
+       struct ksem sem;
+};
+
+static inline void sema_init(struct semaphore *s, int val)
+{
+       ksem_init(&s->sem, val);
+}
+
+static inline void __down(struct semaphore *s)
+{
+       ksem_down(&s->sem, 1);
+}
+
+static inline void __up(struct semaphore *s)
+{
+       ksem_up(&s->sem, 1);
+}
+
+/*
+ * Mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+
+#define mutex_up(s)                    __up(s)
+#define mutex_down(s)                  __down(s)
+
+#define init_mutex(x)                  sema_init(x, 1)
+#define init_mutex_locked(x)           sema_init(x, 0)
+
+/*
+ * Completion:
+ *
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+struct completion {
+       /*
+        * Emulate completion by semaphore for now.
+        *
+        * XXX nikita: this is not safe if completion is used to synchronize
+        * exit from kernel daemon thread and kext unloading. In this case
+        * some core function (a la complete_and_exit()) is needed.
+        */
+       struct ksem sem;
+};
+
+static inline void init_completion(struct completion *c)
+{
+       ksem_init(&c->sem, 0);
+}
+
+static inline void complete(struct completion *c)
+{
+       ksem_up(&c->sem, 1);
+}
+
+static inline void wait_for_completion(struct completion *c)
+{
+       ksem_down(&c->sem, 1);
+}
+
+/*
+ * rw_semaphore:
+ *
+ * - DECLARE_RWSEM(x)
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+struct rw_semaphore {
+       struct krw_sem s;
+};
+
+static inline void init_rwsem(struct rw_semaphore *s)
+{
+       krw_sem_init(&s->s);
+}
+
+static inline void fini_rwsem(struct rw_semaphore *s)
+{
+       krw_sem_done(&s->s);
+}
+
+static inline void down_read(struct rw_semaphore *s)
+{
+       krw_sem_down_r(&s->s);
+}
+
+static inline int down_read_trylock(struct rw_semaphore *s)
+{
+       int ret = krw_sem_down_r_try(&s->s);
+       return ret == 0;
+}
+
+static inline void down_write(struct rw_semaphore *s)
+{
+       krw_sem_down_w(&s->s);
+}
+
+static inline int down_write_trylock(struct rw_semaphore *s)
+{
+       int ret = krw_sem_down_w_try(&s->s);
+       return ret == 0;
+}
+
+static inline void up_read(struct rw_semaphore *s)
+{
+       krw_sem_up_r(&s->s);
+}
+
+static inline void up_write(struct rw_semaphore *s)
+{
+       krw_sem_up_w(&s->s);
+}
+
+/* 
+ * read-write lock : Need to be investigated more!!
+ *
+ * - DECLARE_RWLOCK(l)
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+typedef struct krw_spin rwlock_t;
+
+#define rwlock_init(pl)                        krw_spin_init(pl)
+
+#define read_lock(l)                   krw_spin_down_r(l)
+#define read_unlock(l)                 krw_spin_up_r(l)
+#define write_lock(l)                  krw_spin_down_w(l)
+#define write_unlock(l)                        krw_spin_up_w(l)
+
+#define write_lock_irqsave(l, f)       do{                     \
+                                       f = __disable_irq();    \
+                                       write_lock(l);  }while(0)
+
+#define write_unlock_irqrestore(l, f)  do{                     \
+                                       write_unlock(l);        \
+                                       __enable_irq(f);}while(0)
+
+#define read_lock_irqsave(l, f)                do{                     \
+                                       f = __disable_irq();    \
+                                       read_lock(l);   }while(0)
+
+#define read_unlock_irqrestore(l, f)   do{                     \
+                                       read_unlock(l);         \
+                                       __enable_irq(f);}while(0)
+/*
+ * Funnel: 
+ *
+ * Safe funnel in/out
+ */
+#ifdef __DARWIN8__
+
+#define CFS_DECL_FUNNEL_DATA
+#define CFS_DECL_CONE_DATA              DECLARE_FUNNEL_DATA
+#define CFS_DECL_NET_DATA               DECLARE_FUNNEL_DATA
+#define CFS_CONE_IN                     do {} while(0)
+#define CFS_CONE_EX                     do {} while(0)
+
+#define CFS_NET_IN                      do {} while(0)
+#define CFS_NET_EX                      do {} while(0)
+
+#else
+
+#define CFS_DECL_FUNNEL_DATA                   \
+        boolean_t    __funnel_state = FALSE;   \
+        funnel_t    *__funnel
+#define CFS_DECL_CONE_DATA             CFS_DECL_FUNNEL_DATA
+#define CFS_DECL_NET_DATA              CFS_DECL_FUNNEL_DATA
+
+void lustre_cone_in(boolean_t *state, funnel_t **cone);
+void lustre_cone_ex(boolean_t state, funnel_t *cone);
+
+#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel)
+#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel)
+
+void lustre_net_in(boolean_t *state, funnel_t **cone);
+void lustre_net_ex(boolean_t state, funnel_t *cone);
+
+#define CFS_NET_IN  lustre_net_in(&__funnel_state, &__funnel)
+#define CFS_NET_EX  lustre_net_ex(__funnel_state, __funnel)
+
+#endif
+
+#else
+#include <libcfs/user-lock.h>
+#endif /* __KERNEL__ */
+
+/* __XNU_CFS_LOCK_H */
+#endif
diff --git a/libcfs/include/libcfs/darwin/darwin-mem.h b/libcfs/include/libcfs/darwin/darwin-mem.h
new file mode 100644 (file)
index 0000000..5ffcd4e
--- /dev/null
@@ -0,0 +1,232 @@
+#ifndef __LIBCFS_DARWIN_CFS_MEM_H__
+#define __LIBCFS_DARWIN_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <sys/vm.h>
+#include <sys/kernel.h>
+#include <sys/ubc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/lockf.h>
+
+#include <mach/mach_types.h>
+#include <mach/vm_types.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <mach/machine/vm_param.h>
+#include <kern/thread_call.h>
+#include <sys/param.h>
+#include <sys/vm.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-sync.h>
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/list.h>
+
+/*
+ * Basic xnu_page struct, should be binary compatibility with
+ * all page types in xnu (we have only xnu_raw_page, xll_page now)
+ */
+
+/* Variable sized pages are not supported */
+
+#ifdef PAGE_SHIFT
+#define CFS_PAGE_SHIFT PAGE_SHIFT
+#else
+#define CFS_PAGE_SHIFT 12
+#endif
+
+#define CFS_PAGE_SIZE  (1UL << CFS_PAGE_SHIFT)
+
+#define CFS_PAGE_MASK  (~((__u64)CFS_PAGE_SIZE - 1))
+
+enum {
+       XNU_PAGE_RAW,
+       XNU_PAGE_XLL,
+       XNU_PAGE_NTYPES
+};
+
+typedef __u32 page_off_t;
+
+/*
+ * For XNU we have our own page cache built on top of underlying BSD/MACH
+ * infrastructure. In particular, we have two disjoint types of pages:
+ *
+ *    - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM,
+ *    based on UPLs, and
+ *
+ *    - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache
+ *    file data, owned by file system objects, hashed, lrued, etc.
+ *
+ * cfs_page_t has to cover both of them, because core Lustre code is based on
+ * the Linux assumption that page is _both_ memory buffer and file system
+ * caching entity.
+ *
+ * To achieve this, all types of pages supported on XNU has to start from
+ * common header that contains only "page type". Common cfs_page_t operations
+ * dispatch through operation vector based on page type.
+ *
+ */
+typedef struct xnu_page {
+       int type;
+} cfs_page_t;
+
+struct xnu_page_ops {
+       void *(*page_map)        (cfs_page_t *);
+       void  (*page_unmap)      (cfs_page_t *);
+       void *(*page_address)    (cfs_page_t *);
+};
+
+void xnu_page_ops_register(int type, struct xnu_page_ops *ops);
+void xnu_page_ops_unregister(int type);
+
+/*
+ * raw page, no cache object, just like buffer
+ */
+struct xnu_raw_page {
+       struct xnu_page  header;
+       void            *virtual;
+       atomic_t         count;
+       struct list_head link;
+};
+
+/*
+ * Public interface to lustre
+ *
+ * - cfs_alloc_page(f)
+ * - cfs_free_page(p)
+ * - cfs_kmap(p)
+ * - cfs_kunmap(p)
+ * - cfs_page_address(p)
+ */
+
+/*
+ * Of all functions above only cfs_kmap(), cfs_kunmap(), and
+ * cfs_page_address() can be called on file system pages. The rest is for raw
+ * pages only.
+ */
+
+cfs_page_t *cfs_alloc_page(u_int32_t flags);
+void cfs_free_page(cfs_page_t *page);
+void cfs_get_page(cfs_page_t *page);
+int cfs_put_page_testzero(cfs_page_t *page);
+int cfs_page_count(cfs_page_t *page);
+#define cfs_page_index(pg)     (0)
+
+void *cfs_page_address(cfs_page_t *pg);
+void *cfs_kmap(cfs_page_t *pg);
+void cfs_kunmap(cfs_page_t *pg);
+
+/*
+ * Memory allocator
+ */
+
+void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+void  cfs_free(void *addr);
+
+void *cfs_alloc_large(size_t nr_bytes);
+void  cfs_free_large(void *addr);
+
+extern int get_preemption_level(void);
+
+#define CFS_ALLOC_ATOMIC_TRY                                    \
+       (get_preemption_level() != 0 ? CFS_ALLOC_ATOMIC : 0)
+
+/*
+ * Slab:
+ *
+ * No slab in OSX, use zone allocator to simulate slab
+ */
+#define SLAB_HWCACHE_ALIGN             0
+
+#ifdef __DARWIN8__
+/* 
+ * In Darwin8, we cannot use zalloc_noblock(not exported by kernel),
+ * also, direct using of zone allocator is not recommended.
+ */
+#define CFS_INDIVIDUAL_ZONE     (0)
+
+#if !CFS_INDIVIDUAL_ZONE
+#include <libkern/OSMalloc.h>
+typedef        OSMallocTag     mem_cache_t;
+#else
+typedef                void*           zone_t;
+typedef                zone_t          mem_cache_t;
+#endif
+
+#else /* !__DARWIN8__ */
+
+#define CFS_INDIVIDUAL_ZONE     (1)
+
+typedef        zone_t          mem_cache_t;
+
+#endif /* !__DARWIN8__ */
+
+#define MC_NAME_MAX_LEN                64
+
+typedef struct cfs_mem_cache {
+       int                     mc_size;
+       mem_cache_t             mc_cache;
+       struct list_head        mc_link;
+       char                    mc_name [MC_NAME_MAX_LEN];
+} cfs_mem_cache_t;
+
+#define KMEM_CACHE_MAX_COUNT   64
+#define KMEM_MAX_ZONE          8192
+
+cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
+int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+
+/*
+ * Misc
+ */
+/* XXX Liang: num_physpages... fix me */
+#define num_physpages                  (64 * 1024)
+
+#define CFS_DECL_MMSPACE               
+#define CFS_MMSPACE_OPEN               do {} while(0)
+#define CFS_MMSPACE_CLOSE              do {} while(0)
+
+#define copy_from_user(kaddr, uaddr, size)     copyin(CAST_USER_ADDR_T(uaddr), (caddr_t)kaddr, size)
+#define copy_to_user(uaddr, kaddr, size)       copyout((caddr_t)kaddr, CAST_USER_ADDR_T(uaddr), size)
+
+#if 0
+static inline int strncpy_from_user(char *kaddr, char *uaddr, int size)
+{
+       size_t count;
+       return copyinstr((const user_addr_t)uaddr, (void *)kaddr, size, &count);
+}
+#endif
+
+#if defined (__ppc__)
+#define mb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb()  __asm__ __volatile__ ("eieio" : : : "memory")
+#elif defined (__i386__)
+#define mb()    __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory")
+#define rmb()   mb()
+#define wmb()   __asm__ __volatile__ ("": : :"memory")
+#else
+#error architecture not supported
+#endif
+
+#else  /* !__KERNEL__ */
+
+#define CFS_CACHE_SHIFT 12
+#define PAGE_CACHE_SIZE (1 << CFS_CACHE_SHIFT)
+#include <libcfs/user-prim.h>
+
+#endif /* __KERNEL__ */
+
+#endif /* __XNU_CFS_MEM_H__ */
diff --git a/libcfs/include/libcfs/darwin/darwin-prim.h b/libcfs/include/libcfs/darwin/darwin-prim.h
new file mode 100644 (file)
index 0000000..0c201c2
--- /dev/null
@@ -0,0 +1,527 @@
+#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__
+#define __LIBCFS_DARWIN_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#ifndef __DARWIN8__
+# ifndef __APPLE_API_PRIVATE
+#  define __APPLE_API_PRIVATE
+#  include <sys/user.h>
+#  undef __APPLE_API_PRIVATE
+# else
+#  include <sys/user.h>
+# endif
+# include <mach/mach_traps.h>
+# include <mach/thread_switch.h>
+# include <machine/cpu_number.h>
+#endif /* !__DARWIN8__ */
+
+#include <sys/kernel.h>
+
+#include <mach/thread_act.h>
+#include <mach/mach_types.h>
+#include <mach/time_value.h>
+#include <kern/sched_prim.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <mach/machine/vm_param.h>
+#include <machine/machine_routines.h>
+#include <kern/clock.h>
+#include <kern/thread_call.h>
+#include <sys/param.h>
+#include <sys/vm.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-utils.h>
+#include <libcfs/darwin/darwin-lock.h>
+
+/*
+ * Symbol functions for libcfs
+ *
+ * OSX has no facility for use to register symbol.
+ * So we have to implement it.
+ */
+#define CFS_SYMBOL_LEN     64
+
+struct  cfs_symbol {
+       char    name[CFS_SYMBOL_LEN];
+       void    *value;
+       int     ref;
+       struct  list_head sym_list;
+};
+
+extern kern_return_t            cfs_symbol_register(const char *, const void *);
+extern kern_return_t            cfs_symbol_unregister(const char *);
+extern void *                   cfs_symbol_get(const char *);
+extern kern_return_t            cfs_symbol_put(const char *);
+
+/*
+ * sysctl typedef
+ *
+ * User can register/unregister a list of sysctl_oids
+ * sysctl_oid is data struct of osx's sysctl-entry
+ */
+#define        CONFIG_SYSCTL   1
+
+typedef struct sysctl_oid *     cfs_sysctl_table_t;
+typedef cfs_sysctl_table_t      cfs_sysctl_table_header_t;
+cfs_sysctl_table_header_t      *cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg);
+void cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table);
+
+/*
+ * Proc file system APIs, no /proc fs support in OSX
+ */
+typedef struct cfs_proc_dir_entry {
+       void            *data;
+} cfs_proc_dir_entry_t;
+
+cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod,
+                                         cfs_proc_dir_entry_t *parent);
+void cfs_free_proc_entry(cfs_proc_dir_entry_t *de);
+void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry);
+
+typedef int (cfs_read_proc_t)(char *page, char **start, off_t off,
+                         int count, int *eof, void *data);
+typedef int (cfs_write_proc_t)(struct file *file, const char *buffer,
+                          unsigned long count, void *data);
+
+/*
+ * cfs pseudo device
+ *
+ * cfs_psdev_t
+ * cfs_psdev_register:
+ * cfs_psdev_deregister:
+ */
+typedef struct {
+       int             index;
+       void            *handle;
+       const char      *name;
+       struct cdevsw   *devsw;
+       void            *private;
+} cfs_psdev_t;
+
+extern kern_return_t            cfs_psdev_register(cfs_psdev_t *);
+extern kern_return_t            cfs_psdev_deregister(cfs_psdev_t *);
+
+/*
+ * Task struct and ...
+ *
+ * Using BSD current_proc in Darwin
+ */
+extern boolean_t        assert_wait_possible(void);
+extern void             *get_bsdtask_info(task_t);
+
+#ifdef __DARWIN8__
+
+typedef struct {}              cfs_task_t;
+#define cfs_current()          ((cfs_task_t *)current_thread())
+#else  /* !__DARWIN8__ */
+
+typedef struct uthread         cfs_task_t;
+
+#define current_uthread()       ((struct uthread *)get_bsdthread_info(current_act()))
+#define cfs_current()          current_uthread()
+
+#endif /* !__DARWIN8__ */
+
+#define cfs_task_lock(t)       do {;} while (0)
+#define cfs_task_unlock(t)     do {;} while (0)
+
+#define set_current_state(s)   do {;} while (0)
+
+#define CFS_DECL_JOURNAL_DATA  
+#define CFS_PUSH_JOURNAL       do {;} while(0)
+#define CFS_POP_JOURNAL                do {;} while(0)
+
+#define THREAD_NAME(comm, fmt, a...)
+/*
+ * Kernel thread:
+ *
+ * OSX kernel thread can not be created with args,
+ * so we have to implement new APIs to create thread with args
+ */
+
+typedef int (*cfs_thread_t)(void *);
+
+extern task_t  kernel_task;
+
+/*
+ * cloning flags, no use in OSX, just copy them from Linux
+ */
+#define CSIGNAL         0x000000ff      /* signal mask to be sent at exit */
+#define CLONE_VM        0x00000100      /* set if VM shared between processes */
+#define CLONE_FS        0x00000200      /* set if fs info shared between processes */
+#define CLONE_FILES     0x00000400      /* set if open files shared between processes */
+#define CLONE_SIGHAND   0x00000800      /* set if signal handlers and blocked signals shared */
+#define CLONE_PID       0x00001000      /* set if pid shared */
+#define CLONE_PTRACE    0x00002000      /* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK     0x00004000      /* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT    0x00008000      /* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD    0x00010000      /* Same thread group? */
+#define CLONE_NEWNS     0x00020000      /* New namespace group? */
+
+#define CLONE_SIGNAL    (CLONE_SIGHAND | CLONE_THREAD)
+
+extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag);
+
+
+/*
+ * Wait Queue implementation
+ *
+ * Like wait_queue in Linux
+ */
+typedef struct cfs_waitq {
+       struct ksleep_chan wq_ksleep_chan;
+} cfs_waitq_t;
+
+typedef struct cfs_waitlink {
+       struct cfs_waitq   *wl_waitq;
+       struct ksleep_link  wl_ksleep_link;
+} cfs_waitlink_t;
+
+typedef int cfs_task_state_t;
+
+#define CFS_TASK_INTERRUPTIBLE THREAD_ABORTSAFE
+#define CFS_TASK_UNINT         THREAD_UNINT
+
+void cfs_waitq_init(struct cfs_waitq *waitq);
+void cfs_waitlink_init(struct cfs_waitlink *link);
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+                            struct cfs_waitlink *link);
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+int  cfs_waitq_active(struct cfs_waitq *waitq);
+
+void cfs_waitq_signal(struct cfs_waitq *waitq);
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+
+void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link,
+                                  cfs_task_state_t state, 
+                                  cfs_duration_t timeout);
+
+/*
+ * Thread schedule APIs.
+ */
+#define MAX_SCHEDULE_TIMEOUT    ((long)(~0UL>>12))
+extern void thread_set_timer_deadline(__u64 deadline);
+extern void thread_cancel_timer(void);
+
+static inline int cfs_schedule_timeout(int state, int64_t timeout)
+{
+       int          result;
+       
+#ifdef __DARWIN8__
+       result = assert_wait((event_t)current_thread(), state);
+#else
+       result = assert_wait((event_t)current_uthread(), state);
+#endif
+       if (timeout > 0) {
+               __u64 expire;
+               nanoseconds_to_absolutetime(timeout, &expire);
+               clock_absolutetime_interval_to_deadline(expire, &expire);
+               thread_set_timer_deadline(expire);
+       }
+       if (result == THREAD_WAITING)
+               result = thread_block(THREAD_CONTINUE_NULL);
+       if (timeout > 0)
+               thread_cancel_timer();
+       if (result == THREAD_TIMED_OUT)
+               result = 0;
+       else
+               result = 1;
+       return result;
+}
+
+#define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_TICK)
+#define cfs_pause(tick)        cfs_schedule_timeout(CFS_TASK_UNINT, tick)
+
+#define __wait_event(wq, condition)                            \
+do {                                                           \
+       struct cfs_waitlink __wait;                             \
+                                                               \
+       cfs_waitlink_init(&__wait);                             \
+       for (;;) {                                              \
+               cfs_waitq_add(&wq, &__wait);                    \
+               if (condition)                                  \
+                       break;                                  \
+               cfs_waitq_wait(&__wait, CFS_TASK_UNINT);        \
+               cfs_waitq_del(&wq, &__wait);                    \
+       }                                                       \
+       cfs_waitq_del(&wq, &__wait);                            \
+} while (0)
+
+#define wait_event(wq, condition)                              \
+do {                                                           \
+       if (condition)                                          \
+               break;                                          \
+       __wait_event(wq, condition);                            \
+} while (0)
+
+#define __wait_event_interruptible(wq, condition, ex, ret)     \
+do {                                                           \
+       struct cfs_waitlink __wait;                             \
+                                                               \
+       cfs_waitlink_init(&__wait);                             \
+       for (;;) {                                              \
+               if (ex == 0)                                    \
+                       cfs_waitq_add(&wq, &__wait);            \
+               else                                            \
+                       cfs_waitq_add_exclusive(&wq, &__wait);  \
+               if (condition)                                  \
+                       break;                                  \
+               if (!cfs_signal_pending()) {                    \
+                       cfs_waitq_wait(&__wait,                 \
+                                      CFS_TASK_INTERRUPTIBLE); \
+                       cfs_waitq_del(&wq, &__wait);            \
+                       continue;                               \
+               }                                               \
+               ret = -ERESTARTSYS;                             \
+               break;                                          \
+       }                                                       \
+       cfs_waitq_del(&wq, &__wait);                            \
+} while (0)
+
+#define wait_event_interruptible(wq, condition)                        \
+({                                                             \
+       int __ret = 0;                                          \
+       if (!condition)                                         \
+               __wait_event_interruptible(wq, condition,       \
+                                          0, __ret);           \
+       __ret;                                                  \
+})
+
+#define wait_event_interruptible_exclusive(wq, condition)      \
+({                                                             \
+       int __ret = 0;                                          \
+       if (!condition)                                         \
+               __wait_event_interruptible(wq, condition,       \
+                                          1, __ret);           \
+       __ret;                                                  \
+})
+
+#ifndef __DARWIN8__
+extern void    wakeup_one __P((void * chan));
+#endif
+/* only used in tests */
+#define wake_up_process(p)                                     \
+       do {                                                    \
+               wakeup_one((caddr_t)p);                         \
+       } while (0)
+       
+/* used in couple of places */
+static inline void sleep_on(cfs_waitq_t *waitq)
+{
+       cfs_waitlink_t link;
+       
+       cfs_waitlink_init(&link);
+       cfs_waitq_add(waitq, &link);
+       cfs_waitq_wait(&link, CFS_TASK_UNINT);
+       cfs_waitq_del(waitq, &link);
+}
+
+/*
+ * Signal
+ */
+typedef sigset_t       cfs_sigset_t;
+
+#define SIGNAL_MASK_ASSERT()
+/*
+ * Timer
+ */
+typedef struct cfs_timer {
+       struct ktimer t;
+} cfs_timer_t;
+
+#define cfs_init_timer(t)      do {} while(0)
+void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg);
+void cfs_timer_done(struct cfs_timer *t);
+void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline);
+void cfs_timer_disarm(struct cfs_timer *t);
+int  cfs_timer_is_armed(struct cfs_timer *t);
+
+cfs_time_t cfs_timer_deadline(struct cfs_timer *t);
+
+/*
+ * Ioctl
+ * We don't need to copy out everything in osx
+ */
+#define cfs_ioctl_data_out(a, d, l)                    \
+       ({                                              \
+               int __size;                             \
+               int __rc = 0;                           \
+               assert((l) >= sizeof(*d));              \
+               __size = (l) - sizeof(*d);              \
+               if (__size > 0)                         \
+                       __rc = copy_to_user((void *)a + __size, \
+                            (void *)d + __size,        \
+                            __size);                   \
+               __rc;                                   \
+       })
+
+/*
+ * CPU
+ */
+/* Run in PowerG5 who is PPC64 */
+#define SMP_CACHE_BYTES                         128
+#define __cacheline_aligned                     __attribute__((__aligned__(SMP_CACHE_BYTES)))
+#define NR_CPUS                                        2
+
+/* 
+ * XXX Liang: patch xnu and export current_processor()?
+ *
+ * #define smp_processor_id()                  current_processor()
+ */
+#define smp_processor_id()                     0
+/* XXX smp_call_function is not supported in xnu */
+#define smp_call_function(f, a, n, w)          do {} while(0)
+int cfs_online_cpus(void);
+#define smp_num_cpus                           cfs_online_cpus()
+
+/*
+ * Misc
+ */
+extern int is_suser(void);
+
+#ifndef likely
+#define likely(exp) (exp)
+#endif
+#ifndef unlikely
+#define unlikely(exp) (exp)
+#endif
+
+#define lock_kernel()                          do {} while(0)
+#define unlock_kernel()                                do {} while(0)
+
+#define CAP_SYS_BOOT                           0
+#define CAP_SYS_ADMIN                           1
+#define capable(a)                             ((a) == CAP_SYS_BOOT ? is_suser(): is_suser1())
+
+#define USERMODEHELPER(path, argv, envp)       (0)
+
+#define cfs_module(name, version, init, fini)                          \
+extern kern_return_t _start(kmod_info_t *ki, void *data);              \
+extern kern_return_t _stop(kmod_info_t *ki, void *data);               \
+__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \
+__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \
+                                                                       \
+kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1,               \
+                               { "com.clusterfs.lustre." #name }, { version }, \
+                               -1, 0, 0, 0, 0, name##_start, name##_stop }; \
+                                                                       \
+__private_extern__ kmod_start_func_t *_realmain = name##_start;                \
+__private_extern__ kmod_stop_func_t *_antimain = name##_stop;          \
+__private_extern__ int _kext_apple_cc = __APPLE_CC__ ;                 \
+                                                                       \
+kern_return_t name##_start(kmod_info_t *ki, void *d)                   \
+{                                                                      \
+       return init();                                                  \
+}                                                                      \
+                                                                       \
+kern_return_t name##_stop(kmod_info_t *ki, void *d)                    \
+{                                                                      \
+        fini();                                                                \
+        return KERN_SUCCESS;                                           \
+}                                                                      \
+                                                                       \
+/*                                                                     \
+ * to allow semicolon after cfs_module(...)                            \
+ */                                                                    \
+struct __dummy_ ## name ## _struct {}
+
+#define inter_module_get(n)                    cfs_symbol_get(n)
+#define inter_module_put(n)                    cfs_symbol_put(n)
+
+static inline int request_module(char *name)
+{
+       return (-EINVAL);
+}
+
+#ifndef __exit
+#define __exit
+#endif
+#ifndef __init
+#define __init
+#endif
+
+#define EXPORT_SYMBOL(s)
+#define MODULE_AUTHOR(s)
+#define MODULE_DESCRIPTION(s)
+#define MODULE_LICENSE(s)
+#define MODULE_PARM(a, b)
+#define MODULE_PARM_DESC(a, b)
+
+#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
+#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0)
+
+#define NR_IRQS                                512
+#define in_interrupt()                 ml_at_interrupt_context()
+
+#define KERN_EMERG      "<0>"   /* system is unusable                   */
+#define KERN_ALERT      "<1>"   /* action must be taken immediately     */
+#define KERN_CRIT       "<2>"   /* critical conditions                  */
+#define KERN_ERR        "<3>"   /* error conditions                     */
+#define KERN_WARNING    "<4>"   /* warning conditions                   */
+#define KERN_NOTICE     "<5>"   /* normal but significant condition     */
+#define KERN_INFO       "<6>"   /* informational                        */
+#define KERN_DEBUG      "<7>"   /* debug-level messages                 */
+
+static inline long PTR_ERR(const void *ptr)
+{
+       return (long) ptr;
+}
+
+#define ERR_PTR(err)   ((void *)err)
+#define IS_ERR(p)      ((unsigned long)(p) + 1000 < 1000)
+
+#else  /* !__KERNEL__ */
+
+typedef struct cfs_proc_dir_entry {
+       void            *data;
+} cfs_proc_dir_entry_t;
+
+#include <libcfs/user-prim.h>
+#define __WORDSIZE     32
+
+#endif /* END __KERNEL__ */
+/*
+ * Error number
+ */
+#ifndef EPROTO
+#define EPROTO          EPROTOTYPE
+#endif
+#ifndef EBADR
+#define EBADR          EBADRPC
+#endif
+#ifndef ERESTARTSYS
+#define ERESTARTSYS    512
+#endif
+#ifndef EDEADLOCK
+#define EDEADLOCK      EDEADLK
+#endif
+#ifndef ECOMM
+#define ECOMM          EINVAL
+#endif
+#ifndef ENODATA
+#define ENODATA                EINVAL
+#endif
+#ifndef ENOTSUPP
+#define ENOTSUPP       EINVAL
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+# define __BIG_ENDIAN
+#else
+# define __LITTLE_ENDIAN
+#endif
+
+#endif /* __LIBCFS_DARWIN_CFS_PRIM_H__ */
diff --git a/libcfs/include/libcfs/darwin/darwin-sync.h b/libcfs/include/libcfs/darwin/darwin-sync.h
new file mode 100644 (file)
index 0000000..7817b38
--- /dev/null
@@ -0,0 +1,332 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Implementation of standard libcfs synchronization primitives for XNU
+ * kernel.
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+/*
+ * xnu_sync.h
+ *
+ * Created by nikita on Sun Jul 18 2004.
+ *
+ * Prototypes of XNU synchronization primitives.
+ */
+
+#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__
+#define __LIBCFS_DARWIN_XNU_SYNC_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#define XNU_SYNC_DEBUG (1)
+
+#if XNU_SYNC_DEBUG
+#define ON_SYNC_DEBUG(e) e
+#else
+#define ON_SYNC_DEBUG(e)
+#endif
+
+enum {
+        /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */
+       KMUT_MAGIC  = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */
+       KSEM_MAGIC  = 0x1abe11ed,
+       KCOND_MAGIC = 0xb01dface,
+       KRW_MAGIC   = 0xdabb1edd,
+       KSPIN_MAGIC = 0xca11ab1e,
+        KRW_SPIN_MAGIC    = 0xbabeface,
+       KSLEEP_CHAN_MAGIC = 0x0debac1e,
+       KSLEEP_LINK_MAGIC = 0xacc01ade,
+       KTIMER_MAGIC      = 0xbefadd1e
+};
+
+/* ------------------------- spin lock ------------------------- */
+
+/*
+ * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h
+ */
+#define SMP (1)
+
+#include <libcfs/list.h>
+
+#ifdef __DARWIN8__
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <kern/locks.h>
+
+/*
+ * hw_lock is not available in Darwin8 (hw_lock_* are not exported at all), 
+ * so use lck_spin_t. we can hack out lck_spin_t easily, it's the only 
+ * hacking in Darwin8.x. We did so because it'll take a lot of time to 
+ * add lock_done for all locks, maybe it should be done in the future.
+ * If lock_done for all locks were added, we can:
+ *
+ * typedef lck_spin_t      *xnu_spin_t;
+ */
+#if defined (__ppc__)
+typedef struct {
+        unsigned int    opaque[3];
+} xnu_spin_t;
+#elif defined (__i386__)
+typedef struct {
+        unsigned int    opaque[10];
+} xnu_spin_t;
+#endif
+
+/* 
+ * wait_queue is not available in Darwin8 (wait_queue_* are not exported), 
+ * use assert_wait/wakeup/wake_one (wait_queue in kernel hash).
+ */
+typedef void * xnu_wait_queue_t;
+
+/* DARWIN8 */
+#else
+
+#include <mach/mach_types.h>
+#include <sys/types.h>
+#include <kern/simple_lock.h>
+
+typedef hw_lock_data_t          xnu_spin_t;
+typedef struct wait_queue       xnu_wait_queue_t;
+
+/* DARWIN8 */
+#endif
+
+struct kspin {
+#if SMP
+       xnu_spin_t      lock;
+#endif
+#if XNU_SYNC_DEBUG
+       unsigned        magic;
+       thread_t        owner;
+#endif
+};
+
+void kspin_init(struct kspin *spin);
+void kspin_done(struct kspin *spin);
+void kspin_lock(struct kspin *spin);
+void kspin_unlock(struct kspin *spin);
+int  kspin_trylock(struct kspin *spin);
+
+#if XNU_SYNC_DEBUG
+/*
+ * two functions below are for use in assertions
+ */
+/* true, iff spin-lock is locked by the current thread */
+int kspin_islocked(struct kspin *spin);
+/* true, iff spin-lock is not locked by the current thread */
+int kspin_isnotlocked(struct kspin *spin);
+#else
+#define kspin_islocked(s) (1)
+#define kspin_isnotlocked(s) (1)
+#endif
+
+/* ------------------------- rw spinlock ----------------------- */
+struct krw_spin {
+        struct kspin      guard;
+        int               count;
+#if XNU_SYNC_DEBUG
+        unsigned          magic;
+#endif
+};
+
+void krw_spin_init(struct krw_spin *sem);
+void krw_spin_done(struct krw_spin *sem);
+void krw_spin_down_r(struct krw_spin *sem);
+void krw_spin_down_w(struct krw_spin *sem);
+void krw_spin_up_r(struct krw_spin *sem);
+void krw_spin_up_w(struct krw_spin *sem);
+
+/* ------------------------- semaphore ------------------------- */
+
+struct ksem {
+        struct kspin      guard;
+        xnu_wait_queue_t  q;
+        int               value;
+#if XNU_SYNC_DEBUG
+        unsigned          magic;
+#endif
+};
+
+void ksem_init(struct ksem *sem, int value);
+void ksem_done(struct ksem *sem);
+int  ksem_up  (struct ksem *sem, int value);
+void ksem_down(struct ksem *sem, int value);
+int  ksem_trydown(struct ksem *sem, int value);
+
+/* ------------------------- mutex ------------------------- */
+
+struct kmut {
+       struct ksem s;
+#if XNU_SYNC_DEBUG
+        unsigned    magic;
+        thread_t    owner;
+#endif
+};
+
+void kmut_init(struct kmut *mut);
+void kmut_done(struct kmut *mut);
+
+void kmut_lock   (struct kmut *mut);
+void kmut_unlock (struct kmut *mut);
+int  kmut_trylock(struct kmut *mut);
+
+#if XNU_SYNC_DEBUG
+/*
+ * two functions below are for use in assertions
+ */
+/* true, iff mutex is locked by the current thread */
+int kmut_islocked(struct kmut *mut);
+/* true, iff mutex is not locked by the current thread */
+int kmut_isnotlocked(struct kmut *mut);
+#else
+#define kmut_islocked(m) (1)
+#define kmut_isnotlocked(m) (1)
+#endif
+
+/* ------------------------- condition variable ------------------------- */
+
+struct kcond_link {
+       struct kcond_link *next;
+        struct ksem        sem;
+};
+
+struct kcond {
+        struct kspin       guard;
+        struct kcond_link *waiters;
+#if XNU_SYNC_DEBUG
+        unsigned           magic;
+#endif
+};
+
+void kcond_init(struct kcond *cond);
+void kcond_done(struct kcond *cond);
+void kcond_wait(struct kcond *cond, struct kspin *lock);
+void kcond_signal(struct kcond *cond);
+void kcond_broadcast(struct kcond *cond);
+
+void kcond_wait_guard(struct kcond *cond);
+void kcond_signal_guard(struct kcond *cond);
+void kcond_broadcast_guard(struct kcond *cond);
+
+/* ------------------------- read-write semaphore ------------------------- */
+
+struct krw_sem {
+       int          count;
+       struct kcond cond;
+#if XNU_SYNC_DEBUG
+       unsigned     magic;
+#endif
+};
+
+void krw_sem_init(struct krw_sem *sem);
+void krw_sem_done(struct krw_sem *sem);
+void krw_sem_down_r(struct krw_sem *sem);
+int krw_sem_down_r_try(struct krw_sem *sem);
+void krw_sem_down_w(struct krw_sem *sem);
+int krw_sem_down_w_try(struct krw_sem *sem);
+void krw_sem_up_r(struct krw_sem *sem);
+void krw_sem_up_w(struct krw_sem *sem);
+
+/* ------------------------- sleep-channel ------------------------- */
+
+struct ksleep_chan {
+       struct kspin     guard;
+       struct list_head waiters;
+#if XNU_SYNC_DEBUG
+       unsigned     magic;
+#endif
+};
+
+#define KSLEEP_CHAN_INITIALIZER         {{{0}}}
+
+struct ksleep_link {
+       int                 flags;
+       event_t             event;
+       int                 hits;
+       struct ksleep_chan *forward;
+       struct list_head    linkage;
+#if XNU_SYNC_DEBUG
+       unsigned     magic;
+#endif
+};
+
+enum {
+       KSLEEP_EXCLUSIVE = 1
+};
+
+void ksleep_chan_init(struct ksleep_chan *chan);
+void ksleep_chan_done(struct ksleep_chan *chan);
+
+void ksleep_link_init(struct ksleep_link *link);
+void ksleep_link_done(struct ksleep_link *link);
+
+void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link);
+void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link);
+
+void ksleep_wait(struct ksleep_chan *chan, int state);
+int64_t  ksleep_timedwait(struct ksleep_chan *chan, int state, __u64 timeout);
+
+void ksleep_wake(struct ksleep_chan *chan);
+void ksleep_wake_all(struct ksleep_chan *chan);
+void ksleep_wake_nr(struct ksleep_chan *chan, int nr);
+
+#define KSLEEP_LINK_DECLARE(name)               \
+{                                               \
+       .flags   = 0,                           \
+       .event   = 0,                           \
+       .hits    = 0,                           \
+       .linkage = CFS_LIST_HEAD(name.linkage), \
+       .magic   = KSLEEP_LINK_MAGIC            \
+}
+
+/* ------------------------- timer ------------------------- */
+
+struct ktimer {
+       struct kspin   guard;
+       void         (*func)(void *);
+       void          *arg;
+       u_int64_t      deadline; /* timer deadline in absolute nanoseconds */
+       int            armed;
+#if XNU_SYNC_DEBUG
+       unsigned     magic;
+#endif
+};
+
+void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg);
+void ktimer_done(struct ktimer *t);
+void ktimer_arm(struct ktimer *t, u_int64_t deadline);
+void ktimer_disarm(struct ktimer *t);
+int  ktimer_is_armed(struct ktimer *t);
+
+u_int64_t ktimer_deadline(struct ktimer *t);
+
+/* __XNU_SYNC_H__ */
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/darwin/darwin-tcpip.h b/libcfs/include/libcfs/darwin/darwin-tcpip.h
new file mode 100644 (file)
index 0000000..1a73891
--- /dev/null
@@ -0,0 +1,90 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines. 
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_TCPIP_H__
+#define __LIBCFS_DARWIN_TCPIP_H__
+
+#ifdef __KERNEL__
+#include <sys/socket.h>
+
+#ifdef __DARWIN8__
+
+struct socket;
+
+typedef void    (*so_upcall)(socket_t sock, void* arg, int waitf);
+
+#define CFS_SOCK_UPCALL         0x1
+#define CFS_SOCK_DOWN           0x2
+
+#define CFS_SOCK_MAGIC          0xbabeface
+
+typedef struct cfs_socket {
+        socket_t        s_so;
+        int             s_magic;
+        int             s_flags;
+        so_upcall       s_upcall;
+        void           *s_upcallarg;
+} cfs_socket_t;
+
+
+/* cfs_socket_t to bsd socket */
+#define C2B_SOCK(s)             ((s)->s_so)     
+
+static inline int get_sock_intopt(socket_t so, int opt)
+{
+        int     val, len;
+        int     rc;
+
+        /*
+         * sock_getsockopt will take a lock(mutex) for socket,
+         * so it can be blocked. So be careful while using 
+         * them.
+         */
+        len = sizeof(val);
+        rc = sock_getsockopt(so, SOL_SOCKET, opt, &val, &len);
+        assert(rc == 0);
+        return val;
+}
+
+#define SOCK_ERROR(s)           get_sock_intopt(C2B_SOCK(s), SO_ERROR)        
+/* #define SOCK_WMEM_QUEUED(s)     (0) */
+#define SOCK_WMEM_QUEUED(s)     get_sock_intopt(C2B_SOCK(s), SO_NWRITE)
+/* XXX Liang: no reliable way to get it in Darwin8.x */
+#define SOCK_TEST_NOSPACE(s)    (0)
+
+void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg);
+void libcfs_sock_reset_cb(cfs_socket_t *sock);
+
+#else /* !__DARWIN8__ */
+
+#define SOCK_WMEM_QUEUED(so)    ((so)->so_snd.sb_cc)
+#define SOCK_ERROR(so)          ((so)->so_error)
+
+#define SOCK_TEST_NOSPACE(so)   (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat)
+
+#endif /* !__DARWIN8__ */
+
+#endif /* __KERNEL END */
+
+#endif  /* __XNU_CFS_TYPES_H__ */
diff --git a/libcfs/include/libcfs/darwin/darwin-time.h b/libcfs/include/libcfs/darwin/darwin-time.h
new file mode 100644 (file)
index 0000000..35862a6
--- /dev/null
@@ -0,0 +1,249 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for XNU kernel
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__
+#define __LIBCFS_DARWIN_DARWIN_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ *  cfs_time_t        represents point in time. This is internal kernel
+ *                    time rather than "wall clock". This time bears no
+ *                    relation to gettimeofday().
+ *
+ *  cfs_duration_t    represents time interval with resolution of internal
+ *                    platform clock
+ *
+ *  cfs_fs_time_t     represents instance in world-visible time. This is
+ *                    used in file-system time-stamps
+ *
+ *  cfs_time_t     cfs_time_current(void);
+ *  cfs_time_t     cfs_time_add    (cfs_time_t, cfs_duration_t);
+ *  cfs_duration_t cfs_time_sub    (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_before (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ *  cfs_duration_t cfs_duration_build(int64_t);
+ *
+ *  time_t         cfs_duration_sec (cfs_duration_t);
+ *  void           cfs_duration_usec(cfs_duration_t, struct timeval *);
+ *  void           cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ *  void           cfs_fs_time_current(cfs_fs_time_t *);
+ *  time_t         cfs_fs_time_sec    (cfs_fs_time_t *);
+ *  void           cfs_fs_time_usec   (cfs_fs_time_t *, struct timeval *);
+ *  void           cfs_fs_time_nsec   (cfs_fs_time_t *, struct timespec *);
+ *  int            cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ *  int            cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ *  CFS_TIME_FORMAT
+ *  CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION 1000000
+
+#ifdef __KERNEL__
+#include <sys/types.h>
+#include <sys/systm.h>
+
+#include <sys/kernel.h>
+
+#include <mach/mach_types.h>
+#include <mach/time_value.h>
+#include <kern/clock.h>
+#include <sys/param.h>
+
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-utils.h>
+#include <libcfs/darwin/darwin-lock.h>
+
+/*
+ * There are three way to measure time in OS X:
+ * 1. nanoseconds
+ * 2. absolute time (abstime unit equal to the length of one bus cycle),
+ *    schedule of thread/timer are counted by absolute time, but abstime
+ *    in different mac can be different also, so we wouldn't use it.
+ * 3. clock interval (1sec = 100hz). But clock interval only taken by KPI
+ *    like tsleep().
+ *
+ * We use nanoseconds (uptime, not calendar time)
+ *
+ * clock_get_uptime()   :get absolute time since bootup.
+ * nanouptime()         :get nanoseconds since bootup
+ * microuptime()        :get microseonds since bootup
+ * nanotime()           :get nanoseconds since epoch
+ * microtime()          :get microseconds since epoch
+ */
+typedef u_int64_t cfs_time_t; /* nanoseconds */
+typedef int64_t cfs_duration_t;
+
+#define CFS_TIME_T             "%llu"
+#define CFS_DURATION_T         "%lld"
+
+typedef struct timeval cfs_fs_time_t;
+
+static inline cfs_time_t cfs_time_current(void)
+{
+        struct timespec instant;
+
+        nanouptime(&instant);
+        return ((u_int64_t)instant.tv_sec) * NSEC_PER_SEC + instant.tv_nsec;
+}
+
+static inline time_t cfs_time_current_sec(void)
+{
+        struct timespec instant;
+
+        nanouptime(&instant);
+       return instant.tv_sec;
+}
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+        return t + d;
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+        return t1 - t2;
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+        return (int64_t)t1 - (int64_t)t2 < 0;
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+        return (int64_t)t1 - (int64_t)t2 <= 0;
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+        microtime((struct timeval *)t);
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+        return t->tv_sec;
+}
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+        *v = *t;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+        s->tv_sec  = t->tv_sec;
+        s->tv_nsec = t->tv_usec * NSEC_PER_USEC;
+}
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+       return (NSEC_PER_SEC * (int64_t)seconds);
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+        return ((int64_t)t->tv_sec)*NSEC_PER_SEC + t->tv_usec*NSEC_PER_USEC;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+        return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0;
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+        return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0;
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+        return d / NSEC_PER_SEC;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+        s->tv_sec = d / NSEC_PER_SEC;
+        s->tv_usec = (d - ((int64_t)s->tv_sec) * NSEC_PER_SEC) / NSEC_PER_USEC;
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+        s->tv_sec = d / NSEC_PER_SEC;
+        s->tv_nsec = d - ((int64_t)s->tv_sec) * NSEC_PER_SEC;
+}
+
+#define cfs_time_current_64 cfs_time_current
+#define cfs_time_add_64     cfs_time_add
+#define cfs_time_shift_64   cfs_time_shift
+#define cfs_time_before_64  cfs_time_before
+#define cfs_time_beforeq_64 cfs_time_beforeq
+
+/* 
+ * One jiffy (in nanoseconds)
+ *
+ * osfmk/kern/sched_prim.c
+ * #define DEFAULT_PREEMPTION_RATE      100
+ */
+#define CFS_TICK               (NSEC_PER_SEC / (u_int64_t)100)
+
+#define LTIME_S(t)             (t)
+
+/* __KERNEL__ */
+#else
+
+/*
+ * User level
+ */
+#include <libcfs/user-time.h>
+
+/* __KERNEL__ */
+#endif
+
+/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/darwin/darwin-types.h b/libcfs/include/libcfs/darwin/darwin-types.h
new file mode 100644 (file)
index 0000000..3c24724
--- /dev/null
@@ -0,0 +1,95 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines. 
+ *
+ */
+
+#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__
+#define __LIBCFS_DARWIN_XNU_TYPES_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <mach/mach_types.h>
+#include <sys/types.h>
+
+#ifndef _BLKID_TYPES_H
+#define _BLKID_TYPES_H
+#endif
+
+typedef u_int8_t        __u8;
+typedef u_int16_t       __u16;
+typedef u_int32_t       __u32;
+typedef u_int64_t       __u64;
+typedef int8_t          __s8;
+typedef int16_t         __s16;
+typedef int32_t         __s32;
+typedef int64_t         __s64;
+
+#ifdef __KERNEL__
+
+#include <kern/kern_types.h>
+
+
+typedef struct { int e; }              event_chan_t;
+typedef dev_t                          kdev_t;
+
+/*
+ * Atmoic define
+ */
+#include <libkern/OSAtomic.h>
+
+typedef struct { volatile uint32_t counter; }  atomic_t;
+
+#define ATOMIC_INIT(i)                 { (i) }
+#define atomic_read(a)                 ((a)->counter)
+#define atomic_set(a, v)               (((a)->counter) = (v))
+#ifdef __DARWIN8__
+/* OS*Atomic return the value before the operation */
+#define atomic_add(v, a)               OSAddAtomic(v, (SInt32 *)&((a)->counter))
+#define atomic_sub(v, a)               OSAddAtomic(-(v), (SInt32 *)&((a)->counter))
+#define atomic_inc(a)                  OSIncrementAtomic((SInt32 *)&((a)->counter))
+#define atomic_dec(a)                  OSDecrementAtomic((SInt32 *)&((a)->counter))
+#else /* !__DARWIN8__ */
+#define atomic_add(v, a)               hw_atomic_add((__u32 *)&((a)->counter), v)
+#define atomic_sub(v, a)               hw_atomic_sub((__u32 *)&((a)->counter), v)
+#define atomic_inc(a)                  atomic_add(1, a)
+#define atomic_dec(a)                  atomic_sub(1, a)
+#endif /* !__DARWIN8__ */
+#define atomic_sub_and_test(v, a)       (atomic_sub(v, a) == (v))
+#define atomic_dec_and_test(a)          (atomic_dec(a) == 1)
+#define atomic_inc_return(a)            (atomic_inc(a) + 1)
+#define atomic_dec_return(a)            (atomic_dec(a) - 1)
+
+#include <libsa/mach/mach.h>
+typedef off_t                          loff_t;
+
+#else  /* !__KERNEL__ */
+
+#include <stdint.h>
+
+typedef off_t                          loff_t;
+
+#endif /* __KERNEL END */
+typedef unsigned short                  umode_t;
+
+#endif  /* __XNU_CFS_TYPES_H__ */
diff --git a/libcfs/include/libcfs/darwin/darwin-utils.h b/libcfs/include/libcfs/darwin/darwin-utils.h
new file mode 100644 (file)
index 0000000..0f808a2
--- /dev/null
@@ -0,0 +1,67 @@
+#ifndef __LIBCFS_DARWIN_UTILS_H__
+#define __LIBCFS_DARWIN_UTILS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <sys/random.h> 
+
+#ifdef __KERNEL__
+inline int isspace(char c);
+char *strpbrk(const char *cs, const char *ct);
+char * strsep(char **s, const char *ct);
+size_t strnlen(const char * s, size_t count);
+char * strstr(const char *in, const char *str);
+char * strrchr(const char *p, int ch);
+char * ul2dstr(unsigned long address, char *buf, int len);
+
+#define simple_strtol(a1, a2, a3)               strtol(a1, a2, a3)
+#define simple_strtoul(a1, a2, a3)              strtoul(a1, a2, a3)
+#define simple_strtoll(a1, a2, a3)              strtoq(a1, a2, a3)
+#define simple_strtoull(a1, a2, a3)             strtouq(a1, a2, a3)
+
+#define test_bit(i, a)                          isset(a, i)
+#define set_bit(i, a)                           setbit(a, i)
+#define clear_bit(i, a)                         clrbit(a, i)
+
+#define get_random_bytes(buf, len)              read_random(buf, len)
+
+#endif  /* __KERNEL__ */
+
+#ifndef min_t
+#define min_t(type,x,y) \
+       ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; })
+#endif
+#ifndef max_t
+#define max_t(type,x,y) \
+       ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; })
+#endif
+
+#define do_div(n,base)                          \
+       ({                                      \
+        __u64 __n = (n);                       \
+        __u32 __base = (base);                 \
+        __u32 __mod;                           \
+                                               \
+        __mod = __n % __base;                  \
+        n = __n / __base;                      \
+        __mod;                                 \
+        })
+
+#define NIPQUAD(addr)                  \
+       ((unsigned char *)&addr)[0],    \
+       ((unsigned char *)&addr)[1],    \
+       ((unsigned char *)&addr)[2],    \
+       ((unsigned char *)&addr)[3]
+
+#define HIPQUAD NIPQUAD
+
+#ifndef LIST_CIRCLE
+#define LIST_CIRCLE(elm, field)                                 \
+       do {                                                    \
+               (elm)->field.le_prev = &(elm)->field.le_next;   \
+       } while (0)
+#endif
+
+#endif /* __XNU_UTILS_H__ */
diff --git a/libcfs/include/libcfs/darwin/kp30.h b/libcfs/include/libcfs/darwin/kp30.h
new file mode 100644 (file)
index 0000000..f9e94b1
--- /dev/null
@@ -0,0 +1,101 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_DARWIN_KP30__
+#define __LIBCFS_DARWIN_KP30__
+
+#ifndef __LIBCFS_KP30_H__
+#error Do not #include this file directly. #include <libcfs/kp30.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+#include <stdarg.h>
+
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/darwin/darwin-prim.h>
+#include <lnet/lnet.h>
+
+#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1)
+
+#ifdef CONFIG_SMP
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
+#else
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+#endif
+#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */
+
+#define LIBCFS_PANIC(msg) panic(msg)
+#error libcfs_register_panic_notifier() missing
+#error libcfs_unregister_panic_notifier() missing
+
+/* --------------------------------------------------------------------- */
+
+#define PORTAL_SYMBOL_REGISTER(x)               cfs_symbol_register(#x, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x)             cfs_symbol_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x)                    ((typeof(&x))cfs_symbol_get(#x))
+#define PORTAL_SYMBOL_PUT(x)                    cfs_symbol_put(#x)
+
+#define PORTAL_MODULE_USE                       do{int i = 0; i++;}while(0)
+#define PORTAL_MODULE_UNUSE                     do{int i = 0; i--;}while(0)
+
+#define num_online_cpus()                       cfs_online_cpus()
+
+/******************************************************************************/
+/* XXX Liang: There is no module parameter supporting in OSX */
+#define CFS_MODULE_PARM(name, t, type, perm, desc)
+
+#define CFS_SYSFS_MODULE_PARM    0 /* no sysfs access to module parameters */
+/******************************************************************************/
+
+#else  /* !__KERNEL__ */
+# include <stdio.h>
+# include <stdlib.h>
+# include <stdint.h>
+# include <unistd.h>
+# include <time.h>
+# include <machine/limits.h>
+# include <sys/types.h>
+#endif
+
+#define BITS_PER_LONG   LONG_BIT
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT  0
+
+typedef struct {
+        long long   lwte_when;
+        char       *lwte_where;
+        void       *lwte_task;
+        long        lwte_p1;
+        long        lwte_p2;
+        long        lwte_p3;
+        long        lwte_p4;
+} lwt_event_t;
+
+# define LWT_EVENT(p1,p2,p3,p4)     /* no lwt implementation yet */
+
+/* -------------------------------------------------------------------------- */
+
+#define IOCTL_LIBCFS_TYPE struct libcfs_ioctl_data
+
+#define LPU64 "%llu"
+#define LPD64 "%lld"
+#define LPX64 "%#llx"
+#define LPSZ  "%lu"
+#define LPSSZ "%ld"
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a)
+
+#endif
diff --git a/libcfs/include/libcfs/darwin/libcfs.h b/libcfs/include/libcfs/darwin/libcfs.h
new file mode 100644 (file)
index 0000000..eb4d8f3
--- /dev/null
@@ -0,0 +1,193 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_DARWIN_LIBCFS_H__
+#define __LIBCFS_DARWIN_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#include <mach/mach_types.h>
+#include <sys/errno.h>
+#include <string.h>
+#include <libcfs/darwin/darwin-types.h>
+#include <libcfs/darwin/darwin-time.h>
+#include <libcfs/darwin/darwin-prim.h>
+#include <libcfs/darwin/darwin-mem.h>
+#include <libcfs/darwin/darwin-lock.h>
+#include <libcfs/darwin/darwin-fs.h>
+#include <libcfs/darwin/darwin-tcpip.h>
+
+#ifdef __KERNEL__
+# include <sys/types.h>
+# include <sys/time.h>
+# define do_gettimeofday(tv) microuptime(tv)
+#else
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
+#endif
+
+#define __cpu_to_le64(x)                        OSSwapHostToLittleInt64(x)
+#define __cpu_to_le32(x)                        OSSwapHostToLittleInt32(x)
+#define __cpu_to_le16(x)                        OSSwapHostToLittleInt16(x)
+
+#define __le16_to_cpu(x)                        OSSwapLittleToHostInt16(x)
+#define __le32_to_cpu(x)                        OSSwapLittleToHostInt32(x)
+#define __le64_to_cpu(x)                        OSSwapLittleToHostInt64(x)
+
+#define cpu_to_le64(x)                          __cpu_to_le64(x)
+#define cpu_to_le32(x)                          __cpu_to_le32(x)
+#define cpu_to_le16(x)                          __cpu_to_le16(x)
+
+#define le64_to_cpu(x)                          __le64_to_cpu(x)
+#define le32_to_cpu(x)                          __le32_to_cpu(x)
+#define le16_to_cpu(x)                          __le16_to_cpu(x)
+
+#define __swab16(x)                             OSSwapInt16(x)
+#define __swab32(x)                             OSSwapInt32(x)
+#define __swab64(x)                             OSSwapInt64(x)
+#define __swab16s(x)                            do { *(x) = __swab16(*(x)); } while (0)
+#define __swab32s(x)                            do { *(x) = __swab32(*(x)); } while (0)
+#define __swab64s(x)                            do { *(x) = __swab64(*(x)); } while (0)
+
+struct ptldebug_header {
+        __u32 ph_len;
+        __u32 ph_flags;
+        __u32 ph_subsys;
+        __u32 ph_mask;
+        __u32 ph_cpu_id;
+        __u32 ph_sec;
+        __u64 ph_usec;
+        __u32 ph_stack;
+        __u32 ph_pid;
+        __u32 ph_extern_pid;
+        __u32 ph_line_num;
+} __attribute__((packed));
+
+
+#ifdef __KERNEL__
+# include <sys/systm.h>
+# include <pexpert/pexpert.h>
+/* Fix me */
+# define THREAD_SIZE 8192
+#else
+# define THREAD_SIZE 8192
+#endif
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#define CHECK_STACK() do { } while(0)
+#define CDEBUG_STACK() (0L)
+
+/* Darwin has defined RETURN, so we have to undef it in lustre */
+#ifdef RETURN
+#undef RETURN
+#endif
+
+/*
+ * When this is enabled debugging messages are indented according to the
+ * current "nesting level". Nesting level in increased when ENTRY macro
+ * is executed, and decreased on EXIT and RETURN.
+ */
+#ifdef __KERNEL__
+#define ENTRY_NESTING_SUPPORT (0)
+#endif
+
+#if ENTRY_NESTING_SUPPORT
+
+/*
+ * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic
+ * idea is to keep per-thread pointer to small data structure (struct
+ * cfs_debug_data) describing current nesting level. In XNU unused
+ * proc->p_wmegs field in hijacked for this. On Linux
+ * current->journal_info can be used. In user space
+ * pthread_{g,s}etspecific().
+ *
+ * ENTRY macro allocates new cfs_debug_data on stack, and installs it as
+ * a current nesting level, storing old data in cfs_debug_data it just
+ * created.
+ *
+ * EXIT pops old value back.
+ *
+ */
+
+/*
+ * One problem with this approach is that there is a lot of code that
+ * does ENTRY and then escapes scope without doing EXIT/RETURN. In this
+ * case per-thread current nesting level pointer is dangling (it points
+ * to the stack area that is possible already overridden). To detect
+ * such cases, we add two magic fields to the cfs_debug_data and check
+ * them whenever current nesting level pointer is dereferenced. While
+ * looking flaky this works because stack is always consumed
+ * "continously".
+ */
+enum {
+       CDD_MAGIC1 = 0x02128506,
+       CDD_MAGIC2 = 0x42424242
+};
+
+struct cfs_debug_data {
+       unsigned int           magic1;
+       struct cfs_debug_data *parent;
+       int                    nesting_level;
+       unsigned int           magic2;
+};
+
+void __entry_nesting(struct cfs_debug_data *child);
+void __exit_nesting(struct cfs_debug_data *child);
+unsigned int __current_nesting_level(void);
+
+#define ENTRY_NESTING                                          \
+struct cfs_debug_data __cdd = { .magic1        = CDD_MAGIC1,   \
+                               .parent        = NULL,          \
+                                .nesting_level = 0,            \
+                                .magic2        = CDD_MAGIC2 }; \
+__entry_nesting(&__cdd);
+
+#define EXIT_NESTING __exit_nesting(&__cdd)
+
+/* ENTRY_NESTING_SUPPORT */
+#else
+
+#define ENTRY_NESTING   do {;} while (0)
+#define EXIT_NESTING   do {;} while (0)
+#define __current_nesting_level() (0)
+
+/* ENTRY_NESTING_SUPPORT */
+#endif
+
+#define LUSTRE_LNET_PID          12345
+
+#define _XNU_LIBCFS_H
+
+/*
+ * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
+ *
+ * Implementation is in darwin-curproc.c
+ */
+#define CFS_CURPROC_COMM_MAX    MAXCOMLEN
+/*
+ * XNU has no capabilities
+ */
+typedef int cfs_kernel_cap_t;
+
+#ifdef __KERNEL__
+enum {
+        /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */
+        CFS_STACK_TRACE_DEPTH = 16
+};
+
+struct cfs_stack_trace {
+        void *frame[CFS_STACK_TRACE_DEPTH];
+};
+
+#define printk(format, args...)                 printf(format, ## args)
+
+#ifdef WITH_WATCHDOG
+#undef WITH_WATCHDOG
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _XNU_LIBCFS_H */
diff --git a/libcfs/include/libcfs/darwin/lltrace.h b/libcfs/include/libcfs/darwin/lltrace.h
new file mode 100644 (file)
index 0000000..31d6e17
--- /dev/null
@@ -0,0 +1,26 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_DARWIN_LLTRACE_H__
+#define __LIBCFS_DARWIN_LLTRACE_H__
+
+#ifndef __LIBCFS_LLTRACE_H__
+#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <lnet/types.h>
+#include <libcfs/kp30.h>
+#include <mach/vm_param.h>
+#include <lnet/lnetctl.h>
+
+#endif
diff --git a/libcfs/include/libcfs/darwin/portals_utils.h b/libcfs/include/libcfs/darwin/portals_utils.h
new file mode 100644 (file)
index 0000000..4907cb1
--- /dev/null
@@ -0,0 +1,18 @@
+#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__
+#define __LIBCFS_DARWIN_PORTALS_UTILS_H__
+
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
+#endif
+
+#include <libcfs/list.h>
+#ifdef __KERNEL__
+#include <mach/mach_types.h>
+#include <libcfs/libcfs.h>
+#else /* !__KERNEL__ */
+#include <machine/endian.h>
+#include <netinet/in.h>
+#include <sys/syscall.h>
+#endif /* !__KERNEL__ */
+
+#endif
diff --git a/libcfs/include/libcfs/kp30.h b/libcfs/include/libcfs/kp30.h
new file mode 100644 (file)
index 0000000..0869f67
--- /dev/null
@@ -0,0 +1,610 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_KP30_H__
+#define __LIBCFS_KP30_H__
+
+/* Controlled via configure key */
+/* #define LIBCFS_DEBUG */
+
+#include <libcfs/libcfs.h>
+#include <lnet/types.h>
+
+#if defined(__linux__)
+#include <libcfs/linux/kp30.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/kp30.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/kp30.h>
+#else
+#error Unsupported operating system
+#endif
+
+#ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+#endif
+
+#ifdef __KERNEL__
+
+#ifdef LIBCFS_DEBUG
+
+/*
+ * When this is on, LASSERT macro includes check for assignment used instead
+ * of equality check, but doesn't have unlikely(). Turn this on from time to
+ * time to make test-builds. This shouldn't be on for production release.
+ */
+#define LASSERT_CHECKED (0)
+
+#if LASSERT_CHECKED
+/*
+ * Assertion.
+ *
+ * Strange construction with empty "then" clause is used to trigger compiler
+ * warnings on the assertions of the form LASSERT(a = b);
+ *
+ * "warning: suggest parentheses around assignment used as truth value"
+ *
+ * requires -Wall. Unfortunately this rules out use of likely/unlikely.
+ */
+#define LASSERT(cond)                                           \
+({                                                              \
+        if (cond)                                               \
+                ;                                               \
+        else                                                    \
+                libcfs_assertion_failed( #cond , __FILE__,      \
+                        __FUNCTION__, __LINE__);                \
+})
+
+#define LASSERTF(cond, fmt, a...)                                       \
+({                                                                      \
+         if (cond)                                                      \
+                 ;                                                      \
+         else {                                                         \
+                 libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG,       \
+                                  __FILE__, __FUNCTION__,__LINE__,      \
+                                  "ASSERTION(" #cond ") failed:" fmt,   \
+                                  ## a);                                \
+                 LBUG();                                                \
+         }                                                              \
+})
+
+/* LASSERT_CHECKED */
+#else
+
+#define LASSERT(cond)                                           \
+({                                                              \
+        if (unlikely(!(cond)))                                  \
+                libcfs_assertion_failed(#cond , __FILE__,       \
+                        __FUNCTION__, __LINE__);                \
+})
+
+#define LASSERTF(cond, fmt, a...)                                       \
+({                                                                      \
+        if (unlikely(!(cond))) {                                        \
+                libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG,        \
+                                 __FILE__, __FUNCTION__,__LINE__,       \
+                                 "ASSERTION(" #cond ") failed:" fmt,    \
+                                 ## a);                                 \
+                LBUG();                                                 \
+        }                                                               \
+})
+
+/* LASSERT_CHECKED */
+#endif
+
+/* LIBCFS_DEBUG */
+#else
+#define LASSERT(e) ((void)(0))
+#define LASSERTF(cond, fmt...) ((void)(0))
+#endif /* LIBCFS_DEBUG */
+
+#define KLASSERT(e) LASSERT(e)
+
+void lbug_with_loc(char *file, const char *func, const int line)
+        __attribute__((noreturn));
+
+#define LBUG() lbug_with_loc(__FILE__, __FUNCTION__, __LINE__)
+
+extern atomic_t libcfs_kmemory;
+/*
+ * Memory
+ */
+#ifdef LIBCFS_DEBUG
+
+# define libcfs_kmem_inc(ptr, size)             \
+do {                                            \
+        atomic_add(size, &libcfs_kmemory);      \
+} while (0)
+
+# define libcfs_kmem_dec(ptr, size) do {        \
+        atomic_sub(size, &libcfs_kmemory);      \
+} while (0)
+
+#else
+# define libcfs_kmem_inc(ptr, size) do {} while (0)
+# define libcfs_kmem_dec(ptr, size) do {} while (0)
+#endif /* LIBCFS_DEBUG */
+
+#define LIBCFS_VMALLOC_SIZE        16384
+
+#define LIBCFS_ALLOC_GFP(ptr, size, mask)                                 \
+do {                                                                      \
+        LASSERT(!in_interrupt() ||                                        \
+               (size <= LIBCFS_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\
+        if (unlikely((size) > LIBCFS_VMALLOC_SIZE))                     \
+                (ptr) = cfs_alloc_large(size);                            \
+        else                                                              \
+                (ptr) = cfs_alloc((size), (mask));                        \
+        if (unlikely((ptr) == NULL)) {                                  \
+                CERROR("LNET: out of memory at %s:%d (tried to alloc '"   \
+                       #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
+                CERROR("LNET: %d total bytes allocated by lnet\n",        \
+                       atomic_read(&libcfs_kmemory));                     \
+        } else {                                                          \
+                libcfs_kmem_inc((ptr), (size));                           \
+                if (!((mask) & CFS_ALLOC_ZERO))                           \
+                       memset((ptr), 0, (size));                          \
+        }                                                                 \
+        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n",    \
+               (int)(size), (ptr), atomic_read (&libcfs_kmemory));        \
+} while (0)
+
+#define LIBCFS_ALLOC(ptr, size) \
+        LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_IO)
+
+#define LIBCFS_ALLOC_ATOMIC(ptr, size) \
+        LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC)
+
+#define LIBCFS_FREE(ptr, size)                                          \
+do {                                                                    \
+        int s = (size);                                                 \
+        if (unlikely((ptr) == NULL)) {                                  \
+                CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at "    \
+                       "%s:%d\n", s, __FILE__, __LINE__);               \
+                break;                                                  \
+        }                                                               \
+        if (unlikely(s > LIBCFS_VMALLOC_SIZE))                          \
+                cfs_free_large(ptr);                                    \
+        else                                                            \
+                cfs_free(ptr);                                          \
+        libcfs_kmem_dec((ptr), s);                                      \
+        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
+               s, (ptr), atomic_read(&libcfs_kmemory));                 \
+} while (0)
+
+/******************************************************************************/
+
+/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */
+#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__)
+#define ___htonl(x) __cpu_to_be32(x)
+#define ___htons(x) __cpu_to_be16(x)
+#define ___ntohl(x) __be32_to_cpu(x)
+#define ___ntohs(x) __be16_to_cpu(x)
+#define htonl(x) ___htonl(x)
+#define ntohl(x) ___ntohl(x)
+#define htons(x) ___htons(x)
+#define ntohs(x) ___ntohs(x)
+#endif
+
+void libcfs_debug_dumpstack(cfs_task_t *tsk);
+void libcfs_run_upcall(char **argv);
+void libcfs_run_lbug_upcall(char * file, const char *fn, const int line);
+void libcfs_debug_dumplog(void);
+int libcfs_debug_init(unsigned long bufsize);
+int libcfs_debug_cleanup(void);
+int libcfs_debug_clear_buffer(void);
+int libcfs_debug_mark_buffer(char *text);
+
+void libcfs_debug_set_level(unsigned int debug_level);
+
+#else  /* !__KERNEL__ */
+# ifdef LIBCFS_DEBUG
+#  undef NDEBUG
+#  include <assert.h>
+#  define LASSERT(e)     assert(e)
+#  define LASSERTF(cond, args...)                                              \
+do {                                                                           \
+          if (!(cond))                                                         \
+                CERROR(args);                                                  \
+          assert(cond);                                                        \
+} while (0)
+#  define LBUG()   assert(0)
+# else
+#  define LASSERT(e) ((void)(0))
+#  define LASSERTF(cond, args...) do { } while (0)
+#  define LBUG()   ((void)(0))
+# endif /* LIBCFS_DEBUG */
+# define KLASSERT(e) do { } while (0)
+# define printk(format, args...) printf (format, ## args)
+# ifdef CRAY_XT3                                /* buggy calloc! */
+#  define LIBCFS_ALLOC(ptr, size)               \
+   do {                                         \
+        (ptr) = malloc(size);                   \
+        memset(ptr, 0, size);                   \
+   } while (0)
+# else
+#  define LIBCFS_ALLOC(ptr, size) do { (ptr) = calloc(1,size); } while (0)
+# endif
+# define LIBCFS_FREE(a, b) do { free(a); } while (0)
+
+void libcfs_debug_dumplog(void);
+int libcfs_debug_init(unsigned long bufsize);
+int libcfs_debug_cleanup(void);
+
+/*
+ * Generic compiler-dependent macros required for kernel
+ * build go below this comment. Actual compiler/compiler version
+ * specific implementations come from the above header files
+ */
+
+#define likely(x)      __builtin_expect(!!(x), 1)
+#define unlikely(x)    __builtin_expect(!!(x), 0)
+
+/* !__KERNEL__ */
+#endif
+
+/*
+ * compile-time assertions. @cond has to be constant expression.
+ * ISO C Standard:
+ *
+ *        6.8.4.2  The switch statement
+ *
+ *       ....
+ *
+ *       [#3] The expression of each case label shall be  an  integer
+ *       constant   expression  and  no  two  of  the  case  constant
+ *       expressions in the same switch statement shall have the same
+ *       value  after  conversion...
+ *
+ */
+#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } })
+
+/* support decl needed both by kernel and liblustre */
+int         libcfs_isknown_lnd(int type);
+char       *libcfs_lnd2modname(int type);
+char       *libcfs_lnd2str(int type);
+int         libcfs_str2lnd(const char *str);
+char       *libcfs_net2str(__u32 net);
+char       *libcfs_nid2str(lnet_nid_t nid);
+__u32       libcfs_str2net(const char *str);
+lnet_nid_t  libcfs_str2nid(const char *str);
+int         libcfs_str2anynid(lnet_nid_t *nid, const char *str);
+char       *libcfs_id2str(lnet_process_id_t id);
+void        libcfs_setnet0alias(int type);
+
+/* how an LNET NID encodes net:address */
+#define LNET_NIDADDR(nid)      ((__u32)((nid) & 0xffffffff))
+#define LNET_NIDNET(nid)       ((__u32)(((nid) >> 32)) & 0xffffffff)
+#define LNET_MKNID(net,addr)   ((((__u64)(net))<<32)|((__u64)(addr)))
+/* how net encodes type:number */
+#define LNET_NETNUM(net)       ((net) & 0xffff)
+#define LNET_NETTYP(net)       (((net) >> 16) & 0xffff)
+#define LNET_MKNET(typ,num)    ((((__u32)(typ))<<16)|((__u32)(num)))
+
+/* implication */
+#define ergo(a, b) (!(a) || (b))
+/* logical equivalence */
+#define equi(a, b) (!!(a) == !!(b))
+
+#ifndef CURRENT_TIME
+# define CURRENT_TIME time(0)
+#endif
+
+/* --------------------------------------------------------------------
+ * Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect.
+ * All stuff about lwt are put in arch/kp30.h
+ * -------------------------------------------------------------------- */
+
+struct libcfs_device_userstate
+{
+        int           ldu_memhog_pages;
+        cfs_page_t   *ldu_memhog_root_page;
+};
+
+/* what used to be in portals_lib.h */
+#ifndef MIN
+# define MIN(a,b) (((a)<(b)) ? (a): (b))
+#endif
+#ifndef MAX
+# define MAX(a,b) (((a)>(b)) ? (a): (b))
+#endif
+
+#define MKSTR(ptr) ((ptr))? (ptr) : ""
+
+static inline int size_round4 (int val)
+{
+        return (val + 3) & (~0x3);
+}
+
+static inline int size_round (int val)
+{
+        return (val + 7) & (~0x7);
+}
+
+static inline int size_round16(int val)
+{
+        return (val + 0xf) & (~0xf);
+}
+
+static inline int size_round32(int val)
+{
+        return (val + 0x1f) & (~0x1f);
+}
+
+static inline int size_round0(int val)
+{
+        if (!val)
+                return 0;
+        return (val + 1 + 7) & (~0x7);
+}
+
+static inline size_t round_strlen(char *fset)
+{
+        return (size_t)size_round((int)strlen(fset) + 1);
+}
+
+#define LOGL(var,len,ptr)                                       \
+do {                                                            \
+        if (var)                                                \
+                memcpy((char *)ptr, (const char *)var, len);    \
+        ptr += size_round(len);                                 \
+} while (0)
+
+#define LOGU(var,len,ptr)                                       \
+do {                                                            \
+        if (var)                                                \
+                memcpy((char *)var, (const char *)ptr, len);    \
+        ptr += size_round(len);                                 \
+} while (0)
+
+#define LOGL0(var,len,ptr)                              \
+do {                                                    \
+        if (!len)                                       \
+                break;                                  \
+        memcpy((char *)ptr, (const char *)var, len);    \
+        *((char *)(ptr) + len) = 0;                     \
+        ptr += size_round(len + 1);                     \
+} while (0)
+
+/*
+ * USER LEVEL STUFF BELOW
+ */
+
+#define LIBCFS_IOCTL_VERSION 0x0001000a
+
+struct libcfs_ioctl_data {
+        __u32 ioc_len;
+        __u32 ioc_version;
+
+        __u64 ioc_nid;
+        __u64 ioc_u64[1];
+
+        __u32 ioc_flags;
+        __u32 ioc_count;
+        __u32 ioc_net;
+        __u32 ioc_u32[7];
+
+        __u32 ioc_inllen1;
+        char *ioc_inlbuf1;
+        __u32 ioc_inllen2;
+        char *ioc_inlbuf2;
+
+        __u32 ioc_plen1; /* buffers in userspace */
+        char *ioc_pbuf1;
+        __u32 ioc_plen2; /* buffers in userspace */
+        char *ioc_pbuf2;
+
+        char ioc_bulk[0];
+};
+
+
+struct libcfs_ioctl_hdr {
+        __u32 ioc_len;
+        __u32 ioc_version;
+};
+
+struct libcfs_debug_ioctl_data
+{
+        struct libcfs_ioctl_hdr hdr;
+        unsigned int subs;
+        unsigned int debug;
+};
+
+#define LIBCFS_IOC_INIT(data)                           \
+do {                                                    \
+        memset(&data, 0, sizeof(data));                 \
+        data.ioc_version = LIBCFS_IOCTL_VERSION;        \
+        data.ioc_len = sizeof(data);                    \
+} while (0)
+
+/* FIXME check conflict with lustre_lib.h */
+#define LIBCFS_IOC_DEBUG_MASK             _IOWR('f', 250, long)
+
+static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data)
+{
+        int len = sizeof(*data);
+        len += size_round(data->ioc_inllen1);
+        len += size_round(data->ioc_inllen2);
+        return len;
+}
+
+static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data)
+{
+        if (data->ioc_len > (1<<30)) {
+                CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen1 > (1<<30)) {
+                CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inllen2 > (1<<30)) {
+                CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
+                CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
+                CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_pbuf1 && !data->ioc_plen1) {
+                CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_pbuf2 && !data->ioc_plen2) {
+                CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n");
+                return 1;
+        }
+        if (data->ioc_plen1 && !data->ioc_pbuf1) {
+                CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n");
+                return 1;
+        }
+        if (data->ioc_plen2 && !data->ioc_pbuf2) {
+                CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n");
+                return 1;
+        }
+        if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) {
+                CERROR ("LIBCFS ioctl: packlen != ioc_len\n");
+                return 1;
+        }
+        if (data->ioc_inllen1 &&
+            data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
+                CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n");
+                return 1;
+        }
+        if (data->ioc_inllen2 &&
+            data->ioc_bulk[size_round(data->ioc_inllen1) +
+                           data->ioc_inllen2 - 1] != '\0') {
+                CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n");
+                return 1;
+        }
+        return 0;
+}
+
+#ifndef __KERNEL__
+static inline int libcfs_ioctl_pack(struct libcfs_ioctl_data *data, char **pbuf,
+                                    int max)
+{
+        char *ptr;
+        struct libcfs_ioctl_data *overlay;
+        data->ioc_len = libcfs_ioctl_packlen(data);
+        data->ioc_version = LIBCFS_IOCTL_VERSION;
+
+        if (*pbuf && libcfs_ioctl_packlen(data) > max)
+                return 1;
+        if (*pbuf == NULL) {
+                *pbuf = malloc(data->ioc_len);
+        }
+        if (!*pbuf)
+                return 1;
+        overlay = (struct libcfs_ioctl_data *)*pbuf;
+        memcpy(*pbuf, data, sizeof(*data));
+
+        ptr = overlay->ioc_bulk;
+        if (data->ioc_inlbuf1)
+                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+        if (data->ioc_inlbuf2)
+                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+        if (libcfs_ioctl_is_invalid(overlay))
+                return 1;
+
+        return 0;
+}
+
+#else
+
+extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg);
+extern int libcfs_ioctl_popdata(void *arg, void *buf, int size);
+
+#endif
+
+/* ioctls for manipulating snapshots 30- */
+#define IOC_LIBCFS_TYPE                   'e'
+#define IOC_LIBCFS_MIN_NR                 30
+/* libcfs ioctls */
+#define IOC_LIBCFS_PANIC                   _IOWR('e', 30, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CLEAR_DEBUG             _IOWR('e', 31, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_MARK_DEBUG              _IOWR('e', 32, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LWT_CONTROL             _IOWR('e', 33, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LWT_SNAPSHOT            _IOWR('e', 34, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LWT_LOOKUP_STRING       _IOWR('e', 35, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_MEMHOG                  _IOWR('e', 36, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PING_TEST               _IOWR('e', 37, IOCTL_LIBCFS_TYPE)
+/* lnet ioctls */
+#define IOC_LIBCFS_GET_NI                  _IOWR('e', 50, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_FAIL_NID                _IOWR('e', 51, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_ROUTE               _IOWR('e', 52, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_ROUTE               _IOWR('e', 53, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_ROUTE               _IOWR('e', 54, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_NOTIFY_ROUTER           _IOWR('e', 55, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_UNCONFIGURE             _IOWR('e', 56, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PORTALS_COMPATIBILITY   _IOWR('e', 57, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LNET_DIST               _IOWR('e', 58, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CONFIGURE               _IOWR('e', 59, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_TESTPROTOCOMPAT         _IOWR('e', 60, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PING                    _IOWR('e', 61, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEBUG_PEER              _IOWR('e', 62, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_LNETST                  _IOWR('e', 63, IOCTL_LIBCFS_TYPE)
+/* lnd ioctls */
+#define IOC_LIBCFS_REGISTER_MYNID          _IOWR('e', 70, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_CLOSE_CONNECTION        _IOWR('e', 71, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_PUSH_CONNECTION         _IOWR('e', 72, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_CONN                _IOWR('e', 73, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_PEER                _IOWR('e', 74, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_PEER                _IOWR('e', 75, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_PEER                _IOWR('e', 76, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_TXDESC              _IOWR('e', 77, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_ADD_INTERFACE           _IOWR('e', 78, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_DEL_INTERFACE           _IOWR('e', 79, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_INTERFACE           _IOWR('e', 80, IOCTL_LIBCFS_TYPE)
+#define IOC_LIBCFS_GET_GMID                _IOWR('e', 81, IOCTL_LIBCFS_TYPE)
+
+#define IOC_LIBCFS_MAX_NR                             81
+
+
+enum {
+        /* Only add to these values (i.e. don't ever change or redefine them):
+         * network addresses depend on them... */
+        QSWLND    = 1,
+        SOCKLND   = 2,
+        GMLND     = 3,
+        PTLLND    = 4,
+        O2IBLND   = 5,
+        CIBLND    = 6,
+        OPENIBLND = 7,
+        IIBLND    = 8,
+        LOLND     = 9,
+        RALND     = 10,
+        VIBLND    = 11,
+        MXLND     = 12,
+};
+
+enum {
+        DEBUG_DAEMON_START       =  1,
+        DEBUG_DAEMON_STOP        =  2,
+        DEBUG_DAEMON_PAUSE       =  3,
+        DEBUG_DAEMON_CONTINUE    =  4,
+};
+
+
+enum cfg_record_type {
+        PORTALS_CFG_TYPE = 1,
+        LUSTRE_CFG_TYPE = 123,
+};
+
+typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
+
+/* lustre_id output helper macros */
+#define DLID4   "%lu/%lu/%lu/%lu"
+
+#define OLID4(id)                              \
+    (unsigned long)(id)->li_fid.lf_id,         \
+    (unsigned long)(id)->li_fid.lf_group,      \
+    (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \
+    (unsigned long)(id)->li_stc.u.e3s.l3s_gen
+
+#endif
diff --git a/libcfs/include/libcfs/libcfs.h b/libcfs/include/libcfs/libcfs.h
new file mode 100644 (file)
index 0000000..80518b1
--- /dev/null
@@ -0,0 +1,649 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LIBCFS_H__
+#define __LIBCFS_LIBCFS_H__
+
+#if !__GNUC__
+#define __attribute__(x)
+#endif
+
+#if defined(__linux__)
+#include <libcfs/linux/libcfs.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/libcfs.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/libcfs.h>
+#else
+#error Unsupported operating system.
+#endif
+
+#include "curproc.h"
+
+#ifndef __KERNEL__
+#include <stdio.h>
+#endif
+
+/* Controlled via configure key */
+/* #define LIBCFS_DEBUG */
+
+#ifndef offsetof
+# define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
+#endif
+
+/* cardinality of array */
+#define sizeof_array(a) ((sizeof (a)) / (sizeof ((a)[0])))
+
+#if !defined(container_of)
+/* given a pointer @ptr to the field @member embedded into type (usually
+ * struct) @type, return pointer to the embedding instance of @type. */
+#define container_of(ptr, type, member) \
+        ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+#endif
+
+#define container_of0(ptr, type, member)                        \
+({                                                              \
+        typeof(ptr) __ptr = (ptr);                              \
+        type       *__res;                                      \
+                                                                \
+        if (unlikely(IS_ERR(__ptr) || __ptr == NULL))           \
+                __res = (type *)__ptr;                          \
+        else                                                    \
+                __res = container_of(__ptr, type, member);      \
+        __res;                                                  \
+})
+
+/*
+ * true iff @i is power-of-2
+ */
+#define IS_PO2(i)                               \
+({                                              \
+        typeof(i) __i;                          \
+                                                \
+        __i = (i);                              \
+        !(__i & (__i - 1));                     \
+})
+
+#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
+
+/*
+ *  Debugging
+ */
+extern unsigned int libcfs_subsystem_debug;
+extern unsigned int libcfs_stack;
+extern unsigned int libcfs_debug;
+extern unsigned int libcfs_printk;
+extern unsigned int libcfs_console_ratelimit;
+extern cfs_duration_t libcfs_console_max_delay;
+extern cfs_duration_t libcfs_console_min_delay;
+extern unsigned int libcfs_console_backoff;
+extern unsigned int libcfs_debug_binary;
+extern char debug_file_path[1024];
+
+int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys);
+int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys);
+
+/* Has there been an LBUG? */
+extern unsigned int libcfs_catastrophe;
+extern unsigned int libcfs_panic_on_lbug;
+
+/*
+ * struct ptldebug_header is defined in libcfs/<os>/libcfs.h
+ */
+
+#define PH_FLAG_FIRST_RECORD 1
+
+/* Debugging subsystems (32 bits, non-overlapping) */
+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+#define S_UNDEFINED   0x00000001
+#define S_MDC         0x00000002
+#define S_MDS         0x00000004
+#define S_OSC         0x00000008
+#define S_OST         0x00000010
+#define S_CLASS       0x00000020
+#define S_LOG         0x00000040
+#define S_LLITE       0x00000080
+#define S_RPC         0x00000100
+#define S_MGMT        0x00000200
+#define S_LNET        0x00000400
+#define S_LND         0x00000800 /* ALL LNDs */
+#define S_PINGER      0x00001000
+#define S_FILTER      0x00002000
+/* unused */
+#define S_ECHO        0x00008000
+#define S_LDLM        0x00010000
+#define S_LOV         0x00020000
+/* unused */
+/* unused */
+/* unused */
+/* unused */
+/* unused */
+#define S_LMV         0x00800000 /* b_new_cmd */
+/* unused */
+#define S_SEC         0x02000000 /* upcall cache */
+#define S_GSS         0x04000000 /* b_new_cmd */
+/* unused */
+#define S_MGC         0x10000000
+#define S_MGS         0x20000000
+#define S_FID         0x40000000 /* b_new_cmd */
+#define S_FLD         0x80000000 /* b_new_cmd */
+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+
+/* Debugging masks (32 bits, non-overlapping) */
+/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */
+#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
+#define D_INODE       0x00000002
+#define D_SUPER       0x00000004
+#define D_EXT2        0x00000008 /* anything from ext2_debug */
+#define D_MALLOC      0x00000010 /* print malloc, free information */
+#define D_CACHE       0x00000020 /* cache-related items */
+#define D_INFO        0x00000040 /* general information */
+#define D_IOCTL       0x00000080 /* ioctl related information */
+#define D_NETERROR    0x00000100 /* network errors */
+#define D_NET         0x00000200 /* network communications */
+#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
+#define D_BUFFS       0x00000800
+#define D_OTHER       0x00001000
+#define D_DENTRY      0x00002000
+#define D_NETTRACE    0x00004000
+#define D_PAGE        0x00008000 /* bulk page handling */
+#define D_DLMTRACE    0x00010000
+#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
+#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
+#define D_HA          0x00080000 /* recovery and failover */
+#define D_RPCTRACE    0x00100000 /* for distributed debugging */
+#define D_VFSTRACE    0x00200000
+#define D_READA       0x00400000 /* read-ahead */
+#define D_MMAP        0x00800000
+#define D_CONFIG      0x01000000
+#define D_CONSOLE     0x02000000
+#define D_QUOTA       0x04000000
+#define D_SEC         0x08000000
+/* keep these in sync with lnet/{utils,libcfs}/debug.c */
+
+#define D_CANTMASK   (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE)
+
+#ifndef DEBUG_SUBSYSTEM
+# define DEBUG_SUBSYSTEM S_UNDEFINED
+#endif
+
+#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600))         /* jiffies */
+#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */
+#define CDEBUG_DEFAULT_BACKOFF   2
+typedef struct {
+        cfs_time_t      cdls_next;
+        int             cdls_count;
+        cfs_duration_t  cdls_delay;
+} cfs_debug_limit_state_t;
+
+/* Controlled via configure key */
+/* #define CDEBUG_ENABLED */
+
+#if defined(__KERNEL__) || (defined(__arch_lib__) && !defined(LUSTRE_UTILS))
+
+#ifdef CDEBUG_ENABLED
+#define __CDEBUG(cdls, mask, format, a...)                              \
+do {                                                                    \
+        CHECK_STACK();                                                  \
+                                                                        \
+        if (((mask) & D_CANTMASK) != 0 ||                               \
+            ((libcfs_debug & (mask)) != 0 &&                            \
+             (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0))          \
+                libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask,           \
+                                 __FILE__, __FUNCTION__, __LINE__,      \
+                                 format, ## a);                         \
+} while (0)
+
+#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a)
+
+#define CDEBUG_LIMIT(mask, format, a...)        \
+do {                                            \
+        static cfs_debug_limit_state_t cdls;    \
+                                                \
+        __CDEBUG(&cdls, mask, format, ## a);    \
+} while (0)
+
+#else /* CDEBUG_ENABLED */
+#define CDEBUG(mask, format, a...) (void)(0)
+#define CDEBUG_LIMIT(mask, format, a...) (void)(0)
+#warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!"
+#endif
+
+#else
+
+#define CDEBUG(mask, format, a...)                                      \
+do {                                                                    \
+        if (((mask) & D_CANTMASK) != 0)                                 \
+                fprintf(stderr, "(%s:%d:%s()) " format,                 \
+                        __FILE__, __LINE__, __FUNCTION__, ## a);        \
+} while (0)
+
+#define CDEBUG_LIMIT CDEBUG
+
+#endif /* !__KERNEL__ */
+
+/*
+ * Lustre Error Checksum: calculates checksum
+ * of Hex number by XORing each bit.
+ */
+#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \
+                           ((hexnum) >> 8 & 0xf))
+
+#define CWARN(format, a...)          CDEBUG_LIMIT(D_WARNING, format, ## a)
+#define CERROR(format, a...)         CDEBUG_LIMIT(D_ERROR, format, ## a)
+#define CEMERG(format, a...)         CDEBUG_LIMIT(D_EMERG, format, ## a)
+
+#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a)
+#define LCONSOLE_INFO(format, a...)  CDEBUG_LIMIT(D_CONSOLE, format, ## a)
+#define LCONSOLE_WARN(format, a...)  CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a)
+#define LCONSOLE_ERROR_MSG(errnum, format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \
+                           "%x-%x: " format, errnum, LERRCHKSUM(errnum),  ## a)
+#define LCONSOLE_ERROR(format, a...) LCONSOLE_ERROR_MSG(0x00, format, ## a)
+
+#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a)
+
+#ifdef CDEBUG_ENABLED
+
+#define GOTO(label, rc)                                                 \
+do {                                                                    \
+        long GOTO__ret = (long)(rc);                                    \
+        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
+               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
+               (signed long)GOTO__ret);                                 \
+        goto label;                                                     \
+} while (0)
+#else
+#define GOTO(label, rc) do { ((void)(rc)); goto label; } while (0)
+#endif
+
+/* Controlled via configure key */
+/* #define CDEBUG_ENTRY_EXIT */
+
+#ifdef CDEBUG_ENTRY_EXIT
+
+/*
+ * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise
+ * there will be a warning in osx.
+ */
+#define RETURN(rc)                                                      \
+do {                                                                    \
+        typeof(rc) RETURN__ret = (rc);                                  \
+        CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
+               (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
+        EXIT_NESTING;                                                   \
+        return RETURN__ret;                                             \
+} while (0)
+
+#define ENTRY                                                           \
+ENTRY_NESTING;                                                          \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process entered\n");                           \
+} while (0)
+
+#define EXIT                                                            \
+do {                                                                    \
+        CDEBUG(D_TRACE, "Process leaving\n");                           \
+        EXIT_NESTING;                                                   \
+} while(0)
+#else /* !CDEBUG_ENTRY_EXIT */
+
+#define RETURN(rc) return (rc)
+#define ENTRY                           do { } while (0)
+#define EXIT                            do { } while (0)
+
+#endif /* !CDEBUG_ENTRY_EXIT */
+
+/*
+ * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses
+ * Lustre RETURN(NULL) macro.
+ */
+#if defined(NULL)
+#undef NULL
+#endif
+
+#define NULL ((void *)0)
+
+#define LUSTRE_SRV_LNET_PID      LUSTRE_LNET_PID
+
+#ifdef __KERNEL__
+
+#include <libcfs/list.h>
+
+struct libcfs_ioctl_data;                       /* forward ref */
+
+struct libcfs_ioctl_handler {
+        struct list_head item;
+        int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data);
+};
+
+#define DECLARE_IOCTL_HANDLER(ident, func)                      \
+        struct libcfs_ioctl_handler ident = {                   \
+                /* .item = */ CFS_LIST_HEAD_INIT(ident.item),   \
+                /* .handle_ioctl = */ func                      \
+        }
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
+
+/* libcfs tcpip */
+int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask);
+int libcfs_ipif_enumerate(char ***names);
+void libcfs_ipif_free_enumeration(char **names, int n);
+int libcfs_sock_listen(cfs_socket_t **sockp, __u32 ip, int port, int backlog);
+int libcfs_sock_accept(cfs_socket_t **newsockp, cfs_socket_t *sock);
+void libcfs_sock_abort_accept(cfs_socket_t *sock);
+int libcfs_sock_connect(cfs_socket_t **sockp, int *fatal,
+                        __u32 local_ip, int local_port,
+                        __u32 peer_ip, int peer_port);
+int libcfs_sock_setbuf(cfs_socket_t *socket, int txbufsize, int rxbufsize);
+int libcfs_sock_getbuf(cfs_socket_t *socket, int *txbufsize, int *rxbufsize);
+int libcfs_sock_getaddr(cfs_socket_t *socket, int remote, __u32 *ip, int *port);
+int libcfs_sock_write(cfs_socket_t *sock, void *buffer, int nob, int timeout);
+int libcfs_sock_read(cfs_socket_t *sock, void *buffer, int nob, int timeout);
+void libcfs_sock_release(cfs_socket_t *sock);
+
+/* libcfs watchdogs */
+struct lc_watchdog;
+
+/* Add a watchdog which fires after "time" milliseconds of delay.  You have to
+ * touch it once to enable it. */
+struct lc_watchdog *lc_watchdog_add(int time,
+                                    void (*cb)(pid_t pid, void *),
+                                    void *data);
+
+/* Enables a watchdog and resets its timer. */
+void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms);
+void lc_watchdog_touch(struct lc_watchdog *lcw);
+
+/* Disable a watchdog; touch it to restart it. */
+void lc_watchdog_disable(struct lc_watchdog *lcw);
+
+/* Clean up the watchdog */
+void lc_watchdog_delete(struct lc_watchdog *lcw);
+
+/* Dump a debug log */
+void lc_watchdog_dumplog(pid_t pid, void *data);
+
+/* __KERNEL__ */
+#endif
+
+/* need both kernel and user-land acceptor */
+#define LNET_ACCEPTOR_MIN_RESERVED_PORT    512
+#define LNET_ACCEPTOR_MAX_RESERVED_PORT    1023
+
+/*
+ * libcfs pseudo device operations
+ *
+ * struct cfs_psdev_t and
+ * cfs_psdev_register() and
+ * cfs_psdev_deregister() are declared in
+ * libcfs/<os>/cfs_prim.h
+ *
+ * It's just draft now.
+ */
+
+struct cfs_psdev_file {
+        unsigned long   off;
+        void            *private_data;
+        unsigned long   reserved1;
+        unsigned long   reserved2;
+};
+
+struct cfs_psdev_ops {
+        int (*p_open)(unsigned long, void *);
+        int (*p_close)(unsigned long, void *);
+        int (*p_read)(struct cfs_psdev_file *, char *, unsigned long);
+        int (*p_write)(struct cfs_psdev_file *, char *, unsigned long);
+        int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *);
+};
+
+/*
+ * generic time manipulation functions.
+ */
+
+static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2)
+{
+        return cfs_time_before(t2, t1);
+}
+
+static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2)
+{
+        return cfs_time_beforeq(t2, t1);
+}
+
+/*
+ * return seconds since UNIX epoch
+ */
+static inline time_t cfs_unix_seconds(void)
+{
+        cfs_fs_time_t t;
+
+        cfs_fs_time_current(&t);
+        return (time_t)cfs_fs_time_sec(&t);
+}
+
+static inline cfs_time_t cfs_time_shift(int seconds)
+{
+        return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds));
+}
+
+static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small,
+                                   struct timeval *result)
+{
+        long r = (long) (
+                (large->tv_sec - small->tv_sec) * ONE_MILLION +
+                (large->tv_usec - small->tv_usec));
+        if (result != NULL) {
+                result->tv_usec = r % ONE_MILLION;
+                result->tv_sec = r / ONE_MILLION;
+        }
+        return r;
+}
+
+#define CFS_RATELIMIT(seconds)                                  \
+({                                                              \
+        /*                                                      \
+         * XXX nikita: non-portable initializer                 \
+         */                                                     \
+        static time_t __next_message = 0;                       \
+        int result;                                             \
+                                                                \
+        if (cfs_time_after(cfs_time_current(), __next_message)) \
+                result = 1;                                     \
+        else {                                                  \
+                __next_message = cfs_time_shift(seconds);       \
+                result = 0;                                     \
+        }                                                       \
+        result;                                                 \
+})
+
+struct libcfs_debug_msg_data {
+        cfs_debug_limit_state_t *msg_cdls;
+        int                      msg_subsys;
+        const char              *msg_file;
+        const char              *msg_fn;
+        int                      msg_line;
+};
+
+#define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \
+        .msg_cdls           = (cdls),       \
+        .msg_subsys         = (subsystem),  \
+        .msg_file           = (file),       \
+        .msg_fn             = (func),       \
+        .msg_line           = (ln)          \
+    }
+
+
+extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls,
+                              int subsys, int mask,
+                              const char *file, const char *fn, const int line,
+                              const char *format1, va_list args,
+                              const char *format2, ...)
+        __attribute__ ((format (printf, 9, 10)));
+
+#define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args)   \
+    libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL)
+
+#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...)    \
+    libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a)
+
+#define cdebug_va(cdls, mask, file, func, line, fmt, args)      do {          \
+        CHECK_STACK();                                                        \
+                                                                              \
+        if (((mask) & D_CANTMASK) != 0 ||                                     \
+            ((libcfs_debug & (mask)) != 0 &&                                  \
+             (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0))                \
+                libcfs_debug_vmsg(cdls, DEBUG_SUBSYSTEM, (mask),              \
+                                  (file), (func), (line), fmt, args);         \
+} while(0);
+
+#define cdebug(cdls, mask, file, func, line, fmt, a...) do {                  \
+        CHECK_STACK();                                                        \
+                                                                              \
+        if (((mask) & D_CANTMASK) != 0 ||                                     \
+            ((libcfs_debug & (mask)) != 0 &&                                  \
+             (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0))                \
+                libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask),               \
+                                 (file), (func), (line), fmt, ## a);          \
+} while(0);
+
+extern void libcfs_assertion_failed(const char *expr, const char *file,
+                                    const char *fn, const int line);
+
+static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg)
+{
+        if (cfs_time_after(cfs_time_current(),
+                           cfs_time_add(now, cfs_time_seconds(15))))
+                CERROR("slow %s "CFS_TIME_T" sec\n", msg,
+                       cfs_duration_sec(cfs_time_sub(cfs_time_current(),now)));
+}
+
+/*
+ * helper function similar to do_gettimeofday() of Linux kernel
+ */
+static inline void cfs_fs_timeval(struct timeval *tv)
+{
+        cfs_fs_time_t time;
+
+        cfs_fs_time_current(&time);
+        cfs_fs_time_usec(&time, tv);
+}
+
+/*
+ * return valid time-out based on user supplied one. Currently we only check
+ * that time-out is not shorted than allowed.
+ */
+static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout)
+{
+        if (timeout < CFS_TICK)
+                timeout = CFS_TICK;
+        return timeout;
+}
+
+/*
+ * Universal memory allocator API
+ */
+enum cfs_alloc_flags {
+        /* allocation is not allowed to block */
+        CFS_ALLOC_ATOMIC = 0x1,
+        /* allocation is allowed to block */
+        CFS_ALLOC_WAIT   = 0x2,
+        /* allocation should return zeroed memory */
+        CFS_ALLOC_ZERO   = 0x4,
+        /* allocation is allowed to call file-system code to free/clean
+         * memory */
+        CFS_ALLOC_FS     = 0x8,
+        /* allocation is allowed to do io to free/clean memory */
+        CFS_ALLOC_IO     = 0x10,
+        /* don't report allocation failure to the console */
+        CFS_ALLOC_NOWARN = 0x20,
+        /* standard allocator flag combination */
+        CFS_ALLOC_STD    = CFS_ALLOC_FS | CFS_ALLOC_IO,
+        CFS_ALLOC_USER   = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO,
+};
+
+/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */
+enum cfs_alloc_page_flags {
+        /* allow to return page beyond KVM. It has to be mapped into KVM by
+         * cfs_page_map(); */
+        CFS_ALLOC_HIGH   = 0x40,
+        CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH,
+};
+
+/*
+ * Drop into debugger, if possible. Implementation is provided by platform.
+ */
+
+void cfs_enter_debugger(void);
+
+/*
+ * Defined by platform
+ */
+void cfs_daemonize(char *str);
+int cfs_daemonize_ctxt(char *str);
+cfs_sigset_t cfs_get_blocked_sigs(void);
+cfs_sigset_t cfs_block_allsigs(void);
+cfs_sigset_t cfs_block_sigs(cfs_sigset_t bits);
+void cfs_restore_sigs(cfs_sigset_t);
+int cfs_signal_pending(void);
+void cfs_clear_sigpending(void);
+/*
+ * XXX Liang:
+ * these macros should be removed in the future,
+ * we keep them just for keeping libcfs compatible
+ * with other branches.
+ */
+#define libcfs_daemonize(s)     cfs_daemonize(s)
+#define cfs_sigmask_lock(f)     do { f= 0; } while (0)
+#define cfs_sigmask_unlock(f)   do { f= 0; } while (0)
+
+int convert_server_error(__u64 ecode);
+int convert_client_oflag(int cflag, int *result);
+
+/*
+ * Stack-tracing filling.
+ */
+
+/*
+ * Platform-dependent data-type to hold stack frames.
+ */
+struct cfs_stack_trace;
+
+/*
+ * Fill @trace with current back-trace.
+ */
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace);
+
+/*
+ * Return instruction pointer for frame @frame_no. NULL if @frame_no is
+ * invalid.
+ */
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no);
+
+/*
+ * Universal open flags.
+ */
+#define CFS_O_ACCMODE           0003
+#define CFS_O_CREAT             0100
+#define CFS_O_EXCL              0200
+#define CFS_O_NOCTTY            0400
+#define CFS_O_TRUNC             01000
+#define CFS_O_APPEND            02000
+#define CFS_O_NONBLOCK          04000
+#define CFS_O_NDELAY            CFS_O_NONBLOCK
+#define CFS_O_SYNC              010000
+#define CFS_O_ASYNC             020000
+#define CFS_O_DIRECT            040000
+#define CFS_O_LARGEFILE         0100000
+#define CFS_O_DIRECTORY         0200000
+#define CFS_O_NOFOLLOW          0400000
+#define CFS_O_NOATIME           01000000
+
+/* convert local open flags to universal open flags */
+int cfs_oflags2univ(int flags);
+/* convert universal open flags to local open flags */
+int cfs_univ2oflags(int flags);
+
+#define _LIBCFS_H
+
+#endif /* _LIBCFS_H */
diff --git a/libcfs/include/libcfs/linux/.cvsignore b/libcfs/include/libcfs/linux/.cvsignore
new file mode 100644 (file)
index 0000000..3dda729
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile.in
+Makefile
diff --git a/libcfs/include/libcfs/linux/Makefile.am b/libcfs/include/libcfs/linux/Makefile.am
new file mode 100644 (file)
index 0000000..072a7ad
--- /dev/null
@@ -0,0 +1,3 @@
+EXTRA_DIST := kp30.h libcfs.h linux-fs.h linux-lock.h linux-mem.h      \
+       linux-prim.h linux-time.h linux-tcpip.h lltrace.h               \
+       portals_compat25.h portals_utils.h
diff --git a/libcfs/include/libcfs/linux/kp30.h b/libcfs/include/libcfs/linux/kp30.h
new file mode 100644 (file)
index 0000000..19355ed
--- /dev/null
@@ -0,0 +1,379 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_KP30_H__
+#define __LIBCFS_LINUX_KP30_H__
+
+#ifndef __LIBCFS_KP30_H__
+#error Do not #include this file directly. #include <libcfs/kp30.h> instead
+#endif
+
+#ifdef __KERNEL__
+#ifndef AUTOCONF_INCLUDED
+# include <linux/config.h>
+#endif
+# include <linux/kernel.h>
+# include <linux/mm.h>
+# include <linux/string.h>
+# include <linux/stat.h>
+# include <linux/init.h>
+# include <linux/errno.h>
+# include <linux/unistd.h>
+# include <asm/system.h>
+# include <linux/kmod.h>
+# include <linux/notifier.h>
+# include <linux/fs.h>
+# include <linux/miscdevice.h>
+# include <linux/vmalloc.h>
+# include <linux/time.h>
+# include <linux/slab.h>
+# include <linux/interrupt.h>
+# include <linux/highmem.h>
+# include <linux/module.h>
+# include <linux/version.h>
+# include <lnet/lnet.h>
+# include <linux/smp_lock.h>
+# include <asm/atomic.h>
+# include <asm/uaccess.h>
+# include <linux/rwsem.h>
+# include <linux/proc_fs.h>
+# include <linux/file.h>
+# include <linux/smp.h>
+# include <linux/ctype.h>
+# include <linux/compiler.h>
+# ifdef HAVE_MM_INLINE
+#  include <linux/mm_inline.h>
+# endif
+# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#  include <linux/kallsyms.h>
+#  include <linux/moduleparam.h>
+# endif
+
+#include <libcfs/linux/portals_compat25.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define schedule_work schedule_task
+#define prepare_work(wq,cb,cbdata)                                            \
+do {                                                                          \
+        INIT_TQUEUE((wq), 0, 0);                                              \
+        PREPARE_TQUEUE((wq), (cb), (cbdata));                                 \
+} while (0)
+
+#define PageUptodate Page_Uptodate
+#define our_recalc_sigpending(current) recalc_sigpending(current)
+#define num_online_cpus() smp_num_cpus
+static inline void our_cond_resched(void)
+{
+        if (current->need_resched)
+               schedule ();
+}
+#define work_struct_t                   struct tq_struct
+#define cfs_get_work_data(type,field,data)   (data)
+#else
+
+#ifdef HAVE_3ARGS_INIT_WORK
+
+#define prepare_work(wq,cb,cbdata)                                            \
+do {                                                                          \
+        INIT_WORK((wq), (void *)(cb), (void *)(cbdata));                      \
+} while (0)
+
+#define cfs_get_work_data(type,field,data)   (data)
+
+#else
+
+#define prepare_work(wq,cb,cbdata)                                            \
+do {                                                                          \
+        INIT_WORK((wq), (void *)(cb));                                        \
+} while (0)
+
+#define cfs_get_work_data(type,field,data) container_of(data,type,field)
+
+#endif
+
+#define wait_on_page wait_on_page_locked
+#define our_recalc_sigpending(current) recalc_sigpending()
+#define strtok(a,b) strpbrk(a, b)
+static inline void our_cond_resched(void)
+{
+        cond_resched();
+}
+#define work_struct_t      struct work_struct
+
+#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
+
+#ifdef CONFIG_SMP
+#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock))
+#else
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+#endif
+#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0)
+
+#define LIBCFS_PANIC(msg)            panic(msg)
+
+/* ------------------------------------------------------------------- */
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
+#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
+
+#define PORTAL_MODULE_USE       MOD_INC_USE_COUNT
+#define PORTAL_MODULE_UNUSE     MOD_DEC_USE_COUNT
+#else
+
+#define PORTAL_SYMBOL_REGISTER(x)
+#define PORTAL_SYMBOL_UNREGISTER(x)
+
+#define PORTAL_SYMBOL_GET(x) symbol_get(x)
+#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
+
+#define PORTAL_MODULE_USE       try_module_get(THIS_MODULE)
+#define PORTAL_MODULE_UNUSE     module_put(THIS_MODULE)
+
+#endif
+
+/******************************************************************************/
+/* Module parameter support */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define CFS_MODULE_PARM(name, t, type, perm, desc) \
+        MODULE_PARM(name, t);\
+        MODULE_PARM_DESC(name, desc)
+
+#else
+# define CFS_MODULE_PARM(name, t, type, perm, desc) \
+        module_param(name, type, perm);\
+        MODULE_PARM_DESC(name, desc)
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9))
+# define CFS_SYSFS_MODULE_PARM  0 /* no sysfs module parameters */
+#else
+# define CFS_SYSFS_MODULE_PARM  1 /* module parameters accessible via sysfs */
+#endif
+/******************************************************************************/
+
+#if (__GNUC__)
+/* Use the special GNU C __attribute__ hack to have the compiler check the
+ * printf style argument string against the actual argument count and
+ * types.
+ */
+#ifdef printf
+# warning printf has been defined as a macro...
+# undef printf
+#endif
+
+#endif /* __GNUC__ */
+
+# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
+# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
+# define time(a) CURRENT_TIME
+
+#ifndef num_possible_cpus
+#define num_possible_cpus() NR_CPUS
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+#define i_size_read(a) ((a)->i_size)
+#endif
+
+#else  /* !__KERNEL__ */
+# include <stdio.h>
+# include <stdlib.h>
+#if defined(__CYGWIN__)
+# include <cygwin-ioctl.h>
+#else
+# include <stdint.h>
+#endif
+# include <unistd.h>
+# include <time.h>
+# include <limits.h>
+# include <errno.h>
+# include <sys/ioctl.h>                         /* for _IOWR */
+#ifndef _IOWR
+#include "ioctl.h"
+#endif
+
+# define CFS_MODULE_PARM(name, t, type, perm, desc)
+#define PORTAL_SYMBOL_GET(x) inter_module_get(#x)
+#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
+
+#endif /* End of !__KERNEL__ */
+
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT  0
+
+#define LWT_MEMORY   (16<<20)
+
+#ifndef KLWT_SUPPORT
+# if defined(__KERNEL__)
+#  if !defined(BITS_PER_LONG)
+#   error "BITS_PER_LONG not defined"
+#  endif
+# elif !defined(__WORDSIZE)
+#  error "__WORDSIZE not defined"
+# else
+#  define BITS_PER_LONG __WORDSIZE
+# endif
+
+/* kernel hasn't defined this? */
+typedef struct {
+        long long   lwte_when;
+        char       *lwte_where;
+        void       *lwte_task;
+        long        lwte_p1;
+        long        lwte_p2;
+        long        lwte_p3;
+        long        lwte_p4;
+# if BITS_PER_LONG > 32
+        long        lwte_pad;
+# endif
+} lwt_event_t;
+#endif /* !KLWT_SUPPORT */
+
+#if LWT_SUPPORT
+# ifdef __KERNEL__
+#  if !KLWT_SUPPORT
+
+typedef struct _lwt_page {
+        struct list_head     lwtp_list;
+        struct page         *lwtp_page;
+        lwt_event_t         *lwtp_events;
+} lwt_page_t;
+
+typedef struct {
+        int                lwtc_current_index;
+        lwt_page_t        *lwtc_current_page;
+} lwt_cpu_t;
+
+extern int       lwt_enabled;
+extern lwt_cpu_t lwt_cpus[];
+
+/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
+ * This stuff is meant for finding specific problems; it never stays in
+ * production code... */
+
+#define LWTSTR(n)       #n
+#define LWTWHERE(f,l)   f ":" LWTSTR(l)
+#define LWT_EVENTS_PER_PAGE (CFS_PAGE_SIZE / sizeof (lwt_event_t))
+
+#define LWT_EVENT(p1, p2, p3, p4)                                       \
+do {                                                                    \
+        unsigned long    flags;                                         \
+        lwt_cpu_t       *cpu;                                           \
+        lwt_page_t      *p;                                             \
+        lwt_event_t     *e;                                             \
+                                                                        \
+        if (lwt_enabled) {                                              \
+                local_irq_save (flags);                                 \
+                                                                        \
+                cpu = &lwt_cpus[smp_processor_id()];                    \
+                p = cpu->lwtc_current_page;                             \
+                e = &p->lwtp_events[cpu->lwtc_current_index++];         \
+                                                                        \
+                if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) {   \
+                        cpu->lwtc_current_page =                        \
+                                list_entry (p->lwtp_list.next,          \
+                                            lwt_page_t, lwtp_list);     \
+                        cpu->lwtc_current_index = 0;                    \
+                }                                                       \
+                                                                        \
+                e->lwte_when  = get_cycles();                           \
+                e->lwte_where = LWTWHERE(__FILE__,__LINE__);            \
+                e->lwte_task  = current;                                \
+                e->lwte_p1    = (long)(p1);                             \
+                e->lwte_p2    = (long)(p2);                             \
+                e->lwte_p3    = (long)(p3);                             \
+                e->lwte_p4    = (long)(p4);                             \
+                                                                        \
+                local_irq_restore (flags);                              \
+        }                                                               \
+} while (0)
+
+#endif /* !KLWT_SUPPORT */
+
+extern int  lwt_init (void);
+extern void lwt_fini (void);
+extern int  lwt_lookup_string (int *size, char *knlptr,
+                               char *usrptr, int usrsize);
+extern int  lwt_control (int enable, int clear);
+extern int  lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
+                          void *user_ptr, int user_size);
+# else  /* __KERNEL__ */
+#  define LWT_EVENT(p1,p2,p3,p4)     /* no userland implementation yet */
+# endif /* __KERNEL__ */
+#endif /* LWT_SUPPORT */
+
+/* ------------------------------------------------------------------ */
+
+#define IOCTL_LIBCFS_TYPE long
+
+#ifdef __CYGWIN__
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
+#endif
+
+#if BITS_PER_LONG > 32
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
+#else
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long)0x5a5a5a5a)
+# define LP_POISON ((void *)(long)0x5a5a5a5a)
+#endif
+
+/* this is a bit chunky */
+
+#if defined(__KERNEL__)
+ #define _LWORDSIZE BITS_PER_LONG
+#else
+ #define _LWORDSIZE __WORDSIZE
+#endif
+
+#if (defined(__x86_64__) && (defined(__KERNEL__) || defined(CRAY_XT3))) || defined(HAVE_U64_LONG_LONG)
+/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPF64 "L"
+#elif (_LWORDSIZE == 32)
+# define LPU64 "%Lu"
+# define LPD64 "%Ld"
+# define LPX64 "%#Lx"
+# define LPF64 "L"
+#elif (_LWORDSIZE == 64)
+# define LPU64 "%lu"
+# define LPD64 "%ld"
+# define LPX64 "%#lx"
+# define LPF64 "l"
+#endif
+
+#ifdef HAVE_SIZE_T_LONG
+# define LPSZ  "%lu"
+#else
+# define LPSZ  "%u"
+#endif
+
+#ifdef HAVE_SSIZE_T_LONG
+# define LPSSZ "%ld"
+#else
+# define LPSSZ "%d"
+#endif
+
+#ifndef LPU64
+# error "No word size defined"
+#endif
+
+#undef _LWORDSIZE
+
+#endif
diff --git a/libcfs/include/libcfs/linux/libcfs.h b/libcfs/include/libcfs/linux/libcfs.h
new file mode 100644 (file)
index 0000000..c873c2f
--- /dev/null
@@ -0,0 +1,173 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_LIBCFS_H__
+#define __LIBCFS_LINUX_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef HAVE_ASM_TYPES_H
+#include <asm/types.h>
+#else
+#include <libcfs/types.h>
+#endif
+
+#include <stdarg.h>
+#include <libcfs/linux/linux-time.h>
+#include <libcfs/linux/linux-mem.h>
+#include <libcfs/linux/linux-prim.h>
+#include <libcfs/linux/linux-lock.h>
+#include <libcfs/linux/linux-fs.h>
+#include <libcfs/linux/linux-tcpip.h>
+
+
+#ifdef __KERNEL__
+# include <linux/types.h>
+# include <linux/time.h>
+# include <asm/timex.h>
+#else
+# include <sys/types.h>
+# include <sys/time.h>
+# define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
+#endif
+
+#ifndef __KERNEL__
+/* Userpace byte flipping */
+# include <endian.h>
+# include <byteswap.h>
+# define __swab16(x) bswap_16(x)
+# define __swab32(x) bswap_32(x)
+# define __swab64(x) bswap_64(x)
+# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
+# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
+# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define le16_to_cpu(x) (x)
+#  define cpu_to_le16(x) (x)
+#  define le32_to_cpu(x) (x)
+#  define cpu_to_le32(x) (x)
+#  define le64_to_cpu(x) (x)
+#  define cpu_to_le64(x) (x)
+
+#  define be16_to_cpu(x) bswap_16(x)
+#  define cpu_to_be16(x) bswap_16(x)
+#  define be32_to_cpu(x) bswap_32(x)
+#  define cpu_to_be32(x) bswap_32(x)
+#  define be64_to_cpu(x) bswap_64(x)
+#  define cpu_to_be64(x) bswap_64(x)
+
+# else
+#  if __BYTE_ORDER == __BIG_ENDIAN
+#   define le16_to_cpu(x) bswap_16(x)
+#   define cpu_to_le16(x) bswap_16(x)
+#   define le32_to_cpu(x) bswap_32(x)
+#   define cpu_to_le32(x) bswap_32(x)
+#   define le64_to_cpu(x) bswap_64(x)
+#   define cpu_to_le64(x) bswap_64(x)
+
+#   define be16_to_cpu(x) (x)
+#   define cpu_to_be16(x) (x)
+#   define be32_to_cpu(x) (x)
+#   define cpu_to_be32(x) (x)
+#   define be64_to_cpu(x) (x)
+#   define cpu_to_be64(x) (x)
+
+#  else
+#   error "Unknown byte order"
+#  endif /* __BIG_ENDIAN */
+# endif /* __LITTLE_ENDIAN */
+#endif /* ! __KERNEL__ */
+
+struct ptldebug_header {
+        __u32 ph_len;
+        __u32 ph_flags;
+        __u32 ph_subsys;
+        __u32 ph_mask;
+        __u32 ph_cpu_id;
+        __u32 ph_sec;
+        __u64 ph_usec;
+        __u32 ph_stack;
+        __u32 ph_pid;
+        __u32 ph_extern_pid;
+        __u32 ph_line_num;
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+# include <linux/sched.h> /* THREAD_SIZE */
+#else
+# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
+#  define THREAD_SIZE 8192
+# endif
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#if defined(__KERNEL__) && !defined(__x86_64__)
+# ifdef  __ia64__
+#  define CDEBUG_STACK() (THREAD_SIZE -                                 \
+                          ((unsigned long)__builtin_dwarf_cfa() &       \
+                           (THREAD_SIZE - 1)))
+# else
+#  define CDEBUG_STACK() (THREAD_SIZE -                                 \
+                          ((unsigned long)__builtin_frame_address(0) &  \
+                           (THREAD_SIZE - 1)))
+# endif /* __ia64__ */
+
+#define __CHECK_STACK(file, func, line)                                 \
+do {                                                                    \
+        unsigned long _stack = CDEBUG_STACK();                          \
+                                                                        \
+        if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) {        \
+                libcfs_stack = _stack;                                  \
+                libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING,      \
+                                 file, func, line,                      \
+                                 "maximum lustre stack %lu\n", _stack); \
+              /*panic("LBUG");*/                                        \
+        }                                                               \
+} while (0)
+#define CHECK_STACK()     __CHECK_STACK(__FILE__, __func__, __LINE__)
+#else /* !__KERNEL__ */
+#define __CHECK_STACK(X, Y, Z) do { } while(0)
+#define CHECK_STACK() do { } while(0)
+#define CDEBUG_STACK() (0L)
+#endif /* __KERNEL__ */
+
+/* initial pid  */
+#define LUSTRE_LNET_PID          12345
+
+#define ENTRY_NESTING_SUPPORT (1)
+#define ENTRY_NESTING   do {;} while (0)
+#define EXIT_NESTING   do {;} while (0)
+#define __current_nesting_level() (0)
+
+/*
+ * Platform specific declarations for cfs_curproc API (libcfs/curproc.h)
+ *
+ * Implementation is in linux-curproc.c
+ */
+#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm)
+
+#if defined(__KERNEL__)
+#include <linux/capability.h>
+typedef kernel_cap_t cfs_kernel_cap_t;
+#else
+typedef __u32 cfs_kernel_cap_t;
+#endif
+
+#if defined(__KERNEL__)
+/*
+ * No stack-back-tracing in Linux for now.
+ */
+struct cfs_stack_trace {
+};
+
+#ifndef WITH_WATCHDOG
+#define WITH_WATCHDOG
+#endif
+
+#endif
+
+#endif /* _LINUX_LIBCFS_H */
diff --git a/libcfs/include/libcfs/linux/linux-fs.h b/libcfs/include/libcfs/linux/linux-fs.h
new file mode 100644 (file)
index 0000000..7573322
--- /dev/null
@@ -0,0 +1,82 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines. 
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_FS_H__
+#define __LIBCFS_LINUX_CFS_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/mount.h>
+#else /* !__KERNEL__ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <mntent.h>
+#endif  /* __KERNEL__ */
+
+typedef struct file cfs_file_t;
+typedef struct dentry cfs_dentry_t;
+typedef struct dirent64 cfs_dirent_t;
+
+#ifdef __KERNEL__
+#define cfs_filp_size(f)               (i_size_read((f)->f_dentry->d_inode))
+#define cfs_filp_poff(f)                (&(f)->f_pos)
+
+/* 
+ * XXX Do we need to parse flags and mode in cfs_filp_open? 
+ */
+cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err);
+#define cfs_filp_close(f)                   filp_close(f, NULL)
+#define cfs_filp_read(fp, buf, size, pos)   (fp)->f_op->read((fp), (buf), (size), pos)
+#define cfs_filp_write(fp, buf, size, pos)  (fp)->f_op->write((fp), (buf), (size), pos)
+#define cfs_filp_fsync(fp)                  (fp)->f_op->fsync((fp), (fp)->f_dentry, 1)
+
+#define cfs_get_file(f)                     get_file(f)
+#define cfs_put_file(f)                     fput(f)
+#define cfs_file_count(f)                   file_count(f)
+
+typedef struct file_lock cfs_flock_t;
+#define cfs_flock_type(fl)                  ((fl)->fl_type)
+#define cfs_flock_set_type(fl, type)        do { (fl)->fl_type = (type); } while(0)
+#define cfs_flock_pid(fl)                   ((fl)->fl_pid)
+#define cfs_flock_set_pid(fl, pid)          do { (fl)->fl_pid = (pid); } while(0)
+#define cfs_flock_start(fl)                 ((fl)->fl_start)
+#define cfs_flock_set_start(fl, start)      do { (fl)->fl_start = (start); } while(0)
+#define cfs_flock_end(fl)                   ((fl)->fl_end)
+#define cfs_flock_set_end(fl, end)          do { (fl)->fl_end = (end); } while(0)
+
+ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset);
+
+#endif
+
+#endif
diff --git a/libcfs/include/libcfs/linux/linux-lock.h b/libcfs/include/libcfs/linux/linux-lock.h
new file mode 100644 (file)
index 0000000..4b51d1b
--- /dev/null
@@ -0,0 +1,105 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_LOCK_H__
+#define __LIBCFS_LINUX_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/smp_lock.h>
+
+/*
+ * IMPORTANT !!!!!!!!
+ *
+ * All locks' declaration are not guaranteed to be initialized,
+ * Althought some of they are initialized in Linux. All locks
+ * declared by CFS_DECL_* should be initialized explicitly.
+ */
+
+
+/*
+ * spin_lock (use Linux kernel's primitives)
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ */
+
+/*
+ * rw_semaphore (use Linux kernel's primitives)
+ *
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+
+/*
+ * rwlock_t (use Linux kernel's primitives)
+ *
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+
+/*
+ * mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+#define init_mutex(x)                   init_MUTEX(x)
+#define init_mutex_locked(x)            init_MUTEX_LOCKED(x)
+#define mutex_up(x)                     up(x)
+#define mutex_down(x)                   down(x)
+#define mutex_down_trylock(x)           down_trylock(x)
+
+/*
+ * completion (use Linux kernel's primitives)
+ *
+ * - init_complition(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+
+/* __KERNEL__ */
+#else
+
+#include "../user-lock.h"
+
+/* __KERNEL__ */
+#endif
+#endif
diff --git a/libcfs/include/libcfs/linux/linux-mem.h b/libcfs/include/libcfs/linux/linux-mem.h
new file mode 100644 (file)
index 0000000..fa4ba3d
--- /dev/null
@@ -0,0 +1,129 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_MEM_H__
+#define __LIBCFS_LINUX_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+# include <linux/mm.h>
+# include <linux/vmalloc.h>
+# include <linux/pagemap.h>
+# include <linux/slab.h>
+# ifdef HAVE_MM_INLINE
+#  include <linux/mm_inline.h>
+# endif
+
+typedef struct page                     cfs_page_t;
+#define CFS_PAGE_SIZE                   PAGE_CACHE_SIZE
+#define CFS_PAGE_SHIFT                  PAGE_CACHE_SHIFT
+#define CFS_PAGE_MASK                   (~((__u64)CFS_PAGE_SIZE-1))
+
+static inline void *cfs_page_address(cfs_page_t *page)
+{
+        /*
+         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
+        return page_address(page);
+}
+
+static inline void *cfs_kmap(cfs_page_t *page)
+{
+        return kmap(page);
+}
+
+static inline void cfs_kunmap(cfs_page_t *page)
+{
+        kunmap(page);
+}
+
+static inline void cfs_get_page(cfs_page_t *page)
+{
+        get_page(page);
+}
+
+static inline int cfs_page_count(cfs_page_t *page)
+{
+        return page_count(page);
+}
+
+#define cfs_page_index(p)       ((p)->index)
+
+/*
+ * Memory allocator
+ * XXX Liang: move these declare to public file
+ */
+extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+extern void  cfs_free(void *addr);
+
+extern void *cfs_alloc_large(size_t nr_bytes);
+extern void  cfs_free_large(void *addr);
+
+extern cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order);
+extern void __cfs_free_pages(cfs_page_t *page, unsigned int order);
+
+#define cfs_alloc_page(flags)  cfs_alloc_pages(flags, 0)
+#define __cfs_free_page(page)  __cfs_free_pages(page, 0)
+#define cfs_free_page(p)       __free_pages(p, 0)
+
+/*
+ * In Linux there is no way to determine whether current execution context is
+ * blockable.
+ */
+#define CFS_ALLOC_ATOMIC_TRY   CFS_ALLOC_ATOMIC
+
+/*
+ * SLAB allocator
+ * XXX Liang: move these declare to public file
+ */
+#ifdef HAVE_KMEM_CACHE
+typedef struct kmem_cache cfs_mem_cache_t;
+#else
+typedef kmem_cache_t cfs_mem_cache_t;
+#endif
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long);
+extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+
+/*
+ */
+#define CFS_DECL_MMSPACE                mm_segment_t __oldfs
+#define CFS_MMSPACE_OPEN                do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
+#define CFS_MMSPACE_CLOSE               set_fs(__oldfs)
+
+#else   /* !__KERNEL__ */
+#ifdef HAVE_ASM_PAGE_H
+#include <asm/page.h>           /* needed for PAGE_SIZE - rread */
+#endif
+
+#include <libcfs/user-prim.h>
+/* __KERNEL__ */
+#endif
+
+#endif /* __LINUX_CFS_MEM_H__ */
diff --git a/libcfs/include/libcfs/linux/linux-prim.h b/libcfs/include/libcfs/linux/linux-prim.h
new file mode 100644 (file)
index 0000000..705499e
--- /dev/null
@@ -0,0 +1,311 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_PRIM_H__
+#define __LIBCFS_LINUX_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/proc_fs.h>
+#include <linux/mm.h>
+#include <linux/timer.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+
+#include <linux/miscdevice.h>
+#include <libcfs/linux/portals_compat25.h>
+#include <asm/div64.h>
+
+#include <libcfs/linux/linux-time.h>
+
+/*
+ * Pseudo device register
+ */
+typedef struct miscdevice              cfs_psdev_t;
+#define cfs_psdev_register(dev)                misc_register(dev)
+#define cfs_psdev_deregister(dev)      misc_deregister(dev)
+
+/*
+ * Sysctl register
+ */
+typedef struct ctl_table               cfs_sysctl_table_t;
+typedef struct ctl_table_header                cfs_sysctl_table_header_t;
+
+#ifdef HAVE_2ARGS_REGISTER_SYSCTL
+#define cfs_register_sysctl_table(t, a)        register_sysctl_table(t, a)
+#else
+#define cfs_register_sysctl_table(t, a) register_sysctl_table(t)
+#endif
+#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t)
+
+/*
+ * Symbol register
+ */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define cfs_symbol_register(s, p)       inter_module_register(s, THIS_MODULE, p)
+#define cfs_symbol_unregister(s)        inter_module_unregister(s)
+#define cfs_symbol_get(s)               inter_module_get(s)
+#define cfs_symbol_put(s)               inter_module_put(s)
+#define cfs_module_get()                MOD_INC_USE_COUNT
+#define cfs_module_put()                MOD_DEC_USE_COUNT
+#else
+#define cfs_symbol_register(s, p)       do {} while(0)
+#define cfs_symbol_unregister(s)        do {} while(0)
+#define cfs_symbol_get(s)               symbol_get(s)
+#define cfs_symbol_put(s)               symbol_put(s)
+#define cfs_module_get()                try_module_get(THIS_MODULE)
+#define cfs_module_put()                module_put(THIS_MODULE)
+#endif
+
+/*
+ * Proc file system APIs
+ */
+typedef read_proc_t                     cfs_read_proc_t;
+typedef write_proc_t                    cfs_write_proc_t;
+typedef struct proc_dir_entry           cfs_proc_dir_entry_t;
+#define cfs_create_proc_entry(n, m, p)  create_proc_entry(n, m, p)
+#define cfs_free_proc_entry(e)          free_proc_entry(e)
+#define cfs_remove_proc_entry(n, e)     remove_proc_entry(n, e)
+
+/*
+ * Wait Queue
+ */
+#define CFS_TASK_INTERRUPTIBLE          TASK_INTERRUPTIBLE
+#define CFS_TASK_UNINT                  TASK_UNINTERRUPTIBLE
+
+typedef wait_queue_t                   cfs_waitlink_t;
+typedef wait_queue_head_t              cfs_waitq_t;
+
+typedef long                            cfs_task_state_t;
+
+#define cfs_waitq_init(w)               init_waitqueue_head(w)
+#define cfs_waitlink_init(l)            init_waitqueue_entry(l, current)
+#define cfs_waitq_add(w, l)             add_wait_queue(w, l)
+#define cfs_waitq_add_exclusive(w, l)   add_wait_queue_exclusive(w, l)
+#define cfs_waitq_forward(l, w)         do {} while(0)
+#define cfs_waitq_del(w, l)             remove_wait_queue(w, l)
+#define cfs_waitq_active(w)             waitqueue_active(w)
+#define cfs_waitq_signal(w)             wake_up(w)
+#define cfs_waitq_signal_nr(w,n)        wake_up_nr(w, n)
+#define cfs_waitq_broadcast(w)          wake_up_all(w)
+#define cfs_waitq_wait(l, s)            schedule()
+#define cfs_waitq_timedwait(l, s, t)    schedule_timeout(t)
+#define cfs_schedule_timeout(s, t)      schedule_timeout(t)
+#define cfs_schedule()                  schedule()
+
+/* Kernel thread */
+typedef int (*cfs_thread_t)(void *);
+
+static inline int cfs_kernel_thread(int (*fn)(void *),
+                                    void *arg, unsigned long flags)
+{
+        void *orig_info = current->journal_info;
+        int rc;
+
+        current->journal_info = NULL;
+        rc = kernel_thread(fn, arg, flags);
+        current->journal_info = orig_info;
+        return rc;
+}
+
+
+/*
+ * Task struct
+ */
+typedef struct task_struct              cfs_task_t;
+#define cfs_current()                   current
+#define cfs_task_lock(t)                task_lock(t)
+#define cfs_task_unlock(t)              task_unlock(t)
+#define CFS_DECL_JOURNAL_DATA           void *journal_info
+#define CFS_PUSH_JOURNAL                do {    \
+        journal_info = current->journal_info;   \
+        current->journal_info = NULL;           \
+        } while(0)
+#define CFS_POP_JOURNAL                 do {    \
+        current->journal_info = journal_info;   \
+        } while(0)
+
+/* Module interfaces */
+#define cfs_module(name, version, init, fini) \
+module_init(init);                            \
+module_exit(fini)
+
+/*
+ * Signal
+ */
+typedef sigset_t                        cfs_sigset_t;
+
+/*
+ * Timer
+ */
+typedef struct timer_list cfs_timer_t;
+typedef  void (*timer_func_t)(unsigned long);
+
+#define cfs_init_timer(t)       init_timer(t)
+
+static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg)
+{
+        init_timer(t);
+        t->function = (timer_func_t)func;
+        t->data = (unsigned long)arg;
+}
+
+static inline void cfs_timer_done(cfs_timer_t *t)
+{
+        return;
+}
+
+static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline)
+{
+        mod_timer(t, deadline);
+}
+
+static inline void cfs_timer_disarm(cfs_timer_t *t)
+{
+        del_timer(t);
+}
+
+static inline int  cfs_timer_is_armed(cfs_timer_t *t)
+{
+        return timer_pending(t);
+}
+
+static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t)
+{
+        return t->expires;
+}
+
+
+/* deschedule for a bit... */
+static inline void cfs_pause(cfs_duration_t ticks)
+{
+        set_current_state(TASK_UNINTERRUPTIBLE);
+        schedule_timeout(ticks);
+}
+
+#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */
+#define __wait_event_timeout(wq, condition, timeout, ret)        \
+do {                                                             \
+       int __ret = 0;                                           \
+       if (!(condition)) {                                      \
+               wait_queue_t __wait;                             \
+               unsigned long expire;                            \
+                                                                 \
+               init_waitqueue_entry(&__wait, current);          \
+               expire = timeout + jiffies;                      \
+               add_wait_queue(&wq, &__wait);                    \
+               for (;;) {                                       \
+                       set_current_state(TASK_UNINTERRUPTIBLE); \
+                       if (condition)                           \
+                               break;                           \
+                       if (jiffies > expire) {                  \
+                               ret = jiffies - expire;          \
+                               break;                           \
+                       }                                        \
+                       schedule_timeout(timeout);               \
+               }                                                \
+               current->state = TASK_RUNNING;                   \
+               remove_wait_queue(&wq, &__wait);                 \
+       }                                                        \
+} while (0)
+/*
+   retval == 0; condition met; we're good.
+   retval > 0; timed out.
+*/
+#define cfs_waitq_wait_event_timeout(wq, condition, timeout)         \
+({                                                                   \
+       int __ret = 0;                                               \
+       if (!(condition))                                            \
+               __wait_event_timeout(wq, condition, timeout, __ret); \
+       __ret;                                                       \
+})
+#else
+#define cfs_waitq_wait_event_timeout  wait_event_timeout
+#endif
+
+#ifndef wait_event_interruptible_timeout /* Only for RHEL3 2.4.21 kernel */
+#define __wait_event_interruptible_timeout(wq, condition, timeout, ret)   \
+do {                                                           \
+       int __ret = 0;                                         \
+       if (!(condition)) {                                    \
+               wait_queue_t __wait;                           \
+               unsigned long expire;                          \
+                                                               \
+               init_waitqueue_entry(&__wait, current);        \
+               expire = timeout + jiffies;                    \
+               add_wait_queue(&wq, &__wait);                  \
+               for (;;) {                                     \
+                       set_current_state(TASK_INTERRUPTIBLE); \
+                       if (condition)                         \
+                               break;                         \
+                       if (jiffies > expire) {                \
+                               ret = jiffies - expire;        \
+                               break;                         \
+                       }                                      \
+                       if (!signal_pending(current)) {        \
+                               schedule_timeout(timeout);     \
+                               continue;                      \
+                       }                                      \
+                       ret = -ERESTARTSYS;                    \
+                       break;                                 \
+               }                                              \
+               current->state = TASK_RUNNING;                 \
+               remove_wait_queue(&wq, &__wait);               \
+       }                                                      \
+} while (0)
+
+/*
+   retval == 0; condition met; we're good.
+   retval < 0; interrupted by signal.
+   retval > 0; timed out.
+*/
+#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout) \
+({                                                                \
+       int __ret = 0;                                            \
+       if (!(condition))                                         \
+               __wait_event_interruptible_timeout(wq, condition, \
+                                               timeout, __ret);  \
+       __ret;                                                    \
+})
+#else
+#define cfs_waitq_wait_event_interruptible_timeout wait_event_interruptible_timeout
+#endif
+
+#else   /* !__KERNEL__ */
+
+typedef struct proc_dir_entry           cfs_proc_dir_entry_t;
+#include "../user-prim.h"
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/libcfs/include/libcfs/linux/linux-tcpip.h b/libcfs/include/libcfs/linux/linux-tcpip.h
new file mode 100644 (file)
index 0000000..fb2ac93
--- /dev/null
@@ -0,0 +1,66 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines. 
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_CFS_TCP_H__
+#define __LIBCFS_LINUX_CFS_TCP_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <net/sock.h>
+
+typedef struct socket   cfs_socket_t;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
+# define sk_allocation  allocation
+# define sk_data_ready  data_ready
+# define sk_write_space write_space
+# define sk_user_data   user_data
+# define sk_prot        prot
+# define sk_sndbuf      sndbuf
+# define sk_rcvbuf      rcvbuf
+# define sk_socket      socket
+# define sk_sleep       sleep
+#endif
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+# define sk_wmem_queued wmem_queued
+# define sk_err         err
+# define sk_route_caps  route_caps
+#endif
+
+#define SOCK_SNDBUF(so)         ((so)->sk->sk_sndbuf)
+#define SOCK_WMEM_QUEUED(so)    ((so)->sk->sk_wmem_queued)
+#define SOCK_ERROR(so)          ((so)->sk->sk_err)
+#define SOCK_TEST_NOSPACE(so)   test_bit(SOCK_NOSPACE, &(so)->flags)
+
+#else   /* !__KERNEL__ */
+
+#include "../user-tcpip.h"
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/libcfs/include/libcfs/linux/linux-time.h b/libcfs/include/libcfs/linux/linux-time.h
new file mode 100644 (file)
index 0000000..3d4cdf5
--- /dev/null
@@ -0,0 +1,327 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for Linux (kernel and user-level).
+ *
+ */
+
+#ifndef __LIBCFS_LINUX_LINUX_TIME_H__
+#define __LIBCFS_LINUX_LINUX_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ *  cfs_time_t        represents point in time. This is internal kernel
+ *                    time rather than "wall clock". This time bears no
+ *                    relation to gettimeofday().
+ *
+ *  cfs_duration_t    represents time interval with resolution of internal
+ *                    platform clock
+ *
+ *  cfs_fs_time_t     represents instance in world-visible time. This is
+ *                    used in file-system time-stamps
+ *
+ *  cfs_time_t     cfs_time_current(void);
+ *  cfs_time_t     cfs_time_add    (cfs_time_t, cfs_duration_t);
+ *  cfs_duration_t cfs_time_sub    (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_before (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ *  cfs_duration_t cfs_duration_build(int64_t);
+ *
+ *  time_t         cfs_duration_sec (cfs_duration_t);
+ *  void           cfs_duration_usec(cfs_duration_t, struct timeval *);
+ *  void           cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ *  void           cfs_fs_time_current(cfs_fs_time_t *);
+ *  time_t         cfs_fs_time_sec    (cfs_fs_time_t *);
+ *  void           cfs_fs_time_usec   (cfs_fs_time_t *, struct timeval *);
+ *  void           cfs_fs_time_nsec   (cfs_fs_time_t *, struct timespec *);
+ *  int            cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ *  int            cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ *  CFS_TIME_FORMAT
+ *  CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION 1000000
+
+#ifdef __KERNEL__
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/version.h>
+#include <linux/time.h>
+#include <asm/div64.h>
+
+#include <libcfs/linux/portals_compat25.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+/*
+ * old kernels---CURRENT_TIME is struct timeval
+ */
+typedef struct timeval cfs_fs_time_t;
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+        *v = *t;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+        s->tv_sec  = t->tv_sec;
+        s->tv_nsec = t->tv_usec * 1000;
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+        return (unsigned long long)t->tv_sec * ONE_MILLION + t->tv_usec;
+}
+
+#define CURRENT_KERN_TIME        xtime
+
+#else
+/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
+
+/*
+ * post 2.5 kernels.
+ */
+
+#include <linux/jiffies.h>
+
+typedef struct timespec cfs_fs_time_t;
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+        v->tv_sec  = t->tv_sec;
+        v->tv_usec = t->tv_nsec / 1000;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+        *s = *t;
+}
+
+/*
+ * internal helper function used by cfs_fs_time_before*()
+ */
+static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+        return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec;
+}
+
+#define CURRENT_KERN_TIME        CURRENT_TIME
+
+/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */
+#endif
+
+/*
+ * Generic kernel stuff
+ */
+
+typedef unsigned long cfs_time_t;      /* jiffies */
+typedef long cfs_duration_t;
+
+
+static inline cfs_time_t cfs_time_current(void)
+{
+        return jiffies;
+}
+
+static inline time_t cfs_time_current_sec(void)
+{
+        return CURRENT_SECONDS;
+}
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+        return t + d;
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+        return t1 - t2;
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+        return time_before(t1, t2);
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+        return time_before_eq(t1, t2);
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+        *t = CURRENT_KERN_TIME;
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+        return t->tv_sec;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+        return __cfs_fs_time_flat(t1) <  __cfs_fs_time_flat(t2);
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+        return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2);
+}
+
+#if 0
+static inline cfs_duration_t cfs_duration_build(int64_t nano)
+{
+#if (BITS_PER_LONG == 32)
+        /* We cannot use do_div(t, ONE_BILLION), do_div can only process
+         * 64 bits n and 32 bits base */
+        int64_t  t = nano * HZ;
+        do_div(t, 1000);
+        do_div(t, 1000000);
+        return (cfs_duration_t)t;
+#else
+        return (nano * HZ / ONE_BILLION);
+#endif
+}
+#endif
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+        return ((cfs_duration_t)seconds) * HZ;
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+        return d / HZ;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+#if (BITS_PER_LONG == 32) && (HZ > 4096)
+        __u64 t;
+
+        s->tv_sec = d / HZ;
+        t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION;
+        do_div(t, HZ);
+        s->tv_usec = t;
+#else
+        s->tv_sec = d / HZ;
+        s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION) / HZ;
+#endif
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+#if (BITS_PER_LONG == 32)
+        __u64 t;
+
+        s->tv_sec = d / HZ;
+        t = (d - s->tv_sec * HZ) * ONE_BILLION;
+        do_div(t, HZ);
+        s->tv_nsec = t;
+#else
+        s->tv_sec = d / HZ;
+        s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ;
+#endif
+}
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
+
+#define cfs_time_current_64 get_jiffies_64
+
+static inline __u64 cfs_time_add_64(__u64 t, __u64 d)
+{
+        return t + d;
+}
+
+static inline __u64 cfs_time_shift_64(int seconds)
+{
+        return cfs_time_add_64(cfs_time_current_64(),
+                               cfs_time_seconds(seconds));
+}
+
+static inline int cfs_time_before_64(__u64 t1, __u64 t2)
+{
+        return (__s64)t2 - (__s64)t1 > 0;
+}
+
+static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2)
+{
+        return (__s64)t2 - (__s64)t1 >= 0;
+}
+
+#else
+#define cfs_time_current_64 cfs_time_current
+#define cfs_time_add_64     cfs_time_add
+#define cfs_time_shift_64   cfs_time_shift
+#define cfs_time_before_64  cfs_time_before
+#define cfs_time_beforeq_64 cfs_time_beforeq
+#endif
+
+/*
+ * One jiffy
+ */
+#define CFS_TICK                (1)
+
+#define CFS_TIME_T              "%lu"
+#define CFS_DURATION_T          "%ld"
+
+#else   /* !__KERNEL__ */
+
+/*
+ * Liblustre. time(2) based implementation.
+ */
+
+#define CFS_TIME_T              "%lu"
+
+#include <libcfs/user-time.h>
+
+#endif /* __KERNEL__ */
+
+/* __LIBCFS_LINUX_LINUX_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/linux/lltrace.h b/libcfs/include/libcfs/linux/lltrace.h
new file mode 100644 (file)
index 0000000..1ddd03d
--- /dev/null
@@ -0,0 +1,28 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_LLTRACE_H__
+#define __LIBCFS_LINUX_LLTRACE_H__
+
+#ifndef __LIBCFS_LLTRACE_H__
+#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <lnet/types.h>
+#include <libcfs/kp30.h>
+#include <lnet/lnetctl.h>
+#include <linux/limits.h>
+#include <asm/page.h>
+#include <linux/version.h>
+
+#endif
diff --git a/libcfs/include/libcfs/linux/portals_compat25.h b/libcfs/include/libcfs/linux/portals_compat25.h
new file mode 100644 (file)
index 0000000..2d6b782
--- /dev/null
@@ -0,0 +1,125 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__
+#define __LIBCFS_LINUX_PORTALS_COMPAT_H__
+
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
+# else
+#  define SIGNAL_MASK_ASSERT() \
+   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
+# endif
+#else
+# define SIGNAL_MASK_ASSERT()
+#endif
+// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
+  spin_lock_irqsave(&task->sighand->siglock, flags)
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
+  spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp, 1)
+# define RECALC_SIGPENDING         recalc_sigpending()
+# define CLEAR_SIGPENDING          clear_tsk_thread_flag(current,       \
+                                                         TIF_SIGPENDING)
+# define CURRENT_SECONDS           get_seconds()
+# define smp_num_cpus              num_online_cpus()
+
+
+#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
+  spin_lock_irqsave(&task->sighand->siglock, flags)
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
+  spin_unlock_irqrestore(&task->sighand->siglock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
+# define RECALC_SIGPENDING         recalc_sigpending()
+# define CLEAR_SIGPENDING          (current->sigpending = 0)
+# define CURRENT_SECONDS           CURRENT_TIME
+# define wait_event_interruptible_exclusive(wq, condition)              \
+        wait_event_interruptible(wq, condition)
+
+#else /* 2.4.x */
+
+# define SIGNAL_MASK_LOCK(task, flags)                                  \
+  spin_lock_irqsave(&task->sigmask_lock, flags)
+# define SIGNAL_MASK_UNLOCK(task, flags)                                \
+  spin_unlock_irqrestore(&task->sigmask_lock, flags)
+# define USERMODEHELPER(path, argv, envp)                               \
+  call_usermodehelper(path, argv, envp)
+# define RECALC_SIGPENDING         recalc_sigpending(current)
+# define CLEAR_SIGPENDING          (current->sigpending = 0)
+# define CURRENT_SECONDS           CURRENT_TIME
+# define wait_event_interruptible_exclusive(wq, condition)              \
+        wait_event_interruptible(wq, condition)
+
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+#define UML_PID(tsk) ((tsk)->thread.extern_pid)
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid)
+#else
+#define UML_PID(tsk) ((tsk)->pid)
+#endif
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# define THREAD_NAME(comm, len, fmt, a...)                              \
+        snprintf(comm, len,fmt"|%d", ## a, UML_PID(current))
+#else
+# define THREAD_NAME(comm, len, fmt, a...)                              \
+        snprintf(comm, len, fmt, ## a)
+#endif
+
+#ifdef HAVE_PAGE_LIST
+/* 2.4 alloc_page users can use page->list */
+#define PAGE_LIST_ENTRY list
+#define PAGE_LIST(page) ((page)->list)
+#else
+/* 2.6 alloc_page users can use page->lru */
+#define PAGE_LIST_ENTRY lru
+#define PAGE_LIST(page) ((page)->lru)
+#endif
+
+#ifndef HAVE_CPU_ONLINE
+#define cpu_online(cpu) ((1<<cpu) & (cpu_online_map))
+#endif
+#ifndef HAVE_CPUMASK_T
+typedef unsigned long cpumask_t;
+#define cpu_set(cpu, map) set_bit(cpu, &(map))
+#define cpus_clear(map) memset(&(map), 0, sizeof(cpumask_t))
+#endif
+
+#ifndef __user
+#define __user
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
+#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos)        \
+        proc_dointvec(table, write, filp, buffer, lenp)
+#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos)        \
+        proc_dostring(table, write, filp, buffer, lenp)
+#define LL_PROC_PROTO(name)                                             \
+        name(cfs_sysctl_table_t *table, int write, struct file *filp,   \
+             void __user *buffer, size_t *lenp)
+#define DECLARE_LL_PROC_PPOS_DECL  loff_t *ppos = &filp->f_pos
+#else
+#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos)        \
+        proc_dointvec(table, write, filp, buffer, lenp, ppos);
+#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos)        \
+        proc_dostring(table, write, filp, buffer, lenp, ppos);
+#define LL_PROC_PROTO(name)                                             \
+        name(cfs_sysctl_table_t *table, int write, struct file *filp,   \
+             void __user *buffer, size_t *lenp, loff_t *ppos)
+#define DECLARE_LL_PROC_PPOS_DECL
+#endif
+
+#endif /* _PORTALS_COMPAT_H */
diff --git a/libcfs/include/libcfs/linux/portals_utils.h b/libcfs/include/libcfs/linux/portals_utils.h
new file mode 100644 (file)
index 0000000..4e76856
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__
+#define __LIBCFS_LINUX_PORTALS_UTILS_H__
+
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
+#endif
+
+#ifdef __KERNEL__
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/smp_lock.h>
+#include <linux/poll.h>
+#include <linux/random.h>
+
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+# include <linux/tqueue.h>
+#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */
+# include <linux/workqueue.h>
+#endif
+#include <libcfs/linux/linux-mem.h>
+#include <libcfs/linux/linux-prim.h>
+#else /* !__KERNEL__ */
+
+#include <endian.h>
+#include <libcfs/list.h>
+
+#ifdef HAVE_LINUX_VERSION_H
+# include <linux/version.h>
+
+# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#  define BUG()                            /* workaround for module.h includes */
+#  include <linux/module.h>
+# endif
+#endif /* !HAVE_LINUX_VERSION_H */
+
+#ifndef __CYGWIN__
+# include <sys/syscall.h>
+#else /* __CYGWIN__ */
+# include <windows.h>
+# include <windef.h>
+# include <netinet/in.h>
+#endif /* __CYGWIN__ */
+
+#endif /* !__KERNEL__ */
+#endif
diff --git a/libcfs/include/libcfs/list.h b/libcfs/include/libcfs/list.h
new file mode 100644 (file)
index 0000000..ed03bd5
--- /dev/null
@@ -0,0 +1,463 @@
+#ifndef __LIBCFS_LIST_H__
+#define __LIBCFS_LIST_H__
+
+#if defined (__linux__) && defined(__KERNEL__)
+
+#include <linux/list.h>
+
+#define CFS_LIST_HEAD_INIT(n)          LIST_HEAD_INIT(n)
+#define CFS_LIST_HEAD(n)               LIST_HEAD(n)
+#define CFS_INIT_LIST_HEAD(p)          INIT_LIST_HEAD(p)
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define CFS_HLIST_HEAD_INIT            HLIST_HEAD_INIT
+#define CFS_HLIST_HEAD(n)              HLIST_HEAD(n)
+#define CFS_INIT_HLIST_HEAD(p)         INIT_HLIST_HEAD(p)
+#define CFS_INIT_HLIST_NODE(p)         INIT_HLIST_NODE(p)
+#endif
+
+#else /* !defined (__linux__) || !defined(__KERNEL__) */
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#ifndef __WINNT__
+#define prefetch(a) ((void)a)
+#else
+#define prefetch(a) ((void *)a)
+#endif
+
+struct list_head {
+       struct list_head *next, *prev;
+};
+
+typedef struct list_head list_t;
+
+#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define CFS_LIST_HEAD(name) \
+       struct list_head name = CFS_LIST_HEAD_INIT(name)
+
+#define CFS_INIT_LIST_HEAD(ptr) do { \
+       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
+} while (0)
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head * new,
+                             struct list_head * prev,
+                             struct list_head * next)
+{
+       next->prev = new;
+       new->next = next;
+       new->prev = prev;
+       prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head, head->next);
+}
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+       __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+       next->prev = prev;
+       prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
+ */
+static inline void list_del(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+       __list_del(entry->prev, entry->next);
+       CFS_INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ *
+ * This is not safe to use if @list is already on the same list as @head.
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+       __list_del(list->prev, list->next);
+       list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ *
+ * This is not safe to use if @list is already on the same list as @head.
+ */
+static inline void list_move_tail(struct list_head *list,
+                                 struct list_head *head)
+{
+       __list_del(list->prev, list->next);
+       list_add_tail(list, head);
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(struct list_head *head)
+{
+       return head->next == head;
+}
+
+static inline void __list_splice(struct list_head *list,
+                                struct list_head *head)
+{
+       struct list_head *first = list->next;
+       struct list_head *last = list->prev;
+       struct list_head *at = head->next;
+
+       first->prev = head;
+       head->next = first;
+
+       last->next = at;
+       at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+       if (!list_empty(list))
+               __list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+                                   struct list_head *head)
+{
+       if (!list_empty(list)) {
+               __list_splice(list, head);
+               CFS_INIT_LIST_HEAD(list);
+       }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:       the &struct list_head pointer.
+ * @type:      the type of the struct this is embedded in.
+ * @member:    the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
+
+/**
+ * list_for_each       -       iterate over a list
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each(pos, head) \
+       for (pos = (head)->next, prefetch(pos->next); pos != (head); \
+               pos = pos->next, prefetch(pos->next))
+
+/**
+ * list_for_each_safe  -       iterate over a list safe against removal of list entry
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @n:         another &struct list_head to use as temporary storage
+ * @head:      the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+       for (pos = (head)->next, n = pos->next; pos != (head); \
+               pos = n, n = pos->next)
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+       struct hlist_node *first;
+};
+
+struct hlist_node {
+       struct hlist_node *next, **pprev;
+};
+
+/*
+ * "NULL" might not be defined at this point
+ */
+#ifdef NULL
+#define NULL_P NULL
+#else
+#define NULL_P ((void *)0)
+#endif
+
+#define CFS_HLIST_HEAD_INIT { .first = NULL_P }
+#define CFS_HLIST_HEAD(name) struct hlist_head name = {  .first = NULL_P }
+#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P)
+#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P)
+
+#define HLIST_HEAD_INIT                CFS_HLIST_HEAD_INIT
+#define HLIST_HEAD(n)          CFS_HLIST_HEAD(n)
+#define INIT_HLIST_HEAD(p)     CFS_INIT_HLIST_HEAD(p)
+#define INIT_HLIST_NODE(p)     CFS_INIT_HLIST_NODE(p)
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+       return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+       return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+       struct hlist_node *next = n->next;
+       struct hlist_node **pprev = n->pprev;
+       *pprev = next;
+       if (next)
+               next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+       __hlist_del(n);
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+       if (n->pprev)  {
+               __hlist_del(n);
+               INIT_HLIST_NODE(n);
+       }
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+       struct hlist_node *first = h->first;
+       n->next = first;
+       if (first)
+               first->pprev = &n->next;
+       h->first = n;
+       n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+                                       struct hlist_node *next)
+{
+       n->pprev = next->pprev;
+       n->next = next;
+       next->pprev = &n->next;
+       *(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+                                       struct hlist_node *next)
+{
+       next->next = n->next;
+       n->next = next;
+       next->pprev = &n->next;
+
+       if(next->next)
+               next->next->pprev  = &next->next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+       for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
+            pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+       for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+            pos = n)
+
+/**
+ * hlist_for_each_entry        - iterate over list of given type
+ * @tpos:      the type * to use as a loop counter.
+ * @pos:       the &struct hlist_node to use as a loop counter.
+ * @head:      the head for your list.
+ * @member:    the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)                   \
+       for (pos = (head)->first;                                        \
+            pos && ({ prefetch(pos->next); 1;}) &&                      \
+               ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+            pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point
+ * @tpos:      the type * to use as a loop counter.
+ * @pos:       the &struct hlist_node to use as a loop counter.
+ * @member:    the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)                \
+       for (pos = (pos)->next;                                          \
+            pos && ({ prefetch(pos->next); 1;}) &&                      \
+               ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+            pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from existing point
+ * @tpos:      the type * to use as a loop counter.
+ * @pos:       the &struct hlist_node to use as a loop counter.
+ * @member:    the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)                    \
+       for (; pos && ({ prefetch(pos->next); 1;}) &&                    \
+               ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+            pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:      the type * to use as a loop counter.
+ * @pos:       the &struct hlist_node to use as a loop counter.
+ * @n:         another &struct hlist_node to use as temporary storage
+ * @head:      the head for your list.
+ * @member:    the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member)           \
+       for (pos = (head)->first;                                        \
+            pos && ({ n = pos->next; 1; }) &&                           \
+               ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+            pos = n)
+
+#endif /* __linux__ && __KERNEL__ */
+
+#ifndef list_for_each_prev
+/**
+ * list_for_each_prev  -       iterate over a list in reverse order
+ * @pos:       the &struct list_head to use as a loop counter.
+ * @head:      the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
+               pos = pos->prev, prefetch(pos->prev))
+
+#endif /* list_for_each_prev */
+
+#ifndef list_for_each_entry
+/**
+ * list_for_each_entry  -       iterate over list of given type
+ * @pos:        the type * to use as a loop counter.
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)                         \
+        for (pos = list_entry((head)->next, typeof(*pos), member),     \
+                    prefetch(pos->member.next);                        \
+            &pos->member != (head);                                    \
+            pos = list_entry(pos->member.next, typeof(*pos), member),  \
+            prefetch(pos->member.next))
+#endif /* list_for_each_entry */
+
+#ifndef list_for_each_entry_reverse
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:        the type * to use as a loop counter.
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)                  \
+       for (pos = list_entry((head)->prev, typeof(*pos), member);      \
+            prefetch(pos->member.prev), &pos->member != (head);        \
+            pos = list_entry(pos->member.prev, typeof(*pos), member))
+#endif /* list_for_each_entry_reverse */
+
+#ifndef list_for_each_entry_safe
+/**
+ * list_for_each_entry_safe  -       iterate over list of given type safe against removal of list entry
+ * @pos:        the type * to use as a loop counter.
+ * @n:          another type * to use as temporary storage
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)                 \
+        for (pos = list_entry((head)->next, typeof(*pos), member),     \
+               n = list_entry(pos->member.next, typeof(*pos), member); \
+            &pos->member != (head);                                    \
+            pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#endif /* list_for_each_entry_safe */
+
+#ifndef list_for_each_entry_safe_from
+/**
+ * list_for_each_entry_safe_from
+ * @pos:        the type * to use as a loop cursor.
+ * @n:          another type * to use as temporary storage
+ * @head:       the head for your list.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member)                 \
+        for (n = list_entry(pos->member.next, typeof(*pos), member);        \
+             &pos->member != (head);                                        \
+             pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#endif /* list_for_each_entry_safe_from */
+
+#endif /* __LIBCFS_LUSTRE_LIST_H__ */
diff --git a/libcfs/include/libcfs/lltrace.h b/libcfs/include/libcfs/lltrace.h
new file mode 100644 (file)
index 0000000..dbeae91
--- /dev/null
@@ -0,0 +1,167 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Compile with:
+ * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
+ */
+#ifndef __LIBCFS_LLTRACE_H__
+#define __LIBCFS_LLTRACE_H__
+
+#if defined(__linux__)
+#include <libcfs/linux/lltrace.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/lltrace.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/lltrace.h>
+#else
+#error Unsupported Operating System
+#endif
+
+static inline int ltrace_write_file(char* fname)
+{
+        char* argv[3];
+
+        argv[0] = "debug_kernel";
+        argv[1] = fname;
+        argv[2] = "1";
+
+        fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
+
+        return jt_dbg_debug_kernel(3, argv);
+}
+
+static inline int ltrace_clear()
+{
+        char* argv[1];
+
+        argv[0] = "clear";
+
+        fprintf(stderr, "[ptlctl] %s\n", argv[0]);
+
+        return jt_dbg_clear_debug_buf(1, argv);
+}
+
+static inline int ltrace_mark(int indent_level, char* text)
+{
+        char* argv[2];
+        char mark_buf[PATH_MAX];
+
+        snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
+
+        argv[0] = "mark";
+        argv[1] = mark_buf;
+        return jt_dbg_mark_debug_buf(2, argv);
+}
+
+static inline int ltrace_applymasks()
+{
+        char* argv[2];
+        argv[0] = "list";
+        argv[1] = "applymasks";
+
+        fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
+
+        return jt_dbg_list(2, argv);
+}
+
+
+static inline int ltrace_filter(char* subsys_or_mask)
+{
+        char* argv[2];
+        argv[0] = "filter";
+        argv[1] = subsys_or_mask;
+        return jt_dbg_filter(2, argv);
+}
+
+static inline int ltrace_show(char* subsys_or_mask)
+{
+        char* argv[2];
+        argv[0] = "show";
+        argv[1] = subsys_or_mask;
+        return jt_dbg_show(2, argv);
+}
+
+static inline int ltrace_start()
+{
+        int rc = 0;
+        dbg_initialize(0, NULL);
+#ifdef LNET_DEV_ID
+        rc = register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH,
+                              LNET_DEV_MAJOR, LNET_DEV_MINOR);
+#endif
+        ltrace_filter("class");
+        ltrace_filter("nal");
+        ltrace_filter("portals");
+
+        ltrace_show("all_types");
+        ltrace_filter("trace");
+        ltrace_filter("malloc");
+        ltrace_filter("net");
+        ltrace_filter("page");
+        ltrace_filter("other");
+        ltrace_filter("info");
+        ltrace_applymasks();
+
+        return rc;
+}
+
+
+static inline void ltrace_stop()
+{
+#ifdef LNET_DEV_ID
+        unregister_ioc_dev(LNET_DEV_ID);
+#endif
+}
+
+static inline int not_uml()
+{
+  /* Return Values:
+   *   0 when run under UML
+   *   1 when run on host
+   *  <0 when lookup failed
+   */
+        struct stat buf;
+        int rc = stat("/dev/ubd", &buf);
+        rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc;
+        if (rc<0) {
+          fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno));
+          rc = 1; /* Assume host */
+        }
+        return rc;
+}
+
+#define LTRACE_MAX_NOB   256
+static inline void ltrace_add_processnames(char* fname)
+{
+        char cmdbuf[LTRACE_MAX_NOB];
+        struct timeval tv;
+        struct timezone tz;
+        int nob;
+        int underuml = !not_uml();
+
+        gettimeofday(&tv, &tz);
+
+        nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
+
+        /* Careful - these format strings need to match the CDEBUG
+         * formats in portals/linux/debug.c EXACTLY
+         */
+        nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ",
+                        S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec);
+
+        if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) {
+                nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
+                                 "(%s:%d:%s() %d | %d+%lu): ",
+                                 "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L);
+        }
+        else {
+                nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
+                                 "(%s:%d:%s() %d+%lu): ",
+                                 "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
+        }
+
+        nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
+        system(cmdbuf);
+}
+
+#endif
diff --git a/libcfs/include/libcfs/portals_utils.h b/libcfs/include/libcfs/portals_utils.h
new file mode 100644 (file)
index 0000000..b79eb7e
--- /dev/null
@@ -0,0 +1,21 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ */
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#define __LIBCFS_PORTALS_UTILS_H__
+
+/*
+ * portals_utils.h
+ *
+ */
+#if defined(__linux__)
+#include <libcfs/linux/portals_utils.h>
+#elif defined(__APPLE__)
+#include <libcfs/darwin/portals_utils.h>
+#elif defined(__WINNT__)
+#include <libcfs/winnt/portals_utils.h>
+#else
+#error Unsupported Operating System
+#endif
+
+#endif
diff --git a/libcfs/include/libcfs/types.h b/libcfs/include/libcfs/types.h
new file mode 100644 (file)
index 0000000..71dd7fb
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef _LIBCFS_TYPES_H
+#define _LIBCFS_TYPES_H
+
+/*
+ * This file was inttroduced to resolve XT3 (Catamount) build issues.
+ * The orignal idea was to move <lustre/types.h> here however at
+ * the time of this writing
+ * it's unclear what external dependencies are tied
+ * to that file (It's not just some source file #including it)
+ * there is some build/packaging infrastructure that includes it.
+ * Hopefully that will be resolved shortly, that file will
+ * be removed, its contents copied here and this comment can be deleted.
+ */
+
+#include <lustre/types.h>
+
+#endif
diff --git a/libcfs/include/libcfs/user-bitops.h b/libcfs/include/libcfs/user-bitops.h
new file mode 100644 (file)
index 0000000..d2eea0e
--- /dev/null
@@ -0,0 +1,102 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_BITOPS_H__
+#define __LIBCFS_USER_BITOPS_H__
+
+/* test if bit nr is set in bitmap addr; returns previous value of bit nr */
+static __inline__ int set_bit(int nr, unsigned long * addr)
+{
+        long    mask;
+
+        addr += nr / BITS_PER_LONG;
+        mask = 1UL << (nr & (BITS_PER_LONG - 1));
+        nr = (mask & *addr) != 0;
+        *addr |= mask;
+        return nr;
+}
+
+/* clear bit nr in bitmap addr; returns previous value of bit nr*/
+static __inline__ int clear_bit(int nr, unsigned long * addr)
+{
+        long    mask;
+
+        addr += nr / BITS_PER_LONG;
+        mask = 1UL << (nr & (BITS_PER_LONG - 1));
+        nr = (mask & *addr) != 0;
+        *addr &= ~mask;
+        return nr;
+}
+
+static __inline__ int test_bit(int nr, const unsigned long * addr)
+{
+        return ((1UL << (nr & (BITS_PER_LONG - 1))) & ((addr)[nr / BITS_PER_LONG])) != 0;
+}
+
+/* using binary seach */
+static __inline__ unsigned long __ffs(long data)
+{
+        int pos = 0;
+
+#if BITS_PER_LONG == 64
+        if ((data & 0xFFFFFFFF) == 0) {
+                pos += 32;
+                data >>= 32;
+        }
+#endif
+        if ((data & 0xFFFF) == 0) {
+                pos += 16;
+                data >>= 16;
+        }
+        if ((data & 0xFF) == 0) {
+                pos += 8;
+                data >>= 8;
+        }
+        if ((data & 0xF) == 0) {
+                pos += 4;
+                data >>= 4;
+        }
+        if ((data & 0x3) == 0) {
+                pos += 2;
+                data >>= 2;
+        }
+        if ((data & 0x1) == 0)
+                pos += 1;
+
+        return pos;
+}
+
+#define __ffz(x)       __ffs(~(x))
+
+unsigned long find_next_bit(unsigned long *addr,
+                            unsigned long size, unsigned long offset);
+
+unsigned long find_next_zero_bit(unsigned long *addr,
+                                 unsigned long size, unsigned long offset);
+
+#define find_first_bit(addr,size)       (find_next_bit((addr),(size),0))
+#define find_first_zero_bit(addr,size)  (find_next_zero_bit((addr),(size),0))
+
+#endif
diff --git a/libcfs/include/libcfs/user-lock.h b/libcfs/include/libcfs/user-lock.h
new file mode 100644 (file)
index 0000000..6b46ce2
--- /dev/null
@@ -0,0 +1,243 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_LOCK_H__
+#define __LIBCFS_USER_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Implementations of portable synchronization APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ *
+ * XXX Liang: There are several branches share lnet with b_hd_newconfig,
+ * if we define lock APIs at here, there will be conflict with liblustre
+ * in other branches.
+ */
+
+#ifndef __KERNEL__
+#include <stdio.h>
+#include <stdlib.h>
+
+#if 0
+/*
+ * Optional debugging (magic stamping and checking ownership) can be added.
+ */
+
+/*
+ * spin_lock
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ *
+ * No-op implementation.
+ */
+struct spin_lock {int foo;};
+
+typedef struct spin_lock spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { }
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+
+void spin_lock_init(spinlock_t *lock);
+void spin_lock(spinlock_t *lock);
+void spin_unlock(spinlock_t *lock);
+int spin_trylock(spinlock_t *lock);
+void spin_lock_bh_init(spinlock_t *lock);
+void spin_lock_bh(spinlock_t *lock);
+void spin_unlock_bh(spinlock_t *lock);
+static inline int spin_is_locked(spinlock_t *l) {return 1;}
+
+static inline void spin_lock_irqsave(spinlock_t *l, unsigned long f){}
+static inline void spin_unlock_irqrestore(spinlock_t *l, unsigned long f){}
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+typedef struct semaphore {
+    int foo;
+} mutex_t;
+
+void sema_init(struct semaphore *s, int val);
+void __down(struct semaphore *s);
+void __up(struct semaphore *s);
+
+/*
+ * Mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+#define mutex_up(s)                    __up(s)
+#define mutex_down(s)                  __down(s)
+
+#define init_mutex(x)                  sema_init(x, 1)
+#define init_mutex_locked(x)           sema_init(x, 0)
+
+/*
+ * Completion:
+ *
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+#if 0
+struct completion {};
+
+void init_completion(struct completion *c);
+void complete(struct completion *c);
+void wait_for_completion(struct completion *c);
+#endif
+
+/*
+ * rw_semaphore:
+ *
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+struct rw_semaphore {};
+
+void init_rwsem(struct rw_semaphore *s);
+void down_read(struct rw_semaphore *s);
+int down_read_trylock(struct rw_semaphore *s);
+void down_write(struct rw_semaphore *s);
+int down_write_trylock(struct rw_semaphore *s);
+void up_read(struct rw_semaphore *s);
+void up_write(struct rw_semaphore *s);
+
+/*
+ * read-write lock : Need to be investigated more!!
+ * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore
+ *
+ * - DECLARE_RWLOCK(l)
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+typedef struct rw_semaphore rwlock_t;
+
+#define rwlock_init(pl)                init_rwsem(pl)
+
+#define read_lock(l)           down_read(l)
+#define read_unlock(l)         up_read(l)
+#define write_lock(l)          down_write(l)
+#define write_unlock(l)                up_write(l)
+
+static inline void
+write_lock_irqsave(rwlock_t *l, unsigned long f) { write_lock(l); }
+static inline void
+write_unlock_irqrestore(rwlock_t *l, unsigned long f) { write_unlock(l); }
+
+static inline void 
+read_lock_irqsave(rwlock_t *l, unsigned long f) { read_lock(l); }
+static inline void
+read_unlock_irqrestore(rwlock_t *l, unsigned long f) { read_unlock(l); }
+
+/*
+ * Atomic for user-space
+ * Copied from liblustre
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+#define atomic_read(a) ((a)->counter)
+#define atomic_set(a,b) do {(a)->counter = b; } while (0)
+#define atomic_dec_and_test(a) ((--((a)->counter)) == 0)
+#define atomic_inc(a)  (((a)->counter)++)
+#define atomic_dec(a)  do { (a)->counter--; } while (0)
+#define atomic_add(b,a)  do {(a)->counter += b;} while (0)
+#define atomic_add_return(n,a) ((a)->counter = n)
+#define atomic_inc_return(a) atomic_add_return(1,a)
+#define atomic_sub(b,a)  do {(a)->counter -= b;} while (0)
+
+#endif
+
+#ifdef HAVE_LIBPTHREAD
+#include <pthread.h>
+
+/*
+ * Completion
+ */
+
+struct cfs_completion {
+        int c_done;
+        pthread_cond_t c_cond;
+        pthread_mutex_t c_mut;
+};
+
+void cfs_init_completion(struct cfs_completion *c);
+void cfs_fini_completion(struct cfs_completion *c);
+void cfs_complete(struct cfs_completion *c);
+void cfs_wait_for_completion(struct cfs_completion *c);
+
+/*
+ * atomic.h
+ */
+
+typedef struct { volatile int counter; } cfs_atomic_t;
+
+int cfs_atomic_read(cfs_atomic_t *a);
+void cfs_atomic_set(cfs_atomic_t *a, int b);
+int cfs_atomic_dec_and_test(cfs_atomic_t *a);
+void cfs_atomic_inc(cfs_atomic_t *a);
+void cfs_atomic_dec(cfs_atomic_t *a);
+void cfs_atomic_add(int b, cfs_atomic_t *a);
+void cfs_atomic_sub(int b, cfs_atomic_t *a);
+
+#endif /* HAVE_LIBPTHREAD */
+
+/* !__KERNEL__ */
+#endif
+
+/* __LIBCFS_USER_LOCK_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/user-prim.h b/libcfs/include/libcfs/user-prim.h
new file mode 100644 (file)
index 0000000..43c1aeb
--- /dev/null
@@ -0,0 +1,328 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_PRIM_H__
+#define __LIBCFS_USER_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Implementations of portable APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ */
+
+#ifndef __KERNEL__
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/mman.h>
+#include <libcfs/list.h>
+#include <libcfs/user-time.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifdef HAVE_LIBPTHREAD
+#include <pthread.h>
+#endif
+
+
+/*
+ * Wait Queue. No-op implementation.
+ */
+
+typedef struct cfs_waitlink {
+        struct list_head sleeping;
+        void *process;
+} cfs_waitlink_t;
+
+typedef struct cfs_waitq {
+        struct list_head sleepers;
+} cfs_waitq_t;
+
+void cfs_waitq_init(struct cfs_waitq *waitq);
+void cfs_waitlink_init(struct cfs_waitlink *link);
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, 
+                             struct cfs_waitlink *link);
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+int  cfs_waitq_active(struct cfs_waitq *waitq);
+void cfs_waitq_signal(struct cfs_waitq *waitq);
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+void cfs_waitq_wait(struct cfs_waitlink *link, int state);
+int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout);
+#define cfs_schedule_timeout(s, t)              \
+        do {                                    \
+                cfs_waitlink_t    l;            \
+                cfs_waitq_timedwait(&l, s, t);  \
+        } while (0)
+
+#define CFS_TASK_INTERRUPTIBLE  (0)
+#define CFS_TASK_UNINT          (0)
+
+/* 2.4 defines */
+
+/* XXX
+ * for this moment, liblusre will not rely OST for non-page-aligned write
+ */
+#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE
+
+struct page {
+        void   *addr;
+        unsigned long index;
+        struct list_head list;
+        unsigned long private;
+
+        /* internally used by liblustre file i/o */
+        int     _offset;
+        int     _count;
+#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE
+        int     _managed;
+#endif
+        struct list_head _node;
+};
+
+typedef struct page cfs_page_t;
+
+#ifndef PAGE_SIZE
+
+/* 4K */
+#define CFS_PAGE_SHIFT 12
+#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT)
+#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1))
+
+#else
+
+#define CFS_PAGE_SIZE                   PAGE_SIZE
+#define CFS_PAGE_SHIFT                  PAGE_SHIFT
+#define CFS_PAGE_MASK                   (~((__u64)CFS_PAGE_SIZE-1))
+
+#endif
+
+cfs_page_t *cfs_alloc_page(unsigned int flags);
+void cfs_free_page(cfs_page_t *pg);
+void *cfs_page_address(cfs_page_t *pg);
+void *cfs_kmap(cfs_page_t *pg);
+void cfs_kunmap(cfs_page_t *pg);
+
+#define cfs_get_page(p)                        __I_should_not_be_called__(at_all)
+#define cfs_page_count(p)              __I_should_not_be_called__(at_all)
+#define cfs_page_index(p)               ((p)->index)
+
+/*
+ * Memory allocator
+ * Inline function, so utils can use them without linking of libcfs
+ */
+#define __ALLOC_ZERO    (1 << 2)
+static inline void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+        void *result;
+
+        result = malloc(nr_bytes);
+        if (result != NULL && (flags & __ALLOC_ZERO))
+                memset(result, 0, nr_bytes);
+        return result;
+}
+
+#define cfs_free(addr)  free(addr)
+#define cfs_alloc_large(nr_bytes) cfs_alloc(nr_bytes, 0)
+#define cfs_free_large(addr) cfs_free(addr)
+
+#define CFS_ALLOC_ATOMIC_TRY   (0)
+/*
+ * SLAB allocator
+ */
+typedef struct {
+         int size;
+} cfs_mem_cache_t;
+
+#define SLAB_HWCACHE_ALIGN 0
+#define SLAB_KERNEL 0
+#define SLAB_NOFS 0
+
+cfs_mem_cache_t *
+cfs_mem_cache_create(const char *, size_t, size_t, unsigned long);
+int cfs_mem_cache_destroy(cfs_mem_cache_t *c);
+void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp);
+void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr);
+
+typedef int (cfs_read_proc_t)(char *page, char **start, off_t off,
+                          int count, int *eof, void *data);
+
+struct file; /* forward ref */
+typedef int (cfs_write_proc_t)(struct file *file, const char *buffer,
+                               unsigned long count, void *data);
+
+/*
+ * Signal
+ */
+typedef sigset_t                        cfs_sigset_t;
+
+/*
+ * Timer
+ */
+#include <sys/time.h>
+
+typedef struct {
+        struct list_head tl_list;
+        void (*function)(unsigned long unused);
+        unsigned long data;
+        long expires;
+} cfs_timer_t;
+
+#define cfs_init_timer(t)       do {} while(0)
+#define cfs_jiffies                             \
+({                                              \
+        unsigned long _ret = 0;                 \
+        struct timeval tv;                      \
+        if (gettimeofday(&tv, NULL) == 0)       \
+                _ret = tv.tv_sec;               \
+        _ret;                                   \
+})
+
+static inline int cfs_timer_init(cfs_timer_t *l, void (* func)(unsigned long), void *arg)
+{
+        CFS_INIT_LIST_HEAD(&l->tl_list);
+        l->function = func;
+        l->data = (unsigned long)arg;
+        return 0;
+}
+
+static inline int cfs_timer_is_armed(cfs_timer_t *l)
+{
+        if (cfs_time_before(cfs_jiffies, l->expires))
+                return 1;
+        else
+                return 0;
+}
+
+static inline void cfs_timer_arm(cfs_timer_t *l, int thetime)
+{
+        l->expires = thetime;
+}
+
+static inline void cfs_timer_disarm(cfs_timer_t *l)
+{
+}
+
+static inline long cfs_timer_deadline(cfs_timer_t *l)
+{
+        return l->expires;
+}
+
+#if 0
+#define cfs_init_timer(t)      do {} while(0)
+void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg);
+void cfs_timer_done(struct cfs_timer *t);
+void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline);
+void cfs_timer_disarm(struct cfs_timer *t);
+int  cfs_timer_is_armed(struct cfs_timer *t);
+
+cfs_time_t cfs_timer_deadline(struct cfs_timer *t);
+#endif
+
+#define in_interrupt()    (0)
+
+static inline void cfs_pause(cfs_duration_t d)
+{
+        struct timespec s;
+        
+        cfs_duration_nsec(d, &s);
+        nanosleep(&s, NULL);
+}
+
+typedef void cfs_psdev_t;
+
+static inline int cfs_psdev_register(cfs_psdev_t *foo)
+{
+        return 0;
+}
+
+static inline int cfs_psdev_deregister(cfs_psdev_t *foo)
+{
+        return 0;
+}
+
+#define cfs_lock_kernel()               do {} while (0)
+#define cfs_sigfillset(l) do {}         while (0)
+#define cfs_recalc_sigpending(l)        do {} while (0)
+#define cfs_kernel_thread(l,m,n)        LBUG()
+
+#ifdef HAVE_LIBPTHREAD
+typedef int (*cfs_thread_t)(void *);
+int cfs_create_thread(cfs_thread_t func, void *arg);
+#else
+#define cfs_create_thread(l,m) LBUG()
+#endif
+
+int cfs_parse_int_tunable(int *value, char *name);
+uid_t cfs_curproc_uid(void);
+
+#define LIBCFS_REALLOC(ptr, size) realloc(ptr, size)
+
+#define cfs_online_cpus() sysconf(_SC_NPROCESSORS_ONLN)
+
+// static inline void local_irq_save(unsigned long flag) {return;}
+// static inline void local_irq_restore(unsigned long flag) {return;}
+
+enum {
+        CFS_STACK_TRACE_DEPTH = 16
+};
+
+struct cfs_stack_trace {
+        void *frame[CFS_STACK_TRACE_DEPTH];
+};
+
+/*
+ * arithmetic
+ */
+#define do_div(a,b)                     \
+        ({                              \
+                unsigned long remainder;\
+                remainder = (a) % (b);  \
+                (a) = (a) / (b);        \
+                (remainder);            \
+        })
+
+/* !__KERNEL__ */
+#endif
+
+/* __LIBCFS_USER_PRIM_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/user-tcpip.h b/libcfs/include/libcfs/user-tcpip.h
new file mode 100644 (file)
index 0000000..342c039
--- /dev/null
@@ -0,0 +1,90 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LIBCFS_USER_TCPIP_H__
+#define __LIBCFS_USER_TCPIP_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifndef __KERNEL__
+
+#include <sys/uio.h>
+
+/*
+ * Functions to get network interfaces info
+ */
+
+int libcfs_sock_ioctl(int cmd, unsigned long arg);
+int libcfs_ipif_query (char *name, int *up, __u32 *ip);
+void libcfs_ipif_free_enumeration (char **names, int n);
+int libcfs_ipif_enumerate (char ***namesp);
+
+/*
+ * Network function used by user-land lnet acceptor
+ */
+
+int libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog);
+int libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port);
+int libcfs_sock_read (int sock, void *buffer, int nob, int timeout);
+void libcfs_sock_abort_accept(__u16 port);
+
+/*
+ * Network functions of common use
+ */
+
+int libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p);
+int libcfs_socketpair(int *fdp);
+int libcfs_fcntl_nonblock(int fd);
+int libcfs_sock_set_nagle(int fd, int nagle);
+int libcfs_sock_set_bufsiz(int fd, int bufsiz);
+int libcfs_sock_create(int *fdp);
+int libcfs_sock_bind_to_port(int fd, __u16 port);
+int libcfs_sock_connect(int fd, __u32 ip, __u16 port);
+int libcfs_sock_writev(int fd, const struct iovec *vector, int count);
+int libcfs_sock_readv(int fd, const struct iovec *vector, int count);
+
+/*
+ * Macros for easy printing IP-adresses
+ */
+
+#define NIPQUAD(addr) \
+        ((unsigned char *)&addr)[0], \
+        ((unsigned char *)&addr)[1], \
+        ((unsigned char *)&addr)[2], \
+        ((unsigned char *)&addr)[3]
+
+#if defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)
+#define HIPQUAD(addr)                \
+        ((unsigned char *)&addr)[3], \
+        ((unsigned char *)&addr)[2], \
+        ((unsigned char *)&addr)[1], \
+        ((unsigned char *)&addr)[0]
+#elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)
+#define HIPQUAD NIPQUAD
+#else
+#error "Undefined byteorder??"
+#endif /* __LITTLE_ENDIAN */
+
+#endif /* !__KERNEL__ */
+
+#endif
diff --git a/libcfs/include/libcfs/user-time.h b/libcfs/include/libcfs/user-time.h
new file mode 100644 (file)
index 0000000..874b7da
--- /dev/null
@@ -0,0 +1,205 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+#ifndef __LIBCFS_USER_TIME_H__
+#define __LIBCFS_USER_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ *  cfs_time_t        represents point in time. This is internal kernel
+ *                    time rather than "wall clock". This time bears no
+ *                    relation to gettimeofday().
+ *
+ *  cfs_duration_t    represents time interval with resolution of internal
+ *                    platform clock
+ *
+ *  cfs_fs_time_t     represents instance in world-visible time. This is
+ *                    used in file-system time-stamps
+ *
+ *  cfs_time_t     cfs_time_current(void);
+ *  cfs_time_t     cfs_time_add    (cfs_time_t, cfs_duration_t);
+ *  cfs_duration_t cfs_time_sub    (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_before (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ *  cfs_duration_t cfs_duration_build(int64_t);
+ *
+ *  time_t         cfs_duration_sec (cfs_duration_t);
+ *  void           cfs_duration_usec(cfs_duration_t, struct timeval *);
+ *  void           cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ *  void           cfs_fs_time_current(cfs_fs_time_t *);
+ *  time_t         cfs_fs_time_sec    (cfs_fs_time_t *);
+ *  void           cfs_fs_time_usec   (cfs_fs_time_t *, struct timeval *);
+ *  void           cfs_fs_time_nsec   (cfs_fs_time_t *, struct timespec *);
+ *  int            cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ *  int            cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ *  CFS_TIME_FORMAT
+ *  CFS_DURATION_FORMAT
+ *
+ */
+
+#ifndef __KERNEL__
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION 1000000
+
+/*
+ * Liblustre. time(2) based implementation.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+
+typedef time_t cfs_fs_time_t;
+typedef time_t cfs_time_t;
+typedef long cfs_duration_t;
+
+static inline cfs_time_t cfs_time_current(void)
+{
+        return time(NULL);
+}
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+        return seconds;
+}
+
+static inline time_t cfs_time_current_sec(void)
+{
+        return cfs_time_seconds(cfs_time_current());
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+        return t1 < t2;
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+        return t1 <= t2;
+}
+
+static inline cfs_duration_t cfs_duration_build(int64_t nano)
+{
+        return (cfs_duration_t) (nano / ONE_BILLION);
+}
+
+static inline time_t cfs_duration_sec(cfs_duration_t d)
+{
+        return d;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+        s->tv_sec = d;
+        s->tv_usec = 0;
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+        s->tv_sec = d;
+        s->tv_nsec = 0;
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+        time(t);
+}
+
+static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+        return *t;
+}
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+        v->tv_sec = *t;
+        v->tv_usec = 0;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+        s->tv_sec = *t;
+        s->tv_nsec = 0;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+        return *t1 < *t2;
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+        return *t1 <= *t2;
+}
+
+#define CFS_TICK                (1)
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+        return t + d;
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+        return t1 - t2;
+}
+
+#define cfs_time_current_64 cfs_time_current
+#define cfs_time_add_64     cfs_time_add
+#define cfs_time_shift_64   cfs_time_shift
+#define cfs_time_before_64  cfs_time_before
+#define cfs_time_beforeq_64 cfs_time_beforeq
+
+#ifndef CFS_TIME_T
+#define CFS_TIME_T              "%u"
+#endif
+
+#define CFS_DURATION_T          "%ld"
+
+/* !__KERNEL__ */
+#endif
+
+/* __LIBCFS_USER_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/winnt/kp30.h b/libcfs/include/libcfs/winnt/kp30.h
new file mode 100644 (file)
index 0000000..779d8be
--- /dev/null
@@ -0,0 +1,157 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_KP30_H__
+#define __LIBCFS_WINNT_KP30_H__
+
+#ifndef __LIBCFS_KP30_H__
+#error Do not #include this file directly. #include <libcfs/kp30.h> instead
+#endif
+
+#include <libcfs/winnt/portals_compat25.h>
+#include <lnet/types.h>
+
+#ifdef __KERNEL__
+
+/* Module parameter support */
+#define CFS_MODULE_PARM(name, t, type, perm, desc)
+
+#define CFS_SYSFS_MODULE_PARM    0 /* no sysfs access to module parameters */
+
+
+static inline void our_cond_resched()
+{
+    schedule_timeout(1i64);
+}
+
+#ifdef CONFIG_SMP
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */
+#else
+#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
+#endif
+
+#error Need a winnt version of panic()
+#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL)
+#error libcfs_register_panic_notifier() missing
+#error libcfs_unregister_panic_notifier() missing
+
+#define cfs_work_struct_t WORK_QUEUE_ITEM
+#define cfs_prepare_work(tq, routine, contex)
+#define cfs_schedule_work(tq)
+#define cfs_get_work_data(type,field,data)   (data)
+
+/* ------------------------------------------------------------------- */
+
+#define PORTAL_SYMBOL_REGISTER(x)               cfs_symbol_register(#x, &x)
+#define PORTAL_SYMBOL_UNREGISTER(x)             cfs_symbol_unregister(#x)
+
+#define PORTAL_SYMBOL_GET(x)                    (cfs_symbol_get(#x))
+#define PORTAL_SYMBOL_PUT(x)                    cfs_symbol_put(#x)
+
+#define PORTAL_MODULE_USE                       do{}while(0)
+#define PORTAL_MODULE_UNUSE                     do{}while(0)
+
+#define printk                                  DbgPrint
+#define ptintf                                  DbgPrint
+
+#else  /* !__KERNEL__ */
+
+# include <stdio.h>
+# include <stdlib.h>
+#ifdef __CYGWIN__
+# include <cygwin-ioctl.h>
+#endif
+# include <time.h>
+
+#endif /* End of !__KERNEL__ */
+
+/******************************************************************************/
+/* Light-weight trace
+ * Support for temporary event tracing with minimal Heisenberg effect. */
+#define LWT_SUPPORT  0
+
+/* kernel hasn't defined this? */
+typedef struct {
+        __s64      lwte_when;
+        char       *lwte_where;
+        void       *lwte_task;
+        long_ptr        lwte_p1;
+        long_ptr        lwte_p2;
+        long_ptr        lwte_p3;
+        long_ptr        lwte_p4;
+# if BITS_PER_LONG > 32
+        long_ptr        lwte_pad;
+# endif
+} lwt_event_t;
+
+
+# define LWT_EVENT(p1,p2,p3,p4)
+
+
+/* ------------------------------------------------------------------ */
+
+#define IOCTL_LIBCFS_TYPE long_ptr
+
+#ifdef __CYGWIN__
+# ifndef BITS_PER_LONG
+#  if (~0UL) == 0xffffffffUL
+#   define BITS_PER_LONG 32
+#  else
+#   define BITS_PER_LONG 64
+#  endif
+# endif
+#endif
+
+#if BITS_PER_LONG > 32
+# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
+# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a)
+# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a)
+#else
+# define LI_POISON ((int)0x5a5a5a5a)
+# define LL_POISON ((long_ptr)0x5a5a5a5a)
+# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a)
+#endif
+
+#if defined(__x86_64__)
+# define LPU64 "%I64u"
+# define LPD64 "%I64d"
+# define LPX64 "%I64x"
+# define LPSZ  "%lu"
+# define LPSSZ "%ld"
+#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
+# define LPU64 "%I64u"
+# define LPD64 "%I64d"
+# define LPX64 "%I64x"
+# define LPSZ  "%u"
+# define LPSSZ "%d"
+#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
+# define LPU64 "%I64u"
+# define LPD64 "%I64d"
+# define LPX64 "%I64x"
+# define LPSZ  "%u"
+# define LPSSZ "%d"
+#endif
+#ifndef LPU64
+# error "No word size defined"
+#endif
+
+#endif
diff --git a/libcfs/include/libcfs/winnt/libcfs.h b/libcfs/include/libcfs/winnt/libcfs.h
new file mode 100644 (file)
index 0000000..386eb5f
--- /dev/null
@@ -0,0 +1,126 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_LIBCFS_H__
+#define __LIBCFS_WINNT_LIBCFS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* workgroud for VC compiler */
+#ifndef __FUNCTION__
+#define __FUNCTION__ "generic"
+#endif
+
+#include <libcfs/winnt/winnt-types.h>
+#include <libcfs/portals_utils.h>
+#include <libcfs/winnt/winnt-time.h>
+#include <libcfs/winnt/winnt-lock.h>
+#include <libcfs/winnt/winnt-mem.h>
+#include <libcfs/winnt/winnt-prim.h>
+#include <libcfs/winnt/winnt-fs.h>
+#include <libcfs/winnt/winnt-tcpip.h>
+
+struct ptldebug_header {
+        __u32 ph_len;
+        __u32 ph_flags;
+        __u32 ph_subsys;
+        __u32 ph_mask;
+        __u32 ph_cpu_id;
+        __u32 ph_sec;
+        __u64 ph_usec;
+        __u32 ph_stack;
+        __u32 ph_pid;
+        __u32 ph_extern_pid;
+        __u32 ph_line_num;
+} __attribute__((packed));
+
+#ifdef __KERNEL__
+
+enum {
+       /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */
+       CFS_STACK_TRACE_DEPTH = 16
+};
+
+struct cfs_stack_trace {
+       void *frame[CFS_STACK_TRACE_DEPTH];
+};
+
+static inline __u32 query_stack_size()
+{
+    ULONG   LowLimit, HighLimit;
+
+    IoGetStackLimits(&LowLimit, &HighLimit);
+    ASSERT(HighLimit > LowLimit);
+
+    return (__u32) (HighLimit - LowLimit);
+}
+#else
+static inline __u32 query_stack_size()
+{
+   return 4096;
+}
+#endif
+
+
+#ifndef THREAD_SIZE
+# define THREAD_SIZE query_stack_size()
+#endif
+
+#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
+
+#ifdef __KERNEL__
+# ifdef  __ia64__
+#  define CDEBUG_STACK() (THREAD_SIZE -                         \
+                          ((ulong_ptr)__builtin_dwarf_cfa() &   \
+                           (THREAD_SIZE - 1)))
+# else
+#  define CDEBUG_STACK (IoGetRemainingStackSize())
+#  error "This doesn't seem right; CDEBUG_STACK should grow with the stack"
+# endif /* __ia64__ */
+
+#define CHECK_STACK()                                                   \
+do {                                                                    \
+        unsigned long _stack = CDEBUG_STACK();                          \
+                                                                        \
+        if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) {        \
+                libcfs_stack = _stack;                                  \
+                libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING,      \
+                                 __FILE__, NULL, __LINE__,              \
+                                 "maximum lustre stack %lu\n", _stack); \
+        }                                                               \
+} while (0)
+#else /* !__KERNEL__ */
+#define CHECK_STACK() do { } while(0)
+#define CDEBUG_STACK() (0L)
+#endif /* __KERNEL__ */
+
+/* initial pid  */
+#define LUSTRE_LNET_PID          12345
+
+#define ENTRY_NESTING_SUPPORT (0)
+#define ENTRY_NESTING   do {;} while (0)
+#define EXIT_NESTING   do {;} while (0)
+#define __current_nesting_level() (0)
+
+#endif /* _WINNT_LIBCFS_H */
diff --git a/libcfs/include/libcfs/winnt/lltrace.h b/libcfs/include/libcfs/winnt/lltrace.h
new file mode 100644 (file)
index 0000000..9615e94
--- /dev/null
@@ -0,0 +1,33 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_LLTRACE_H__
+#define __LIBCFS_WINNT_LLTRACE_H__
+
+#ifndef __LIBCFS_LLTRACE_H__
+#error Do not #include this file directly. #include <libcfs/lltrace.h> instead
+#endif
+
+
+#endif
diff --git a/libcfs/include/libcfs/winnt/portals_compat25.h b/libcfs/include/libcfs/winnt/portals_compat25.h
new file mode 100644 (file)
index 0000000..579b795
--- /dev/null
@@ -0,0 +1,28 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__
+#define __LIBCFS_WINNT_PORTALS_COMPAT_H__
+
+
+
+#endif /* _PORTALS_COMPAT_H */
diff --git a/libcfs/include/libcfs/winnt/portals_utils.h b/libcfs/include/libcfs/winnt/portals_utils.h
new file mode 100644 (file)
index 0000000..ec80692
--- /dev/null
@@ -0,0 +1,168 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_PORTALS_UTILS_H__
+#define __LIBCFS_WINNT_PORTALS_UTILS_H__
+
+#ifndef __LIBCFS_PORTALS_UTILS_H__
+#error Do not #include this file directly. #include <libcfs/portals_utils.h> instead
+#endif
+
+#ifndef cfs_is_flag_set
+#define cfs_is_flag_set(x,f) (((x)&(f))==(f))
+#endif
+
+#ifndef cfs_set_flag
+#define cfs_set_flag(x,f)    ((x) |= (f))
+#endif
+
+#ifndef cfs_clear_flag
+#define cfs_clear_flag(x,f)  ((x) &= ~(f))
+#endif
+
+
+static inline __u32 __do_div(__u32 * n, __u32 b) 
+{
+    __u32   mod;
+
+    mod = *n % b;
+    *n  = *n / b;
+    return mod;
+} 
+
+#define do_div(n,base)  __do_div((__u32 *)&(n), (__u32) (base))
+
+#ifdef __KERNEL__
+
+#include <stdlib.h>
+#include <libcfs/winnt/winnt-types.h>
+
+char * strsep(char **s, const char *ct);
+static inline size_t strnlen(const char * s, size_t count) {
+    size_t len = 0;
+    while(len < count && s[len++]);
+    return len;
+}
+char * ul2dstr(ulong_ptr address, char *buf, int len);
+
+#define simple_strtol(a1, a2, a3)               strtol(a1, a2, a3)
+#define simple_strtoll(a1, a2, a3)              (__s64)strtoull(a1, a2, a3)
+#define simple_strtoull(a1, a2, a3)             strtoull(a1, a2, a3)
+
+unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base);
+
+static inline int test_bit(int nr, void * addr)
+{
+    return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0;
+}
+
+static inline void clear_bit(int nr, void * addr)
+{
+    (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31)));
+}
+
+
+static inline void set_bit(int nr, void * addr)
+{
+    (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31));
+}
+
+static inline void read_random(char *buf, int len)
+{
+    ULONG   Seed = (ULONG) buf;
+    Seed = RtlRandom(&Seed);
+    while (len >0) {
+        if (len > sizeof(ULONG)) {
+            memcpy(buf, &Seed, sizeof(ULONG));
+            len -= sizeof(ULONG);
+            buf += sizeof(ULONG);
+        } else {
+            memcpy(buf, &Seed, len);
+            len = 0;
+            break;
+        } 
+    }
+}
+#define get_random_bytes(buf, len)  read_random(buf, len)
+
+/* do NOT use function or expression as parameters ... */
+
+#ifndef min_t
+#define min_t(type,x,y) (type)(x) < (type)(y) ? (x): (y)
+#endif
+
+#ifndef max_t
+#define max_t(type,x,y) (type)(x) < (type)(y) ? (y): (x)
+#endif
+
+
+#define NIPQUAD(addr)                      \
+       ((unsigned char *)&addr)[0],    \
+       ((unsigned char *)&addr)[1],    \
+       ((unsigned char *)&addr)[2],    \
+       ((unsigned char *)&addr)[3]
+
+#define HIPQUAD(addr)                      \
+       ((unsigned char *)&addr)[3],    \
+       ((unsigned char *)&addr)[2],    \
+       ((unsigned char *)&addr)[1],    \
+       ((unsigned char *)&addr)[0]
+
+static int copy_from_user(void *to, void *from, int c) 
+{
+    memcpy(to, from, c);
+    return 0;
+}
+
+static int copy_to_user(void *to, void *from, int c) 
+{
+    memcpy(to, from, c);
+    return 0;
+}
+
+
+#define put_user(x, ptr)        \
+(                               \
+    *(ptr) = x,                 \
+    0                           \
+)
+
+
+#define get_user(x,ptr)         \
+(                               \
+    x = *(ptr),                 \
+    0                           \
+)
+
+#define num_physpages                  (64 * 1024)
+
+#define snprintf  _snprintf
+#define vsnprintf _vsnprintf
+
+
+#endif /* !__KERNEL__ */
+
+int cfs_error_code(NTSTATUS);
+
+#endif
diff --git a/libcfs/include/libcfs/winnt/winnt-fs.h b/libcfs/include/libcfs/winnt/winnt-fs.h
new file mode 100644 (file)
index 0000000..088d0e0
--- /dev/null
@@ -0,0 +1,254 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * File operations & routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_FS_H__
+#define __LIBCFS_WINNT_CFS_FS_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+
+#define MINORBITS      8
+#define MINORMASK      ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev)     ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev)     ((unsigned int) ((dev) & MINORMASK))
+#define NODEV          0
+#define MKDEV(ma,mi)   (((ma) << MINORBITS) | (mi))
+
+
+#ifdef __KERNEL__
+
+struct file_operations
+{
+    loff_t (*lseek)(struct file * file, loff_t offset, int origin);
+    ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos);
+    ssize_t (*write)(struct file * file, const char * buffer,
+        size_t count, loff_t *ppos);
+    int (*ioctl) (struct file *, unsigned int, ulong_ptr);
+    int (*open) (struct file *);
+    int (*release) (struct file *);
+};
+
+struct file {
+
+    cfs_handle_t            f_handle;
+    unsigned int            f_flags;
+    mode_t                  f_mode;
+    ulong_ptr           f_count;
+
+    //struct list_head      f_list;
+    //struct dentry *       f_dentry;
+
+    cfs_proc_entry_t *      proc_dentry;
+    cfs_file_operations_t * f_op;
+
+    size_t                  f_size;
+    loff_t                  f_pos;
+    unsigned int            f_uid, f_gid;
+    int                     f_error;
+
+    ulong_ptr           f_version;
+
+    void *                  private_data;
+
+    char                    f_name[1];
+
+};
+
+#define cfs_filp_size(f)               ((f)->f_size)
+#define cfs_filp_poff(f)                (&(f)->f_pos)
+
+cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err);
+int cfs_filp_close(cfs_file_t *fp);
+int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos);
+int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos);
+int cfs_filp_fsync(cfs_file_t *fp);
+int cfs_get_file(cfs_file_t *fp);
+int cfs_put_file(cfs_file_t *fp);
+int cfs_file_count(cfs_file_t *fp);
+
+
+
+/*
+ * CFS_FLOCK routines
+ */
+
+typedef struct file_lock{
+    int         fl_type;
+    pid_t       fl_pid;
+    size_t      fl_len;
+    off_t       fl_start;
+    off_t       fl_end;
+} cfs_flock_t; 
+
+#define CFS_INT_LIMIT(x)               (~((x)1 << (sizeof(x)*8 - 1)))
+#define CFS_OFFSET_MAX                 CFS_INT_LIMIT(loff_t)
+
+#define cfs_flock_type(fl)                  ((fl)->fl_type)
+#define cfs_flock_set_type(fl, type)        do { (fl)->fl_type = (type); } while(0)
+#define cfs_flock_pid(fl)                   ((fl)->fl_pid)
+#define cfs_flock_set_pid(fl, pid)          do { (fl)->fl_pid = (pid); } while(0)
+#define cfs_flock_start(fl)                 ((fl)->fl_start)
+#define cfs_flock_set_start(fl, start)      do { (fl)->fl_start = (start); } while(0)
+#define cfs_flock_end(fl)                   ((fl)->fl_end)
+#define cfs_flock_set_end(fl, end)          do { (fl)->fl_end = (end); } while(0)
+
+#define ATTR_MODE       0x0001
+#define ATTR_UID        0x0002
+#define ATTR_GID        0x0004
+#define ATTR_SIZE       0x0008
+#define ATTR_ATIME      0x0010
+#define ATTR_MTIME      0x0020
+#define ATTR_CTIME      0x0040
+#define ATTR_ATIME_SET  0x0080
+#define ATTR_MTIME_SET  0x0100
+#define ATTR_FORCE      0x0200  /* Not a change, but a change it */
+#define ATTR_ATTR_FLAG  0x0400
+#define ATTR_RAW        0x0800  /* file system, not vfs will massage attrs */
+#define ATTR_FROM_OPEN  0x1000  /* called from open path, ie O_TRUNC */
+//#define ATTR_CTIME_SET  0x2000
+#define ATTR_BLOCKS     0x4000
+#define ATTR_KILL_SUID  0
+#define ATTR_KILL_SGID  0
+
+#define in_group_p(x)  (0)
+
+/*
+ * proc fs routines
+ */
+
+int proc_init_fs();
+void proc_destroy_fs();
+
+
+/*
+ *  misc
+ */
+
+static inline void *ERR_PTR(long_ptr error)
+{
+       return (void *) error;
+}
+
+static inline long_ptr PTR_ERR(const void *ptr)
+{
+       return (long_ptr) ptr;
+}
+
+static inline long_ptr IS_ERR(const void *ptr)
+{
+       return (ulong_ptr)ptr > (ulong_ptr)-1000L;
+}
+
+#else  /* !__KERNEL__ */
+
+#define CREATE_NEW          1
+#define CREATE_ALWAYS       2
+#define OPEN_EXISTING       3
+#define OPEN_ALWAYS         4
+#define TRUNCATE_EXISTING   5
+
+#define SECTION_QUERY       0x0001
+#define SECTION_MAP_WRITE   0x0002
+#define SECTION_MAP_READ    0x0004
+#define SECTION_MAP_EXECUTE 0x0008
+#define SECTION_EXTEND_SIZE 0x0010
+
+#define FILE_MAP_COPY       SECTION_QUERY
+#define FILE_MAP_WRITE      SECTION_MAP_WRITE
+#define FILE_MAP_READ       SECTION_MAP_READ
+#define FILE_MAP_ALL_ACCESS SECTION_ALL_ACCESS
+
+
+NTSYSAPI
+HANDLE
+NTAPI
+CreateFileA(
+    IN LPCSTR lpFileName,
+    IN DWORD dwDesiredAccess,
+    IN DWORD dwShareMode,
+    IN PVOID lpSecurityAttributes,
+    IN DWORD dwCreationDisposition,
+    IN DWORD dwFlagsAndAttributes,
+    IN HANDLE hTemplateFile
+    );
+
+#define CreateFile  CreateFileA
+
+NTSYSAPI
+BOOL
+NTAPI
+CloseHandle(
+    IN OUT HANDLE hObject
+    );
+
+NTSYSAPI
+HANDLE
+NTAPI
+CreateFileMappingA(
+    IN HANDLE hFile,
+    IN PVOID lpFileMappingAttributes,
+    IN DWORD flProtect,
+    IN DWORD dwMaximumSizeHigh,
+    IN DWORD dwMaximumSizeLow,
+    IN LPCSTR lpName
+    );
+#define CreateFileMapping  CreateFileMappingA
+
+NTSYSAPI
+DWORD
+NTAPI
+GetFileSize(
+    IN HANDLE hFile,
+    OUT DWORD * lpFileSizeHigh
+    );
+
+NTSYSAPI
+PVOID
+NTAPI
+MapViewOfFile(
+    IN HANDLE hFileMappingObject,
+    IN DWORD dwDesiredAccess,
+    IN DWORD dwFileOffsetHigh,
+    IN DWORD dwFileOffsetLow,
+    IN SIZE_T dwNumberOfBytesToMap
+    );
+
+NTSYSAPI
+BOOL
+NTAPI
+UnmapViewOfFile(
+    IN PVOID lpBaseAddress
+    );
+
+#endif /* __KERNEL__ */
+
+typedef struct {
+       void    *d;
+} cfs_dentry_t;
+
+
+#endif /* __LIBCFS_WINNT_CFS_FS_H__*/
diff --git a/libcfs/include/libcfs/winnt/winnt-lock.h b/libcfs/include/libcfs/winnt/winnt-lock.h
new file mode 100644 (file)
index 0000000..e0b9393
--- /dev/null
@@ -0,0 +1,686 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_LOCK_H__
+#define __LIBCFS_WINNT_CFS_LOCK_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+
+/*
+ *  nt specific part ...
+ */
+
+
+/* atomic */
+
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { i }
+
+#define atomic_read(v) ((v)->counter)
+#define atomic_set(v,i)                (((v)->counter) = (i))
+
+void FASTCALL atomic_add(int i, atomic_t *v);
+void FASTCALL atomic_sub(int i, atomic_t *v);
+
+int FASTCALL atomic_sub_and_test(int i, atomic_t *v);
+
+void FASTCALL atomic_inc(atomic_t *v);
+void FASTCALL atomic_dec(atomic_t *v);
+
+int FASTCALL atomic_dec_and_test(atomic_t *v);
+int FASTCALL atomic_inc_and_test(atomic_t *v);
+
+
+/* event */
+
+typedef KEVENT          event_t;
+
+/*
+ * cfs_init_event
+ *   To initialize the event object
+ *
+ * Arguments:
+ *   event:  pointer to the event object
+ *   type:   Non Zero: SynchronizationEvent
+ *           Zero: NotificationEvent
+ *   status: the initial stats of the event
+ *           Non Zero: signaled
+ *           Zero: un-signaled
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+static inline void
+    cfs_init_event(event_t *event, int type, int status)
+{
+    KeInitializeEvent(
+            event,
+            (type) ? SynchronizationEvent: NotificationEvent,
+            (status) ? TRUE : FALSE
+            );
+}
+
+/*
+ * cfs_wait_event
+ *   To wait on an event to syncrhonize the process
+ *
+ * Arguments:
+ *   event:  pointer to the event object
+ *   timeout: the timeout for waitting or 0 means infinite time.
+ *
+ * Return Value:
+ *   Zero:   waiting timeouts
+ *   Non Zero: event signaled ...
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline int64_t
+cfs_wait_event(event_t * event, int64_t timeout)
+{
+    NTSTATUS        Status;
+    LARGE_INTEGER   TimeOut;
+
+    TimeOut.QuadPart = -1 * (10000000/HZ) * timeout;
+
+    Status = KeWaitForSingleObject(
+                event,
+                Executive,
+                KernelMode,
+                FALSE,
+                (timeout != 0) ? (&TimeOut) : (NULL)
+                );
+
+    if (Status == STATUS_TIMEOUT)  {
+        return 0;
+    }
+
+    return TRUE; // signaled case
+}
+
+/*
+ * cfs_wake_event
+ *   To signal the event object
+ *
+ * Arguments:
+ *   event:  pointer to the event object
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline int
+cfs_wake_event(event_t * event)
+{
+    return (KeSetEvent(event, 0, FALSE) != 0);
+}
+
+/*
+ * cfs_clear_event
+ *   To clear/reset the status of the event object
+ *
+ * Arguments:
+ *   event:  pointer to the event object
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void
+cfs_clear_event(event_t * event)
+{
+    KeResetEvent(event);
+}
+
+
+/*
+ * IMPORTANT !!!!!!!!
+ *
+ * All locks' declaration are not guaranteed to be initialized,
+ * Althought some of they are initialized in Linux. All locks
+ * declared by CFS_DECL_* should be initialized explicitly.
+ */
+
+
+/*
+ * spin lock defintions / routines
+ */
+
+/*
+ * Warning:
+ *
+ * for spinlock operations, try to grab nesting acquisition of
+ * spinlock will cause dead-lock in MP system and current irql 
+ * overwritten for UP system. (UP system could allow nesting spin
+ * acqisition, because it's not spin at all just raising the irql.)
+ *
+ */
+
+typedef struct spin_lock {
+
+    KSPIN_LOCK lock;
+    KIRQL      irql;
+
+} spinlock_t;
+
+
+#define CFS_DECL_SPIN(name)  spinlock_t name;
+#define CFS_DECL_SPIN_EXTERN(name)  extern spinlock_t name;
+
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+    KeInitializeSpinLock(&(lock->lock));
+}
+
+
+static inline void spin_lock(spinlock_t *lock)
+{
+    KeAcquireSpinLock(&(lock->lock), &(lock->irql));
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    KIRQL       irql = lock->irql;
+    KeReleaseSpinLock(&(lock->lock), irql);
+}
+
+
+#define spin_lock_irqsave(lock, flags)         do {(flags) = 0; spin_lock(lock);} while(0)
+#define spin_unlock_irqrestore(lock, flags)    do {spin_unlock(lock);} while(0)
+
+
+/* There's no  corresponding routine in windows kernel.
+   We must realize a light one of our own.  But there's
+   no way to identify the system is MP build or UP build
+   on the runtime. We just uses a workaround for it. */
+
+extern int MPSystem;
+
+static int spin_trylock(spinlock_t *lock)
+{
+    KIRQL   Irql;
+    int     rc = 0;
+
+    ASSERT(lock != NULL);
+
+    KeRaiseIrql(DISPATCH_LEVEL, &Irql);
+
+    if (MPSystem) {
+        if (0 == (ulong_ptr)lock->lock) {
+#if _X86_
+            __asm {
+                mov  edx, dword ptr [ebp + 8]
+                lock bts dword ptr[edx], 0
+                jb   lock_failed
+                mov  rc, TRUE
+            lock_failed:
+            }
+#else
+        KdBreakPoint();
+#endif
+
+        }
+    } else {
+        rc = TRUE;
+    }
+
+    if (rc) {
+        lock->irql = Irql;
+    } else {
+        KeLowerIrql(Irql);
+    }
+
+    return rc;
+}
+
+/* synchronization between cpus: it will disable all DPCs
+   kernel task scheduler on the CPU */
+#define spin_lock_bh(x)                    spin_lock(x)
+#define spin_unlock_bh(x)          spin_unlock(x)
+#define spin_lock_bh_init(x)   spin_lock_init(x)
+
+/*
+ * rw_semaphore (using ERESOURCE)
+ */
+
+
+typedef struct rw_semaphore {
+    ERESOURCE   rwsem;
+} rw_semaphore_t;
+
+
+#define CFS_DECL_RWSEM(name) rw_semaphore_t name
+#define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name
+
+
+/*
+ * init_rwsem
+ *   To initialize the the rw_semaphore_t structure
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void init_rwsem(rw_semaphore_t *s)
+{
+       ExInitializeResourceLite(&s->rwsem);
+}
+
+
+/*
+ * fini_rwsem
+ *   To finilize/destroy the the rw_semaphore_t structure
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   For winnt system, we need this routine to delete the ERESOURCE.
+ *   Just define it NULL for other systems.
+ */
+
+static inline void fini_rwsem(rw_semaphore_t *s)
+{
+    ExDeleteResourceLite(&s->rwsem);
+}
+
+/*
+ * down_read
+ *   To acquire read-lock of the rw_semahore
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void down_read(struct rw_semaphore *s)
+{
+       ExAcquireResourceSharedLite(&s->rwsem, TRUE);
+}
+
+
+/*
+ * down_read_trylock
+ *   To acquire read-lock of the rw_semahore without blocking
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   Zero: failed to acquire the read lock
+ *   Non-Zero: succeeded to acquire the read lock
+ *
+ * Notes: 
+ *   This routine will return immediately without waiting.
+ */
+
+static inline int down_read_trylock(struct rw_semaphore *s)
+{
+       return ExAcquireResourceSharedLite(&s->rwsem, FALSE);
+}
+
+
+/*
+ * down_write
+ *   To acquire write-lock of the rw_semahore
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void down_write(struct rw_semaphore *s)
+{
+       ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE);
+}
+
+
+/*
+ * down_write_trylock
+ *   To acquire write-lock of the rw_semahore without blocking
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   Zero: failed to acquire the write lock
+ *   Non-Zero: succeeded to acquire the read lock
+ *
+ * Notes: 
+ *   This routine will return immediately without waiting.
+ */
+
+static inline int down_write_trylock(struct rw_semaphore *s)
+{
+    return ExAcquireResourceExclusiveLite(&(s->rwsem), FALSE);
+}
+
+
+/*
+ * up_read
+ *   To release read-lock of the rw_semahore
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void up_read(struct rw_semaphore *s)
+{
+    ExReleaseResourceForThreadLite(
+            &(s->rwsem),
+            ExGetCurrentResourceThread());
+}
+
+
+/*
+ * up_write
+ *   To release write-lock of the rw_semahore
+ *
+ * Arguments:
+ *   rwsem:  pointer to the rw_semaphore_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void up_write(struct rw_semaphore *s)
+{
+    ExReleaseResourceForThreadLite(
+                &(s->rwsem),
+                ExGetCurrentResourceThread());
+}
+
+/*
+ * rwlock_t (using sempahore)
+ *
+ * - rwlock_init(x)
+ * - read_lock(x)
+ * - read_unlock(x)
+ * - write_lock(x)
+ * - write_unlock(x)
+ */
+
+typedef struct {
+    spinlock_t guard;
+    int        count;
+} rwlock_t;
+
+void rwlock_init(rwlock_t * rwlock);
+void rwlock_fini(rwlock_t * rwlock);
+
+void read_lock(rwlock_t * rwlock);
+void read_unlock(rwlock_t * rwlock);
+void write_lock(rwlock_t * rwlock);
+void write_unlock(rwlock_t * rwlock);
+
+#define write_lock_irqsave(l, f)        do {f = 0; write_lock(l);} while(0)
+#define write_unlock_irqrestore(l, f)   do {write_unlock(l);} while(0)
+#define read_lock_irqsave(l, f)                do {f=0; read_lock(l);} while(0)
+#define read_unlock_irqrestore(l, f)    do {read_unlock(l);} while(0)
+
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+
+typedef struct semaphore {
+       KSEMAPHORE sem;
+} mutex_t;
+
+static inline void sema_init(struct semaphore *s, int val)
+{
+       KeInitializeSemaphore(&s->sem, val, val);
+}
+
+static inline void __down(struct semaphore *s)
+{
+   KeWaitForSingleObject( &(s->sem), Executive,
+                          KernelMode, FALSE, NULL );
+
+}
+
+static inline void __up(struct semaphore *s)
+{
+       KeReleaseSemaphore(&s->sem, 0, 1, FALSE);
+}
+
+/*
+ * mutex_t:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+
+
+/*
+ * init_mutex
+ *   To initialize a mutex_t structure
+ *
+ * Arguments:
+ *   mutex:  pointer to the mutex_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void init_mutex(mutex_t *mutex)
+{
+    sema_init(mutex, 1);
+}
+
+
+/*
+ * mutex_down
+ *   To acquire the mutex lock
+ *
+ * Arguments:
+ *   mutex:  pointer to the mutex_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void mutex_down(mutex_t *mutex)
+{
+    __down(mutex);
+}
+
+
+/*
+ * mutex_up
+ *   To release the mutex lock (acquired already)
+ *
+ * Arguments:
+ *   mutex:  pointer to the mutex_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void mutex_up(mutex_t *mutex)
+{
+    __up(mutex);
+}
+
+
+/*
+ * init_mutex_locked
+ *   To initialize the mutex as acquired state
+ *
+ * Arguments:
+ *   mutex:  pointer to the mutex_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline init_mutex_locked(mutex_t *mutex)
+{
+    init_mutex(mutex);
+    mutex_down(mutex);
+}
+
+/*
+ * completion
+ *
+ * - init_complition(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+
+struct completion {
+       event_t  event;
+};
+
+
+/*
+ * init_completion
+ *   To initialize the completion object
+ *
+ * Arguments:
+ *   c:  pointer to the completion structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void init_completion(struct completion *c)
+{
+       cfs_init_event(&(c->event), 1, FALSE);
+}
+
+
+/*
+ * complete
+ *   To complete/signal the completion object
+ *
+ * Arguments:
+ *   c:  pointer to the completion structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void complete(struct completion *c)
+{
+       cfs_wake_event(&(c->event));
+}
+
+/*
+ * wait_for_completion
+ *   To wait on the completion object. If the event is signaled,
+ *   this function will return to the call with the event un-singled.
+ *
+ * Arguments:
+ *   c:  pointer to the completion structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+static inline void wait_for_completion(struct completion *c)
+{
+    cfs_wait_event(&(c->event), 0);
+}
+
+/* __KERNEL__ */
+#else
+
+#include "../user-lock.h"
+
+/* __KERNEL__ */
+#endif
+#endif
diff --git a/libcfs/include/libcfs/winnt/winnt-mem.h b/libcfs/include/libcfs/winnt/winnt-mem.h
new file mode 100644 (file)
index 0000000..b7f00a4
--- /dev/null
@@ -0,0 +1,133 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines of memory manipulation routines .
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_MEM_H__
+#define __LIBCFS_WINNT_CFS_MEM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+#ifdef __KERNEL__
+
+#define CFS_PAGE_SIZE                   PAGE_SIZE
+#define CFS_PAGE_SHIFT                  PAGE_SHIFT
+#define CFS_PAGE_MASK                   (~(PAGE_SIZE - 1))
+
+typedef struct cfs_page {
+    void *      addr;
+    atomic_t    count;
+} cfs_page_t;
+
+
+cfs_page_t *cfs_alloc_page(int flags);
+void cfs_free_page(cfs_page_t *pg);
+
+static inline void *cfs_page_address(cfs_page_t *page)
+{
+    return page->addr;
+}
+
+static inline void *cfs_kmap(cfs_page_t *page)
+{
+    return page->addr;
+}
+
+static inline void cfs_kunmap(cfs_page_t *page)
+{
+    return;
+}
+
+static inline void cfs_get_page(cfs_page_t *page)
+{
+    atomic_inc(&page->count);
+}
+
+static inline void cfs_put_page(cfs_page_t *page)
+{
+    atomic_dec(&page->count);
+}
+
+static inline int cfs_page_count(cfs_page_t *page)
+{
+    return atomic_read(&page->count);
+}
+
+/*
+ * Memory allocator
+ */
+
+#define CFS_ALLOC_ATOMIC_TRY   (0)
+
+extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags);
+extern void  cfs_free(void *addr);
+
+extern void *cfs_alloc_large(size_t nr_bytes);
+extern void  cfs_free_large(void *addr);
+
+/*
+ * SLAB allocator
+ */
+
+#define SLAB_HWCACHE_ALIGN             0
+
+/* The cache name is limited to 20 chars */
+
+typedef struct cfs_mem_cache {
+
+    char                    name[20];
+    ulong_ptr           flags;
+    NPAGED_LOOKASIDE_LIST   npll;
+
+} cfs_mem_cache_t;
+
+
+extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr);
+extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * );
+extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int);
+extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *);
+
+
+/*
+ * Page allocator slabs 
+ */
+
+extern cfs_mem_cache_t *cfs_page_t_slab;
+extern cfs_mem_cache_t *cfs_page_p_slab;
+
+
+#define CFS_DECL_MMSPACE
+#define CFS_MMSPACE_OPEN    do {} while(0)
+#define CFS_MMSPACE_CLOSE   do {} while(0)
+
+
+#define mb()    do {} while(0)
+#define rmb()   mb()
+#define wmb()   mb()
+
+
+/* __KERNEL__ */
+#endif
+
+#endif /* __WINNT_CFS_MEM_H__ */
diff --git a/libcfs/include/libcfs/winnt/winnt-prim.h b/libcfs/include/libcfs/winnt/winnt-prim.h
new file mode 100644 (file)
index 0000000..3c8560b
--- /dev/null
@@ -0,0 +1,1082 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic library routines.
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_CFS_PRIM_H__
+#define __LIBCFS_WINNT_CFS_PRIM_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+
+/*
+ * libcfs proc device object
+ */
+
+
+#define LUSTRE_PROC_DEVICE  L"\\Device\\lproc"      /* proc fs emulator device object */
+#define LUSTRE_PROC_SYMLNK  L"\\DosDevices\\lproc"  /* proc fs user-visible device */
+
+
+/*
+ * Device IO Control Code Definitions
+ */
+
+#define FILE_DEVICE_LIBCFS      ('LC')
+
+#define FILE_DEVICE_LIBCFS      ('LC')
+
+#define FUNC_LIBCFS_VERSION     0x101  // get version of current libcfs
+#define FUNC_LIBCFS_IOCTL       0x102  // Device i/o control to proc fs
+
+
+#define IOCTL_LIBCFS_VERSION \
+     CTL_CODE (FILE_DEVICE_LIBCFS, FUNC_LIBCFS_VERSION, METHOD_BUFFERED, FILE_ANY_ACCESS)
+#define IOCTL_LIBCFS_ENTRY   \
+     CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL,   METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+#pragma pack(4)
+
+typedef struct _CFS_PROC_IOCTL {
+
+    ULONG           cmd;    // ioctl command identifier
+    ULONG           len;    // length of data
+
+    // UCHAR        data[]; // content of the real ioctl
+
+} CFS_PROC_IOCTL, *PCFS_PROC_IOCTL;
+
+#pragma pack()
+
+#ifdef __KERNEL__
+
+#include <libcfs/list.h>
+
+/*
+ * Symbol functions for libcfs
+ *
+ * OSX has no facility for use to register symbol.
+ * So we have to implement it.
+ */
+#define CFS_SYMBOL_LEN     64
+
+struct  cfs_symbol {
+       char    name[CFS_SYMBOL_LEN];
+       void    *value;
+       int     ref;
+       struct  list_head sym_list;
+};
+
+extern int      cfs_symbol_register(const char *, const void *);
+extern void     cfs_symbol_unregister(const char *);
+extern void *   cfs_symbol_get(const char *);
+extern void     cfs_symbol_put(const char *);
+extern void     cfs_symbol_clean();
+
+
+
+typedef struct file_operations cfs_file_operations_t;
+typedef struct file cfs_file_t;
+
+/*
+ * Pseudo device register
+ */
+
+typedef struct
+{
+    int                     minor;
+    const char *            name;
+    cfs_file_operations_t * fops;
+} cfs_psdev_t;
+
+int cfs_psdev_register(cfs_psdev_t * psdev);
+int cfs_psdev_deregister(cfs_psdev_t * psdev);
+
+
+/*
+ * Proc emulator file system APIs
+ */
+
+typedef int cfs_read_proc_t(char *page, char **start, off_t off,
+                         int count, int *eof, void *data);
+typedef int cfs_write_proc_t(struct file *file, const char *buffer,
+                          ulong_ptr count, void *data);
+
+#define CFS_PROC_ENTRY_MAGIC 'CPEM'
+
+#define CFS_PROC_FLAG_DIRECTORY    0x00000001 // directory node
+#define CFS_PROC_FLAG_ATTACHED     0x00000002 // node is attached to proc
+#define CFS_PROC_FLAG_MISCDEV      0x00000004 // miscellaneous device
+
+typedef struct cfs_proc_entry
+{
+    ULONG                   magic;      // Magic
+    ULONG                   flags;      // Flags
+
+    struct _dir_entry {                 // proc directory entry
+        PRTL_SPLAY_LINKS    root;
+    };
+
+    struct _file_entry {                // proc file / leaf entry
+           cfs_read_proc_t  *  read_proc;
+           cfs_write_proc_t *  write_proc;
+    };
+
+    mode_t                  mode;
+    unsigned short          nlink;
+
+       
+    struct file_operations * proc_fops;
+       void * data;
+
+    // proc_dir_entry ended.
+
+    RTL_SPLAY_LINKS         s_link;       // splay link
+
+    //
+    // Maximum length of proc entry name is 0x20
+    //
+
+    char                    name[0x20];
+
+} cfs_proc_entry_t, cfs_proc_dir_entry_t;
+
+typedef cfs_proc_entry_t cfs_proc_dir_entry_t;
+
+#define PROC_BLOCK_SIZE    PAGE_SIZE
+
+/*
+ * Sysctl register
+ */
+
+typedef struct ctl_table                   cfs_sysctl_table_t;
+typedef struct ctl_table_header                cfs_sysctl_table_header_t;
+
+
+typedef int ctl_handler (
+            cfs_sysctl_table_t *table,
+            int *name,    int nlen,
+                       void *oldval, size_t *oldlenp,
+                       void *newval, size_t newlen, 
+                       void **context );
+
+typedef int proc_handler (
+            cfs_sysctl_table_t *ctl,
+            int write, struct file * filp,
+                       void *buffer, size_t *lenp );
+
+
+int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
+                    void *buffer, size_t *lenp);
+
+int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp,
+                 void *buffer, size_t *lenp);
+
+int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
+                 void *oldval, size_t *oldlenp,
+                 void *newval, size_t newlen, void **context);
+
+
+/*
+ *  System io control definitions
+ */
+
+#define CTL_MAXNAME 10
+
+#define CTL_ANY     -1  /* Matches any name */
+#define CTL_NONE    0
+
+enum
+{
+    CTL_KERN=1,     /* General kernel info and control */
+    CTL_VM=2,       /* VM management */
+    CTL_NET=3,      /* Networking */
+    CTL_PROC=4,     /* Process info */
+    CTL_FS=5,       /* Filesystems */
+    CTL_DEBUG=6,        /* Debugging */
+    CTL_DEV=7,      /* Devices */
+    CTL_BUS=8,      /* Busses */
+    CTL_ABI=9,      /* Binary emulation */
+    CTL_CPU=10      /* CPU stuff (speed scaling, etc) */
+};
+
+/* sysctl table definitons */
+struct ctl_table 
+{
+       int ctl_name;
+       char *procname;
+       void *data;
+       int maxlen;
+       mode_t mode;
+       cfs_sysctl_table_t *child;
+       proc_handler *proc_handler;     /* text formatting callback */
+       ctl_handler *strategy;          /* read / write callback functions */
+       cfs_proc_entry_t *de;   /* proc entry block */
+       void *extra1;
+       void *extra2;
+};
+
+
+/* the mantaner of the cfs_sysctl_table trees */
+struct ctl_table_header
+{
+       cfs_sysctl_table_t *    ctl_table;
+       struct list_head        ctl_entry;
+};
+
+
+cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod,
+                                         cfs_proc_entry_t *parent);
+void proc_free_entry(cfs_proc_entry_t *de);
+void remove_proc_entry(char *name, cfs_proc_entry_t *entry);
+cfs_proc_entry_t * search_proc_entry(char * name,
+                        cfs_proc_entry_t *  root );
+
+#define cfs_create_proc_entry create_proc_entry
+#define cfs_free_proc_entry   proc_free_entry
+#define cfs_remove_proc_entry remove_proc_entry
+
+#define register_cfs_sysctl_table(t, a)        register_sysctl_table(t, a)
+#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a)
+
+
+/*
+ *  declaration of proc kernel process routines
+ */
+
+cfs_file_t *
+lustre_open_file(char * filename);
+
+int
+lustre_close_file(cfs_file_t * fh);
+
+int
+lustre_do_ioctl( cfs_file_t * fh,
+                 unsigned long cmd,
+                 ulong_ptr arg );
+
+int
+lustre_ioctl_file( cfs_file_t * fh,
+                   PCFS_PROC_IOCTL devctl);
+
+size_t
+lustre_read_file( cfs_file_t *    fh,
+                  loff_t          off,
+                  size_t          size,
+                  char *          buf
+                  );
+
+size_t
+lustre_write_file( cfs_file_t *    fh,
+                   loff_t          off,
+                   size_t          size,
+                   char *          buf
+                   );
+
+/*
+ * Wait Queue
+ */
+
+
+typedef int cfs_task_state_t;
+
+#define CFS_TASK_INTERRUPTIBLE 0x00000001
+#define CFS_TASK_UNINT         0x00000002
+
+
+
+#define CFS_WAITQ_MAGIC     'CWQM'
+#define CFS_WAITLINK_MAGIC  'CWLM'
+
+typedef struct cfs_waitq {
+
+    unsigned int        magic;
+    unsigned int        flags;
+    
+    spinlock_t          guard;
+    struct list_head    waiters;
+
+} cfs_waitq_t;
+
+
+typedef struct cfs_waitlink cfs_waitlink_t;
+
+#define CFS_WAITQ_CHANNELS     (2)
+
+#define CFS_WAITQ_CHAN_NORMAL  (0)
+#define CFS_WAITQ_CHAN_FORWARD (1)
+
+
+
+typedef struct cfs_waitlink_channel {
+    struct list_head        link;
+    cfs_waitq_t *           waitq;
+    cfs_waitlink_t *        waitl;
+} cfs_waitlink_channel_t;
+
+struct cfs_waitlink {
+
+    unsigned int            magic;
+    int                     flags;
+    event_t  *              event;
+    atomic_t *              hits;
+
+    cfs_waitlink_channel_t  waitq[CFS_WAITQ_CHANNELS];
+};
+
+enum {
+       CFS_WAITQ_EXCLUSIVE = 1
+};
+
+#define CFS_DECL_WAITQ(name) cfs_waitq_t name
+
+
+void cfs_waitq_init(struct cfs_waitq *waitq);
+void cfs_waitlink_init(struct cfs_waitlink *link);
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, 
+                            struct cfs_waitlink *link);
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq);
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link);
+int  cfs_waitq_active(struct cfs_waitq *waitq);
+
+void cfs_waitq_signal(struct cfs_waitq *waitq);
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr);
+void cfs_waitq_broadcast(struct cfs_waitq *waitq);
+
+void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state);
+cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, 
+                                  cfs_task_state_t state, cfs_duration_t timeout);
+
+
+
+/* Kernel thread */
+
+typedef int (*cfs_thread_t) (void *arg);
+
+typedef struct _cfs_thread_context {
+    cfs_thread_t        func;
+    void *              arg;
+} cfs_thread_context_t;
+
+int cfs_kernel_thread(int (*func)(void *), void *arg, int flag);
+
+/*
+ * thread creation flags from Linux, not used in winnt
+ */
+#define CSIGNAL         0x000000ff      /* signal mask to be sent at exit */
+#define CLONE_VM        0x00000100      /* set if VM shared between processes */
+#define CLONE_FS        0x00000200      /* set if fs info shared between processes */
+#define CLONE_FILES     0x00000400      /* set if open files shared between processes */
+#define CLONE_SIGHAND   0x00000800      /* set if signal handlers and blocked signals shared */
+#define CLONE_PID       0x00001000      /* set if pid shared */
+#define CLONE_PTRACE    0x00002000      /* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK     0x00004000      /* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT    0x00008000      /* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD    0x00010000      /* Same thread group? */
+#define CLONE_NEWNS     0x00020000      /* New namespace group? */
+
+#define CLONE_SIGNAL    (CLONE_SIGHAND | CLONE_THREAD)
+
+
+/*
+ * sigset ...
+ */
+
+typedef sigset_t cfs_sigset_t;
+
+/*
+ * Task struct
+ */
+
+#define MAX_SCHEDULE_TIMEOUT    ((long_ptr)(~0UL>>12))
+
+
+#define NGROUPS 1
+#define CFS_CURPROC_COMM_MAX (16)
+typedef struct task_sruct{
+    mode_t umask;
+
+       pid_t pid;
+       pid_t pgrp;
+
+       uid_t uid,euid,suid,fsuid;
+       gid_t gid,egid,sgid,fsgid;
+
+       int ngroups;
+       gid_t   groups[NGROUPS];
+       cfs_kernel_cap_t   cap_effective,
+                       cap_inheritable,
+                       cap_permitted;
+
+       char comm[CFS_CURPROC_COMM_MAX];
+    void * journal_info;
+}  cfs_task_t;
+
+
+/*
+ *  linux task struct emulator ...
+ */
+
+#define TASKMAN_MAGIC  'TMAN'   /* Task Manager */
+#define TASKSLT_MAGIC  'TSLT'   /* Task Slot */
+
+typedef struct _TASK_MAN {
+
+    ULONG       Magic;      /* Magic and Flags */
+    ULONG       Flags;
+
+    spinlock_t  Lock;       /* Protection lock */
+
+    cfs_mem_cache_t * slab; /* Memory slab for task slot */
+
+    ULONG       NumOfTasks; /* Total tasks (threads) */
+    LIST_ENTRY  TaskList;   /* List of task slots */
+
+} TASK_MAN, *PTASK_MAN;
+
+typedef struct _TASK_SLOT {
+
+    ULONG       Magic;      /* Magic and Flags */
+    ULONG       Flags;
+
+    LIST_ENTRY  Link;       /* To be linked to TaskMan */
+
+    event_t     Event;      /* Schedule event */
+
+    HANDLE      Pid;        /* Process id */
+    HANDLE      Tid;        /* Thread id */
+    PETHREAD    Tet;        /* Pointer to ethread */
+
+    atomic_t    count;      /* refer count */
+    atomic_t    hits;       /* times of waken event singaled */
+
+    KIRQL       irql;       /* irql for rwlock ... */
+
+    cfs_task_t  task;       /* linux task part */
+
+} TASK_SLOT, *PTASK_SLOT;
+
+
+#define current                 cfs_current()
+#define set_current_state(s)   do {;} while (0)
+
+#define wait_event(wq, condition)                           \
+do {                                                        \
+    cfs_waitlink_t __wait;                                     \
+                                                            \
+    cfs_waitlink_init(&__wait);                                    \
+       while (TRUE) {                                          \
+               cfs_waitq_add(&wq, &__wait);                        \
+               if (condition)  {                                           \
+                       break;                                                  \
+        }                                                   \
+               cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);        \
+               cfs_waitq_del(&wq, &__wait);                        \
+       }                                                                           \
+       cfs_waitq_del(&wq, &__wait);                                \
+} while(0)
+
+#define wait_event_interruptible(wq, condition, __ret)      \
+do {                                                        \
+    cfs_waitlink_t __wait;                                     \
+                                                            \
+    __ret = 0;                                              \
+    cfs_waitlink_init(&__wait);                                    \
+       while (TRUE) {                                          \
+               cfs_waitq_add(&wq, &__wait);                        \
+               if (condition)  {                                           \
+                       break;                                                  \
+        }                                                   \
+               cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE);    \
+               cfs_waitq_del(&wq, &__wait);                        \
+       }                                                                           \
+       cfs_waitq_del(&wq, &__wait);                                \
+} while(0)
+
+
+int     init_task_manager();
+void    cleanup_task_manager();
+cfs_task_t * cfs_current();
+int     schedule_timeout(int64_t time);
+int     schedule();
+int     wake_up_process(cfs_task_t * task);
+#define cfs_schedule_timeout(state, time)  schedule_timeout(time)
+void sleep_on(cfs_waitq_t *waitq);
+
+#define CFS_DECL_JOURNAL_DATA  
+#define CFS_PUSH_JOURNAL           do {;} while(0)
+#define CFS_POP_JOURNAL                    do {;} while(0)
+
+
+/* module related definitions */
+
+#ifndef __exit
+#define __exit
+#endif
+#ifndef __init
+#define __init
+#endif
+
+#define request_module(x) (0)
+
+#define EXPORT_SYMBOL(s)
+#define MODULE_AUTHOR(s)
+#define MODULE_DESCRIPTION(s)
+#define MODULE_LICENSE(s)
+#define MODULE_PARM(a, b)
+#define MODULE_PARM_DESC(a, b)
+
+#define module_init(X) int  __init module_##X() {return X();}
+#define module_exit(X) void __exit module_##X() {X();}
+
+#define DECLARE_INIT(X) extern int  __init  module_##X(void)
+#define DECLARE_EXIT(X) extern void __exit  module_##X(void)
+
+#define MODULE_INIT(X) do { int rc = module_##X(); \
+                            if (rc) goto errorout; \
+                          } while(0)
+
+#define MODULE_EXIT(X) do { module_##X(); } while(0)
+
+
+/* Module interfaces */
+#define cfs_module(name, version, init, fini) \
+module_init(init);                            \
+module_exit(fini)
+
+
+/*
+ *  Linux kernel version definition
+ */
+
+#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c)
+#define LINUX_VERSION_CODE (2*100+6*10+7)
+
+
+/*
+ * Signal
+ */
+#define SIGNAL_MASK_ASSERT()
+
+/*
+ * Timer
+ */
+
+#define CFS_TIMER_FLAG_INITED   0x00000001  // Initialized already
+#define CFS_TIMER_FLAG_TIMERED  0x00000002  // KeSetTimer is called
+
+typedef struct cfs_timer {
+
+    KSPIN_LOCK      Lock;
+
+    ULONG           Flags;
+
+    KDPC            Dpc;
+    KTIMER          Timer;
+
+    cfs_time_t      deadline;
+
+    void (*proc)(ulong_ptr);
+    void *          arg;
+
+} cfs_timer_t;
+
+
+typedef  void (*timer_func_t)(ulong_ptr);
+
+#define cfs_init_timer(t)
+
+void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg);
+void cfs_timer_done(cfs_timer_t *t);
+void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline);
+void cfs_timer_disarm(cfs_timer_t *t);
+int  cfs_timer_is_armed(cfs_timer_t *t);
+cfs_time_t cfs_timer_deadline(cfs_timer_t *t);
+
+
+/* deschedule for a bit... */
+static inline void cfs_pause(cfs_duration_t ticks)
+{
+    cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks);
+}
+
+
+static inline void cfs_enter_debugger(void)
+{
+#if _X86_
+    __asm int 3;
+#else
+    KdBreakPoint();
+#endif
+}
+
+/*
+ *  libcfs globals initialization/cleanup
+ */
+
+int
+libcfs_arch_init(void);
+
+void
+libcfs_arch_cleanup(void);
+
+/*
+ * SMP ...
+ */
+
+#define SMP_CACHE_BYTES             128
+#define __cacheline_aligned
+#define NR_CPUS                                            (2)
+#define smp_processor_id()                 KeGetCurrentProcessorNumber()
+#define smp_num_cpus                NR_CPUS
+#define num_online_cpus() smp_num_cpus
+#define smp_call_function(f, a, n, w)          do {} while(0)
+
+/*
+ *  Irp related
+ */
+
+#define NR_IRQS                                    512
+#define in_interrupt()                 (0)
+
+/*
+ *  printk flags
+ */
+
+#define KERN_EMERG      "<0>"   /* system is unusable                   */
+#define KERN_ALERT      "<1>"   /* action must be taken immediately     */
+#define KERN_CRIT       "<2>"   /* critical conditions                  */
+#define KERN_ERR        "<3>"   /* error conditions                     */
+#define KERN_WARNING    "<4>"   /* warning conditions                   */
+#define KERN_NOTICE     "<5>"   /* normal but significant condition     */
+#define KERN_INFO       "<6>"   /* informational                        */
+#define KERN_DEBUG      "<7>"   /* debug-level messages                 */
+
+/*
+ * Misc
+ */
+
+
+#define inter_module_get(n)                    cfs_symbol_get(n)
+#define inter_module_put(n)                    cfs_symbol_put(n)
+
+#ifndef likely
+#define likely(exp) (exp)
+#endif
+#ifndef unlikely
+#define unlikely(exp) (exp)
+#endif
+
+#define lock_kernel()               do {} while(0)
+#define unlock_kernel()             do {} while(0)
+
+#define CAP_SYS_ADMIN                    0
+#define CAP_SYS_ROOT                     1
+
+#define capable(a)                             (TRUE)
+
+#define USERMODEHELPER(path, argv, envp)       (0)
+
+
+#define local_irq_save(x)
+#define local_irq_restore(x)
+
+#define cfs_assert                      ASSERT
+
+#define THREAD_NAME
+
+#else   /* !__KERNEL__ */
+
+#define PAGE_CACHE_SIZE PAGE_SIZE
+#define PAGE_CACHE_MASK PAGE_MASK
+
+#define getpagesize()   (PAGE_SIZE)
+
+
+typedef struct {
+    int foo;
+} pthread_mutex_t;
+
+typedef struct {
+    int foo;
+} pthread_cond_t;
+
+#define pthread_mutex_init(x, y)    do {} while(0)
+#define pthread_cond_init(x, y)     do {} while(0)
+
+#define pthread_mutex_lock(x)       do {} while(0)
+#define pthread_mutex_unlock(x)     do {} while(0)
+
+#define pthread_cond_wait(x,y)      do {} while(0)
+#define pthread_cond_broadcast(x)   do {} while(0)
+
+typedef struct file {
+    int foo;
+} cfs_file_t;
+
+typedef struct cfs_proc_dir_entry{
+       void            *data;
+}cfs_proc_dir_entry_t;
+
+
+
+#include "../user-prim.h"
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define strcasecmp  strcmp
+#define strncasecmp strncmp
+#define snprintf   _snprintf
+#define getpid()   (0)
+
+
+#define getpwuid(x) (NULL)
+#define getgrgid(x) (NULL)
+
+int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev);
+
+int gethostname(char * name, int namelen);
+
+#define setlinebuf(x) do {} while(0)
+
+
+NTSYSAPI VOID NTAPI DebugBreak();
+
+
+static inline void cfs_enter_debugger(void)
+{
+#if _X86_
+    __asm int 3;
+#else
+    DebugBreak();
+#endif
+}
+
+/* Maximum EA Information Length */
+#define EA_MAX_LENGTH  (sizeof(FILE_FULL_EA_INFORMATION) + 15)
+
+
+/*
+ *  proc user mode routines
+ */
+
+HANDLE cfs_proc_open (char * filename, int oflag);
+int cfs_proc_close(HANDLE handle);
+int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count);
+int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count);
+int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer);
+
+
+/*
+ * Native API definitions
+ */
+
+//
+//  Disk I/O Routines
+//
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtReadFile(HANDLE FileHandle,
+    HANDLE Event OPTIONAL,
+    PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+    PVOID ApcContext OPTIONAL,
+    PIO_STATUS_BLOCK IoStatusBlock,
+    PVOID Buffer,
+    ULONG Length,
+    PLARGE_INTEGER ByteOffset OPTIONAL,
+    PULONG Key OPTIONAL);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtWriteFile(HANDLE FileHandle,
+    HANDLE Event OPTIONAL,
+    PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+    PVOID ApcContext OPTIONAL,
+    PIO_STATUS_BLOCK IoStatusBlock,
+    PVOID Buffer,
+    ULONG Length,
+    PLARGE_INTEGER ByteOffset OPTIONAL,
+    PULONG Key OPTIONAL);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtClose(HANDLE Handle);
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtCreateFile(PHANDLE FileHandle,
+    ACCESS_MASK DesiredAccess,
+    POBJECT_ATTRIBUTES ObjectAttributes,
+    PIO_STATUS_BLOCK IoStatusBlock,
+    PLARGE_INTEGER AllocationSize OPTIONAL,
+    ULONG FileAttributes,
+    ULONG ShareAccess,
+    ULONG CreateDisposition,
+    ULONG CreateOptions,
+    PVOID EaBuffer OPTIONAL,
+    ULONG EaLength);
+
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtDeviceIoControlFile(
+    IN HANDLE  FileHandle,
+    IN HANDLE  Event,
+    IN PIO_APC_ROUTINE  ApcRoutine,
+    IN PVOID  ApcContext,
+    OUT PIO_STATUS_BLOCK  IoStatusBlock,
+    IN ULONG  IoControlCode,
+    IN PVOID  InputBuffer,
+    IN ULONG  InputBufferLength,
+    OUT PVOID  OutputBuffer,
+    OUT ULONG  OutputBufferLength
+    ); 
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtFsControlFile(
+    IN HANDLE FileHandle,
+    IN HANDLE Event OPTIONAL,
+    IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
+    IN PVOID ApcContext OPTIONAL,
+    OUT PIO_STATUS_BLOCK IoStatusBlock,
+    IN ULONG FsControlCode,
+    IN PVOID InputBuffer OPTIONAL,
+    IN ULONG InputBufferLength,
+    OUT PVOID OutputBuffer OPTIONAL,
+    IN ULONG OutputBufferLength
+);
+
+
+NTSYSAPI
+NTSTATUS
+NTAPI
+NtQueryInformationFile(
+    IN HANDLE  FileHandle,
+    OUT PIO_STATUS_BLOCK  IoStatusBlock,
+    OUT PVOID  FileInformation,
+    IN ULONG  Length,
+    IN FILE_INFORMATION_CLASS  FileInformationClass
+    );
+
+//
+// Random routines ...
+//
+
+NTSYSAPI
+ULONG
+NTAPI
+RtlRandom(
+    IN OUT PULONG  Seed
+    ); 
+
+#endif /* __KERNEL__ */
+
+
+//
+// Inode flags (Linux uses octad number, but why ? strange!!!)
+//
+
+#undef S_IFMT
+#undef S_IFDIR
+#undef S_IFCHR
+#undef S_IFREG
+#undef S_IREAD
+#undef S_IWRITE
+#undef S_IEXEC
+
+#define S_IFMT   0x0F000            /* 017 0000 */
+#define S_IFSOCK 0x0C000            /* 014 0000 */
+#define S_IFLNK  0x0A000            /* 012 0000 */
+#define S_IFREG  0x08000            /* 010 0000 */
+#define S_IFBLK  0x06000            /* 006 0000 */
+#define S_IFDIR  0x04000            /* 004 0000 */
+#define S_IFCHR  0x02000            /* 002 0000 */
+#define S_IFIFO  0x01000            /* 001 0000 */
+#define S_ISUID  0x00800            /* 000 4000 */
+#define S_ISGID  0x00400            /* 000 2000 */
+#define S_ISVTX  0x00200            /* 000 1000 */
+
+#define S_ISREG(m)      (((m) & S_IFMT) == S_IFREG)
+#define S_ISSOCK(m)     (((m) & S_IFMT) == S_IFSOCK)
+#define S_ISLNK(m)      (((m) & S_IFMT) == S_IFLNK)
+#define S_ISFIL(m)      (((m) & S_IFMT) == S_IFFIL)
+#define S_ISBLK(m)      (((m) & S_IFMT) == S_IFBLK)
+#define S_ISDIR(m)      (((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m)      (((m) & S_IFMT) == S_IFCHR)
+#define S_ISFIFO(m)     (((m) & S_IFMT) == S_IFIFO)
+
+#define S_IPERMISSION_MASK 0x1FF /*  */
+
+#define S_IRWXU  0x1C0              /* 0 0700 */
+#define S_IRUSR  0x100              /* 0 0400 */
+#define S_IWUSR  0x080              /* 0 0200 */
+#define S_IXUSR  0x040              /* 0 0100 */
+
+#define S_IRWXG  0x038              /* 0 0070 */
+#define S_IRGRP  0x020              /* 0 0040 */
+#define S_IWGRP  0x010              /* 0 0020 */
+#define S_IXGRP  0x008              /* 0 0010 */
+
+#define S_IRWXO  0x007              /* 0 0007 */
+#define S_IROTH  0x004              /* 0 0004 */
+#define S_IWOTH  0x002              /* 0 0002 */
+#define S_IXOTH  0x001              /* 0 0001 */
+
+#define S_IRWXUGO   (S_IRWXU|S_IRWXG|S_IRWXO)
+#define S_IALLUGO   (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#define S_IRUGO     (S_IRUSR|S_IRGRP|S_IROTH)
+#define S_IWUGO     (S_IWUSR|S_IWGRP|S_IWOTH)
+#define S_IXUGO     (S_IXUSR|S_IXGRP|S_IXOTH)
+
+/*
+ *  linux ioctl coding definitions
+ */
+#define _IOC_NRBITS 8
+#define _IOC_TYPEBITS   8
+#define _IOC_SIZEBITS   14
+#define _IOC_DIRBITS    2
+
+#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK   ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK   ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK    ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT    0
+#define _IOC_TYPESHIFT  (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT  (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT   (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE   0U
+#define _IOC_WRITE  1U
+#define _IOC_READ   2U
+
+#define _IOC(dir,type,nr,size) \
+    (((dir)  << _IOC_DIRSHIFT) | \
+     ((type) << _IOC_TYPESHIFT) | \
+     ((nr)   << _IOC_NRSHIFT) | \
+     ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)      _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)    _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)    _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)        (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)       (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)         (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)       (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/*
+ * Io vector ...  
+ */
+
+struct iovec
+{
+    void *iov_base;
+    size_t iov_len;
+};
+
+
+#define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF))
+/*
+ * Convert a string to an unsigned long long integer.
+ *
+ * Ignores `locale' stuff.  Assumes that the upper and lower case
+ * alphabets and digits are each contiguous.
+ */
+static inline __u64
+strtoull(
+       char *nptr,
+       char **endptr,
+       int base)
+{
+       char *s = nptr;
+       __u64 acc, cutoff;
+       int c, neg = 0, any, cutlim;
+
+       /*
+        * See strtol for comments as to the logic used.
+        */
+       do {
+               c = *s++;
+       } while (isspace(c));
+       if (c == '-') {
+               neg = 1;
+               c = *s++;
+       } else if (c == '+')
+               c = *s++;
+       if ((base == 0 || base == 16) &&
+           c == '0' && (*s == 'x' || *s == 'X')) {
+               c = s[1];
+               s += 2;
+               base = 16;
+       }
+       if (base == 0)
+               base = c == '0' ? 8 : 10;
+       cutoff = (__u64)ULONG_LONG_MAX / (__u64)base;
+       cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base);
+       for (acc = 0, any = 0;; c = *s++) {
+               if (isdigit(c))
+                       c -= '0';
+               else if (isalpha(c))
+                       c -= isupper(c) ? 'A' - 10 : 'a' - 10;
+               else
+                       break;
+               if (c >= base)
+                       break;
+               if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
+                       any = -1;
+               else {
+                       any = 1;
+                       acc *= base;
+                       acc += c;
+               }
+       }
+       if (any < 0) {
+               acc = ULONG_LONG_MAX;
+       } else if (neg)
+               acc = 0 - acc;
+       if (endptr != 0)
+               *endptr = (char *) (any ? s - 1 : nptr);
+       return (acc);
+}
+
+#endif
diff --git a/libcfs/include/libcfs/winnt/winnt-tcpip.h b/libcfs/include/libcfs/winnt/winnt-tcpip.h
new file mode 100644 (file)
index 0000000..a988247
--- /dev/null
@@ -0,0 +1,660 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for Winnt (kernel and user-level).
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_TCPIP_H__
+#define __LIBCFS_WINNT_TCPIP_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+
+#ifdef __KERNEL__
+
+//
+//  ks definitions
+//
+
+// iovec is defined in libcfs: winnt_prim.h 
+// lnetkiov_t is defined in lnet/types.h
+
+typedef struct socket ksock_tconn_t;
+typedef struct socket cfs_socket_t;
+
+// completion notification callback routine
+
+typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr);
+
+/* completion routine to update tx structure for async sending */
+typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr);
+
+//
+// tdinal definitions
+//
+
+
+#if TDI_LIBCFS_DBG
+#define KsPrint(X)     KsPrintf X
+#else
+#define KsPrint(X)
+#endif
+
+
+//
+// Socket Addresses Related ...
+//
+
+#define            INADDR_ANY              (ULONG)0x00000000
+#define     INADDR_LOOPBACK     (ULONG)0x7f000001
+#define            INADDR_BROADCAST    (ULONG)0xffffffff
+#define            INADDR_NONE             (ULONG)0xffffffff
+
+/*
+ *  TCP / IP options
+ */
+
+#define     SOL_TCP             6
+#define     SOL_UDP                    17
+
+
+#define TL_INSTANCE             0
+
+#define TCP_SOCKET_NODELAY      1 //  disabling "Nagle"
+#define TCP_SOCKET_KEEPALIVE    2
+#define TCP_SOCKET_OOBINLINE    3
+#define TCP_SOCKET_BSDURGENT    4
+#define TCP_SOCKET_ATMARK       5
+#define TCP_SOCKET_WINDOW       6
+
+
+/* Flags we can use with send/ and recv. 
+   Added those for 1003.1g not all are supported yet
+ */
+#define MSG_OOB            1
+#define MSG_PEEK        2
+#define MSG_DONTROUTE   4
+#define MSG_TRYHARD     4       /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC      8
+#define MSG_PROBE       0x10   /* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC       0x20
+#define MSG_DONTWAIT    0x40   /* Nonblocking io                */
+#define MSG_EOR         0x80   /* End of record */
+#define MSG_WAITALL     0x100  /* Wait for a full request */
+#define MSG_FIN         0x200
+#define MSG_SYN                0x400
+#define MSG_CONFIRM     0x800  /* Confirm path validity */
+#define MSG_RST         0x1000
+#define MSG_ERRQUEUE    0x2000 /* Fetch message from error queue */
+#define MSG_NOSIGNAL    0x4000 /* Do not generate SIGPIPE */
+#define MSG_MORE        0x8000 /* Sender will send more */
+
+#define MSG_EOF         MSG_FIN
+
+
+//
+// Maximum TRANSPORT_ADDRESS Length
+//
+// it must >= FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
+//            + TDI_ADDRESS_LENGTH_IP
+//
+// I define it a little large and 16 bytes aligned to avoid possible overflow.
+//
+
+#define MAX_ADDRESS_LENGTH              (0x30)
+
+
+//
+// Maximum Listers Children Sockets
+//
+
+#define MAX_CHILD_LISTENERS             (4)
+
+//
+// Maximum EA Information Length
+//
+
+#define EA_MAX_LENGTH                   ( sizeof(FILE_FULL_EA_INFORMATION) - 1 + \
+                                          TDI_TRANSPORT_ADDRESS_LENGTH + 1 + \
+                                          MAX_ADDRESS_LENGTH )
+
+
+#define UDP_DEVICE_NAME L"\\Device\\Udp"
+#define TCP_DEVICE_NAME L"\\Device\\Tcp"
+
+
+/*
+ * TSDU definitions
+ */
+
+#define TDINAL_TSDU_DEFAULT_SIZE  (0x10000)
+
+#define KS_TSDU_MAGIC       'KSTD'
+
+#define KS_TSDU_ATTACHED    0x00000001  // Attached to the socket receive tsdu list
+
+typedef struct _KS_TSDU {
+
+    ULONG               Magic;
+    ULONG               Flags;
+
+    struct list_head    Link;
+
+    ULONG               TotalLength;    // Total size of KS_TSDU
+
+    ULONG               StartOffset;    // Start offset of the first Tsdu unit
+    ULONG               LastOffset;     // End offset of the last Tsdu unit
+
+/*
+    union {
+        KS_TSDU_DAT[];
+        KS_TSDU_BUF[];
+        KS_TSDU_MDL[];
+    }
+*/
+
+} KS_TSDU, *PKS_TSDU;
+
+#define TSDU_TYPE_BUF   ((USHORT)0x5401)
+#define TSDU_TYPE_DAT   ((USHORT)0x5402)
+#define TSDU_TYPE_MDL   ((USHORT)0x5403)
+
+#define KS_TSDU_BUF_RECEIVING       0x0001
+typedef struct _KS_TSDU_BUF {
+
+    USHORT              TsduType;
+    USHORT              TsduFlags;
+
+    ULONG               DataLength;
+    ULONG               StartOffset;
+
+    PVOID               UserBuffer;
+
+} KS_TSDU_BUF, *PKS_TSDU_BUF;
+
+#define KS_TSDU_DAT_RECEIVING       0x0001
+
+typedef struct _KS_TSDU_DAT {
+
+    USHORT              TsduType;
+    USHORT              TsduFlags;
+
+    ULONG               DataLength;
+    ULONG               StartOffset;
+
+    ULONG               TotalLength;
+
+    UCHAR               Data[1];
+
+} KS_TSDU_DAT, *PKS_TSDU_DAT;
+
+#define KS_DWORD_ALIGN(x)      (((x) + 0x03) & (~(0x03)))
+#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data)))
+
+typedef struct _KS_TSDU_MDL {
+
+    USHORT              TsduType;
+    USHORT              TsduFlags;
+
+    ULONG               DataLength;
+    ULONG               StartOffset;    
+
+    PMDL                Mdl;
+    PVOID               Descriptor;
+
+} KS_TSDU_MDL, *PKS_TSDU_MDL;
+
+
+typedef struct _KS_TSDUMGR {
+
+    struct list_head    TsduList;
+    ULONG               NumOfTsdu;
+    ULONG               TotalBytes;
+    KEVENT              Event;
+
+} KS_TSDUMGR, *PKS_TSDUMGR;
+
+
+typedef struct _KS_CHAIN {
+
+    KS_TSDUMGR          Normal;
+    KS_TSDUMGR          Expedited;
+
+} KS_CHAIN, *PKS_CHAIN;
+
+
+#define TDINAL_SCHED_FACTOR (1)
+#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR))
+
+//
+// Handler Settings Indictor 
+//
+
+#define TDI_EVENT_MAXIMUM_HANDLER (TDI_EVENT_ERROR_EX + 1)
+
+
+typedef struct _KS_EVENT_HANDLERS {
+    BOOLEAN     IsActive[TDI_EVENT_MAXIMUM_HANDLER];
+    PVOID       Handler [TDI_EVENT_MAXIMUM_HANDLER];
+} KS_EVENT_HANDLERS, *PKS_EVENT_HANDLERS;
+
+#define SetEventHandler(ha, ht, hr) do {        \
+            ha.IsActive[ht] = TRUE;             \
+            ha.Handler[ht] = (PVOID) (hr);      \
+        } while(0)
+
+//
+// KSock Internal Structures
+//
+
+typedef struct _KS_ADDRESS {
+
+    union {
+        TRANSPORT_ADDRESS   Tdi;
+        UCHAR               Pading[MAX_ADDRESS_LENGTH];
+    };
+
+    HANDLE                  Handle;
+    PFILE_OBJECT            FileObject;
+
+} KS_ADDRESS, *PKS_ADDRESS;
+
+//
+// Structures for Disconnect Workitem
+//
+
+typedef struct _KS_DISCONNECT_WORKITEM {
+
+    WORK_QUEUE_ITEM         WorkItem;       // Workitem to perform disconnection
+    ksock_tconn_t *         tconn;          // tdi connecton
+    ULONG                   Flags;          // connection broken/discnnection flags
+    KEVENT                  Event;          // sync event
+
+} KS_DISCONNECT_WORKITEM, *PKS_DISCONNECT_WORKITEM;
+
+
+typedef struct _KS_CONNECTION {
+
+    HANDLE                      Handle;     // Handle of the tdi connection
+    PFILE_OBJECT                FileObject; // FileObject if the conn object
+
+    PTRANSPORT_ADDRESS          Remote;     // the ConnectionInfo of this connection
+    PTDI_CONNECTION_INFORMATION ConnectionInfo;
+
+    ULONG                       nagle;      // Tcp options 
+
+} KS_CONNECTION, *PKS_CONNECTION;
+
+
+//
+// type definitions
+//
+
+typedef MDL                         ksock_mdl_t;
+typedef UNICODE_STRING              ksock_unicode_name_t;
+typedef WORK_QUEUE_ITEM             ksock_workitem_t;
+
+
+typedef KS_CHAIN                    ksock_chain_t;
+typedef KS_ADDRESS                  ksock_tdi_addr_t;
+typedef KS_CONNECTION               ksock_tconn_info_t;
+typedef KS_DISCONNECT_WORKITEM      ksock_disconnect_workitem_t;
+
+
+//
+// Structures for transmission done Workitem
+//
+
+typedef struct _KS_TCPX_FINILIZE {
+    ksock_workitem_t        item;
+    void *                  tx;
+} ksock_tcpx_fini_t;
+
+
+typedef struct ksock_backlogs {
+
+        struct list_head    list;   /* list to link the backlog connections */
+        int                 num;    /* number of backlogs in the list */
+
+} ksock_backlogs_t;
+
+
+typedef struct ksock_daemon {
+
+    ksock_tconn_t *         tconn;         /* the listener connection object */
+    unsigned short          nbacklogs;     /* number of listening backlog conns */
+    unsigned short          port;          /* listening port number */ 
+    int                     shutdown;      /* daemon threads is to exit */
+    struct list_head        list;          /* to be attached into ksock_nal_data_t*/
+
+} ksock_daemon_t ;
+
+
+typedef enum {
+
+    kstt_sender = 0,    // normal sending connection type, it's active connection, while
+                        // child tconn is for passive connection.
+
+    kstt_listener,      // listener daemon type, it just acts as a daemon, and it does
+                        // not have real connection. It manages children tcons to accept
+                        // or refuse the connecting request from remote peers.
+
+    kstt_child,         // accepted child connection type, it's parent must be Listener
+    kstt_lasttype
+} ksock_tconn_type;
+
+typedef enum {
+
+    ksts_uninited = 0, // tconn is just allocated (zero values), not initialized yet
+
+    ksts_inited,        // tconn structure initialized: so it now can be identified as
+                        // a sender, listener or a child
+
+    ksts_bind,          // tconn is bound: the local address object (ip/port) is created.
+                        // after being bound, we must call ksocknal_put_tconn to release
+                        // the tconn objects, it's not safe just to free the memory of tconn.
+
+    ksts_associated,    // the connection object is created and associated with the address
+                        // object. so it's ready for connection. only for child and sender.
+
+    ksts_connecting,    // only used by child tconn: in the ConnectEvent handler routine,
+                        // it indicts the child tconn is busy to be connected to the peer.
+
+    ksts_connected,     // the connection is built already: for sender and child
+
+    ksts_listening,     // listener daemon is working, only for listener tconn
+
+    ksts_disconnected,  // disconnected by user
+    ksts_aborted,       // un-exptected broken status
+
+    ksts_last           // total number of tconn statuses
+} ksock_tconn_state;
+
+#define KS_TCONN_MAGIC              'KSTM'
+
+#define KS_TCONN_HANDLERS_SET       0x00000001  // Conection handlers are set.
+#define KS_TCONN_DISCONNECT_BUSY    0x00010000  // Disconnect Workitem is queued ...
+#define KS_TCONN_DESTROY_BUSY       0x00020000  // Destory Workitem is queued ...
+
+#define KS_TCONN_DAEMON_STARTED     0x00100000  // indict the daemon is started,
+                                                // only valid for listener
+
+struct socket {
+
+        ulong_ptr                   kstc_magic;      /* Magic & Flags */
+        ulong_ptr                   kstc_flags;
+
+        spinlock_t                  kstc_lock;       /* serialise lock*/
+        void *                      kstc_conn;       /* ksock_conn_t */
+
+        ksock_tconn_type            kstc_type;          /* tdi connection Type */
+        ksock_tconn_state           kstc_state;      /* tdi connection state flag */
+
+        ksock_unicode_name_t        kstc_dev;        /* tcp transport device name */
+
+        ksock_tdi_addr_t            kstc_addr;       /* local address handlers / Objects */
+
+        atomic_t                    kstc_refcount;   /* reference count of ksock_tconn */
+
+        struct list_head            kstc_list;       /* linked to global ksocknal_data */
+
+        union {
+
+            struct {
+                int                 nbacklog;         /* total number of backlog tdi connections */
+                ksock_backlogs_t    kstc_listening;   /* listeing backlog child connections */
+                ksock_backlogs_t    kstc_accepted;    /* connected backlog child connections */
+                event_t             kstc_accept_event;   /* Signaled by AcceptedHander, 
+                                                            ksocknal_wait_accpeted_conns waits on */
+                event_t             kstc_destroy_event;  /* Signaled when accepted child is released */
+            } listener; 
+
+            struct  {
+                ksock_tconn_info_t  kstc_info;      /* Connection Info if Connected */
+                ksock_chain_t       kstc_recv;      /* tsdu engine for data receiving */
+                ksock_chain_t       kstc_send;      /* tsdu engine for data sending */
+
+                int                 kstc_queued;    /* Attached to Parent->ChildList ... */
+                int                 kstc_queueno;   /* 0: Attached to Listening list 
+                                                       1: Attached to Accepted list */
+
+                int                 kstc_busy;      /* referred by ConnectEventCallback ? */
+                int                 kstc_accepted;  /* the connection is built ready ? */
+
+                struct list_head    kstc_link;      /* linked to parent tdi connection */
+                ksock_tconn_t   *   kstc_parent;    /* pointers to it's listener parent */
+            } child;
+
+            struct {
+                ksock_tconn_info_t  kstc_info;      /* Connection Info if Connected */
+                ksock_chain_t       kstc_recv;      /* tsdu engine for data receiving */
+                ksock_chain_t       kstc_send;      /* tsdu engine for data sending */
+            } sender; 
+        };
+
+        ulong_ptr                   kstc_snd_wnd;   /* Sending window size */
+        ulong_ptr                   kstc_rcv_wnd;   /* Recving window size */
+
+        ksock_workitem_t            kstc_destroy;    /* tconn destruction workitem */
+        ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */
+
+        ksock_schedule_cb           kstc_sched_cb;   /* notification callback routine of completion */
+        ksock_update_tx             kstc_update_tx;  /* aync sending callback to update tx */
+};
+
+#define SOCK_WMEM_QUEUED(sock) (0)
+
+#define TDINAL_WINDOW_DEFAULT_SIZE  (0x100000)
+
+
+struct _KS_UDP_COMPLETION_CONTEXT;
+struct _KS_TCP_COMPLETION_CONTEXT;
+
+
+typedef
+NTSTATUS
+(*PKS_UDP_COMPLETION_ROUTINE) (
+    IN PIRP     Irp,
+    IN struct _KS_UDP_COMPLETION_CONTEXT
+                *UdpContext
+    );
+
+
+typedef
+NTSTATUS
+(*PKS_TCP_COMPLETION_ROUTINE) (
+    IN PIRP     Irp,
+    IN struct _KS_TCP_COMPLETION_CONTEXT
+                *TcpContext
+    );
+
+//
+// Udp Irp Completion Context
+//
+
+typedef struct _KS_UDP_COMPLETION_CONTEXT {
+
+    PKEVENT                             Event;
+    union {
+        PFILE_OBJECT                    AddressObject;
+        ksock_tconn_t *                 tconn;
+    };
+
+    PKS_UDP_COMPLETION_ROUTINE          CompletionRoutine;
+    PVOID                               CompletionContext;
+
+} KS_UDP_COMPLETION_CONTEXT, *PKS_UDP_COMPLETION_CONTEXT;
+
+
+//
+// Tcp Irp Completion Context (used by tcp data recv/send)
+//
+
+typedef struct _KS_TCP_COMPLETION_CONTEXT {
+
+    PKEVENT                             Event;      // Event to be waited on by Irp caller ...
+
+    ksock_tconn_t *                     tconn;      // the tdi connection
+
+    PKS_TCP_COMPLETION_ROUTINE          CompletionRoutine;
+    PVOID                               CompletionContext;
+    PVOID                               CompletionContext2;
+
+    PKS_TSDUMGR                         KsTsduMgr;  // Tsdu buffer manager
+
+    //
+    // These tow new members are for NON_BLOCKING transmission
+    //
+
+    BOOLEAN                                                        bCounted;    // To indict needing refcount to
+                                                     // execute CompetionRoutine
+    ULONG                               ReferCount;  // Refer count of this structure
+
+} KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT;
+
+typedef KS_TCP_COMPLETION_CONTEXT  ksock_tdi_tx_t, ksock_tdi_rx_t;
+
+
+/*
+ * tdi extensions
+ */
+
+#define IOCTL_TCP_QUERY_INFORMATION_EX        \
+                        CTL_CODE(FILE_DEVICE_NETWORK, 0, METHOD_NEITHER, FILE_ANY_ACCESS)
+#define IOCTL_TCP_SET_INFORMATION_EX        \
+                        CTL_CODE(FILE_DEVICE_NETWORK, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
+
+
+#define TcpBuildSetInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, Buffer, BufferLen)\
+    {                                                                        \
+        PIO_STACK_LOCATION _IRPSP;                                           \
+        if ( CompRoutine != NULL) {                                          \
+            IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\
+        } else {                                                             \
+            IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE);   \
+        }                                                                    \
+        _IRPSP = IoGetNextIrpStackLocation (Irp);                            \
+        _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL;                       \
+        _IRPSP->DeviceObject = DevObj;                                       \
+        _IRPSP->FileObject = FileObj;                                        \
+        _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = 0;           \
+        _IRPSP->Parameters.DeviceIoControl.InputBufferLength = BufferLen;    \
+        _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_SET_INFORMATION_EX;  \
+        Irp->AssociatedIrp.SystemBuffer = Buffer;                            \
+    }
+
+
+#define TcpBuildQueryInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, InBuffer, InLength, OutBuffer, OutLength)\
+    {                                                                        \
+        PIO_STACK_LOCATION _IRPSP;                                           \
+        if ( CompRoutine != NULL) {                                          \
+            IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\
+        } else {                                                             \
+            IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE);   \
+        }                                                                    \
+        _IRPSP = IoGetNextIrpStackLocation (Irp);                            \
+        _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL;                       \
+        _IRPSP->DeviceObject = DevObj;                                       \
+        _IRPSP->FileObject = FileObj;                                        \
+        _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = OutLength;           \
+        _IRPSP->Parameters.DeviceIoControl.InputBufferLength = InLength;    \
+        _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_QUERY_INFORMATION_EX;  \
+        _IRPSP->Parameters.DeviceIoControl.Type3InputBuffer = InBuffer;    \
+        Irp->UserBuffer = OutBuffer;                            \
+    }
+
+
+typedef struct ks_addr_slot {
+    LIST_ENTRY      link;
+    int             up;
+    char            iface[40];
+    __u32           ip_addr;
+    __u32           netmask;
+    UNICODE_STRING  devname;
+    WCHAR           buffer[1];
+} ks_addr_slot_t;
+
+typedef struct {
+
+    /*
+     * Tdi client information
+     */
+
+    UNICODE_STRING    ksnd_client_name; /* tdi client module name */
+    HANDLE            ksnd_pnp_handle;  /* the handle for pnp changes */
+
+    spinlock_t        ksnd_addrs_lock;  /* serialize ip address list access */
+    LIST_ENTRY        ksnd_addrs_list;  /* list of the ip addresses */
+    int               ksnd_naddrs;      /* number of the ip addresses */
+
+    /*
+     *  Tdilnd internal defintions
+     */
+
+    int               ksnd_init;            /* initialisation state */
+
+    TDI_PROVIDER_INFO ksnd_provider;    /* tdi tcp/ip provider's information */
+
+    spinlock_t        ksnd_tconn_lock;      /* tdi connections access serialise */
+
+    int               ksnd_ntconns;         /* number of tconns attached in list */
+    struct list_head  ksnd_tconns;          /* tdi connections list */
+    cfs_mem_cache_t * ksnd_tconn_slab;      /* slabs for ksock_tconn_t allocations */
+    event_t           ksnd_tconn_exit;      /* exit event to be signaled by the last tconn */
+
+    spinlock_t        ksnd_tsdu_lock;       /* tsdu access serialise */
+        
+    int               ksnd_ntsdus;          /* number of tsdu buffers allocated */
+    ulong_ptr     ksnd_tsdu_size;       /* the size of a signel tsdu buffer */
+    cfs_mem_cache_t * ksnd_tsdu_slab;       /* slab cache for tsdu buffer allocation */
+
+    int               ksnd_nfreetsdus;      /* number of tsdu buffers in the freed list */
+    struct list_head  ksnd_freetsdus;          /* List of the freed Tsdu buffer. */
+
+    spinlock_t        ksnd_daemon_lock;     /* stabilize daemon ops */
+    int               ksnd_ndaemons;        /* number of listening daemons */
+    struct list_head  ksnd_daemons;         /* listening daemon list */
+    event_t           ksnd_daemon_exit;     /* the last daemon quiting should singal it */
+
+} ks_data_t;
+
+int
+ks_init_tdi_data();
+
+void
+ks_fini_tdi_data();
+
+
+#endif /* __KERNEL__ */
+#endif /* __LIBCFS_WINNT_TCPIP_H__ */
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/winnt/winnt-time.h b/libcfs/include/libcfs/winnt/winnt-time.h
new file mode 100644 (file)
index 0000000..a7a570c
--- /dev/null
@@ -0,0 +1,316 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for Winnt (kernel and user-level).
+ *
+ */
+
+#ifndef __LIBCFS_WINNT_LINUX_TIME_H__
+#define __LIBCFS_WINNT_LINUX_TIME_H__
+
+#ifndef __LIBCFS_LIBCFS_H__
+#error Do not #include this file directly. #include <libcfs/libcfs.h> instead
+#endif
+
+/* Portable time API */
+
+/*
+ * Platform provides three opaque data-types:
+ *
+ *  cfs_time_t        represents point in time. This is internal kernel
+ *                    time rather than "wall clock". This time bears no
+ *                    relation to gettimeofday().
+ *
+ *  cfs_duration_t    represents time interval with resolution of internal
+ *                    platform clock
+ *
+ *  cfs_fs_time_t     represents instance in world-visible time. This is
+ *                    used in file-system time-stamps
+ *
+ *  cfs_time_t     cfs_time_current(void);
+ *  cfs_time_t     cfs_time_add    (cfs_time_t, cfs_duration_t);
+ *  cfs_duration_t cfs_time_sub    (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_before (cfs_time_t, cfs_time_t);
+ *  int            cfs_time_beforeq(cfs_time_t, cfs_time_t);
+ *
+ *  cfs_duration_t cfs_duration_build(int64_t);
+ *
+ *  time_t         cfs_duration_sec (cfs_duration_t);
+ *  void           cfs_duration_usec(cfs_duration_t, struct timeval *);
+ *  void           cfs_duration_nsec(cfs_duration_t, struct timespec *);
+ *
+ *  void           cfs_fs_time_current(cfs_fs_time_t *);
+ *  time_t         cfs_fs_time_sec    (cfs_fs_time_t *);
+ *  void           cfs_fs_time_usec   (cfs_fs_time_t *, struct timeval *);
+ *  void           cfs_fs_time_nsec   (cfs_fs_time_t *, struct timespec *);
+ *  int            cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *);
+ *  int            cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *);
+ *
+ *  CFS_TIME_FORMAT
+ *  CFS_DURATION_FORMAT
+ *
+ */
+
+#define ONE_BILLION ((u_int64_t)1000000000)
+#define ONE_MILLION ((u_int64_t)   1000000)
+
+#define HZ (100)
+
+struct timeval {
+       time_t          tv_sec;         /* seconds */
+       suseconds_t     tv_usec;        /* microseconds */
+};
+
+struct timespec {
+    ulong_ptr tv_sec;
+    ulong_ptr tv_nsec;
+};
+
+#ifdef __KERNEL__
+
+#include <libcfs/winnt/portals_compat25.h>
+
+/*
+ * Generic kernel stuff
+ */
+
+typedef struct timeval cfs_fs_time_t;
+
+typedef u_int64_t cfs_time_t;
+typedef int64_t cfs_duration_t;
+
+static inline void do_gettimeofday(struct timeval *tv)
+{
+    LARGE_INTEGER Time;
+
+    KeQuerySystemTime(&Time);
+
+    tv->tv_sec  = (long_ptr) (Time.QuadPart / 10000000);
+    tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
+}
+
+static inline cfs_time_t JIFFIES()
+{
+    LARGE_INTEGER Tick;
+    LARGE_INTEGER Elapse;
+
+    KeQueryTickCount(&Tick);
+
+    Elapse.QuadPart  = Tick.QuadPart * KeQueryTimeIncrement();
+    Elapse.QuadPart /= (10000000 / HZ);
+
+    return Elapse.QuadPart;
+}
+
+static inline cfs_time_t cfs_time_current(void)
+{
+    return JIFFIES();
+}
+
+static inline cfs_time_t cfs_time_current_sec(void)
+{
+    return (JIFFIES() / HZ);
+}
+
+static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d)
+{
+    return (t + d);
+}
+
+static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2)
+{
+    return (t1 - t2);
+}
+
+static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2)
+{
+    return ((int64_t)t1 - (int64_t)t2) < 0; 
+}
+
+static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2)
+{
+    return ((int64_t)t1 - (int64_t)t2) <= 0;
+}
+
+static inline void cfs_fs_time_current(cfs_fs_time_t *t)
+{
+    ULONG         Linux;
+    LARGE_INTEGER Sys;
+
+    KeQuerySystemTime(&Sys);
+
+    RtlTimeToSecondsSince1970(&Sys, &Linux);
+
+    t->tv_sec  = Linux;
+    t->tv_usec = (Sys.LowPart % 10000000) / 10;
+}
+
+static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t)
+{
+    return t->tv_sec;
+}
+
+static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t)
+{
+    return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec;
+}
+
+static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+    return (__cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2));
+}
+
+static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2)
+{
+    return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2));
+}
+
+static inline cfs_duration_t cfs_time_seconds(int seconds)
+{
+    return (cfs_duration_t)seconds * HZ;
+}
+
+static inline cfs_time_t cfs_duration_sec(cfs_duration_t d)
+{
+        return d / HZ;
+}
+
+static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s)
+{
+        s->tv_sec = (suseconds_t) (d / HZ);
+        s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) *
+                              ONE_MILLION / HZ);
+}
+
+static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s)
+{
+        s->tv_sec = (suseconds_t) (d / HZ);
+        s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) *
+                              ONE_BILLION / HZ);
+}
+
+static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v)
+{
+        *v = *t;
+}
+
+static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s)
+{
+        s->tv_sec  = t->tv_sec;
+        s->tv_nsec = t->tv_usec * 1000;
+}
+
+#define cfs_time_current_64 cfs_time_current
+#define cfs_time_add_64     cfs_time_add
+#define cfs_time_shift_64   cfs_time_shift
+#define cfs_time_before_64  cfs_time_before
+#define cfs_time_beforeq_64 cfs_time_beforeq
+
+/*
+ * One jiffy
+ */
+#define CFS_TICK                (1)
+
+#define LTIME_S(t)                     (t)
+
+#define CFS_TIME_T              "%I64u"
+#define CFS_DURATION_T          "%I64d"
+
+#else   /* !__KERNEL__ */
+
+/*
+ * Liblustre. time(2) based implementation.
+ */
+#include <libcfs/user-time.h>
+
+
+//
+// Time routines ...
+//
+
+NTSYSAPI
+CCHAR
+NTAPI
+NtQuerySystemTime(
+    OUT PLARGE_INTEGER  CurrentTime
+    );
+
+
+NTSYSAPI
+BOOLEAN
+NTAPI
+RtlTimeToSecondsSince1970(
+    IN PLARGE_INTEGER  Time,
+    OUT PULONG  ElapsedSeconds
+    );
+
+
+NTSYSAPI
+VOID
+NTAPI
+RtlSecondsSince1970ToTime(
+    IN ULONG  ElapsedSeconds,
+    OUT PLARGE_INTEGER  Time
+    );
+
+NTSYSAPI
+VOID
+NTAPI
+Sleep(
+  DWORD dwMilliseconds   // sleep time in milliseconds
+);
+
+
+static inline void sleep(int time)
+{
+    DWORD Time = 1000 * time;
+    Sleep(Time);
+}
+
+
+static inline void do_gettimeofday(struct timeval *tv)
+{
+    LARGE_INTEGER Time;
+
+    NtQuerySystemTime(&Time);
+
+    tv->tv_sec  = (long_ptr) (Time.QuadPart / 10000000);
+    tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10;
+}
+
+static inline int gettimeofday(struct timeval *tv, void * tz)
+{
+    do_gettimeofday(tv);
+    return 0;
+}
+
+#endif /* __KERNEL__ */
+
+/* __LIBCFS_LINUX_LINUX_TIME_H__ */
+#endif
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/include/libcfs/winnt/winnt-types.h b/libcfs/include/libcfs/winnt/winnt-types.h
new file mode 100644 (file)
index 0000000..b50b6bb
--- /dev/null
@@ -0,0 +1,647 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Basic types definitions
+ *
+ */
+
+#ifndef _WINNT_TYPE_H
+#define _WINNT_TYPE_H
+
+#ifdef __KERNEL__
+
+#include <ntifs.h>
+#include <windef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include <tdi.h>
+#include <tdikrnl.h>
+#include <tdiinfo.h>
+
+#else
+
+#include <ntddk.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <time.h>
+#include <io.h>
+#include <string.h>
+#include <assert.h>
+
+#endif
+
+
+#define __LITTLE_ENDIAN
+
+#define inline     __inline
+#define __inline__ __inline
+
+typedef unsigned __int8     __u8;
+typedef signed   __int8     __s8;
+
+typedef signed   __int64    __s64;
+typedef unsigned __int64    __u64;
+
+typedef        signed   __int16        __s16;
+typedef        unsigned __int16        __u16;
+
+typedef        signed   __int32        __s32;
+typedef        unsigned __int32        __u32;
+
+typedef        signed   __int64        __s64;
+typedef        unsigned __int64        __u64;
+
+typedef unsigned long       ULONG;
+
+
+#if defined(_WIN64)
+    #define long_ptr        __int64
+    #define ulong_ptr       unsigned __int64
+    #define BITS_PER_LONG   (64)
+#else
+    #define long_ptr        long
+    #define ulong_ptr       unsigned long
+    #define BITS_PER_LONG   (32)
+
+#endif
+
+/* bsd */
+typedef unsigned char          u_char;
+typedef unsigned short         u_short;
+typedef unsigned int           u_int;
+typedef unsigned long          u_long;
+
+/* sysv */
+typedef unsigned char          unchar;
+typedef unsigned short         ushort;
+typedef unsigned int           uint;
+typedef unsigned long          ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef                __u8            u_int8_t;
+typedef                __s8            int8_t;
+typedef                __u16           u_int16_t;
+typedef                __s16           int16_t;
+typedef                __u32           u_int32_t;
+typedef                __s32           int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef                __u8            uint8_t;
+typedef                __u16           uint16_t;
+typedef                __u32           uint32_t;
+
+typedef                __u64           uint64_t;
+typedef                __u64           u_int64_t;
+typedef                __s64           int64_t;
+
+typedef long            ssize_t;
+
+typedef __u32           suseconds_t;
+
+typedef __u32           pid_t, tid_t;
+
+typedef __u16           uid_t, gid_t;
+
+typedef __u16           mode_t;
+typedef __u16           umode_t;
+
+typedef ulong_ptr       sigset_t;
+
+typedef uint64_t        loff_t;
+typedef HANDLE          cfs_handle_t;
+typedef uint64_t        cycles_t;
+
+#ifndef INVALID_HANDLE_VALUE
+#define INVALID_HANDLE_VALUE ((HANDLE)-1)
+#endif
+
+
+#ifdef __KERNEL__ /* kernel */
+
+typedef __u32           off_t;
+typedef __u32           time_t;
+
+typedef unsigned short  kdev_t;
+
+#else  /* !__KERNEL__ */
+
+typedef int             BOOL;
+typedef __u8            BYTE;
+typedef __u16           WORD;
+typedef __u32           DWORD;
+
+#endif /* __KERNEL__ */
+
+/*
+ * Conastants suffix
+ */
+
+#define ULL i64
+#define ull i64
+
+/*
+ * Winnt kernel has no capabilities.
+ */
+
+typedef __u32 cfs_kernel_cap_t;
+
+#define INT_MAX         ((int)(~0U>>1))
+#define INT_MIN         (-INT_MAX - 1)
+#define UINT_MAX        (~0U)
+
+#endif /* _WINNT_TYPES_H */
+
+
+/*
+ *  Bytes order 
+ */
+
+//
+// Byte order swapping routines
+//
+
+
+#define ___swab16(x) RtlUshortByteSwap(x)
+#define ___swab32(x) RtlUlongByteSwap(x)
+#define ___swab64(x) RtlUlonglongByteSwap(x)
+
+#define ___constant_swab16(x) \
+       ((__u16)( \
+               (((__u16)(x) & (__u16)0x00ffU) << 8) | \
+               (((__u16)(x) & (__u16)0xff00U) >> 8) ))
+
+#define ___constant_swab32(x) \
+       ((__u32)( \
+               (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+               (((__u32)(x) & (__u32)0x0000ff00UL) <<  8) | \
+               (((__u32)(x) & (__u32)0x00ff0000UL) >>  8) | \
+               (((__u32)(x) & (__u32)0xff000000UL) >> 24) ))
+
+#define ___constant_swab64(x) \
+       ((__u64)( \
+               (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \
+               (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \
+               (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \
+               (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) <<  8) | \
+               (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >>  8) | \
+               (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \
+               (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \
+               (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) ))
+
+
+#define __swab16(x)  ___constant_swab16(x)
+#define __swab32(x)  ___constant_swab32(x)
+#define __swab64(x)  ___constant_swab64(x)
+
+#define __swab16s(x) do { *(x) = __swab16((USHORT)(*(x)));} while(0)
+#define __swab32s(x) do { *(x) = __swab32((ULONG)(*(x)));} while(0)
+#define __swab64s(x) do { *(x) = __swab64((ULONGLONG)(*(x)));} while(0)
+
+#define __constant_htonl(x) ___constant_swab32((x))
+#define __constant_ntohl(x) ___constant_swab32((x))
+#define __constant_htons(x) ___constant_swab16((x))
+#define __constant_ntohs(x) ___constant_swab16((x))
+#define __constant_cpu_to_le64(x) ((__u64)(x))
+#define __constant_le64_to_cpu(x) ((__u64)(x))
+#define __constant_cpu_to_le32(x) ((__u32)(x))
+#define __constant_le32_to_cpu(x) ((__u32)(x))
+#define __constant_cpu_to_le16(x) ((__u16)(x))
+#define __constant_le16_to_cpu(x) ((__u16)(x))
+#define __constant_cpu_to_be64(x) ___constant_swab64((x))
+#define __constant_be64_to_cpu(x) ___constant_swab64((x))
+#define __constant_cpu_to_be32(x) ___constant_swab32((x))
+#define __constant_be32_to_cpu(x) ___constant_swab32((x))
+#define __constant_cpu_to_be16(x) ___constant_swab16((x))
+#define __constant_be16_to_cpu(x) ___constant_swab16((x))
+#define __cpu_to_le64(x) ((__u64)(x))
+#define __le64_to_cpu(x) ((__u64)(x))
+#define __cpu_to_le32(x) ((__u32)(x))
+#define __le32_to_cpu(x) ((__u32)(x))
+#define __cpu_to_le16(x) ((__u16)(x))
+#define __le16_to_cpu(x) ((__u16)(x))
+#define __cpu_to_be64(x) __swab64((x))
+#define __be64_to_cpu(x) __swab64((x))
+#define __cpu_to_be32(x) __swab32((x))
+#define __be32_to_cpu(x) __swab32((x))
+#define __cpu_to_be16(x) __swab16((x))
+#define __be16_to_cpu(x) __swab16((x))
+#define __cpu_to_le64p(x) (*(__u64*)(x))
+#define __le64_to_cpup(x) (*(__u64*)(x))
+#define __cpu_to_le32p(x) (*(__u32*)(x))
+#define __le32_to_cpup(x) (*(__u32*)(x))
+#define __cpu_to_le16p(x) (*(__u16*)(x))
+#define __le16_to_cpup(x) (*(__u16*)(x))
+#define __cpu_to_be64p(x) __swab64p((x))
+#define __be64_to_cpup(x) __swab64p((x))
+#define __cpu_to_be32p(x) __swab32p((x))
+#define __be32_to_cpup(x) __swab32p((x))
+#define __cpu_to_be16p(x) __swab16p((x))
+#define __be16_to_cpup(x) __swab16p((x))
+#define __cpu_to_le64s(x) do {} while (0)
+#define __le64_to_cpus(x) do {} while (0)
+#define __cpu_to_le32s(x) do {} while (0)
+#define __le32_to_cpus(x) do {} while (0)
+#define __cpu_to_le16s(x) do {} while (0)
+#define __le16_to_cpus(x) do {} while (0)
+#define __cpu_to_be64s(x) __swab64s((x))
+#define __be64_to_cpus(x) __swab64s((x))
+#define __cpu_to_be32s(x) __swab32s((x))
+#define __be32_to_cpus(x) __swab32s((x))
+#define __cpu_to_be16s(x) __swab16s((x))
+#define __be16_to_cpus(x) __swab16s((x))
+
+#ifndef cpu_to_le64
+#define cpu_to_le64 __cpu_to_le64
+#define le64_to_cpu __le64_to_cpu
+#define cpu_to_le32 __cpu_to_le32
+#define le32_to_cpu __le32_to_cpu
+#define cpu_to_le16 __cpu_to_le16
+#define le16_to_cpu __le16_to_cpu
+#endif
+
+#define cpu_to_be64 __cpu_to_be64
+#define be64_to_cpu __be64_to_cpu
+#define cpu_to_be32 __cpu_to_be32
+#define be32_to_cpu __be32_to_cpu
+#define cpu_to_be16 __cpu_to_be16
+#define be16_to_cpu __be16_to_cpu
+#define cpu_to_le64p __cpu_to_le64p
+#define le64_to_cpup __le64_to_cpup
+#define cpu_to_le32p __cpu_to_le32p
+#define le32_to_cpup __le32_to_cpup
+#define cpu_to_le16p __cpu_to_le16p
+#define le16_to_cpup __le16_to_cpup
+#define cpu_to_be64p __cpu_to_be64p
+#define be64_to_cpup __be64_to_cpup
+#define cpu_to_be32p __cpu_to_be32p
+#define be32_to_cpup __be32_to_cpup
+#define cpu_to_be16p __cpu_to_be16p
+#define be16_to_cpup __be16_to_cpup
+#define cpu_to_le64s __cpu_to_le64s
+#define le64_to_cpus __le64_to_cpus
+#define cpu_to_le32s __cpu_to_le32s
+#define le32_to_cpus __le32_to_cpus
+#define cpu_to_le16s __cpu_to_le16s
+#define le16_to_cpus __le16_to_cpus
+#define cpu_to_be64s __cpu_to_be64s
+#define be64_to_cpus __be64_to_cpus
+#define cpu_to_be32s __cpu_to_be32s
+#define be32_to_cpus __be32_to_cpus
+#define cpu_to_be16s __cpu_to_be16s
+#define be16_to_cpus __be16_to_cpus
+
+
+//
+// Network to host byte swap functions
+//
+
+#define ntohl(x)           ( ( ( ( x ) & 0x000000ff ) << 24 ) | \
+                             ( ( ( x ) & 0x0000ff00 ) << 8 ) | \
+                             ( ( ( x ) & 0x00ff0000 ) >> 8 ) | \
+                             ( ( ( x ) & 0xff000000 ) >> 24 )   )
+
+#define ntohs(x)           ( ( ( ( x ) & 0xff00 ) >> 8 ) | \
+                             ( ( ( x ) & 0x00ff ) << 8 ) )
+
+
+#define htonl(x)           ntohl(x)
+#define htons(x)           ntohs(x)
+
+
+
+#ifndef _I386_ERRNO_H
+#define _I386_ERRNO_H
+
+#define        EPERM            1      /* Operation not permitted */
+#define        ENOENT           2      /* No such file or directory */
+#define        ESRCH            3      /* No such process */
+#define        EINTR            4      /* Interrupted system call */
+#define        EIO                  5  /* I/O error */
+#define        ENXIO            6      /* No such device or address */
+#define        E2BIG            7      /* Arg list too long */
+#define        ENOEXEC          8      /* Exec format error */
+#define        EBADF            9      /* Bad file number */
+#define        ECHILD          10      /* No child processes */
+#define        EAGAIN          11      /* Try again */
+#define        ENOMEM          12      /* Out of memory */
+#define        EACCES          13      /* Permission denied */
+#define        EFAULT          14      /* Bad address */
+#define        ENOTBLK         15      /* Block device required */
+#define        EBUSY           16      /* Device or resource busy */
+#define        EEXIST          17      /* File exists */
+#define        EXDEV           18      /* Cross-device link */
+#define        ENODEV          19      /* No such device */
+#define        ENOTDIR         20      /* Not a directory */
+#define        EISDIR          21      /* Is a directory */
+#define        EINVAL          22      /* Invalid argument */
+#define        ENFILE          23      /* File table overflow */
+#define        EMFILE          24      /* Too many open files */
+#define        ENOTTY          25      /* Not a typewriter */
+#define        ETXTBSY         26      /* Text file busy */
+#define        EFBIG           27      /* File too large */
+#define        ENOSPC          28      /* No space left on device */
+#define        ESPIPE          29      /* Illegal seek */
+#define        EROFS           30      /* Read-only file system */
+#define        EMLINK          31      /* Too many links */
+#define        EPIPE           32      /* Broken pipe */
+#define        EDOM            33      /* Math argument out of domain of func */
+#define        ERANGE          34      /* Math result not representable */
+#undef EDEADLK
+#define        EDEADLK         35      /* Resource deadlock would occur */
+#undef ENAMETOOLONG
+#define        ENAMETOOLONG    36      /* File name too long */
+#undef ENOLCK
+#define        ENOLCK          37      /* No record locks available */
+#undef ENOSYS
+#define        ENOSYS          38      /* Function not implemented */
+#undef ENOTEMPTY
+#define        ENOTEMPTY       39      /* Directory not empty */
+#define        ELOOP           40      /* Too many symbolic links encountered */
+#define        EWOULDBLOCK     EAGAIN  /* Operation would block */
+#define        ENOMSG          42      /* No message of desired type */
+#define        EIDRM           43      /* Identifier removed */
+#define        ECHRNG          44      /* Channel number out of range */
+#define        EL2NSYNC        45      /* Level 2 not synchronized */
+#define        EL3HLT          46      /* Level 3 halted */
+#define        EL3RST          47      /* Level 3 reset */
+#define        ELNRNG          48      /* Link number out of range */
+#define        EUNATCH         49      /* Protocol driver not attached */
+#define        ENOCSI          50      /* No CSI structure available */
+#define        EL2HLT          51      /* Level 2 halted */
+#define        EBADE           52      /* Invalid exchange */
+#define        EBADR           53      /* Invalid request descriptor */
+#define        EXFULL          54      /* Exchange full */
+#define        ENOANO          55      /* No anode */
+#define        EBADRQC         56      /* Invalid request code */
+#define        EBADSLT         57      /* Invalid slot */
+
+#define        EDEADLOCK       EDEADLK
+
+#define        EBFONT          59      /* Bad font file format */
+#define        ENOSTR          60      /* Device not a stream */
+#define        ENODATA         61      /* No data available */
+#define        ETIME           62      /* Timer expired */
+#define        ENOSR           63      /* Out of streams resources */
+#define        ENONET          64      /* Machine is not on the network */
+#define        ENOPKG          65      /* Package not installed */
+#define        EREMOTE         66      /* Object is remote */
+#define        ENOLINK         67      /* Link has been severed */
+#define        EADV            68      /* Advertise error */
+#define        ESRMNT          69      /* Srmount error */
+#define        ECOMM           70      /* Communication error on send */
+#define        EPROTO          71      /* Protocol error */
+#define        EMULTIHOP       72      /* Multihop attempted */
+#define        EDOTDOT         73      /* RFS specific error */
+#define        EBADMSG         74      /* Not a data message */
+#define        EOVERFLOW       75      /* Value too large for defined data type */
+#define        ENOTUNIQ        76      /* Name not unique on network */
+#define        EBADFD          77      /* File descriptor in bad state */
+#define        EREMCHG         78      /* Remote address changed */
+#define        ELIBACC         79      /* Can not access a needed shared library */
+#define        ELIBBAD         80      /* Accessing a corrupted shared library */
+#define        ELIBSCN         81      /* .lib section in a.out corrupted */
+#define        ELIBMAX         82      /* Attempting to link in too many shared libraries */
+#define        ELIBEXEC        83      /* Cannot exec a shared library directly */
+#undef EILSEQ
+#define        EILSEQ          84      /* Illegal byte sequence */
+#define        ERESTART        85      /* Interrupted system call should be restarted */
+#define        ESTRPIPE        86      /* Streams pipe error */
+#define        EUSERS          87      /* Too many users */
+#define        ENOTSOCK        88      /* Socket operation on non-socket */
+#define        EDESTADDRREQ    89      /* Destination address required */
+#define        EMSGSIZE        90      /* Message too long */
+#define        EPROTOTYPE      91      /* Protocol wrong type for socket */
+#define        ENOPROTOOPT     92      /* Protocol not available */
+#define        EPROTONOSUPPORT 93      /* Protocol not supported */
+#define        ESOCKTNOSUPPORT 94      /* Socket type not supported */
+#define        EOPNOTSUPP      95      /* Operation not supported on transport endpoint */
+#define        EPFNOSUPPORT    96      /* Protocol family not supported */
+#define        EAFNOSUPPORT    97      /* Address family not supported by protocol */
+#define        EADDRINUSE      98      /* Address already in use */
+#define        EADDRNOTAVAIL   99      /* Cannot assign requested address */
+#define        ENETDOWN        100     /* Network is down */
+#define        ENETUNREACH     101     /* Network is unreachable */
+#define        ENETRESET       102     /* Network dropped connection because of reset */
+#define        ECONNABORTED    103     /* Software caused connection abort */
+#define        ECONNRESET      104     /* Connection reset by peer */
+#define        ENOBUFS         105     /* No buffer space available */
+#define        EISCONN         106     /* Transport endpoint is already connected */
+#define        ENOTCONN        107     /* Transport endpoint is not connected */
+#define        ESHUTDOWN       108     /* Cannot send after transport endpoint shutdown */
+#define        ETOOMANYREFS    109     /* Too many references: cannot splice */
+#define        ETIMEDOUT       110     /* Connection timed out */
+#define        ECONNREFUSED    111     /* Connection refused */
+#define        EHOSTDOWN       112     /* Host is down */
+#define        EHOSTUNREACH    113     /* No route to host */
+#define        EALREADY        114     /* Operation already in progress */
+#define        EINPROGRESS     115     /* Operation now in progress */
+#define        ESTALE          116     /* Stale NFS file handle */
+#define        EUCLEAN         117     /* Structure needs cleaning */
+#define        ENOTNAM         118     /* Not a XENIX named type file */
+#define        ENAVAIL         119     /* No XENIX semaphores available */
+#define        EISNAM          120     /* Is a named type file */
+#define        EREMOTEIO       121     /* Remote I/O error */
+#define        EDQUOT          122     /* Quota exceeded */
+
+#define        ENOMEDIUM       123     /* No medium found */
+#define        EMEDIUMTYPE     124     /* Wrong medium type */
+
+/* Should never be seen by user programs */
+#define ERESTARTSYS    512
+#define ERESTARTNOINTR 513
+#define ERESTARTNOHAND 514     /* restart if no handler.. */
+#define ENOIOCTLCMD    515     /* No ioctl command */
+
+/* Defined for the NFSv3 protocol */
+#define EBADHANDLE     521     /* Illegal NFS file handle */
+#define ENOTSYNC       522     /* Update synchronization mismatch */
+#define EBADCOOKIE     523     /* Cookie is stale */
+#define ENOTSUPP       524     /* Operation is not supported */
+#define ETOOSMALL      525     /* Buffer or request is too small */
+#define ESERVERFAULT   526     /* An untranslatable error occurred */
+#define EBADTYPE       527     /* Type not supported by server */
+#define EJUKEBOX       528     /* Request initiated, but will not complete before timeout */
+
+
+
+/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
+   located on an ext2 file system */
+#define O_ACCMODE         0003
+#define O_RDONLY            00
+#define O_WRONLY            01
+#define O_RDWR              02
+#define O_CREAT                   0100 /* not fcntl */
+#define O_EXCL            0200 /* not fcntl */
+#define O_NOCTTY          0400 /* not fcntl */
+#define O_TRUNC                  01000 /* not fcntl */
+#define O_APPEND         02000
+#define O_NONBLOCK       04000
+#define O_NDELAY       O_NONBLOCK
+#define O_SYNC          010000
+#define FASYNC          020000 /* fcntl, for BSD compatibility */
+#define O_DIRECT        040000 /* direct disk access hint */
+#define O_LARGEFILE    0100000
+#define O_DIRECTORY    0200000 /* must be a directory */
+#define O_NOFOLLOW     0400000 /* don't follow links */
+
+#define F_DUPFD                0       /* dup */
+#define F_GETFD                1       /* get close_on_exec */
+#define F_SETFD                2       /* set/clear close_on_exec */
+#define F_GETFL                3       /* get file->f_flags */
+#define F_SETFL                4       /* set file->f_flags */
+#define F_GETLK                5
+#define F_SETLK                6
+#define F_SETLKW       7
+
+#define F_SETOWN       8       /*  for sockets. */
+#define F_GETOWN       9       /*  for sockets. */
+#define F_SETSIG       10      /*  for sockets. */
+#define F_GETSIG       11      /*  for sockets. */
+
+#define F_GETLK64      12      /*  using 'struct flock64' */
+#define F_SETLK64      13
+#define F_SETLKW64     14
+
+/* for F_[GET|SET]FL */
+#define FD_CLOEXEC     1       /* actually anything with low bit set goes */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK                0
+#define F_WRLCK                1
+#define F_UNLCK                2
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK                4       /* or 3 */
+#define F_SHLCK                8       /* or 4 */
+
+/* for leases */
+#define F_INPROGRESS   16
+
+/* operations for bsd flock(), also used by the kernel implementation */
+#define LOCK_SH                1       /* shared lock */
+#define LOCK_EX                2       /* exclusive lock */
+#define LOCK_NB                4       /* or'd with one of the above to prevent
+                                  blocking */
+#define LOCK_UN                8       /* remove lock */
+
+#define LOCK_MAND      32      /* This is a mandatory flock */
+#define LOCK_READ      64      /* ... Which allows concurrent read operations */
+#define LOCK_WRITE     128     /* ... Which allows concurrent write operations */
+#define LOCK_RW                192     /* ... Which allows concurrent read & write ops */
+
+#endif
+
+
+#ifndef LIBCFS_SIGNAL_H
+#define LIBCFS_SIGNAL_H
+
+/*
+ *  signal values ...
+ */
+
+#define SIGHUP          1
+#define SIGINT          2
+#define SIGQUIT                 3
+#define SIGILL          4
+#define SIGTRAP                 5
+#define SIGABRT                 6
+#define SIGIOT          6
+#define SIGBUS          7
+#define SIGFPE          8
+#define SIGKILL                 9
+#define SIGUSR1                10
+#define SIGSEGV                11
+#define SIGUSR2                12
+#define SIGPIPE                13
+#define SIGALRM                14
+#define SIGTERM                15
+#define SIGSTKFLT      16
+#define SIGCHLD                17
+#define SIGCONT                18
+#define SIGSTOP                19
+#define SIGTSTP                20
+#define SIGTTIN                21
+#define SIGTTOU                22
+#define SIGURG         23
+#define SIGXCPU                24
+#define SIGXFSZ                25
+#define SIGVTALRM      26
+#define SIGPROF                27
+#define SIGWINCH       28
+#define SIGIO          29
+#define SIGPOLL                SIGIO
+/*
+#define SIGLOST                29
+*/
+#define SIGPWR         30
+#define SIGSYS         31
+#define        SIGUNUSED       31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN       32
+#define SIGRTMAX       (_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP   0x00000001
+#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
+#define SA_SIGINFO     0x00000004
+#define SA_ONSTACK     0x08000000
+#define SA_RESTART     0x10000000
+#define SA_NODEFER     0x40000000
+#define SA_RESETHAND   0x80000000
+
+#define SA_NOMASK      SA_NODEFER
+#define SA_ONESHOT     SA_RESETHAND
+#define SA_INTERRUPT   0x20000000 /* dummy -- ignored */
+
+#define SA_RESTORER    0x04000000
+
+/* 
+ * sigaltstack controls
+ */
+#define SS_ONSTACK     1
+#define SS_DISABLE     2
+
+#define MINSIGSTKSZ    2048
+#define SIGSTKSZ       8192
+
+
+#define sigmask(sig)   ((__u32)1 << ((sig) - 1))
+
+#endif // LIBCFS_SIGNAL_H
diff --git a/libcfs/libcfs/.cvsignore b/libcfs/libcfs/.cvsignore
new file mode 100644 (file)
index 0000000..c6f0aa4
--- /dev/null
@@ -0,0 +1,11 @@
+.deps
+Makefile
+link-stamp
+.*.cmd
+autoMakefile.in
+autoMakefile
+*.ko
+*.mod.c
+.*.flags
+.tmp_versions
+.depend
diff --git a/libcfs/libcfs/Info.plist b/libcfs/libcfs/Info.plist
new file mode 100644 (file)
index 0000000..aaf9b2f
--- /dev/null
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+       <key>CFBundleDevelopmentRegion</key>
+       <string>English</string>
+       <key>CFBundleExecutable</key>
+       <string>libcfs</string>
+       <key>CFBundleIconFile</key>
+       <string></string>
+       <key>CFBundleIdentifier</key>
+       <string>com.clusterfs.lustre.libcfs</string>
+       <key>CFBundleInfoDictionaryVersion</key>
+       <string>6.0</string>
+       <key>CFBundlePackageType</key>
+       <string>KEXT</string>
+       <key>CFBundleSignature</key>
+       <string>????</string>
+       <key>CFBundleVersion</key>
+       <string>1.0.1</string>
+       <key>OSBundleCompatibleVersion</key>
+       <string>1.0.0</string>
+       <key>OSBundleLibraries</key>
+       <dict>
+               <key>com.apple.kpi.bsd</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.libkern</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.mach</key>
+               <string>8.0.0b1</string>
+               <key>com.apple.kpi.unsupported</key>
+               <string>8.0.0b1</string>
+       </dict>
+</dict>
+</plist>
diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in
new file mode 100644 (file)
index 0000000..823782a
--- /dev/null
@@ -0,0 +1,33 @@
+MODULES = libcfs
+
+libcfs-linux-objs := linux-tracefile.o linux-debug.o
+libcfs-linux-objs += linux-prim.o linux-mem.o
+libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o
+libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o
+libcfs-linux-objs += linux-utils.o linux-module.o
+
+ifeq ($(PATCHLEVEL),6)
+libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs))
+endif
+
+default: all
+
+ifeq (@linux25@,no)
+sources:
+       @for i in $(libcfs-linux-objs:%.o=%.c) ; do \
+               echo "ln -s @srcdir@/linux/$$i ." ; \
+               ln -sf @srcdir@/linux/$$i . || exit 1 ; \
+       done
+
+else
+sources:
+
+endif
+
+libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o
+
+libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs)
+
+EXTRA_PRE_CFLAGS := -I@LUSTRE@/../libcfs/libcfs
+
+@INCLUDE_RULES@
diff --git a/libcfs/libcfs/autoMakefile.am b/libcfs/libcfs/autoMakefile.am
new file mode 100644 (file)
index 0000000..e70e5ce
--- /dev/null
@@ -0,0 +1,53 @@
+# Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+#
+# This code is issued under the GNU General Public License.
+# See the file COPYING in this distribution
+
+SUBDIRS := linux
+if DARWIN
+SUBDIRS += darwin
+endif
+DIST_SUBDIRS := $(SUBDIRS)
+
+if LIBLUSTRE
+noinst_LIBRARIES= libcfs.a
+libcfs_a_SOURCES= debug.c user-prim.c user-lock.c user-tcpip.c user-bitops.c
+libcfs_a_CPPFLAGS = $(LLCPPFLAGS)
+libcfs_a_CFLAGS = $(LLCFLAGS)
+endif
+
+if MODULES
+
+if LINUX
+modulenet_DATA := libcfs$(KMODEXT)
+endif
+
+if DARWIN
+macos_PROGRAMS := libcfs
+
+nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c      \
+       darwin/darwin-prim.c darwin/darwin-fs.c darwin/darwin-curproc.c \
+       darwin/darwin-tcpip.c darwin/darwin-utils.c                     \
+       darwin/darwin-debug.c darwin/darwin-proc.c                      \
+       darwin/darwin-tracefile.c darwin/darwin-module.c                \
+       debug.c module.c tracefile.c nidstrings.c watchdog.c
+
+libcfs_CFLAGS := $(EXTRA_KCFLAGS)
+libcfs_LDFLAGS := $(EXTRA_KLDFLAGS)
+libcfs_LDADD := $(EXTRA_KLIBS)
+
+plist_DATA := Info.plist
+
+install_data_hook := fix-kext-ownership
+
+endif
+
+endif
+
+install-data-hook: $(install_data_hook)
+
+EXTRA_DIST := Info.plist
+
+MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs
+DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c \
+                                           user-lock.c user-tcpip.c user-bitops.c
diff --git a/libcfs/libcfs/darwin/.cvsignore b/libcfs/libcfs/darwin/.cvsignore
new file mode 100644 (file)
index 0000000..282522d
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/libcfs/libcfs/darwin/Makefile.am b/libcfs/libcfs/darwin/Makefile.am
new file mode 100644 (file)
index 0000000..3f2077b
--- /dev/null
@@ -0,0 +1,12 @@
+EXTRA_DIST := \
+       darwin-mem.c \
+       darwin-proc.c \
+       darwin-utils.c \
+       darwin-debug.c \
+       darwin-module.c \
+       darwin-sync.c \
+       darwin-fs.c \
+       darwin-prim.c \
+       darwin-tracefile.c \
+       darwin-curproc.c \
+       darwin-tcpip.c
diff --git a/libcfs/libcfs/darwin/darwin-curproc.c b/libcfs/libcfs/darwin/darwin-curproc.c
new file mode 100644 (file)
index 0000000..e12394e
--- /dev/null
@@ -0,0 +1,164 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre curproc API implementation for XNU kernel
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation. Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+ * Public License for more details. You should have received a copy of the GNU
+ * General Public License along with Lustre; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Implementation of cfs_curproc API (see lnet/include/libcfs/curproc.h)
+ * for XNU kernel.
+ */
+
+static inline struct ucred *curproc_ucred(void)
+{
+#ifdef __DARWIN8__
+        return proc_ucred(current_proc());
+#else
+        return current_proc()->p_cred->pc_ucred;
+#endif
+}
+
+uid_t  cfs_curproc_uid(void)
+{
+        return curproc_ucred()->cr_uid;
+}
+
+gid_t  cfs_curproc_gid(void)
+{
+        LASSERT(curproc_ucred()->cr_ngroups > 0);
+        return curproc_ucred()->cr_groups[0];
+}
+
+uid_t  cfs_curproc_fsuid(void)
+{
+#ifdef __DARWIN8__
+        return curproc_ucred()->cr_ruid;
+#else
+        return current_proc()->p_cred->p_ruid;
+#endif
+}
+
+gid_t  cfs_curproc_fsgid(void)
+{
+#ifdef __DARWIN8__
+        return curproc_ucred()->cr_rgid;
+#else
+        return current_proc()->p_cred->p_rgid;
+#endif
+}
+
+pid_t  cfs_curproc_pid(void)
+{
+#ifdef __DARWIN8__
+        /* no pid for each thread, return address of thread struct */
+        return (pid_t)current_thread();
+#else
+        return current_proc()->p_pid;
+#endif
+}
+
+int    cfs_curproc_groups_nr(void)
+{
+        LASSERT(curproc_ucred()->cr_ngroups > 0);
+        return curproc_ucred()->cr_ngroups - 1;
+}
+
+int    cfs_curproc_is_in_groups(gid_t gid)
+{
+        int i;
+        struct ucred *cr;
+
+        cr = curproc_ucred();
+        LASSERT(cr != NULL);
+
+        for (i = 0; i < cr->cr_ngroups; ++ i) {
+                if (cr->cr_groups[i] == gid)
+                        return 1;
+        }
+        return 0;
+}
+
+void   cfs_curproc_groups_dump(gid_t *array, int size)
+{
+        struct ucred *cr;
+
+        cr = curproc_ucred();
+        LASSERT(cr != NULL);
+        CLASSERT(sizeof array[0] == sizeof (__u32));
+
+        size = min_t(int, size, cr->cr_ngroups);
+        memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t));
+}
+
+mode_t cfs_curproc_umask(void)
+{
+#ifdef __DARWIN8__
+        /*
+         * XXX Liang:
+         *
+         * fd_cmask is not available in kexts, so we just assume 
+         * verything is permited.
+         */
+        return -1;
+#else
+        return current_proc()->p_fd->fd_cmask;
+#endif
+}
+
+char  *cfs_curproc_comm(void)
+{
+#ifdef __DARWIN8__
+        /*
+         * Writing to proc->p_comm is not permited in Darwin8,
+         * because proc_selfname() only return a copy of proc->p_comm,
+         * so this function is not really working while user try to 
+         * change comm of current process.
+         */
+        static char     pcomm[MAXCOMLEN+1];
+
+        proc_selfname(pcomm, MAXCOMLEN+1);
+        return pcomm;
+#else
+        return current_proc()->p_comm;
+#endif
+}
+
+cfs_kernel_cap_t cfs_curproc_cap_get(void)
+{
+        return -1;
+}
+
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
+{
+        return;
+}
+
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/libcfs/darwin/darwin-debug.c b/libcfs/libcfs/darwin/darwin-debug.c
new file mode 100644 (file)
index 0000000..2152d40
--- /dev/null
@@ -0,0 +1,77 @@
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include "tracefile.h"
+
+void libcfs_debug_dumpstack(cfs_task_t *tsk)
+{ 
+       return;
+}
+
+void libcfs_run_lbug_upcall(char *file, const char *fn, const int line)
+{
+}
+
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+        libcfs_catastrophe = 1;
+        CEMERG("LBUG: pid: %u thread: %#x\n",
+              (unsigned)cfs_curproc_pid(), (unsigned)current_thread());
+        libcfs_debug_dumplog();
+        libcfs_run_lbug_upcall(file, func, line);
+        while (1)
+                cfs_schedule();
+
+       /* panic("lbug_with_loc(%s, %s, %d)", file, func, line) */
+}
+
+#if ENTRY_NESTING_SUPPORT
+
+static inline struct cfs_debug_data *__current_cdd(void)
+{
+       struct cfs_debug_data *cdd;
+
+       cdd = (struct cfs_debug_data *)current_uthread()->uu_nlminfo;
+       if (cdd != NULL &&
+           cdd->magic1 == CDD_MAGIC1 && cdd->magic2 == CDD_MAGIC2 &&
+           cdd->nesting_level < 1000)
+               return cdd;
+       else
+               return NULL;
+}
+
+static inline void __current_cdd_set(struct cfs_debug_data *cdd)
+{
+       current_uthread()->uu_nlminfo = (void *)cdd;
+}
+
+void __entry_nesting(struct cfs_debug_data *child)
+{
+       struct cfs_debug_data *parent;
+
+       parent = __current_cdd();
+       if (parent != NULL) {
+               child->parent        = parent;
+               child->nesting_level = parent->nesting_level + 1;
+       }
+       __current_cdd_set(child);
+}
+
+void __exit_nesting(struct cfs_debug_data *child)
+{
+       __current_cdd_set(child->parent);
+}
+
+unsigned int __current_nesting_level(void)
+{
+       struct cfs_debug_data *cdd;
+
+       cdd = __current_cdd();
+       if (cdd != NULL)
+               return cdd->nesting_level;
+       else
+               return 0;
+}
+/* ENTRY_NESTING_SUPPORT */
+#endif
diff --git a/libcfs/libcfs/darwin/darwin-fs.c b/libcfs/libcfs/darwin/darwin-fs.c
new file mode 100644 (file)
index 0000000..6fce8d5
--- /dev/null
@@ -0,0 +1,451 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/uio.h>
+#include <sys/filedesc.h>
+#include <sys/namei.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Kernel APIs for file system in xnu
+ *
+ * Public functions
+ */
+
+#ifdef __DARWIN8__
+#include <sys/vnode.h>
+
+extern int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t);
+
+/* vnode_size() is not exported */
+static errno_t
+vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx)
+{
+        struct vnode_attr       va;
+        int                     error; 
+        
+        VATTR_INIT(&va);
+        VATTR_WANTED(&va, va_data_size);
+        error = vnode_getattr(vp, &va, ctx);
+        if (!error)
+                *sizep = va.va_data_size;
+        return(error);
+}
+
+/*
+ * XXX Liang:
+ *
+ * kern_file_*() are not safe for multi-threads now,
+ * however, we need them only for tracefiled, so it's
+ * not so important to implement for MT.
+ */
+int
+kern_file_size(struct cfs_kern_file *fp, off_t *psize) 
+{
+        int     error;
+        off_t   size;
+
+        error = vnode_size(fp->f_vp, &size, fp->f_ctxt);
+        if (error) 
+                return error;
+
+        if (psize)
+                *psize = size;
+        return 0;
+}
+
+struct cfs_kern_file *
+kern_file_open(const char * filename, int uflags, int mode, int *err)
+{
+        struct cfs_kern_file    *fp;
+        vnode_t         vp;
+        int             error;
+
+        fp = (struct cfs_kern_file *)_MALLOC(sizeof(struct cfs_kern_file), M_TEMP, M_WAITOK);
+        if (fp == NULL) {
+                if (err != NULL)
+                        *err = -ENOMEM;
+                return NULL;
+        }
+        fp->f_flags = FFLAGS(uflags);
+        fp->f_ctxt = vfs_context_create(NULL);
+
+        if ((error = vnode_open(filename, fp->f_flags, 
+                                mode, 0, &vp, fp->f_ctxt))){
+                if (err != NULL)
+                        *err = -error;
+                _FREE(fp, M_TEMP);
+        } else {
+                if (err != NULL)
+                        *err = 0;
+                fp->f_vp = vp;
+        }
+
+        return fp;
+}
+
+int
+kern_file_close(struct cfs_kern_file *fp)
+{
+        vnode_close(fp->f_vp, fp->f_flags, fp->f_ctxt);
+        vfs_context_rele(fp->f_ctxt);
+        _FREE(fp, M_TEMP);
+
+        return 0;
+}
+
+int
+kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+        struct proc *p = current_proc();
+        int     resid;
+        int     error;
+
+        assert(buf != NULL);
+        assert(fp != NULL && fp->f_vp != NULL);
+
+        error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos, 
+                        UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
+        if ((error) || (nbytes == resid)) {
+                if (!error)
+                        error = -EINVAL;
+                return error;
+        }
+        *pos += nbytes - resid;
+
+        return (int)(nbytes - resid);
+}
+
+int
+kern_file_write(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+        struct proc *p = current_proc();
+        int     resid;
+        int     error;
+
+        assert(buf != NULL);
+        assert(fp != NULL && fp->f_vp != NULL);
+
+        error = vn_rdwr(UIO_WRITE, fp->f_vp, buf, nbytes, *pos, 
+                        UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p);
+        if ((error) || (nbytes == resid)) {
+                if (!error)
+                        error = -EINVAL;
+                return error;
+        }
+        *pos += nbytes - resid;
+
+        return (int)(nbytes - resid);
+
+}
+
+int
+kern_file_sync (struct cfs_kern_file *fp)
+{
+        return VNOP_FSYNC(fp->f_vp, MNT_WAIT, fp->f_ctxt);
+}
+
+#else  /* !__DARWIN8__ */
+
+int
+kern_file_size(struct file *fp, off_t *size)
+{
+        struct vnode *vp = (struct vnode *)fp->f_data;
+        struct stat sb;
+        int     rc;
+
+        rc = vn_stat(vp, &sb, current_proc());
+        if (rc) {
+                *size = 0;
+                return rc;
+        }
+        *size = sb.st_size;
+        return 0;
+}
+
+cfs_file_t *
+kern_file_open(const char * filename, int flags, int mode, int *err)
+{
+       struct nameidata nd;
+       cfs_file_t      *fp;
+       register struct vnode   *vp;
+       int                     rc;
+       extern struct fileops   vnops;
+       extern int nfiles;
+        CFS_DECL_CONE_DATA;
+
+        CFS_CONE_IN;
+       nfiles++;
+       MALLOC_ZONE(fp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO);
+       bzero(fp, sizeof(cfs_file_t));
+       fp->f_count = 1;
+        LIST_CIRCLE(fp, f_list);
+       NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc());
+       if ((rc = vn_open(&nd, flags, mode)) != 0){
+                printf("filp_open failed at (%d)\n", rc);
+                if (err != NULL)
+                        *err = rc;
+                FREE_ZONE(fp, sizeof *fp, M_FILE);
+                CFS_CONE_EX;
+               return NULL;
+       }
+       vp = nd.ni_vp;
+       fp->f_flag = flags & FMASK;
+       fp->f_type = DTYPE_VNODE;
+       fp->f_ops = &vnops;
+       fp->f_data = (caddr_t)vp;
+       fp->f_cred = current_proc()->p_ucred;
+       /*
+        * Hold cred to increase reference
+        */
+       crhold(fp->f_cred);
+       /*
+        * vnode is locked inside vn_open for lookup,
+        * we should release the lock before return
+        */
+       VOP_UNLOCK(vp, 0, current_proc());
+        CFS_CONE_EX;
+
+       return fp;
+}
+
+static int
+frele_internal(cfs_file_t *fp)
+{
+       if (fp->f_count == (short)0xffff)
+               panic("frele of lustre: stale");
+       if (--fp->f_count < 0)
+               panic("frele of lustre: count < 0");
+       return ((int)fp->f_count);
+}
+
+int
+kern_file_close (cfs_file_t *fp)
+{
+       struct vnode    *vp;
+        CFS_DECL_CONE_DATA;
+       
+       if (fp == NULL)
+               return 0;
+
+        CFS_CONE_IN;
+       if (frele_internal(fp) > 0)
+                goto out;
+       vp = (struct vnode *)fp->f_data;
+       (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc());
+        /*
+        * ffree(fp);
+         * Dont use ffree to release fp!!!!
+         * ffree will call LIST_REMOVE(fp),
+         * but fp is not in any list, this will
+         * cause kernel panic
+         */
+        struct ucred *cred;
+        cred = fp->f_cred;
+        if (cred != NOCRED) {
+                fp->f_cred = NOCRED;
+                crfree(cred);
+        }
+        extern int nfiles;
+        nfiles--;
+        memset(fp, 0xff, sizeof *fp);
+        fp->f_count = (short)0xffff;
+        FREE_ZONE(fp, sizeof *fp, M_FILE);
+out:
+        CFS_CONE_EX;
+       return 0;
+}
+
+extern void bwillwrite(void);
+
+/*
+ * Write buffer to filp inside kernel
+ */
+int
+kern_file_write (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos)
+{
+       struct uio auio;
+       struct iovec aiov;
+       struct proc *p = current_proc();
+       long cnt, error = 0;
+        int flags = 0;
+        CFS_DECL_CONE_DATA;
+
+       aiov.iov_base = (void *)(uintptr_t)buf;
+       aiov.iov_len = nbyte;
+       auio.uio_iov = &aiov;
+       auio.uio_iovcnt = 1;
+        if (pos != NULL) {
+               auio.uio_offset = *pos;
+                /* 
+                 * Liang: If don't set FOF_OFFSET, vn_write()
+                 * will use fp->f_offset as the the real offset.
+                 * Same in vn_read()
+                 */
+                flags |= FOF_OFFSET;
+        } else
+                auio.uio_offset = (off_t)-1;
+       if (nbyte > INT_MAX)
+               return (EINVAL);
+       auio.uio_resid = nbyte;
+       auio.uio_rw = UIO_WRITE;
+       auio.uio_segflg = UIO_SYSSPACE;
+       auio.uio_procp = p;
+
+       cnt = nbyte;
+        CFS_CONE_IN;
+       if (fp->f_type == DTYPE_VNODE)
+               bwillwrite();   /* empty stuff now */
+       if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
+               if (auio.uio_resid != cnt && (error == ERESTART ||\
+                   error == EINTR || error == EWOULDBLOCK))
+                       error = 0;
+               /* The socket layer handles SIGPIPE */
+               if (error == EPIPE && fp->f_type != DTYPE_SOCKET)
+                       psignal(p, SIGPIPE);
+       }
+        CFS_CONE_EX;
+       if (error != 0)
+               cnt = -error;
+       else
+               cnt -= auio.uio_resid;
+        if (pos != NULL)
+                *pos += cnt;
+       return cnt;
+}
+
+/*
+ * Read from filp inside kernel
+ */
+int
+kern_file_read (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos)
+{
+       struct uio auio;
+       struct iovec aiov;
+       struct proc *p = current_proc();
+       long cnt, error = 0;
+        int  flags = 0;
+        CFS_DECL_CONE_DATA;
+
+       aiov.iov_base = (caddr_t)buf;
+       aiov.iov_len = nbyte;
+       auio.uio_iov = &aiov;
+       auio.uio_iovcnt = 1;
+        if (pos != NULL) {
+               auio.uio_offset = *pos;
+                flags |= FOF_OFFSET;
+        } else
+                auio.uio_offset = (off_t)-1;
+       if (nbyte > INT_MAX)
+               return (EINVAL);
+       auio.uio_resid = nbyte;
+       auio.uio_rw = UIO_READ;
+       auio.uio_segflg = UIO_SYSSPACE;
+       auio.uio_procp = p;
+
+       cnt = nbyte;
+        CFS_CONE_IN;
+       if ((error = fo_read(fp, &auio, fp->f_cred, flags, p)) != 0) {
+               if (auio.uio_resid != cnt && (error == ERESTART ||
+                   error == EINTR || error == EWOULDBLOCK))
+                       error = 0;
+       }
+        CFS_CONE_EX;
+       if (error != 0)
+               cnt = -error;
+       else
+               cnt -= auio.uio_resid;
+        if (pos != NULL)
+                *pos += cnt;
+
+       return cnt;
+}
+
+int
+kern_file_sync (cfs_file_t *fp)
+{
+       struct vnode *vp = (struct vnode *)fp->f_data;
+       struct proc *p = current_proc();
+       int error = 0;
+        CFS_DECL_CONE_DATA;
+       
+        CFS_CONE_IN;
+       if (fref(fp) == -1) {
+                CFS_CONE_EX;
+               return (-EBADF);
+        }
+       vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+       error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+       VOP_UNLOCK(vp, 0, p);
+       frele(fp);
+        CFS_CONE_EX;
+
+       return error;
+}
+
+#endif /* !__DARWIN8__ */
+
+struct posix_acl *posix_acl_alloc(int count, int flags)
+{
+        static struct posix_acl acl;
+        return &acl;
+}
+
+/*
+ * XXX Liang: I've not converted all of them, 
+ * more is needed? 
+ */
+int cfs_oflags2univ(int flags) 
+{
+        int f;
+
+        f = flags & O_ACCMODE;
+        f |= (flags & O_CREAT) ? CFS_O_CREAT: 0;
+        f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0;
+        f |= (flags & O_EXCL) ? CFS_O_EXCL: 0;
+        f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0;
+        f |= (flags & O_APPEND) ? CFS_O_APPEND: 0;
+        f |= (flags & O_NOFOLLOW) ? CFS_O_NOFOLLOW: 0;
+        f |= (flags & O_SYNC)? CFS_O_SYNC: 0;
+        return f;
+}
+
+/*
+ * XXX Liang: we don't need it in OSX.
+ * But it should be implemented anyway.
+ */
+int cfs_univ2oflags(int flags)
+{
+        return flags;
+}
diff --git a/libcfs/libcfs/darwin/darwin-internal.h b/libcfs/libcfs/darwin/darwin-internal.h
new file mode 100644 (file)
index 0000000..6c83577
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef __LIBCFS_DARWIN_INTERNAL_H__
+#define __LIBCFS_DARWIN_INTERNAL_H__
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+
+int cfs_sysctl_isvalid(void);
+struct sysctl_oid *cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access,
+                                        const char *name, int (*handler) SYSCTL_HANDLER_ARGS);
+struct sysctl_oid *cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int n,
+                                       const char *name, int *ptr, int val);
+struct sysctl_oid * cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access,
+                                         const char *name, int *ptr, int val);
+struct sysctl_oid * cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access,
+                                           const char *name, char *ptr, int len);
+struct sysctl_oid * cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access,
+                                           const char *name, void *ptr, int size);
+
+#endif
diff --git a/libcfs/libcfs/darwin/darwin-mem.c b/libcfs/libcfs/darwin/darwin-mem.c
new file mode 100644 (file)
index 0000000..3079a56
--- /dev/null
@@ -0,0 +1,480 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Liang Zhen <liangzhen@clusterfs.com>
+ *         Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/malloc.h>
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "darwin-internal.h"
+
+#if CFS_INDIVIDUAL_ZONE
+extern zone_t zinit( vm_size_t, vm_size_t, vm_size_t, const char *);
+extern void * zalloc(zone_t zone);
+extern void *zalloc_noblock(zone_t zone);
+extern void zfree(zone_t zone, void *addr);
+
+struct cfs_zone_nob {
+        struct list_head       *z_nob;  /* Pointer to z_link */
+        struct list_head        z_link; /* Do NOT access it directly */       
+};
+
+static struct cfs_zone_nob      cfs_zone_nob;
+static spinlock_t               cfs_zone_guard;
+
+cfs_mem_cache_t *mem_cache_find(const char *name, size_t objsize)
+{
+        cfs_mem_cache_t         *walker = NULL;
+
+        LASSERT(cfs_zone_nob.z_nob != NULL);
+
+        spin_lock(&cfs_zone_guard);
+        list_for_each_entry(walker, cfs_zone_nob.z_nob, mc_link) {
+                if (!strcmp(walker->mc_name, name) && \
+                    walker->mc_size == objsize)
+                        break;
+        }
+        spin_unlock(&cfs_zone_guard);
+
+        return walker;
+}
+
+/*
+ * our wrapper around kern/zalloc.c:zinit()
+ *
+ * Creates copy of name and calls zinit() to do real work. Needed because zone
+ * survives kext unloading, so that @name cannot be just static string
+ * embedded into kext image.
+ */
+cfs_mem_cache_t *mem_cache_create(vm_size_t objsize, const char *name)
+{
+       cfs_mem_cache_t *mc = NULL;
+        char *cname;
+
+       MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO);
+       if (mc == NULL){
+               CERROR("cfs_mem_cache created fail!\n");
+               return NULL;
+       }
+
+        cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK);
+        LASSERT(cname != NULL);
+        mc->mc_cache = zinit(objsize, (KMEM_MAX_ZONE * objsize), 0, strcpy(cname, name));
+        mc->mc_size = objsize;
+        CFS_INIT_LIST_HEAD(&mc->mc_link);
+        strncpy(mc->mc_name, name, 1 + strlen(name));
+        return mc;
+}
+
+void mem_cache_destroy(cfs_mem_cache_t *mc)
+{
+        /*
+         * zone can NOT be destroyed after creating, 
+         * so just keep it in list.
+         *
+         * We will not lost a zone after we unload
+         * libcfs, it can be found by from libcfs.zone
+         */
+        return;
+}
+
+#define mem_cache_alloc(mc)     zalloc((mc)->mc_cache)
+#ifdef __DARWIN8__
+# define mem_cache_alloc_nb(mc) zalloc((mc)->mc_cache)
+#else
+/* XXX Liang: Tiger doesn't export zalloc_noblock() */
+# define mem_cache_alloc_nb(mc) zalloc_noblock((mc)->mc_cache)
+#endif
+#define mem_cache_free(mc, p)   zfree((mc)->mc_cache, p)
+
+#else  /* !CFS_INDIVIDUAL_ZONE */
+
+cfs_mem_cache_t *
+mem_cache_find(const char *name, size_t objsize)
+{
+        return NULL;
+}
+
+cfs_mem_cache_t *mem_cache_create(vm_size_t size, const char *name)
+{
+        cfs_mem_cache_t *mc = NULL;
+
+       MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO);
+       if (mc == NULL){
+               CERROR("cfs_mem_cache created fail!\n");
+               return NULL;
+       }
+        mc->mc_cache = OSMalloc_Tagalloc(name, OSMT_DEFAULT);
+        mc->mc_size = size;
+        return mc;
+}
+
+void mem_cache_destroy(cfs_mem_cache_t *mc)
+{
+        OSMalloc_Tagfree(mc->mc_cache);
+        FREE(mc, M_TEMP);
+}
+
+#define mem_cache_alloc(mc)     OSMalloc((mc)->mc_size, (mc)->mc_cache)
+#define mem_cache_alloc_nb(mc)  OSMalloc_noblock((mc)->mc_size, (mc)->mc_cache)
+#define mem_cache_free(mc, p)   OSFree(p, (mc)->mc_size, (mc)->mc_cache)
+
+#endif /* !CFS_INDIVIDUAL_ZONE */
+
+cfs_mem_cache_t *
+cfs_mem_cache_create (const char *name,
+                      size_t objsize, size_t off, unsigned long arg1)
+{
+        cfs_mem_cache_t *mc;
+
+        mc = mem_cache_find(name, objsize);
+        if (mc)
+                return mc;
+        mc = mem_cache_create(objsize, name);
+       return mc;
+}
+
+int cfs_mem_cache_destroy (cfs_mem_cache_t *cachep)
+{
+        mem_cache_destroy(cachep);
+        return 0;
+}
+
+void *cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags)
+{
+        void *result;
+
+        /* zalloc_canblock() is not exported... Emulate it. */
+        if (flags & CFS_ALLOC_ATOMIC) {
+                result = (void *)mem_cache_alloc_nb(cachep);
+        } else {
+                LASSERT(get_preemption_level() == 0);
+                result = (void *)mem_cache_alloc(cachep);
+        }
+        if (result != NULL && (flags & CFS_ALLOC_ZERO))
+                memset(result, 0, cachep->mc_size);
+
+        return result;
+}
+
+void cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp)
+{
+        mem_cache_free(cachep, objp);
+}
+
+/* ---------------------------------------------------------------------------
+ * Page operations
+ *
+ * --------------------------------------------------------------------------- */
+
+/*
+ * "Raw" pages
+ */
+
+static unsigned int raw_pages = 0;
+static cfs_mem_cache_t  *raw_page_cache = NULL;
+
+static struct xnu_page_ops raw_page_ops;
+static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = {
+        [XNU_PAGE_RAW] = &raw_page_ops
+};
+
+#if defined(LIBCFS_DEBUG)
+static int page_type_is_valid(cfs_page_t *page)
+{
+        LASSERT(page != NULL);
+        return 0 <= page->type && page->type < XNU_PAGE_NTYPES;
+}
+
+static int page_is_raw(cfs_page_t *page)
+{
+        return page->type == XNU_PAGE_RAW;
+}
+#endif
+
+static struct xnu_raw_page *as_raw(cfs_page_t *page)
+{
+        LASSERT(page_is_raw(page));
+        return list_entry(page, struct xnu_raw_page, header);
+}
+
+static void *raw_page_address(cfs_page_t *pg)
+{
+        return (void *)as_raw(pg)->virtual;
+}
+
+static void *raw_page_map(cfs_page_t *pg)
+{
+        return (void *)as_raw(pg)->virtual;
+}
+
+static void raw_page_unmap(cfs_page_t *pg)
+{
+}
+
+static struct xnu_page_ops raw_page_ops = {
+        .page_map       = raw_page_map,
+        .page_unmap     = raw_page_unmap,
+        .page_address   = raw_page_address
+};
+
+extern int get_preemption_level(void);
+
+struct list_head page_death_row;
+spinlock_t page_death_row_phylax;
+
+static void raw_page_finish(struct xnu_raw_page *pg)
+{
+        -- raw_pages;
+        if (pg->virtual != NULL)
+                cfs_mem_cache_free(raw_page_cache, pg->virtual);
+        cfs_free(pg);
+}
+
+void raw_page_death_row_clean(void)
+{
+        struct xnu_raw_page *pg;
+
+        spin_lock(&page_death_row_phylax);
+        while (!list_empty(&page_death_row)) {
+                pg = container_of(page_death_row.next,
+                                  struct xnu_raw_page, link);
+                list_del(&pg->link);
+                spin_unlock(&page_death_row_phylax);
+                raw_page_finish(pg);
+                spin_lock(&page_death_row_phylax);
+        }
+        spin_unlock(&page_death_row_phylax);
+}
+
+/* Free a "page" */
+void free_raw_page(struct xnu_raw_page *pg)
+{
+       if (!atomic_dec_and_test(&pg->count))
+               return;
+        /*
+         * kmem_free()->vm_map_remove()->vm_map_delete()->lock_write() may
+         * block. (raw_page_done()->upl_abort() can block too) On the other
+         * hand, cfs_free_page() may be called in non-blockable context. To
+         * work around this, park pages on global list when cannot block.
+         */
+        if (get_preemption_level() > 0) {
+                spin_lock(&page_death_row_phylax);
+                list_add(&pg->link, &page_death_row);
+                spin_unlock(&page_death_row_phylax);
+        } else {
+                raw_page_finish(pg);
+                raw_page_death_row_clean();
+        }
+}
+
+cfs_page_t *cfs_alloc_page(u_int32_t flags)
+{
+        struct xnu_raw_page *page;
+
+        /*
+         * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
+
+        page = cfs_alloc(sizeof *page, flags);
+        if (page != NULL) {
+                page->virtual = cfs_mem_cache_alloc(raw_page_cache, flags);
+                if (page->virtual != NULL) {
+                        ++ raw_pages;
+                        page->header.type = XNU_PAGE_RAW;
+                        atomic_set(&page->count, 1);
+                } else {
+                        cfs_free(page);
+                        page = NULL;
+                }
+        }
+        return page != NULL ? &page->header : NULL;
+}
+
+void cfs_free_page(cfs_page_t *pages)
+{
+        free_raw_page(as_raw(pages));
+}
+
+void cfs_get_page(cfs_page_t *p)
+{
+        atomic_inc(&as_raw(p)->count);
+}
+
+int cfs_put_page_testzero(cfs_page_t *p)
+{
+       return atomic_dec_and_test(&as_raw(p)->count);
+}
+
+int cfs_page_count(cfs_page_t *p)
+{
+        return atomic_read(&as_raw(p)->count);
+}
+
+/*
+ * Generic page operations
+ */
+
+void *cfs_page_address(cfs_page_t *pg)
+{
+        /*
+         * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
+        LASSERT(page_type_is_valid(pg));
+        return page_ops[pg->type]->page_address(pg);
+}
+
+void *cfs_kmap(cfs_page_t *pg)
+{
+        LASSERT(page_type_is_valid(pg));
+        return page_ops[pg->type]->page_map(pg);
+}
+
+void cfs_kunmap(cfs_page_t *pg)
+{
+        LASSERT(page_type_is_valid(pg));
+        return page_ops[pg->type]->page_unmap(pg);
+}
+
+void xnu_page_ops_register(int type, struct xnu_page_ops *ops)
+{
+        LASSERT(0 <= type && type < XNU_PAGE_NTYPES);
+        LASSERT(ops != NULL);
+        LASSERT(page_ops[type] == NULL);
+
+        page_ops[type] = ops;
+}
+
+void xnu_page_ops_unregister(int type)
+{
+        LASSERT(0 <= type && type < XNU_PAGE_NTYPES);
+        LASSERT(page_ops[type] != NULL);
+
+        page_ops[type] = NULL;
+}
+
+/*
+ * Portable memory allocator API
+ */
+#ifdef HAVE_GET_PREEMPTION_LEVEL
+extern int get_preemption_level(void);
+#else
+#define get_preemption_level() (0)
+#endif
+
+void *cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+        int mflags;
+
+        mflags = 0;
+        if (flags & CFS_ALLOC_ATOMIC) {
+                mflags |= M_NOWAIT;
+        } else {
+                LASSERT(get_preemption_level() == 0);
+                mflags |= M_WAITOK;
+        }
+
+        if (flags & CFS_ALLOC_ZERO)
+                mflags |= M_ZERO;
+
+        return _MALLOC(nr_bytes, M_TEMP, mflags);
+}
+
+void cfs_free(void *addr)
+{
+        return _FREE(addr, M_TEMP);
+}
+
+void *cfs_alloc_large(size_t nr_bytes)
+{
+        LASSERT(get_preemption_level() == 0);
+        return _MALLOC(nr_bytes, M_TEMP, M_WAITOK);
+}
+
+void  cfs_free_large(void *addr)
+{
+        LASSERT(get_preemption_level() == 0);
+        return _FREE(addr, M_TEMP);
+}
+
+/*
+ * Lookup cfs_zone_nob by sysctl.zone, if it cannot be 
+ * found (first load of * libcfs since boot), allocate 
+ * sysctl libcfs.zone.
+ */
+int cfs_mem_init(void)
+{
+#if     CFS_INDIVIDUAL_ZONE
+        int     rc;
+        size_t  len;
+
+        len = sizeof(struct cfs_zone_nob);
+        rc = sysctlbyname("libcfs.zone",
+                          (void *)&cfs_zone_nob, &len, NULL, 0);
+        if (rc == ENOENT) {
+                /* zone_nob is not register in libcfs_sysctl */
+                struct cfs_zone_nob  *nob;
+                struct sysctl_oid       *oid;
+
+                assert(cfs_sysctl_isvalid());
+
+                nob = _MALLOC(sizeof(struct cfs_zone_nob), 
+                              M_TEMP, M_WAITOK | M_ZERO);
+                CFS_INIT_LIST_HEAD(&nob->z_link);
+                nob->z_nob = &nob->z_link;
+                oid = cfs_alloc_sysctl_struct(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, 
+                                              "zone", nob, sizeof(struct cfs_zone_nob));
+                if (oid == NULL) {
+                        _FREE(nob, M_TEMP);
+                        return -ENOMEM;
+                }
+                sysctl_register_oid(oid);
+
+                cfs_zone_nob.z_nob = nob->z_nob;
+        }
+        spin_lock_init(&cfs_zone_guard);
+#endif
+        CFS_INIT_LIST_HEAD(&page_death_row);
+        spin_lock_init(&page_death_row_phylax);
+        raw_page_cache = cfs_mem_cache_create("raw-page", CFS_PAGE_SIZE, 0, 0);
+        return 0;
+}
+
+void cfs_mem_fini(void)
+{
+        raw_page_death_row_clean();
+        spin_lock_done(&page_death_row_phylax);
+        cfs_mem_cache_destroy(raw_page_cache);
+
+#if     CFS_INDIVIDUAL_ZONE
+        cfs_zone_nob.z_nob = NULL;
+        spin_lock_done(&cfs_zone_guard);
+#endif
+}
diff --git a/libcfs/libcfs/darwin/darwin-module.c b/libcfs/libcfs/darwin/darwin-module.c
new file mode 100644 (file)
index 0000000..10cb7d8
--- /dev/null
@@ -0,0 +1,191 @@
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <miscfs/devfs/devfs.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
+{
+        struct libcfs_ioctl_hdr *hdr;
+        struct libcfs_ioctl_data *data;
+        int err = 0;
+        ENTRY;
+
+        hdr = (struct libcfs_ioctl_hdr *)buf;
+        data = (struct libcfs_ioctl_data *)buf;
+       /* libcfs_ioctl_data has been copied in by ioctl of osx */
+       memcpy(buf, arg, sizeof(struct libcfs_ioctl_data));
+
+        if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
+                CERROR("LIBCFS: version mismatch kernel vs application\n");
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len + buf >= end) {
+                CERROR("LIBCFS: user buffer exceeds kernel buffer\n");
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
+                CERROR("LIBCFS: user buffer too small for ioctl\n");
+                RETURN(-EINVAL);
+        }
+       buf += size_round(sizeof(*data));
+
+        if (data->ioc_inllen1) {
+                err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1));
+               if (err)
+                       RETURN(err);
+                data->ioc_inlbuf1 = buf;
+                buf += size_round(data->ioc_inllen1);
+        }
+
+        if (data->ioc_inllen2) {
+                copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2));
+               if (err)
+                       RETURN(err);
+                data->ioc_inlbuf2 = buf;
+        }
+
+        RETURN(err);
+}
+
+int libcfs_ioctl_popdata(void *arg, void *data, int size)
+{
+       /* 
+        * system call will copy out ioctl arg to user space
+        */
+       memcpy(arg, data, size);
+       return 0;
+}
+
+extern struct cfs_psdev_ops            libcfs_psdev_ops;
+struct libcfs_device_userstate         *mdev_state[16];
+
+static int
+libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p)
+{
+       struct  libcfs_device_userstate *mstat = NULL;
+       int     rc = 0;
+       int     devid;
+       devid = minor(dev);
+
+       if (devid > 16) return (ENXIO);
+
+       if (libcfs_psdev_ops.p_open != NULL)
+               rc = -libcfs_psdev_ops.p_open(0, &mstat);
+       else
+               rc = EPERM;
+       if (rc == 0)
+               mdev_state[devid] = mstat;
+       return rc;
+}
+
+static int
+libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p)
+{
+       int     devid;
+       devid = minor(dev);
+       int     rc = 0;
+
+       if (devid > 16) return (ENXIO);
+
+       if (libcfs_psdev_ops.p_close != NULL)
+               rc = -libcfs_psdev_ops.p_close(0, mdev_state[devid]);
+       else
+               rc = EPERM;
+       if (rc == 0)
+               mdev_state[devid] = NULL;
+       return rc;
+}
+
+static int
+libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p)
+{
+       int rc = 0;
+        struct cfs_psdev_file    pfile;
+       int     devid;
+       devid = minor(dev);
+       
+       if (devid > 16) return (ENXIO);
+
+       if (!is_suser())
+               return (EPERM);
+       
+       pfile.off = 0;
+       pfile.private_data = mdev_state[devid];
+
+       if (libcfs_psdev_ops.p_ioctl != NULL)
+               rc = -libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+       else
+               rc = EPERM;
+       return rc;
+}
+
+static struct cdevsw libcfs_devsw =
+{
+       .d_open     = libcfs_psdev_open,
+       .d_close    = libcfs_psdev_close,
+       .d_read     = eno_rdwrt,
+       .d_write    = eno_rdwrt,
+       .d_ioctl    = libcfs_ioctl,
+       .d_stop     = eno_stop,
+       .d_reset    = eno_reset,
+       .d_ttys     = NULL,
+       .d_select   = eno_select,
+       .d_mmap     = eno_mmap,
+       .d_strategy = eno_strat,
+       .d_getc     = eno_getc,
+       .d_putc     = eno_putc,
+       .d_type     = 0
+};
+
+cfs_psdev_t libcfs_dev = {
+       -1,
+       NULL,
+       "lnet",
+       &libcfs_devsw,
+       NULL
+};
+
+extern spinlock_t trace_cpu_serializer;
+extern void cfs_sync_init(void);
+extern void cfs_sync_fini(void);
+extern int cfs_sysctl_init(void);
+extern void cfs_sysctl_fini(void);
+extern int cfs_mem_init(void);
+extern int cfs_mem_fini(void);
+extern void raw_page_death_row_clean(void);
+extern void cfs_thread_agent_init(void);
+extern void cfs_thread_agent_fini(void);
+extern void cfs_symbol_init(void);
+extern void cfs_symbol_fini(void);
+
+int libcfs_arch_init(void)
+{
+       cfs_sync_init();
+       cfs_sysctl_init();
+       cfs_mem_init();
+       cfs_thread_agent_init();
+       cfs_symbol_init();
+
+       spin_lock_init(&trace_cpu_serializer);
+
+       return 0;
+}
+
+void libcfs_arch_cleanup(void)
+{
+       spin_lock_done(&trace_cpu_serializer);
+
+       cfs_symbol_fini();
+       cfs_thread_agent_fini();
+       cfs_mem_fini();
+       cfs_sysctl_fini();
+       cfs_sync_fini();
+}
+
diff --git a/libcfs/libcfs/darwin/darwin-prim.c b/libcfs/libcfs/darwin/darwin-prim.c
new file mode 100644 (file)
index 0000000..cdcabd9
--- /dev/null
@@ -0,0 +1,581 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/conf.h>
+#include <sys/uio.h>
+#include <sys/filedesc.h>
+#include <sys/namei.h>
+#include <miscfs/devfs/devfs.h>
+#include <kern/thread.h>
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * cfs pseudo device, actually pseudo char device in darwin
+ */
+#define KLNET_MAJOR  -1
+
+kern_return_t  cfs_psdev_register(cfs_psdev_t *dev) {
+       dev->index = cdevsw_add(KLNET_MAJOR, dev->devsw);
+       if (dev->index < 0) {
+               printf("libcfs_init: failed to allocate a major number!\n");
+               return KERN_FAILURE;
+       }
+       dev->handle = devfs_make_node(makedev (dev->index, 0),
+                                      DEVFS_CHAR, UID_ROOT,
+                                      GID_WHEEL, 0666, (char *)dev->name, 0);
+       return KERN_SUCCESS;
+}
+
+kern_return_t  cfs_psdev_deregister(cfs_psdev_t *dev) {
+       devfs_remove(dev->handle);
+       cdevsw_remove(dev->index, dev->devsw);
+       return KERN_SUCCESS;
+}
+
+/*
+ * KPortal symbol register / unregister support
+ */
+struct rw_semaphore             cfs_symbol_lock;
+struct list_head                cfs_symbol_list;
+
+void *
+cfs_symbol_get(const char *name)
+{
+        struct list_head    *walker;
+        struct cfs_symbol   *sym = NULL;
+
+        down_read(&cfs_symbol_lock);
+        list_for_each(walker, &cfs_symbol_list) {
+                sym = list_entry (walker, struct cfs_symbol, sym_list);
+                if (!strcmp(sym->name, name)) {
+                        sym->ref ++;
+                        break;
+                }
+        }
+        up_read(&cfs_symbol_lock);
+        if (sym != NULL)
+                return sym->value;
+        return NULL;
+}
+
+kern_return_t
+cfs_symbol_put(const char *name)
+{
+        struct list_head    *walker;
+        struct cfs_symbol   *sym = NULL;
+
+        down_read(&cfs_symbol_lock);
+        list_for_each(walker, &cfs_symbol_list) {
+                sym = list_entry (walker, struct cfs_symbol, sym_list);
+                if (!strcmp(sym->name, name)) {
+                        sym->ref --;
+                        LASSERT(sym->ref >= 0);
+                        break;
+                }
+        }
+        up_read(&cfs_symbol_lock);
+        LASSERT(sym != NULL);
+
+        return 0;
+}
+
+kern_return_t
+cfs_symbol_register(const char *name, const void *value)
+{
+        struct list_head    *walker;
+        struct cfs_symbol   *sym = NULL;
+        struct cfs_symbol   *new = NULL;
+
+        MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO);
+        strncpy(new->name, name, CFS_SYMBOL_LEN);
+        new->value = (void *)value;
+        new->ref = 0;
+        CFS_INIT_LIST_HEAD(&new->sym_list);
+
+        down_write(&cfs_symbol_lock);
+        list_for_each(walker, &cfs_symbol_list) {
+                sym = list_entry (walker, struct cfs_symbol, sym_list);
+                if (!strcmp(sym->name, name)) {
+                        up_write(&cfs_symbol_lock);
+                        FREE(new, M_TEMP);
+                        return KERN_NAME_EXISTS;
+                }
+
+        }
+        list_add_tail(&new->sym_list, &cfs_symbol_list);
+        up_write(&cfs_symbol_lock);
+
+        return KERN_SUCCESS;
+}
+
+kern_return_t
+cfs_symbol_unregister(const char *name)
+{
+        struct list_head    *walker;
+        struct list_head    *nxt;
+        struct cfs_symbol   *sym = NULL;
+
+        down_write(&cfs_symbol_lock);
+        list_for_each_safe(walker, nxt, &cfs_symbol_list) {
+                sym = list_entry (walker, struct cfs_symbol, sym_list);
+                if (!strcmp(sym->name, name)) {
+                        LASSERT(sym->ref == 0);
+                        list_del (&sym->sym_list);
+                        FREE(sym, M_TEMP);
+                        break;
+                }
+        }
+        up_write(&cfs_symbol_lock);
+
+        return KERN_SUCCESS;
+}
+
+void
+cfs_symbol_init()
+{
+        CFS_INIT_LIST_HEAD(&cfs_symbol_list);
+        init_rwsem(&cfs_symbol_lock);
+}
+
+void
+cfs_symbol_fini()
+{
+        struct list_head    *walker;
+        struct cfs_symbol   *sym = NULL;
+
+        down_write(&cfs_symbol_lock);
+        list_for_each(walker, &cfs_symbol_list) {
+                sym = list_entry (walker, struct cfs_symbol, sym_list);
+                LASSERT(sym->ref == 0);
+                list_del (&sym->sym_list);
+                FREE(sym, M_TEMP);
+        }
+        up_write(&cfs_symbol_lock);
+
+        fini_rwsem(&cfs_symbol_lock);
+        return;
+}
+
+struct kernel_thread_arg
+{
+       spinlock_t      lock;
+       atomic_t        inuse;
+       cfs_thread_t    func;
+       void            *arg;
+};
+
+struct kernel_thread_arg cfs_thread_arg;
+
+#define THREAD_ARG_FREE                        0
+#define THREAD_ARG_HOLD                        1
+#define THREAD_ARG_RECV                        2
+
+#define set_targ_stat(a, v)            atomic_set(&(a)->inuse, v)
+#define get_targ_stat(a)               atomic_read(&(a)->inuse)
+
+/*
+ * Hold the thread argument and set the status of thread_status
+ * to THREAD_ARG_HOLD, if the thread argument is held by other
+ * threads (It's THREAD_ARG_HOLD already), current-thread has to wait.
+ */
+#define thread_arg_hold(pta, _func, _arg)                      \
+       do {                                                    \
+               spin_lock(&(pta)->lock);                        \
+               if (get_targ_stat(pta) == THREAD_ARG_FREE) {    \
+                       set_targ_stat((pta), THREAD_ARG_HOLD);  \
+                       (pta)->arg = (void *)_arg;              \
+                       (pta)->func = _func;                    \
+                       spin_unlock(&(pta)->lock);              \
+                       break;                                  \
+               }                                               \
+               spin_unlock(&(pta)->lock);                      \
+               cfs_schedule();                                 \
+       } while(1);                                             \
+
+/*
+ * Release the thread argument if the thread argument has been
+ * received by the child-thread (Status of thread_args is
+ * THREAD_ARG_RECV), otherwise current-thread has to wait.
+ * After release, the thread_args' status will be set to
+ * THREAD_ARG_FREE, and others can re-use the thread_args to
+ * create new kernel_thread.
+ */
+#define thread_arg_release(pta)                                        \
+       do {                                                    \
+               spin_lock(&(pta)->lock);                        \
+               if (get_targ_stat(pta) == THREAD_ARG_RECV) {    \
+                       (pta)->arg = NULL;                      \
+                       (pta)->func = NULL;                     \
+                       set_targ_stat(pta, THREAD_ARG_FREE);    \
+                       spin_unlock(&(pta)->lock);              \
+                       break;                                  \
+               }                                               \
+               spin_unlock(&(pta)->lock);                      \
+               cfs_schedule();                                 \
+       } while(1)
+
+/*
+ * Receive thread argument (Used in child thread), set the status
+ * of thread_args to THREAD_ARG_RECV.
+ */
+#define __thread_arg_recv_fin(pta, _func, _arg, fin)           \
+       do {                                                    \
+               spin_lock(&(pta)->lock);                        \
+               if (get_targ_stat(pta) == THREAD_ARG_HOLD) {    \
+                       if (fin)                                \
+                           set_targ_stat(pta, THREAD_ARG_RECV);\
+                       _arg = (pta)->arg;                      \
+                       _func = (pta)->func;                    \
+                       spin_unlock(&(pta)->lock);              \
+                       break;                                  \
+               }                                               \
+               spin_unlock(&(pta)->lock);                      \
+               cfs_schedule();                                 \
+       } while (1);                                            \
+
+/*
+ * Just set the thread_args' status to THREAD_ARG_RECV
+ */
+#define thread_arg_fin(pta)                                    \
+       do {                                                    \
+               spin_lock(&(pta)->lock);                        \
+               assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \
+               set_targ_stat(pta, THREAD_ARG_RECV);            \
+               spin_unlock(&(pta)->lock);                      \
+       } while(0)
+
+#define thread_arg_recv(pta, f, a)     __thread_arg_recv_fin(pta, f, a, 1)
+#define thread_arg_keep(pta, f, a)     __thread_arg_recv_fin(pta, f, a, 0)
+
+void
+cfs_thread_agent_init(void)
+{
+        set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE);
+        spin_lock_init(&cfs_thread_arg.lock);
+        cfs_thread_arg.arg = NULL;
+        cfs_thread_arg.func = NULL;
+}
+
+void
+cfs_thread_agent_fini(void)
+{
+        assert(get_targ_stat(&cfs_thread_arg) == THREAD_ARG_FREE);
+
+        spin_lock_done(&cfs_thread_arg.lock);
+}
+
+/*
+ *
+ * All requests to create kernel thread will create a new
+ * thread instance of cfs_thread_agent, one by one.
+ * cfs_thread_agent will call the caller's thread function
+ * with argument supplied by caller.
+ */
+void
+cfs_thread_agent (void)
+{
+        cfs_thread_t           func = NULL;
+        void                   *arg = NULL;
+
+        thread_arg_recv(&cfs_thread_arg, func, arg);
+        /* printf("entry of thread agent (func: %08lx).\n", (void *)func); */
+        assert(func != NULL);
+        func(arg);
+        /* printf("thread agent exit. (func: %08lx)\n", (void *)func); */
+        (void) thread_terminate(current_thread());
+}
+
+extern thread_t kernel_thread(task_t task, void (*start)(void));
+
+int
+cfs_kernel_thread(cfs_thread_t  func, void *arg, int flag)
+{
+        int ret = 0;
+        thread_t th = NULL;
+
+        thread_arg_hold(&cfs_thread_arg, func, arg);
+        th = kernel_thread(kernel_task, cfs_thread_agent);
+        thread_arg_release(&cfs_thread_arg);
+        if (th == THREAD_NULL)
+                ret = -1;
+        return ret;
+}
+
+void cfs_daemonize(char *str)
+{
+        snprintf(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s", str);
+        return;
+}
+
+/*
+ * XXX Liang: kexts cannot access sigmask in Darwin8.
+ * it's almost impossible for us to get/set signal mask
+ * without patching kernel.
+ * Should we provide these functions in xnu?
+ *
+ * These signal functions almost do nothing now, we 
+ * need to investigate more about signal in Darwin.
+ */
+cfs_sigset_t cfs_get_blockedsigs()
+{
+        return (cfs_sigset_t)0;
+}
+
+extern int block_procsigmask(struct proc *p,  int bit);
+
+cfs_sigset_t cfs_block_allsigs()
+{
+        cfs_sigset_t    old = 0;
+#ifdef __DARWIN8__
+#else
+        block_procsigmask(current_proc(), -1);
+#endif
+        return old;
+}
+
+cfs_sigset_t cfs_block_sigs(sigset_t bit)
+{
+        cfs_sigset_t    old = 0;
+#ifdef __DARWIN8__
+#else
+        block_procsigmask(current_proc(), bit);
+#endif
+        return old;
+}
+
+void cfs_restore_sigs(cfs_sigset_t old)
+{
+}
+
+int cfs_signal_pending(void)
+
+{
+#ifdef __DARWIN8__
+        extern int thread_issignal(proc_t, thread_t, sigset_t);
+        return thread_issignal(current_proc(), current_thread(), (sigset_t)-1);
+#else
+        return SHOULDissignal(current_proc(), current_uthread())
+#endif
+}
+
+void cfs_clear_sigpending(void)
+{
+#ifdef __DARWIN8__
+#else
+        clear_procsiglist(current_proc(), -1);
+#endif
+}
+
+#ifdef __DARWIN8__
+
+#else /* !__DARWIN8__ */
+
+void lustre_cone_in(boolean_t *state, funnel_t **cone)
+{
+        *cone = thread_funnel_get();
+        if (*cone == network_flock)
+                thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
+        else if (*cone == NULL)
+                *state = thread_funnel_set(kernel_flock, TRUE);
+}
+
+void lustre_cone_ex(boolean_t state, funnel_t *cone)
+{
+        if (cone == network_flock)
+                thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
+        else if (cone == NULL)
+                (void) thread_funnel_set(kernel_flock, state);
+}
+
+void lustre_net_in(boolean_t *state, funnel_t **cone)
+{
+        *cone = thread_funnel_get();
+        if (*cone == kernel_flock)
+                thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL);
+        else if (*cone == NULL)
+                *state = thread_funnel_set(network_flock, TRUE);
+}
+
+void lustre_net_ex(boolean_t state, funnel_t *cone)
+{
+        if (cone == kernel_flock)
+                thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL);
+        else if (cone == NULL)
+                (void) thread_funnel_set(network_flock, state);
+}
+#endif /* !__DARWIN8__ */
+
+void cfs_waitq_init(struct cfs_waitq *waitq)
+{
+       ksleep_chan_init(&waitq->wq_ksleep_chan);
+}
+
+void cfs_waitlink_init(struct cfs_waitlink *link)
+{
+       ksleep_link_init(&link->wl_ksleep_link);
+}
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+        link->wl_waitq = waitq;
+       ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
+}
+
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq,
+                             struct cfs_waitlink *link)
+{
+        link->wl_waitq = waitq;
+       link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE;
+       ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
+}
+
+void cfs_waitq_forward(struct cfs_waitlink *link,
+                       struct cfs_waitq *waitq)
+{
+       link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan;
+}
+
+void cfs_waitq_del(struct cfs_waitq *waitq,
+                   struct cfs_waitlink *link)
+{
+       ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link);
+}
+
+int cfs_waitq_active(struct cfs_waitq *waitq)
+{
+       return (1);
+}
+
+void cfs_waitq_signal(struct cfs_waitq *waitq)
+{
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       ksleep_wake(&waitq->wq_ksleep_chan);
+}
+
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr)
+{
+       ksleep_wake_nr(&waitq->wq_ksleep_chan, nr);
+}
+
+void cfs_waitq_broadcast(struct cfs_waitq *waitq)
+{
+       ksleep_wake_all(&waitq->wq_ksleep_chan);
+}
+
+void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state)
+{
+        ksleep_wait(&link->wl_waitq->wq_ksleep_chan, state);
+}
+
+cfs_duration_t  cfs_waitq_timedwait(struct cfs_waitlink *link,
+                                    cfs_task_state_t state,
+                                    cfs_duration_t timeout)
+{
+        return ksleep_timedwait(&link->wl_waitq->wq_ksleep_chan, 
+                                state, timeout);
+}
+
+typedef  void (*ktimer_func_t)(void *);
+void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg)
+{
+        ktimer_init(&t->t, (ktimer_func_t)func, arg);
+}
+
+void cfs_timer_done(struct cfs_timer *t)
+{
+        ktimer_done(&t->t);
+}
+
+void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline)
+{
+        ktimer_arm(&t->t, deadline);
+}
+
+void cfs_timer_disarm(struct cfs_timer *t)
+{
+        ktimer_disarm(&t->t);
+}
+
+int  cfs_timer_is_armed(struct cfs_timer *t)
+{
+        return ktimer_is_armed(&t->t);
+}
+
+cfs_time_t cfs_timer_deadline(struct cfs_timer *t)
+{
+        return ktimer_deadline(&t->t);
+}
+
+void cfs_enter_debugger(void)
+{
+#ifdef __DARWIN8__
+        extern void Debugger(const char * reason);
+        Debugger("CFS");
+#else
+        extern void PE_enter_debugger(char *cause);
+        PE_enter_debugger("CFS");
+#endif
+}
+
+int cfs_online_cpus(void)
+{
+        int     activecpu;
+        size_t  size;
+
+#ifdef __DARWIN8__ 
+        size = sizeof(int);
+        sysctlbyname("hw.activecpu", &activecpu, &size, NULL, 0);
+        return activecpu;
+#else
+        host_basic_info_data_t hinfo;
+        kern_return_t kret;
+        int count = HOST_BASIC_INFO_COUNT;
+#define BSD_HOST 1
+        kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count);
+        if (kret == KERN_SUCCESS) 
+                return (hinfo.avail_cpus);
+        return(-EINVAL);
+#endif
+}
+
+int cfs_ncpus(void)
+{
+        int     ncpu;
+        size_t  size;
+
+        size = sizeof(int);
+
+        sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0);
+        return ncpu;
+}
diff --git a/libcfs/libcfs/darwin/darwin-proc.c b/libcfs/libcfs/darwin/darwin-proc.c
new file mode 100644 (file)
index 0000000..a001f5b
--- /dev/null
@@ -0,0 +1,467 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <sys/proc.h>
+#include <sys/unistd.h>
+#include <mach/mach_types.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+
+#define LIBCFS_SYSCTL           "libcfs"
+#define LIBCFS_SYSCTL_SPRITE    "sprite"
+#define LIBCFS_SYSCTL_MAGIC     0xbabeface
+
+static struct libcfs_sysctl_sprite {
+        int                     ss_magic;
+        struct sysctl_oid_list  *ss_link;
+} libcfs_sysctl_sprite = { 0, NULL };
+
+static cfs_sysctl_table_header_t *libcfs_table_header = NULL;
+extern unsigned int libcfs_debug;
+extern unsigned int libcfs_subsystem_debug;
+extern unsigned int libcfs_printk;
+extern unsigned int libcfs_console_ratelimit;
+extern unsigned int libcfs_catastrophe;
+extern atomic_t libcfs_kmemory;
+
+static int sysctl_debug_kernel SYSCTL_HANDLER_ARGS
+{
+#error "Check me"
+        const int  maxstr = 1024;
+        char      *str;
+        int        error;
+
+        if (req->newptr == USER_ADDR_NULL) {
+                /* read request */
+                return -EINVAL;
+        }
+
+        /* write request */
+        error = trace_allocate_string_buffer(&str, maxstr + 1);
+        if (error != 0)
+                return error;
+
+       error = SYSCTL_IN(req, str, maxstr);
+
+        /* NB str guaranteed terminted */
+        if (error == 0)
+                error = tracefile_dump_all_pages(str);
+
+        trace_free_string_buffer(str, maxstr + 1);
+        return error;
+}
+
+static int sysctl_daemon_file SYSCTL_HANDLER_ARGS
+{
+#error "Check me"
+       int   error;
+       char *str;
+
+        if (req->newptr == USER_ADDR_NULL) {
+                /* a read */
+               tracefile_read_lock();
+
+                /* include terminating '\0' */
+               error = SYSCTL_OUT(req, tracefile, strlen(tracefile) + 1);
+
+               tracefile_read_unlock();
+                return error;
+        }
+        
+        /* write request */
+        error = trace_allocate_string_buffer(&str, TRACEFILE_NAME_SIZE);
+        if (error != 0)
+                return error;
+
+       error = SYSCTL_IN(req, str, TRACEFILE_NAME_SIZE - 1);
+
+        /* NB str guaranteed terminted */
+       if (error == 0)
+               error = trace_daemon_command(str);
+
+        trace_free_string_buffer(str, TRACEFILE_NAME_SIZE);
+       return error;
+}
+
+
+static int sysctl_debug_mb SYSCTL_HANDLER_ARGS
+{
+#error "Check me"
+       long mb;
+       int  error;
+       
+       if (req->newptr == USER_ADDR_NULL) {
+               /* read */
+               mb = trace_get_debug_mb();
+               error = SYSCTL_OUT(req, &mb, sizeof(mb));
+       } else {
+               /* write */
+               error = SYSCTL_IN(req, &mb, sizeof(mb));
+               if (error == 0)
+                       error = trace_set_debug_mb(mb);
+       }
+       
+       return error;
+}
+
+/*
+ * sysctl table for lnet
+ */
+
+SYSCTL_NODE (,                 OID_AUTO,       lnet,   CTLFLAG_RW,
+            0,                 "lnet sysctl top");
+
+SYSCTL_INT(_lnet,                      OID_AUTO,       debug,
+            CTLTYPE_INT | CTLFLAG_RW ,                 &libcfs_debug,
+            0,         "debug");
+SYSCTL_INT(_lnet,                      OID_AUTO,       subsystem_debug,
+            CTLTYPE_INT | CTLFLAG_RW,                  &libcfs_subsystem_debug,
+            0,         "subsystem debug");
+SYSCTL_INT(_lnet,                      OID_AUTO,       printk,
+            CTLTYPE_INT | CTLFLAG_RW,                  &libcfs_printk,
+            0,         "printk");
+SYSCTL_INT(_lnet,                      OID_AUTO,       console_ratelimit,
+            CTLTYPE_INT | CTLFLAG_RW,                  &libcfs_console_ratelimit,
+            0,         "console_ratelimit");
+SYSCTL_STRING(_lnet,                   OID_AUTO,       debug_path,
+            CTLTYPE_STRING | CTLFLAG_RW,               debug_file_path,
+            1024,      "debug path");
+SYSCTL_INT(_lnet,                      OID_AUTO,       memused,
+            CTLTYPE_INT | CTLFLAG_RW,                  (int *)&libcfs_kmemory.counter,
+            0,         "memused");
+SYSCTL_INT(_lnet,                      OID_AUTO,       catastrophe,
+            CTLTYPE_INT | CTLFLAG_RW,                  (int *)&libcfs_catastrophe,
+            0,         "catastrophe");
+
+#error "check me"
+SYSCTL_PROC(_lnet,                     OID_AUTO,       debug_kernel,
+            CTLTYPE_STRING | CTLFLAG_W,                0,
+            0,         &sysctl_debug_kernel,           "A",    "debug_kernel");
+SYSCTL_PROC(_lnet,                     OID_AUTO,       daemon_file,
+            CTLTYPE_STRING | CTLFLAG_RW,               0,
+            0,         &sysctl_daemon_file,            "A",    "daemon_file");
+SYSCTL_PROC(_lnet,                     OID_AUTO,       debug_mb,
+            CTLTYPE_INT | CTLFLAG_RW,                  0,
+            0,         &sysctl_debug_mb,               "L",    "debug_mb");
+
+
+static cfs_sysctl_table_t      top_table[] = {
+       &sysctl__lnet,
+       &sysctl__lnet_debug,
+       &sysctl__lnet_subsystem_debug,
+       &sysctl__lnet_printk,
+       &sysctl__lnet_console_ratelimit,
+       &sysctl__lnet_debug_path,
+       &sysctl__lnet_memused,
+       &sysctl__lnet_catastrophe,
+       &sysctl__lnet_debug_kernel,
+       &sysctl__lnet_daemon_file,
+       &sysctl__lnet_debug_mb,
+       NULL
+};
+
+/*
+ * Register sysctl table
+ */
+cfs_sysctl_table_header_t *
+cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg)
+{
+        cfs_sysctl_table_t      item;
+        int i = 0;
+
+        while ((item = table[i++]) != NULL) 
+                sysctl_register_oid(item);
+        return table;
+}
+
+/*
+ * Unregister sysctl table
+ */
+void
+cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table) {
+        int i = 0;
+        cfs_sysctl_table_t      item;
+
+        while ((item = table[i++]) != NULL) 
+                sysctl_unregister_oid(item);
+        return;
+}
+
+/*
+ * Allocate a sysctl oid. 
+ */
+static struct sysctl_oid *
+cfs_alloc_sysctl(struct sysctl_oid_list *parent, int nbr, int access,
+                 const char *name, void *arg1, int arg2, const char *fmt,
+                 int (*handler) SYSCTL_HANDLER_ARGS)
+{
+        struct sysctl_oid *oid;
+        char    *sname = NULL;
+        char    *sfmt = NULL;
+
+        if (strlen(name) + 1 > CTL_MAXNAME) {
+                printf("libcfs: sysctl name: %s is too long.\n", name);
+                return NULL;
+        }
+        oid = (struct sysctl_oid*)_MALLOC(sizeof(struct sysctl_oid), 
+                                          M_TEMP, M_WAITOK | M_ZERO);
+        if (oid == NULL) 
+                return NULL;
+
+        sname = (char *)_MALLOC(sizeof(CTL_MAXNAME), 
+                                M_TEMP, M_WAITOK | M_ZERO);
+        if (sname == NULL) 
+                goto error;
+        strcpy(sname, name);
+
+        sfmt = (char *)_MALLOC(4, M_TEMP, M_WAITOK | M_ZERO);
+        if (sfmt == NULL) 
+                goto error;
+        strcpy(sfmt, fmt);
+
+        if (parent == NULL)
+                oid->oid_parent = &sysctl__children;
+        else
+                oid->oid_parent = parent;
+        oid->oid_number = nbr;
+        oid->oid_kind = access;
+        oid->oid_name = sname;
+        oid->oid_handler = handler;
+        oid->oid_fmt = sfmt;
+
+        if ((access & CTLTYPE) == CTLTYPE_NODE){
+                /* It's a sysctl node */
+                struct sysctl_oid_list *link;
+
+                link = (struct sysctl_oid_list *)_MALLOC(sizeof(struct sysctl_oid_list), 
+                                                         M_TEMP, M_WAITOK | M_ZERO);
+                if (link == NULL)
+                        goto error;
+                oid->oid_arg1 = link;
+                oid->oid_arg2 = 0;
+        } else {
+                oid->oid_arg1 = arg1;
+                oid->oid_arg2 = arg2;
+        }
+
+        return oid;
+error:
+        if (sfmt != NULL)
+                _FREE(sfmt, M_TEMP);
+        if (sname != NULL)
+                _FREE(sname, M_TEMP);
+        if (oid != NULL)
+                _FREE(oid, M_TEMP);
+        return NULL;
+}
+
+void cfs_free_sysctl(struct sysctl_oid *oid)
+{
+        if (oid->oid_name != NULL)
+                _FREE((void *)oid->oid_name, M_TEMP);
+        if (oid->oid_fmt != NULL)
+                _FREE((void *)oid->oid_fmt, M_TEMP);
+        if ((oid->oid_kind & CTLTYPE_NODE != 0) && oid->oid_arg1)
+                /* XXX Liang: need to assert the list is empty */
+                _FREE(oid->oid_arg1, M_TEMP);
+        _FREE(oid, M_TEMP);
+}
+
+#define CFS_SYSCTL_ISVALID ((libcfs_sysctl_sprite.ss_magic == LIBCFS_SYSCTL_MAGIC) && \
+                            (libcfs_sysctl_sprite.ss_link != NULL))       
+
+int
+cfs_sysctl_isvalid(void)
+{
+        return CFS_SYSCTL_ISVALID;
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access,
+                      const char *name, int (*handler) SYSCTL_HANDLER_ARGS)
+{
+        if (parent == NULL && CFS_SYSCTL_ISVALID)
+                parent = libcfs_sysctl_sprite.ss_link;
+        return cfs_alloc_sysctl(parent, nbr, CTLTYPE_NODE | access, name,
+                                NULL, 0, "N", handler);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int nbr, int access,
+                     const char *name, int *ptr, int val)
+{
+        if (parent == NULL && CFS_SYSCTL_ISVALID)
+                parent = libcfs_sysctl_sprite.ss_link;
+        return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, 
+                                ptr, val, "I", sysctl_handle_int);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access,
+                      const char *name, int *ptr, int val)
+{
+        if (parent == NULL && CFS_SYSCTL_ISVALID)
+                parent = libcfs_sysctl_sprite.ss_link;
+        return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, 
+                                ptr, val, "L", sysctl_handle_long);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access,
+                        const char *name, char *ptr, int len)
+{
+        if (parent == NULL && CFS_SYSCTL_ISVALID)
+                parent = libcfs_sysctl_sprite.ss_link;
+        return cfs_alloc_sysctl(parent, nbr, CTLTYPE_STRING | access, name, 
+                                ptr, len, "A", sysctl_handle_string);
+}
+
+struct sysctl_oid *
+cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access,
+                        const char *name, void *ptr, int size)
+{
+        if (parent == NULL && CFS_SYSCTL_ISVALID)
+                parent = libcfs_sysctl_sprite.ss_link;
+        return cfs_alloc_sysctl(parent, nbr, CTLTYPE_OPAQUE | access, name,
+                                ptr, size, "S", sysctl_handle_opaque);
+}
+
+/* no proc in osx */
+cfs_proc_dir_entry_t *
+cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent)
+{
+       cfs_proc_dir_entry_t *entry;
+       MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO);
+
+       return  entry;
+}
+
+void
+cfs_free_proc_entry(cfs_proc_dir_entry_t *de){
+       FREE(de, M_TEMP);
+       return;
+};
+
+void
+cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry)
+{
+       cfs_free_proc_entry(entry);
+       return;
+}
+
+int
+insert_proc(void)
+{
+#if 1
+        if (!libcfs_table_header) 
+                libcfs_table_header = cfs_register_sysctl_table(top_table, 0);
+#endif
+       return 0;
+}
+
+void
+remove_proc(void)
+{
+#if 1
+        if (libcfs_table_header != NULL) 
+                cfs_unregister_sysctl_table(libcfs_table_header); 
+        libcfs_table_header = NULL;
+#endif
+       return;
+}
+
+int
+cfs_sysctl_init(void)
+{
+        struct sysctl_oid               *oid_root;
+        struct sysctl_oid               *oid_sprite;
+        struct libcfs_sysctl_sprite     *sprite;
+        size_t  len; 
+        int     rc;
+
+        len = sizeof(struct libcfs_sysctl_sprite);
+        rc = sysctlbyname("libcfs.sprite", 
+                          (void *)&libcfs_sysctl_sprite, &len, NULL, 0);
+        if (rc == 0) {
+                /* 
+                 * XXX Liang: assert (rc == 0 || rc == ENOENT)
+                 *
+                 * libcfs.sprite has been registered by previous 
+                 * loading of libcfs 
+                 */
+                if (libcfs_sysctl_sprite.ss_magic != LIBCFS_SYSCTL_MAGIC) {
+                        printf("libcfs: magic number of libcfs.sprite "
+                               "is not right (%lx, %lx)\n", 
+                               libcfs_sysctl_sprite.ss_magic,
+                               LIBCFS_SYSCTL_MAGIC);
+                        return -1;
+                }
+                assert(libcfs_sysctl_sprite.ss_link != NULL);
+                printf("libcfs: registered libcfs.sprite found.\n");
+                return 0;
+        }
+        oid_root = cfs_alloc_sysctl_node(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN,
+                                         LIBCFS_SYSCTL, 0);
+        if (oid_root == NULL)
+                return -1;
+        sysctl_register_oid(oid_root);
+
+        sprite = (struct libcfs_sysctl_sprite *)_MALLOC(sizeof(struct libcfs_sysctl_sprite), 
+                                                        M_TEMP, M_WAITOK | M_ZERO);
+        if (sprite == NULL) {
+                sysctl_unregister_oid(oid_root);
+                cfs_free_sysctl(oid_root);
+                return -1;
+        }
+        sprite->ss_magic = LIBCFS_SYSCTL_MAGIC;
+        sprite->ss_link = (struct sysctl_oid_list *)oid_root->oid_arg1;
+        oid_sprite = cfs_alloc_sysctl_struct((struct sysctl_oid_list *)oid_root->oid_arg1, 
+                                             OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, 
+                                             LIBCFS_SYSCTL_SPRITE, sprite, 
+                                             sizeof(struct libcfs_sysctl_sprite));
+        if (oid_sprite == NULL) {
+                cfs_free_sysctl(oid_sprite);
+                sysctl_unregister_oid(oid_root);
+                cfs_free_sysctl(oid_root);
+                return -1;
+        }
+        sysctl_register_oid(oid_sprite);
+
+        libcfs_sysctl_sprite.ss_magic = sprite->ss_magic;
+        libcfs_sysctl_sprite.ss_link = sprite->ss_link;
+
+        return 0;
+}
+
+void
+cfs_sysctl_fini(void)
+{
+        libcfs_sysctl_sprite.ss_magic = 0;
+        libcfs_sysctl_sprite.ss_link = NULL;
+}
+
diff --git a/libcfs/libcfs/darwin/darwin-sync.c b/libcfs/libcfs/darwin/darwin-sync.c
new file mode 100644 (file)
index 0000000..8b752e3
--- /dev/null
@@ -0,0 +1,1025 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre Light Super operations
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+/*
+ * xnu_sync.c
+ *
+ * Created by nikita on Sun Jul 18 2004.
+ *
+ * XNU synchronization primitives.
+ */
+
+/*
+ * This file contains very simplistic implementations of (saner) API for
+ * basic synchronization primitives:
+ *
+ *     - spin-lock          (kspin)
+ *
+ *     - semaphore          (ksem)
+ *
+ *     - mutex              (kmut)
+ *
+ *     - condition variable (kcond)
+ *
+ *     - wait-queue         (ksleep_chan and ksleep_link)
+ *
+ *     - timer              (ktimer)
+ *
+ * A lot can be optimized here.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#ifdef __DARWIN8__
+# include <kern/locks.h>
+#else
+# include <mach/mach_types.h>
+# include <sys/types.h>
+# include <kern/simple_lock.h>
+#endif
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e))
+
+#ifdef HAVE_GET_PREEMPTION_LEVEL
+extern int get_preemption_level(void);
+#else
+#define get_preemption_level() (0)
+#endif
+
+#if SMP
+#ifdef __DARWIN8__
+
+static lck_grp_t       *cfs_lock_grp = NULL;
+#warning "Verify definition of lck_spin_t hasn't been changed while building!"
+
+/* hw_lock_* are not exported by Darwin8 */
+static inline void xnu_spin_init(xnu_spin_t *s)
+{
+        SLASSERT(cfs_lock_grp != NULL);
+        //*s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL);
+        lck_spin_init((lck_spin_t *)s, cfs_lock_grp, LCK_ATTR_NULL);
+}
+
+static inline void xnu_spin_done(xnu_spin_t *s)
+{
+        SLASSERT(cfs_lock_grp != NULL);
+        //lck_spin_free(*s, cfs_lock_grp);
+        //*s = NULL;
+        lck_spin_destroy((lck_spin_t *)s, cfs_lock_grp);
+}
+
+#define xnu_spin_lock(s)        lck_spin_lock((lck_spin_t *)(s))
+#define xnu_spin_unlock(s)      lck_spin_unlock((lck_spin_t *)(s))
+
+#warning "Darwin8 does not export lck_spin_try_lock"
+#define xnu_spin_try(s)         (1)
+
+#else /* DARWIN8 */
+extern void                    hw_lock_init(hw_lock_t);
+extern void                    hw_lock_lock(hw_lock_t);
+extern void                    hw_lock_unlock(hw_lock_t);
+extern unsigned int            hw_lock_to(hw_lock_t, unsigned int);
+extern unsigned int            hw_lock_try(hw_lock_t);
+extern unsigned int            hw_lock_held(hw_lock_t);
+
+#define xnu_spin_init(s)        hw_lock_init(s)
+#define xnu_spin_done(s)        do {} while (0)
+#define xnu_spin_lock(s)        hw_lock_lock(s)
+#define xnu_spin_unlock(s)      hw_lock_unlock(s)
+#define xnu_spin_try(s)         hw_lock_try(s)
+#endif /* DARWIN8 */
+
+#else /* SMP */
+#define xnu_spin_init(s)        do {} while (0)
+#define xnu_spin_done(s)        do {} while (0)
+#define xnu_spin_lock(s)        do {} while (0)
+#define xnu_spin_unlock(s)      do {} while (0)
+#define xnu_spin_try(s)         (1)
+#endif /* SMP */
+
+/*
+ * Warning: low level libcfs debugging code (libcfs_debug_msg(), for
+ * example), uses spin-locks, so debugging output here may lead to nasty
+ * surprises.
+ *
+ * In uniprocessor version of spin-lock. Only checks.
+ */
+
+void kspin_init(struct kspin *spin)
+{
+       SLASSERT(spin != NULL);
+       xnu_spin_init(&spin->lock);
+       ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC);
+       ON_SYNC_DEBUG(spin->owner = NULL);
+}
+
+void kspin_done(struct kspin *spin)
+{
+       SLASSERT(spin != NULL);
+       SLASSERT(spin->magic == KSPIN_MAGIC);
+       SLASSERT(spin->owner == NULL);
+        xnu_spin_done(&spin->lock);
+}
+
+void kspin_lock(struct kspin *spin)
+{
+       SLASSERT(spin != NULL);
+       SLASSERT(spin->magic == KSPIN_MAGIC);
+       SLASSERT(spin->owner != current_thread());
+
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       xnu_spin_lock(&spin->lock);
+       SLASSERT(spin->owner == NULL);
+       ON_SYNC_DEBUG(spin->owner = current_thread());
+}
+
+void kspin_unlock(struct kspin *spin)
+{
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       SLASSERT(spin != NULL);
+       SLASSERT(spin->magic == KSPIN_MAGIC);
+       SLASSERT(spin->owner == current_thread());
+       ON_SYNC_DEBUG(spin->owner = NULL);
+       xnu_spin_unlock(&spin->lock);
+}
+
+int  kspin_trylock(struct kspin *spin)
+{
+       SLASSERT(spin != NULL);
+       SLASSERT(spin->magic == KSPIN_MAGIC);
+
+       if (xnu_spin_try(&spin->lock)) {
+               SLASSERT(spin->owner == NULL);
+               ON_SYNC_DEBUG(spin->owner = current_thread());
+               return 1;
+       } else
+               return 0;
+}
+
+#if XNU_SYNC_DEBUG
+int kspin_islocked(struct kspin *spin)
+{
+       SLASSERT(spin != NULL);
+       SLASSERT(spin->magic == KSPIN_MAGIC);
+       return spin->owner == current_thread();
+}
+
+int kspin_isnotlocked(struct kspin *spin)
+{
+       SLASSERT(spin != NULL);
+       SLASSERT(spin->magic == KSPIN_MAGIC);
+       return spin->owner != current_thread();
+}
+#endif
+
+/*
+ * read/write spin-lock
+ */
+void krw_spin_init(struct krw_spin *rwspin)
+{
+       SLASSERT(rwspin != NULL);
+
+       kspin_init(&rwspin->guard);
+       rwspin->count = 0;
+       ON_SYNC_DEBUG(rwspin->magic = KRW_SPIN_MAGIC);
+}
+
+void krw_spin_done(struct krw_spin *rwspin)
+{
+       SLASSERT(rwspin != NULL);
+       SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+       SLASSERT(rwspin->count == 0);
+       kspin_done(&rwspin->guard);
+}
+
+void krw_spin_down_r(struct krw_spin *rwspin)
+{
+        int i;
+       SLASSERT(rwspin != NULL);
+       SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+
+       kspin_lock(&rwspin->guard);
+        while(rwspin->count < 0) {
+                i = -1;
+               kspin_unlock(&rwspin->guard);
+                while (--i != 0 && rwspin->count < 0)
+                        continue;
+                kspin_lock(&rwspin->guard);
+        }
+       ++ rwspin->count;
+       kspin_unlock(&rwspin->guard);
+}
+
+void krw_spin_down_w(struct krw_spin *rwspin)
+{
+        int i;
+       SLASSERT(rwspin != NULL);
+       SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+
+       kspin_lock(&rwspin->guard);
+        while (rwspin->count != 0) {
+                i = -1;
+               kspin_unlock(&rwspin->guard);
+                while (--i != 0 && rwspin->count != 0)
+                        continue;
+               kspin_lock(&rwspin->guard);
+        }
+       rwspin->count = -1;
+       kspin_unlock(&rwspin->guard);
+}
+
+void krw_spin_up_r(struct krw_spin *rwspin)
+{
+       SLASSERT(rwspin != NULL);
+       SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+       SLASSERT(rwspin->count > 0);
+
+       kspin_lock(&rwspin->guard);
+       -- rwspin->count;
+       kspin_unlock(&rwspin->guard);
+}
+
+void krw_spin_up_w(struct krw_spin *rwspin)
+{
+       SLASSERT(rwspin != NULL);
+       SLASSERT(rwspin->magic == KRW_SPIN_MAGIC);
+       SLASSERT(rwspin->count == -1);
+
+       kspin_lock(&rwspin->guard);
+       rwspin->count = 0;
+       kspin_unlock(&rwspin->guard);
+}
+
+/*
+ * semaphore 
+ */
+#ifdef __DARWIN8__
+
+#define xnu_waitq_init(q, a)            do {} while (0)
+#define xnu_waitq_done(q)               do {} while (0)
+#define xnu_waitq_wakeup_one(q, e, s)   ({wakeup_one((void *)(e)); KERN_SUCCESS;})
+#define xnu_waitq_wakeup_all(q, e, s)   ({wakeup((void *)(e)); KERN_SUCCESS;})
+#define xnu_waitq_assert_wait(q, e, s)  assert_wait((e), s)
+
+#else /* DARWIN8 */
+
+#define xnu_waitq_init(q, a)            wait_queue_init((q), a)
+#define xnu_waitq_done(q)               do {} while (0)
+#define xnu_waitq_wakeup_one(q, e, s)   wait_queue_wakeup_one((q), (event_t)(e), s)
+#define xnu_waitq_wakeup_all(q, e, s)   wait_queue_wakeup_all((q), (event_t)(e), s)
+#define xnu_waitq_assert_wait(q, e, s)  wait_queue_assert_wait((q), (event_t)(e), s)
+
+#endif /* DARWIN8 */
+void ksem_init(struct ksem *sem, int value)
+{
+       SLASSERT(sem != NULL);
+       kspin_init(&sem->guard);
+       xnu_waitq_init(&sem->q, SYNC_POLICY_FIFO);
+       sem->value = value;
+       ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC);
+}
+
+void ksem_done(struct ksem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KSEM_MAGIC);
+       /*
+        * XXX nikita: cannot check that &sem->q is empty because
+        * wait_queue_empty() is Apple private API.
+        */
+       kspin_done(&sem->guard);
+}
+
+int ksem_up(struct ksem *sem, int value)
+{
+       int result;
+
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KSEM_MAGIC);
+       SLASSERT(value >= 0);
+
+       kspin_lock(&sem->guard);
+       sem->value += value;
+       if (sem->value == 0)
+               result = xnu_waitq_wakeup_one(&sem->q, sem,
+                                             THREAD_AWAKENED);
+       else
+               result = xnu_waitq_wakeup_all(&sem->q, sem,
+                                             THREAD_AWAKENED);
+       kspin_unlock(&sem->guard);
+       SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
+       return (result == KERN_SUCCESS) ? 0 : 1;
+}
+
+void ksem_down(struct ksem *sem, int value)
+{
+       int result;
+
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KSEM_MAGIC);
+       SLASSERT(value >= 0);
+       SLASSERT(get_preemption_level() == 0);
+
+       kspin_lock(&sem->guard);
+       while (sem->value < value) {
+               result = xnu_waitq_assert_wait(&sem->q, sem,
+                                              THREAD_UNINT);
+               SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
+               kspin_unlock(&sem->guard);
+               if (result == THREAD_WAITING)
+                       thread_block(THREAD_CONTINUE_NULL);
+               kspin_lock(&sem->guard);
+       }
+       sem->value -= value;
+       kspin_unlock(&sem->guard);
+}
+
+int ksem_trydown(struct ksem *sem, int value)
+{
+       int result;
+
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KSEM_MAGIC);
+       SLASSERT(value >= 0);
+
+       kspin_lock(&sem->guard);
+       if (sem->value >= value) {
+               sem->value -= value;
+               result = 0;
+       } else
+               result = -EBUSY;
+       kspin_unlock(&sem->guard);
+       return result;
+}
+
+void kmut_init(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       ksem_init(&mut->s, 1);
+       ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC);
+       ON_SYNC_DEBUG(mut->owner = NULL);
+}
+
+void kmut_done(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       SLASSERT(mut->magic == KMUT_MAGIC);
+       SLASSERT(mut->owner == NULL);
+       ksem_done(&mut->s);
+}
+
+void kmut_lock(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       SLASSERT(mut->magic == KMUT_MAGIC);
+       SLASSERT(mut->owner != current_thread());
+       SLASSERT(get_preemption_level() == 0);
+
+       ksem_down(&mut->s, 1);
+       ON_SYNC_DEBUG(mut->owner = current_thread());
+}
+
+void kmut_unlock(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       SLASSERT(mut->magic == KMUT_MAGIC);
+       SLASSERT(mut->owner == current_thread());
+
+       ON_SYNC_DEBUG(mut->owner = NULL);
+       ksem_up(&mut->s, 1);
+}
+
+int kmut_trylock(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       SLASSERT(mut->magic == KMUT_MAGIC);
+       return ksem_trydown(&mut->s, 1);
+}
+
+#if XNU_SYNC_DEBUG
+int kmut_islocked(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       SLASSERT(mut->magic == KMUT_MAGIC);
+       return mut->owner == current_thread();
+}
+
+int kmut_isnotlocked(struct kmut *mut)
+{
+       SLASSERT(mut != NULL);
+       SLASSERT(mut->magic == KMUT_MAGIC);
+       return mut->owner != current_thread();
+}
+#endif
+
+
+void kcond_init(struct kcond *cond)
+{
+       SLASSERT(cond != NULL);
+
+       kspin_init(&cond->guard);
+       cond->waiters = NULL;
+       ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC);
+}
+
+void kcond_done(struct kcond *cond)
+{
+       SLASSERT(cond != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+       SLASSERT(cond->waiters == NULL);
+       kspin_done(&cond->guard);
+}
+
+void kcond_wait(struct kcond *cond, struct kspin *lock)
+{
+       struct kcond_link link;
+
+       SLASSERT(cond != NULL);
+       SLASSERT(lock != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+       SLASSERT(kspin_islocked(lock));
+
+       ksem_init(&link.sem, 0);
+       kspin_lock(&cond->guard);
+       link.next = cond->waiters;
+       cond->waiters = &link;
+       kspin_unlock(&cond->guard);
+       kspin_unlock(lock);
+
+       ksem_down(&link.sem, 1);
+
+       kspin_lock(&cond->guard);
+       kspin_unlock(&cond->guard);
+       kspin_lock(lock);
+}
+
+void kcond_wait_guard(struct kcond *cond)
+{
+       struct kcond_link link;
+
+       SLASSERT(cond != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+       SLASSERT(kspin_islocked(&cond->guard));
+
+       ksem_init(&link.sem, 0);
+       link.next = cond->waiters;
+       cond->waiters = &link;
+       kspin_unlock(&cond->guard);
+
+       ksem_down(&link.sem, 1);
+
+       kspin_lock(&cond->guard);
+}
+
+void kcond_signal_guard(struct kcond *cond)
+{
+       struct kcond_link *link;
+
+       SLASSERT(cond != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+       SLASSERT(kspin_islocked(&cond->guard));
+
+       link = cond->waiters;
+       if (link != NULL) {
+               cond->waiters = link->next;
+               ksem_up(&link->sem, 1);
+       }
+}
+
+void kcond_signal(struct kcond *cond)
+{
+       SLASSERT(cond != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+
+       kspin_lock(&cond->guard);
+       kcond_signal_guard(cond);
+       kspin_unlock(&cond->guard);
+}
+
+void kcond_broadcast_guard(struct kcond *cond)
+{
+       struct kcond_link *link;
+
+       SLASSERT(cond != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+       SLASSERT(kspin_islocked(&cond->guard));
+
+       for (link = cond->waiters; link != NULL; link = link->next)
+               ksem_up(&link->sem, 1);
+       cond->waiters = NULL;
+}
+
+void kcond_broadcast(struct kcond *cond)
+{
+       SLASSERT(cond != NULL);
+       SLASSERT(cond->magic == KCOND_MAGIC);
+
+       kspin_lock(&cond->guard);
+       kcond_broadcast_guard(cond);
+       kspin_unlock(&cond->guard);
+}
+
+void krw_sem_init(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+
+       kcond_init(&sem->cond);
+       sem->count = 0;
+       ON_SYNC_DEBUG(sem->magic = KRW_MAGIC);
+}
+
+void krw_sem_done(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+       SLASSERT(sem->count == 0);
+       kcond_done(&sem->cond);
+}
+
+void krw_sem_down_r(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+       SLASSERT(get_preemption_level() == 0);
+
+       kspin_lock(&sem->cond.guard);
+       while (sem->count < 0)
+               kcond_wait_guard(&sem->cond);
+       ++ sem->count;
+       kspin_unlock(&sem->cond.guard);
+}
+
+int krw_sem_down_r_try(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+
+       kspin_lock(&sem->cond.guard);
+       if (sem->count < 0) {
+               kspin_unlock(&sem->cond.guard);
+                return -EBUSY;
+        }
+       ++ sem->count;
+       kspin_unlock(&sem->cond.guard);
+        return 0;
+}
+
+void krw_sem_down_w(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+       SLASSERT(get_preemption_level() == 0);
+
+       kspin_lock(&sem->cond.guard);
+       while (sem->count != 0)
+               kcond_wait_guard(&sem->cond);
+       sem->count = -1;
+       kspin_unlock(&sem->cond.guard);
+}
+
+int krw_sem_down_w_try(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+
+       kspin_lock(&sem->cond.guard);
+       if (sem->count != 0) {
+               kspin_unlock(&sem->cond.guard);
+                return -EBUSY;
+        }
+       sem->count = -1;
+       kspin_unlock(&sem->cond.guard);
+        return 0;
+}
+
+void krw_sem_up_r(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+       SLASSERT(sem->count > 0);
+
+       kspin_lock(&sem->cond.guard);
+       -- sem->count;
+       if (sem->count == 0)
+               kcond_broadcast_guard(&sem->cond);
+       kspin_unlock(&sem->cond.guard);
+}
+
+void krw_sem_up_w(struct krw_sem *sem)
+{
+       SLASSERT(sem != NULL);
+       SLASSERT(sem->magic == KRW_MAGIC);
+       SLASSERT(sem->count == -1);
+
+       kspin_lock(&sem->cond.guard);
+       sem->count = 0;
+       kspin_unlock(&sem->cond.guard);
+       kcond_broadcast(&sem->cond);
+}
+
+void ksleep_chan_init(struct ksleep_chan *chan)
+{
+       SLASSERT(chan != NULL);
+
+       kspin_init(&chan->guard);
+       CFS_INIT_LIST_HEAD(&chan->waiters);
+       ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC);
+}
+
+void ksleep_chan_done(struct ksleep_chan *chan)
+{
+       SLASSERT(chan != NULL);
+       SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+       SLASSERT(list_empty(&chan->waiters));
+       kspin_done(&chan->guard);
+}
+
+void ksleep_link_init(struct ksleep_link *link)
+{
+       SLASSERT(link != NULL);
+
+       CFS_INIT_LIST_HEAD(&link->linkage);
+       link->flags = 0;
+       link->event = current_thread();
+       link->hits  = 0;
+       link->forward = NULL;
+       ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC);
+}
+
+void ksleep_link_done(struct ksleep_link *link)
+{
+       SLASSERT(link != NULL);
+       SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
+       SLASSERT(list_empty(&link->linkage));
+}
+
+void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link)
+{
+       SLASSERT(chan != NULL);
+       SLASSERT(link != NULL);
+       SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+       SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
+       SLASSERT(list_empty(&link->linkage));
+
+       kspin_lock(&chan->guard);
+        if (link->flags & KSLEEP_EXCLUSIVE)
+                list_add_tail(&link->linkage, &chan->waiters);
+        else
+               list_add(&link->linkage, &chan->waiters);
+       kspin_unlock(&chan->guard);
+}
+
+void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link)
+{
+       SLASSERT(chan != NULL);
+       SLASSERT(link != NULL);
+       SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+       SLASSERT(link->magic == KSLEEP_LINK_MAGIC);
+
+       kspin_lock(&chan->guard);
+       list_del_init(&link->linkage);
+       kspin_unlock(&chan->guard);
+}
+
+static int has_hits(struct ksleep_chan *chan, event_t event)
+{
+       struct ksleep_link *scan;
+
+       SLASSERT(kspin_islocked(&chan->guard));
+       list_for_each_entry(scan, &chan->waiters, linkage) {
+               if (scan->event == event && scan->hits > 0) {
+                       /* consume hit */
+                       -- scan->hits;
+                       return 1;
+               }
+       }
+       return 0;
+}
+
+static void add_hit(struct ksleep_chan *chan, event_t event)
+{
+       struct ksleep_link *scan;
+
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       SLASSERT(kspin_islocked(&chan->guard));
+       list_for_each_entry(scan, &chan->waiters, linkage) {
+               if (scan->event == event) {
+                       ++ scan->hits;
+                       break;
+               }
+       }
+}
+
+void ksleep_wait(struct ksleep_chan *chan, cfs_task_state_t state)
+{
+       event_t event;
+       int     result;
+
+       ENTRY;
+
+       SLASSERT(chan != NULL);
+       SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+       SLASSERT(get_preemption_level() == 0);
+
+       event = current_thread();
+       kspin_lock(&chan->guard);
+       if (!has_hits(chan, event)) {
+               result = assert_wait(event, state);
+               kspin_unlock(&chan->guard);
+               SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
+               if (result == THREAD_WAITING)
+                       thread_block(THREAD_CONTINUE_NULL);
+       } else
+               kspin_unlock(&chan->guard);
+       EXIT;
+}
+
+/*
+ * Sleep on @chan for no longer than @timeout nano-seconds. Return remaining
+ * sleep time (non-zero only if thread was waken by a signal (not currently
+ * implemented), or waitq was already in the "signalled" state).
+ */
+int64_t ksleep_timedwait(struct ksleep_chan *chan, 
+                         cfs_task_state_t state,
+                         __u64 timeout)
+{
+       event_t event;
+
+       ENTRY;
+
+       SLASSERT(chan != NULL);
+       SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+       SLASSERT(get_preemption_level() == 0);
+
+       event = current_thread();
+       kspin_lock(&chan->guard);
+       if (!has_hits(chan, event)) {
+                int      result;
+                __u64 expire;
+               result = assert_wait(event, state);
+               if (timeout > 0) {
+                       /*
+                        * arm a timer. thread_set_timer()'s first argument is
+                        * uint32_t, so we have to cook deadline ourselves.
+                        */
+                       nanoseconds_to_absolutetime(timeout, &expire);
+                        clock_absolutetime_interval_to_deadline(expire, &expire);
+                       thread_set_timer_deadline(expire);
+               }
+               kspin_unlock(&chan->guard);
+               SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING);
+               if (result == THREAD_WAITING)
+                       result = thread_block(THREAD_CONTINUE_NULL);
+               thread_cancel_timer();
+
+               if (result == THREAD_TIMED_OUT)
+                        timeout = 0;
+               else {
+                        __u64 now;
+                        clock_get_uptime(&now);
+                        if (expire > now)
+                               absolutetime_to_nanoseconds(expire - now, &timeout);
+                        else
+                                timeout = 0;
+               }
+       } else  {
+                /* just return timeout, because I've got event and don't need to wait */
+               kspin_unlock(&chan->guard);
+        }
+
+        RETURN(timeout);
+}
+
+/*
+ * wake up single exclusive waiter (plus some arbitrary number of *
+ * non-exclusive)
+ */
+void ksleep_wake(struct ksleep_chan *chan)
+{
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       ksleep_wake_nr(chan, 1);
+}
+
+/*
+ * wake up all waiters on @chan
+ */
+void ksleep_wake_all(struct ksleep_chan *chan)
+{
+       ENTRY;
+       ksleep_wake_nr(chan, 0);
+       EXIT;
+}
+
+/*
+ * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary
+ * number of non-exclusive. If @nr is 0, wake up all waiters.
+ */
+void ksleep_wake_nr(struct ksleep_chan *chan, int nr)
+{
+       struct ksleep_link *scan;
+       int result;
+
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       SLASSERT(chan != NULL);
+       SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC);
+
+       kspin_lock(&chan->guard);
+       list_for_each_entry(scan, &chan->waiters, linkage) {
+               struct ksleep_chan *forward;
+
+               forward = scan->forward;
+               if (forward != NULL)
+                       kspin_lock(&forward->guard);
+               result = thread_wakeup(scan->event);
+               SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING);
+               if (result == KERN_NOT_WAITING) {
+                       ++ scan->hits;
+                       if (forward != NULL)
+                               add_hit(forward, scan->event);
+               }
+               if (forward != NULL)
+                       kspin_unlock(&forward->guard);
+               if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0)
+                       break;
+       }
+       kspin_unlock(&chan->guard);
+}
+
+void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg)
+{
+       SLASSERT(t != NULL);
+       SLASSERT(func != NULL);
+
+       kspin_init(&t->guard);
+       t->func = func;
+       t->arg  = arg;
+       ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC);
+}
+
+void ktimer_done(struct ktimer *t)
+{
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+       kspin_done(&t->guard);
+       ON_SYNC_DEBUG(t->magic = 0);
+}
+
+static void ktimer_actor(void *arg0, void *arg1)
+{
+       struct ktimer *t;
+       int            armed;
+
+       t = arg0;
+       /*
+        * this assumes that ktimer's are never freed.
+        */
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+
+       /*
+        * call actual timer function
+        */
+       kspin_lock(&t->guard);
+       armed = t->armed;
+       t->armed = 0;
+       kspin_unlock(&t->guard);
+
+       if (armed)
+               t->func(t->arg);
+}
+
+extern boolean_t thread_call_func_cancel(thread_call_func_t, thread_call_param_t, boolean_t);
+extern void thread_call_func_delayed(thread_call_func_t, thread_call_param_t, __u64);
+
+static void ktimer_disarm_locked(struct ktimer *t)
+{
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+
+       thread_call_func_cancel(ktimer_actor, t, FALSE);
+}
+
+/*
+ * Received deadline is nanoseconds, but time checked by 
+ * thread_call is absolute time (The abstime unit is equal to 
+ * the length of one bus cycle, so the duration is dependent 
+ * on the bus speed of the computer), so we need to convert
+ * nanotime to abstime by nanoseconds_to_absolutetime().
+ *
+ * Refer to _delayed_call_timer(...)
+ *
+ * if thread_call_func_delayed is not exported in the future,
+ * we can use timeout() or bsd_timeout() to replace it.
+ */
+void ktimer_arm(struct ktimer *t, u_int64_t deadline)
+{
+        cfs_time_t    abstime;
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+
+       kspin_lock(&t->guard);
+       ktimer_disarm_locked(t);
+       t->armed = 1;
+        nanoseconds_to_absolutetime(deadline, &abstime);
+       thread_call_func_delayed(ktimer_actor, t, deadline);
+       kspin_unlock(&t->guard);
+}
+
+void ktimer_disarm(struct ktimer *t)
+{
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+
+       kspin_lock(&t->guard);
+       t->armed = 0;
+       ktimer_disarm_locked(t);
+       kspin_unlock(&t->guard);
+}
+
+int ktimer_is_armed(struct ktimer *t)
+{
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+
+       /*
+        * no locking---result is only a hint anyway.
+        */
+       return t->armed;
+}
+
+u_int64_t ktimer_deadline(struct ktimer *t)
+{
+       SLASSERT(t != NULL);
+       SLASSERT(t->magic == KTIMER_MAGIC);
+
+       return t->deadline;
+}
+
+void cfs_sync_init(void) 
+{
+#ifdef __DARWIN8__
+        /* Initialize lock group */
+        cfs_lock_grp = lck_grp_alloc_init("libcfs sync", LCK_GRP_ATTR_NULL);
+#endif
+}
+
+void cfs_sync_fini(void)
+{
+#ifdef __DARWIN8__
+        /* 
+         * XXX Liang: destroy lock group. As we haven't called lock_done
+         * for all locks, cfs_lock_grp may not be freed by kernel(reference 
+         * count > 1).
+         */
+        lck_grp_free(cfs_lock_grp);
+        cfs_lock_grp = NULL;
+#endif
+}
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/libcfs/darwin/darwin-tcpip.c b/libcfs/libcfs/darwin/darwin-tcpip.c
new file mode 100644 (file)
index 0000000..c6609a7
--- /dev/null
@@ -0,0 +1,1339 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ * 
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ * 
+ * This file is part of Lustre, http://www.lustre.org.
+ * 
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ * 
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * 
+ * Darwin porting library
+ * Make things easy to port
+ */ 
+
+#include <mach/mach_types.h>
+#include <sys/file.h>
+#include <sys/mount.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/protosw.h>
+#include <net/if.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+static __inline__ struct sockaddr_in
+blank_sin()
+{
+        struct sockaddr_in  blank = { sizeof(struct sockaddr_in), AF_INET };
+        return (blank);
+}
+
+void
+libcfs_ipif_free_enumeration (char **names, int n)
+{
+        int      i;
+
+        LASSERT (n > 0);
+
+        for (i = 0; i < n && names[i] != NULL; i++)
+                LIBCFS_FREE(names[i], IFNAMSIZ);
+                
+        LIBCFS_FREE(names, n * sizeof(*names));
+}
+
+#ifdef __DARWIN8__
+/*
+ * Darwin 8.x 
+ *
+ * No hack kernel structre, all using KPI.
+ */
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+        struct ifreq    ifr;
+        socket_t        so;
+        __u32           val;
+        int             nob;
+        int             rc;
+
+        rc = -sock_socket(PF_INET, SOCK_STREAM, 0, 
+                          NULL, NULL, &so);
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return rc;
+        }
+
+        nob = strnlen(name, IFNAMSIZ);
+        if (nob == IFNAMSIZ) {
+                CERROR("Interface name %s too long\n", name);
+                rc = -EINVAL;
+                goto out;
+        }
+
+        CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+        bzero(&ifr, sizeof(ifr));
+        strcpy(ifr.ifr_name, name);
+        rc = -sock_ioctl (so, SIOCGIFFLAGS, &ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get flags for interface %s\n", name);
+                goto out;
+        }
+        
+        if ((ifr.ifr_flags & IFF_UP) == 0) {
+                CDEBUG(D_NET, "Interface %s down\n", name);
+                *up = 0;
+                *ip = *mask = 0;
+                goto out;
+        }
+
+        *up = 1;
+
+        bzero(&ifr, sizeof(ifr));
+        strcpy(ifr.ifr_name, name);
+        *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+        rc = -sock_ioctl(so, SIOCGIFADDR, &ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get IP address for interface %s\n", name);
+                goto out;
+        }
+        
+        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+        *ip = ntohl(val);
+
+        bzero(&ifr, sizeof(ifr));
+        strcpy(ifr.ifr_name, name);
+        *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+        rc = -sock_ioctl(so, SIOCGIFNETMASK, &ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get netmask for interface %s\n", name);
+                goto out;
+        }
+
+        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+        *mask = ntohl(val);
+out:
+        sock_close(so);
+        return rc;
+}
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+        /* Allocate and fill in 'names', returning # interfaces/error */
+        char           **names;
+        int             toobig;
+        int             nalloc;
+        int             nfound;
+        socket_t        so;
+        struct ifreq   *ifr;
+        struct ifconf   ifc;
+        int             rc;
+        int             nob;
+        int             i;
+
+        rc = -sock_socket(PF_INET, SOCK_STREAM, 0, 
+                          NULL, NULL, &so);
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return (rc);
+        }
+
+        nalloc = 16;    /* first guess at max interfaces */
+        toobig = 0;
+        for (;;) {
+                if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
+                        toobig = 1;
+                        nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
+                        CWARN("Too many interfaces: only enumerating first %d\n",
+                              nalloc);
+                }
+
+                LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+                if (ifr == NULL) {
+                        CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+                                rc = -ENOMEM;
+                        goto out0;
+                }
+                                
+                ifc.ifc_buf = (char *)ifr;
+                ifc.ifc_len = nalloc * sizeof(*ifr);
+                                        
+#if 1
+                /*
+                 * XXX Liang:
+                 * sock_ioctl(..., SIOCGIFCONF, ...) is not supposed to be used in
+                 * kernel space because it always try to copy result to userspace. 
+                 * So we can't get interfaces name by sock_ioctl(...,SIOCGIFCONF,...).
+                 * I've created a bug for Apple, let's wait...
+                 */
+                nfound = 0;
+                for (i = 0; i < 16; i++) {
+                        struct ifreq    en;
+                        bzero(&en, sizeof(en));
+                        snprintf(en.ifr_name, IFNAMSIZ, "en%d", i);
+                        rc = -sock_ioctl (so, SIOCGIFFLAGS, &en);
+                        if (rc != 0)
+                                continue;
+                        strcpy(ifr[nfound++].ifr_name, en.ifr_name);
+                }
+
+#else           /* NOT in using now */
+                rc = -sock_ioctl(so, SIOCGIFCONF, (caddr_t)&ifc);
+                                
+                if (rc < 0) {
+                        CERROR ("Error %d enumerating interfaces\n", rc);
+                        goto out1;
+                }
+
+                nfound = ifc.ifc_len/sizeof(*ifr);
+                LASSERT (nfound <= nalloc);
+#endif
+
+                if (nfound < nalloc || toobig)
+                        break;
+
+                LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+                nalloc *= 2;
+        }
+        if (nfound == 0)
+                goto out1;
+
+        LIBCFS_ALLOC(names, nfound * sizeof(*names));
+        if (names == NULL) {
+                rc = -ENOMEM;
+                goto out1;
+        }
+        /* NULL out all names[i] */
+        memset (names, 0, nfound * sizeof(*names));
+
+        for (i = 0; i < nfound; i++) {
+
+                nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+                if (nob == IFNAMSIZ) {
+                        /* no space for terminating NULL */
+                        CERROR("interface name %.*s too long (%d max)\n",
+                               nob, ifr[i].ifr_name, IFNAMSIZ);
+                        rc = -ENAMETOOLONG;
+                        goto out2;
+                }
+
+                LIBCFS_ALLOC(names[i], IFNAMSIZ);
+                if (names[i] == NULL) {
+                        rc = -ENOMEM;
+                        goto out2;
+                }
+
+                memcpy(names[i], ifr[i].ifr_name, nob);
+                names[i][nob] = 0;
+        }
+
+        *namesp = names;
+        rc = nfound;
+
+out2:
+        if (rc < 0)
+                libcfs_ipif_free_enumeration(names, nfound);
+out1:
+        LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+out0:
+        sock_close(so);
+        return rc;
+
+}
+
+/*
+ * Public entry of socket upcall.
+ *
+ * so_upcall can only be installed while create/accept of socket in 
+ * Darwin 8.0, so we setup libcfs_sock_upcall() as upcall for all 
+ * sockets in creat/accept, it will call upcall provided by user 
+ * which can be setup after create/accept of socket.
+ */
+static void libcfs_sock_upcall(socket_t so, void* arg, int waitf)
+{
+        cfs_socket_t    *sock;
+
+        sock = (cfs_socket_t *)arg;
+        LASSERT(sock->s_magic == CFS_SOCK_MAGIC);
+
+        if ((sock->s_flags & CFS_SOCK_UPCALL) != 0 && sock->s_upcall != NULL)
+                sock->s_upcall(so, sock->s_upcallarg, waitf);
+        return;
+}
+
+void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg)
+{
+        sock->s_upcall = callback;
+        sock->s_upcallarg = arg;
+        sock->s_flags |= CFS_SOCK_UPCALL;
+        return;
+}
+
+void libcfs_sock_reset_cb(cfs_socket_t *sock)
+{
+        sock->s_flags &= ~CFS_SOCK_UPCALL;
+        sock->s_upcall = NULL;
+        sock->s_upcallarg = NULL;
+        return;
+}
+
+static int
+libcfs_sock_create (cfs_socket_t **sockp, int *fatal,
+                    __u32 local_ip, int local_port)
+{
+        struct sockaddr_in  locaddr;
+        cfs_socket_t    *sock;
+        int             option;
+        int             optlen;
+        int             rc;
+
+        /* All errors are fatal except bind failure if the port is in use */
+        *fatal = 1;
+
+        sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO);
+        if (!sock) {
+                CERROR("Can't allocate cfs_socket.\n");
+                return -ENOMEM;
+        }
+        *sockp = sock;
+        sock->s_magic = CFS_SOCK_MAGIC;
+
+        rc = -sock_socket(PF_INET, SOCK_STREAM, 0, 
+                          libcfs_sock_upcall, sock, &C2B_SOCK(sock));
+        if (rc != 0) 
+                goto out;
+        option = 1;
+        optlen = sizeof(option);
+        rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, 
+                              SO_REUSEADDR, &option, optlen);
+        if (rc != 0)
+                goto out;
+
+        /* can't specify a local port without a local IP */
+        LASSERT (local_ip == 0 || local_port != 0);
+
+        if (local_ip != 0 || local_port != 0) {
+                bzero (&locaddr, sizeof (locaddr));
+                locaddr.sin_len = sizeof(struct sockaddr_in);
+                locaddr.sin_family = AF_INET;
+                locaddr.sin_port = htons (local_port);
+                locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY;
+                rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr);
+                if (rc == -EADDRINUSE) {
+                        CDEBUG(D_NET, "Port %d already in use\n", local_port);
+                        *fatal = 0;
+                        goto out;
+                }
+                if (rc != 0) {
+                        CERROR("Error trying to bind to port %d: %d\n",
+                               local_port, rc);
+                        goto out;
+                }
+        }
+        return 0;
+out:
+        if (C2B_SOCK(sock) != NULL) 
+                sock_close(C2B_SOCK(sock));
+        FREE(sock, M_TEMP);
+        return rc;
+}
+
+int
+libcfs_sock_listen (cfs_socket_t **sockp,
+                   __u32 local_ip, int local_port, int backlog)
+{
+        cfs_socket_t    *sock;
+        int             fatal;
+        int             rc;
+
+        rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port);
+        if (rc != 0)  {
+                if (!fatal)
+                        CERROR("Can't create socket: port %d already in use\n",
+                                local_port);
+                return rc;
+
+        }
+        rc = -sock_listen(C2B_SOCK(sock), backlog);
+        if (rc == 0) {
+                *sockp = sock;
+                return 0;
+        }
+
+        if (C2B_SOCK(sock) != NULL) 
+                sock_close(C2B_SOCK(sock));
+        FREE(sock, M_TEMP);
+        return rc;
+}
+
+int
+libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock)
+{
+        cfs_socket_t   *newsock;
+        int             rc;
+
+        newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO);
+        if (!newsock) {
+                CERROR("Can't allocate cfs_socket.\n");
+                return -ENOMEM;
+        }
+        newsock->s_magic = CFS_SOCK_MAGIC;
+        /*
+         * thread will sleep in sock_accept by calling of msleep(), 
+         * it can be interrupted because msleep() use PCATCH as argument.
+         */
+        rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0, 
+                          libcfs_sock_upcall, newsock, &C2B_SOCK(newsock));
+        if (rc) {
+                if (C2B_SOCK(newsock) != NULL) 
+                        sock_close(C2B_SOCK(newsock));
+                FREE(newsock, M_TEMP);
+                if ((sock->s_flags & CFS_SOCK_DOWN) != 0)
+                        /* shutdown by libcfs_sock_abort_accept(), fake 
+                         * error number for lnet_acceptor() */
+                        rc = -EAGAIN;
+                return rc;
+        }
+        *newsockp = newsock;
+        return 0;
+}
+
+void
+libcfs_sock_abort_accept (cfs_socket_t *sock)
+{
+        /*
+         * XXX Liang: 
+         *
+         * we want to wakeup thread blocked by sock_accept, but we don't
+         * know the address where thread is sleeping on, so we cannot 
+         * wakeup it directly.
+         * The thread slept in sock_accept will be waken up while:
+         * 1. interrupt by signal
+         * 2. new connection is coming (sonewconn)
+         * 3. disconnecting of the socket (soisconnected)
+         * 
+         * Cause we can't send signal to a thread directly(no KPI), so the 
+         * only thing can be done here is disconnect the socket (by 
+         * sock_shutdown() or sth else? ).
+         *
+         * Shutdown request of socket with SHUT_WR or SHUT_RDWR will
+         * be issured to the protocol.
+         * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()->
+         * tcp_close()->soisdisconnected(), it will wakeup thread by
+         * wakeup((caddr_t)&so->so_timeo);
+         */
+        sock->s_flags |= CFS_SOCK_DOWN;
+        sock_shutdown(C2B_SOCK(sock), SHUT_RDWR);
+}
+
+int
+libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout)
+{
+        size_t          rcvlen;
+        int             rc;
+        cfs_duration_t  to = cfs_time_seconds(timeout);
+        cfs_time_t      then;
+        struct timeval  tv;
+
+        LASSERT(nob > 0);
+
+        for (;;) {
+                struct iovec  iov = {
+                        .iov_base = buffer,
+                        .iov_len  = nob
+                };
+                struct  msghdr  msg = {
+                        .msg_name       = NULL,
+                        .msg_namelen    = 0,
+                        .msg_iov        = &iov,
+                        .msg_iovlen     = 1,
+                        .msg_control    = NULL,
+                        .msg_controllen = 0,
+                        .msg_flags      = 0,
+                };
+                cfs_duration_usec(to, &tv);
+                rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO,
+                                      &tv, sizeof(tv));
+                if (rc != 0) {
+                        CERROR("Can't set socket recv timeout "
+                                        "%ld.%06d: %d\n",
+                                        (long)tv.tv_sec, (int)tv.tv_usec, rc);
+                        return rc;
+                }
+
+                then = cfs_time_current();
+                rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen);
+                to -= cfs_time_current() - then;
+
+                if (rc != 0 && rc != -EWOULDBLOCK)
+                        return rc;
+                if (rcvlen == nob)
+                        return 0;
+
+                if (to <= 0)
+                        return -EAGAIN;
+
+                buffer = ((char *)buffer) + rcvlen;
+                nob -= rcvlen;
+        }
+        return 0;
+}
+
+int
+libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout)
+{
+        size_t          sndlen;
+        int             rc;
+        cfs_duration_t  to = cfs_time_seconds(timeout);
+        cfs_time_t      then;
+        struct timeval  tv;
+
+        LASSERT(nob > 0);
+
+        for (;;) {
+                struct iovec  iov = {
+                        .iov_base = buffer,
+                        .iov_len  = nob
+                };
+                struct  msghdr  msg = {
+                        .msg_name       = NULL,
+                        .msg_namelen    = 0,
+                        .msg_iov        = &iov,
+                        .msg_iovlen     = 1,
+                        .msg_control    = NULL,
+                        .msg_controllen = 0,
+                        .msg_flags      = (timeout == 0) ? MSG_DONTWAIT : 0,
+                };
+
+                if (timeout != 0) {
+                        cfs_duration_usec(to, &tv);
+                        rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO,
+                                              &tv, sizeof(tv));
+                        if (rc != 0) {
+                                CERROR("Can't set socket send timeout "
+                                       "%ld.%06d: %d\n",
+                                       (long)tv.tv_sec, (int)tv.tv_usec, rc);
+                                return rc;
+                        }
+                }
+
+                then = cfs_time_current();
+                rc = -sock_send(C2B_SOCK(sock), &msg, 
+                                ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen);
+                to -= cfs_time_current() - then;
+
+                if (rc != 0 && rc != -EWOULDBLOCK)
+                        return rc;
+                if (sndlen == nob)
+                        return 0;
+
+                if (to <= 0)
+                        return -EAGAIN;
+                buffer = ((char *)buffer) + sndlen;
+                nob -= sndlen;
+        }
+        return 0;
+
+}
+
+int
+libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port)
+{
+        struct sockaddr_in sin;
+        int                rc;
+
+        if (remote != 0) 
+                /* Get remote address */
+                rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
+        else 
+                /* Get local address */
+                rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin));
+        if (rc != 0) {
+                CERROR ("Error %d getting sock %s IP/port\n",
+                         rc, remote ? "peer" : "local");
+                return rc;
+        }
+
+        if (ip != NULL)
+                *ip = ntohl (sin.sin_addr.s_addr);
+
+        if (port != NULL)
+                *port = ntohs (sin.sin_port);
+        return 0;
+}
+
+int
+libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize)
+{
+        int                 option;
+        int                 rc;
+        
+        if (txbufsize != 0) {
+                option = txbufsize;
+                rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
+                                     (char *)&option, sizeof (option));
+                if (rc != 0) {
+                        CERROR ("Can't set send buffer %d: %d\n",
+                                option, rc);
+                        return (rc);
+                } 
+        } 
+        
+        if (rxbufsize != 0) {
+                option = rxbufsize;
+                rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
+                                      (char *)&option, sizeof (option));
+                if (rc != 0) {
+                        CERROR ("Can't set receive buffer %d: %d\n",
+                                option, rc);
+                        return (rc);
+                }
+        }
+        return 0;
+}
+
+int
+libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize)
+{
+        int                 option;
+        int                 optlen;
+        int                 rc; 
+        
+        if (txbufsize != NULL) {
+                optlen = sizeof(option);
+                rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF,
+                                (char *)&option, &optlen);
+                if (rc != 0) {
+                        CERROR ("Can't get send buffer size: %d\n", rc);
+                        return (rc);
+                }
+                *txbufsize = option;
+        } 
+        
+        if (rxbufsize != NULL) {
+                optlen = sizeof(option);
+                rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF,
+                                (char *)&option, &optlen);
+                if (rc != 0) {
+                        CERROR ("Can't get receive buffer size: %d\n", rc);
+                        return (rc);
+                }
+                *rxbufsize = option;
+        }
+        return 0;
+}
+
+void
+libcfs_sock_release (cfs_socket_t *sock)
+{
+        if (C2B_SOCK(sock) != NULL) {
+                sock_shutdown(C2B_SOCK(sock), 2);
+                sock_close(C2B_SOCK(sock));
+        }
+        FREE(sock, M_TEMP);
+}
+
+int
+libcfs_sock_connect (cfs_socket_t **sockp, int *fatal,
+                     __u32 local_ip, int local_port,
+                     __u32 peer_ip, int peer_port)
+{
+        cfs_socket_t       *sock;
+        struct sockaddr_in  srvaddr;
+        int                 rc; 
+        
+        rc = libcfs_sock_create(&sock, fatal, local_ip, local_port);
+        if (rc != 0)
+                return rc;
+
+        bzero(&srvaddr, sizeof(srvaddr));
+        srvaddr.sin_len = sizeof(struct sockaddr_in);
+        srvaddr.sin_family = AF_INET;
+        srvaddr.sin_port = htons(peer_port);
+        srvaddr.sin_addr.s_addr = htonl(peer_ip);
+
+        rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0);
+        if (rc == 0) {
+                *sockp = sock;
+                return 0;
+        }
+
+        *fatal = !(rc == -EADDRNOTAVAIL || rc == -EADDRINUSE);
+        CDEBUG(*fatal ? D_NETERROR : D_NET,
+               "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+               HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+
+        libcfs_sock_release(sock);
+        return rc;
+}
+
+#else   /* !__DARWIN8__ */
+
+/*
+ * To use bigger buffer for socket:
+ * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so
+ *    we must patch kernel).
+ * 2. Increase net.inet.tcp.reass.maxsegments
+ * 3. Increase net.inet.tcp.sendspace
+ * 4. Increase net.inet.tcp.recvspace
+ * 5. Increase kern.ipc.maxsockbuf
+ */
+#define KSOCK_MAX_BUF        (1152*1024)
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+        struct socket      *so;
+        struct ifreq       ifr;
+        int                nob;
+        int                rc;
+        __u32              val;
+        CFS_DECL_FUNNEL_DATA;
+
+        CFS_NET_IN;
+        rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+        CFS_NET_EX;
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return (-rc);
+        }
+        nob = strnlen(name, IFNAMSIZ);
+        if (nob == IFNAMSIZ) {
+                CERROR("Interface name %s too long\n", name);
+                rc = -EINVAL;
+                goto out;
+        }
+
+        CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+        strcpy(ifr.ifr_name, name);
+        CFS_NET_IN;
+        rc = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ifr, current_proc());
+        CFS_NET_EX;
+
+        if (rc != 0) {
+                CERROR("Can't get flags for interface %s\n", name);
+                goto out;
+        }
+        if ((ifr.ifr_flags & IFF_UP) == 0) {
+                CDEBUG(D_NET, "Interface %s down\n", name);
+                *up = 0;
+                *ip = *mask = 0;
+                goto out;
+        }
+       
+        *up = 1;
+        strcpy(ifr.ifr_name, name);
+        *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+        CFS_NET_IN;
+        rc = ifioctl(so, SIOCGIFADDR, (caddr_t)&ifr, current_proc());
+        CFS_NET_EX;
+
+        if (rc != 0) {
+                CERROR("Can't get IP address for interface %s\n", name);
+                goto out;
+        }
+
+        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+        *ip = ntohl(val);
+
+        strcpy(ifr.ifr_name, name);
+        *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin();
+        CFS_NET_IN;
+        rc = ifioctl(so, SIOCGIFNETMASK, (caddr_t)&ifr, current_proc());
+        CFS_NET_EX;
+
+        if (rc != 0) {
+                CERROR("Can't get netmask for interface %s\n", name);
+                goto out;
+        }
+
+        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+        *mask = ntohl(val);
+out:
+        CFS_NET_IN;
+        soclose(so);
+        CFS_NET_EX;
+        return -rc;
+}
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+        /* Allocate and fill in 'names', returning # interfaces/error */
+        char           **names;
+        int             toobig;
+        int             nalloc;
+        int             nfound;
+        struct socket  *so;
+        struct ifreq   *ifr;
+        struct ifconf   ifc;
+        int             rc;
+        int             nob;
+        int             i;
+        CFS_DECL_FUNNEL_DATA;
+
+        CFS_NET_IN;
+        rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+        CFS_NET_EX;
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return (-rc);
+        }
+
+        nalloc = 16;    /* first guess at max interfaces */
+        toobig = 0;
+        for (;;) {
+                if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
+                        toobig = 1;
+                        nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
+                        CWARN("Too many interfaces: only enumerating first %d\n",
+                              nalloc);
+                }
+
+                LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+                if (ifr == NULL) {
+                        CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+                                rc = -ENOMEM;
+                        goto out0;
+                }
+                                
+                ifc.ifc_buf = (char *)ifr;
+                ifc.ifc_len = nalloc * sizeof(*ifr);
+                                        
+                CFS_NET_IN;
+                rc = -ifioctl(so, SIOCGIFCONF, (caddr_t)&ifc, current_proc());
+                CFS_NET_EX;
+                                
+                if (rc < 0) {
+                        CERROR ("Error %d enumerating interfaces\n", rc);
+                        goto out1;
+                }
+
+                nfound = ifc.ifc_len/sizeof(*ifr);
+                LASSERT (nfound <= nalloc);
+
+                if (nfound < nalloc || toobig)
+                        break;
+
+                LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+                nalloc *= 2;
+        }
+        if (nfound == 0)
+                goto out1;
+
+        LIBCFS_ALLOC(names, nfound * sizeof(*names));
+        if (names == NULL) {
+                rc = -ENOMEM;
+                goto out1;
+        }
+        /* NULL out all names[i] */
+        memset (names, 0, nfound * sizeof(*names));
+
+        for (i = 0; i < nfound; i++) {
+
+                nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+                if (nob == IFNAMSIZ) {
+                        /* no space for terminating NULL */
+                        CERROR("interface name %.*s too long (%d max)\n",
+                               nob, ifr[i].ifr_name, IFNAMSIZ);
+                        rc = -ENAMETOOLONG;
+                        goto out2;
+                }
+
+                LIBCFS_ALLOC(names[i], IFNAMSIZ);
+                if (names[i] == NULL) {
+                        rc = -ENOMEM;
+                        goto out2;
+                }
+
+                memcpy(names[i], ifr[i].ifr_name, nob);
+                names[i][nob] = 0;
+        }
+
+        *namesp = names;
+        rc = nfound;
+
+out2:
+        if (rc < 0)
+                libcfs_ipif_free_enumeration(names, nfound);
+out1:
+        LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+out0:
+        CFS_NET_IN;
+        soclose(so);
+        CFS_NET_EX;
+        return rc;
+}
+
+static int
+libcfs_sock_create (struct socket **sockp, int *fatal,
+                    __u32 local_ip, int local_port)
+{
+        struct sockaddr_in  locaddr;
+        struct socket      *so;
+        struct sockopt      sopt;
+        int                 option;
+        int                 rc;
+        CFS_DECL_FUNNEL_DATA;
+
+        *fatal = 1;
+        CFS_NET_IN;
+        rc = socreate(PF_INET, &so, SOCK_STREAM, 0);
+        CFS_NET_EX;
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return (-rc);
+        }
+        
+        bzero(&sopt, sizeof sopt);
+        option = 1;
+        sopt.sopt_level = SOL_SOCKET;
+        sopt.sopt_name = SO_REUSEADDR;
+        sopt.sopt_val = &option;
+        sopt.sopt_valsize = sizeof(option);
+        CFS_NET_IN;
+        rc = sosetopt(so, &sopt);
+        if (rc != 0) {
+                CFS_NET_EX;
+                CERROR ("Can't set sock reuse address: %d\n", rc);
+                goto out;
+        }
+        /* can't specify a local port without a local IP */
+        LASSERT (local_ip == 0 || local_port != 0);
+
+        if (local_ip != 0 || local_port != 0) {
+                bzero (&locaddr, sizeof (locaddr));
+                locaddr.sin_len = sizeof(struct sockaddr_in);
+                locaddr.sin_family = AF_INET;
+                locaddr.sin_port = htons (local_port);
+                locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) :
+                                                            INADDR_ANY;
+
+                rc = sobind(so, (struct sockaddr *)&locaddr);
+                if (rc == EADDRINUSE) {
+                        CFS_NET_EX;
+                        CDEBUG(D_NET, "Port %d already in use\n", local_port);
+                        *fatal = 0;
+                        goto out;
+                }
+                if (rc != 0) {
+                        CFS_NET_EX;
+                        CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n",
+                        HIPQUAD(local_ip), rc);
+                        goto out;
+                }
+        }
+        *sockp = so;
+        return 0;
+out:
+        CFS_NET_IN;
+        soclose(so);
+        CFS_NET_EX;
+        return -rc;
+}
+
+int
+libcfs_sock_listen (struct socket **sockp,
+                    __u32 local_ip, int local_port, int backlog)
+{
+        int      fatal;
+        int      rc;
+        CFS_DECL_FUNNEL_DATA;
+
+        rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
+        if (rc != 0) {
+                if (!fatal)
+                        CERROR("Can't create socket: port %d already in use\n",
+                               local_port);
+                return rc;
+        }
+        CFS_NET_IN;
+        rc = solisten(*sockp, backlog);
+        CFS_NET_EX;
+        if (rc == 0)
+                return 0;
+        CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
+        CFS_NET_IN;
+        soclose(*sockp);
+        CFS_NET_EX;
+        return -rc;
+}
+
+int
+libcfs_sock_accept (struct socket **newsockp, struct socket *sock)
+{
+        struct socket *so;
+        struct sockaddr *sa;
+        int error, s;
+        CFS_DECL_FUNNEL_DATA;
+
+        CFS_NET_IN;
+        s = splnet();
+        if ((sock->so_options & SO_ACCEPTCONN) == 0) {
+                splx(s);
+                CFS_NET_EX;
+                return (-EINVAL);
+        }
+
+        if ((sock->so_state & SS_NBIO) && sock->so_comp.tqh_first == NULL) {
+                splx(s);
+                CFS_NET_EX;
+                return (-EWOULDBLOCK);
+        }
+
+        error = 0;
+        while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) {
+                if (sock->so_state & SS_CANTRCVMORE) {
+                        sock->so_error = ECONNABORTED;
+                        break;
+                }
+                error = tsleep((caddr_t)&sock->so_timeo, PSOCK | PCATCH,
+                                "accept", 0);
+                if (error) {
+                        splx(s);
+                        CFS_NET_EX;
+                        return (-error);
+                }
+        }
+        if (sock->so_error) {
+                error = sock->so_error;
+                sock->so_error = 0;
+                splx(s);
+                CFS_NET_EX;
+                return (-error);
+        }
+
+        /*
+         * At this point we know that there is at least one connection
+         * ready to be accepted. Remove it from the queue prior to
+         * allocating the file descriptor for it since falloc() may
+         * block allowing another process to accept the connection
+         * instead.
+         */
+        so = TAILQ_FIRST(&sock->so_comp);
+        TAILQ_REMOVE(&sock->so_comp, so, so_list);
+        sock->so_qlen--;
+
+        so->so_state &= ~SS_COMP;
+        so->so_head = NULL;
+        sa = 0;
+        (void) soaccept(so, &sa);
+
+        *newsockp = so;
+        FREE(sa, M_SONAME);
+        splx(s);
+        CFS_NET_EX;
+        return (-error);
+}
+
+void
+libcfs_sock_abort_accept (struct socket *sock)
+{
+        wakeup(&sock->so_timeo);
+}
+
+/*
+ * XXX Liang: timeout for write is not supported yet.
+ */
+int
+libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
+{
+        int            rc;
+        CFS_DECL_NET_DATA;
+
+        while (nob > 0) {
+                struct iovec  iov = {
+                        .iov_base = buffer,
+                        .iov_len  = nob
+                };
+                struct  uio suio = {
+                        .uio_iov        = &iov,
+                        .uio_iovcnt     = 1,
+                        .uio_offset     = 0,
+                        .uio_resid      = nob,
+                        .uio_segflg     = UIO_SYSSPACE,
+                        .uio_rw         = UIO_WRITE,
+                        .uio_procp      = NULL
+                };
+                                
+                CFS_NET_IN;
+                rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0);
+                CFS_NET_EX;
+                                
+                if (rc != 0) {
+                        if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
+                             rc == EWOULDBLOCK))
+                        rc = 0;
+                        if ( rc != 0 )
+                                return -rc;
+                        rc = nob - suio.uio_resid;
+                        buffer = ((char *)buffer) + rc;
+                        nob = suio.uio_resid;
+                        continue;
+                }
+                break;
+        }
+        return (0);
+}
+
+/*
+ * XXX Liang: timeout for read is not supported yet.
+ */
+int
+libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
+{
+        int            rc;
+        CFS_DECL_NET_DATA;
+
+        while (nob > 0) {
+                struct iovec  iov = {
+                        .iov_base = buffer,
+                        .iov_len  = nob
+                };
+                struct uio  ruio = {
+                        .uio_iov        = &iov,
+                        .uio_iovcnt     = 1,
+                        .uio_offset     = 0,
+                        .uio_resid      = nob,
+                        .uio_segflg     = UIO_SYSSPACE,
+                        .uio_rw         = UIO_READ,
+                        .uio_procp      = NULL
+                };
+                
+                CFS_NET_IN;
+                rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0);
+                CFS_NET_EX;
+                
+                if (rc != 0) {
+                        if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\
+                                rc == EWOULDBLOCK))
+                                rc = 0;
+                        if (rc != 0)
+                                return -rc;
+                        rc = nob - ruio.uio_resid;
+                        buffer = ((char *)buffer) + rc;
+                        nob = ruio.uio_resid;
+                        continue;
+                }
+                break;
+        }
+        return (0);
+}
+
+int
+libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
+{
+        struct sockopt  sopt;
+        int             rc = 0;
+        int             option;
+        CFS_DECL_NET_DATA;
+
+        bzero(&sopt, sizeof sopt);
+        sopt.sopt_dir = SOPT_SET;
+        sopt.sopt_level = SOL_SOCKET;
+        sopt.sopt_val = &option;
+        sopt.sopt_valsize = sizeof(option);
+
+        if (txbufsize != 0) {
+                option = txbufsize;
+                if (option > KSOCK_MAX_BUF)
+                        option = KSOCK_MAX_BUF;
+        
+                sopt.sopt_name = SO_SNDBUF;
+                CFS_NET_IN;
+                rc = sosetopt(sock, &sopt);
+                CFS_NET_EX;
+                if (rc != 0) {
+                        CERROR ("Can't set send buffer %d: %d\n",
+                                option, rc);
+                        
+                        return -rc;
+                }
+        }
+                
+        if (rxbufsize != 0) {
+                option = rxbufsize;
+                sopt.sopt_name = SO_RCVBUF;
+                CFS_NET_IN;
+                rc = sosetopt(sock, &sopt);
+                CFS_NET_EX;
+                if (rc != 0) {
+                        CERROR ("Can't set receive buffer %d: %d\n",
+                                option, rc);
+                        return -rc;
+                }
+        }
+        return 0;
+}
+
+int
+libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port)
+{
+        struct sockaddr_in *sin;
+        struct sockaddr    *sa = NULL;
+        int                rc;
+        CFS_DECL_NET_DATA;
+
+        if (remote != 0) {
+                CFS_NET_IN;
+                rc = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa);
+                CFS_NET_EX;
+
+                if (rc != 0) {
+                        if (sa) FREE(sa, M_SONAME);
+                        CERROR ("Error %d getting sock peer IP\n", rc);
+                        return -rc;
+                }
+        } else {
+                CFS_NET_IN;
+                rc = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa);
+                CFS_NET_EX;
+                if (rc != 0) {
+                        if (sa) FREE(sa, M_SONAME);
+                        CERROR ("Error %d getting sock local IP\n", rc);
+                        return -rc;
+                }
+        }
+        if (sa != NULL) {
+                sin = (struct sockaddr_in *)sa;
+                if (ip != NULL)
+                        *ip = ntohl (sin->sin_addr.s_addr);
+                if (port != NULL)
+                        *port = ntohs (sin->sin_port);
+                if (sa) 
+                        FREE(sa, M_SONAME);
+        }
+        return 0;
+}
+
+int
+libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
+{
+        struct sockopt  sopt;
+        int rc;
+        CFS_DECL_NET_DATA;
+
+        bzero(&sopt, sizeof sopt);
+        sopt.sopt_dir = SOPT_GET;
+        sopt.sopt_level = SOL_SOCKET;
+
+        if (txbufsize != NULL) {
+                sopt.sopt_val = txbufsize;
+                sopt.sopt_valsize = sizeof(*txbufsize);
+                sopt.sopt_name = SO_SNDBUF;
+                CFS_NET_IN;
+                rc = sogetopt(sock, &sopt);
+                CFS_NET_EX;
+                if (rc != 0) {
+                        CERROR ("Can't get send buffer size: %d\n", rc);
+                        return -rc;
+                }
+        }
+
+        if (rxbufsize != NULL) {
+                sopt.sopt_val = rxbufsize;
+                sopt.sopt_valsize = sizeof(*rxbufsize);
+                sopt.sopt_name = SO_RCVBUF;
+                CFS_NET_IN;
+                rc = sogetopt(sock, &sopt);
+                CFS_NET_EX;
+                if (rc != 0) {
+                        CERROR ("Can't get receive buffer size: %d\n", rc);
+                        return -rc;
+                }
+        }
+        return 0;
+}
+
+int
+libcfs_sock_connect (struct socket **sockp, int *fatal,
+                     __u32 local_ip, int local_port,
+                     __u32 peer_ip, int peer_port)
+{
+        struct sockaddr_in  srvaddr;
+        struct socket      *so;
+        int                 s;
+        int                 rc; 
+        CFS_DECL_FUNNEL_DATA;
+        
+        rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
+        if (rc != 0)
+                return rc;
+        so = *sockp;
+        bzero(&srvaddr, sizeof(srvaddr));
+        srvaddr.sin_len = sizeof(struct sockaddr_in);
+        srvaddr.sin_family = AF_INET;
+        srvaddr.sin_port = htons (peer_port);
+        srvaddr.sin_addr.s_addr = htonl (peer_ip);
+
+        CFS_NET_IN;
+        rc = soconnect(so, (struct sockaddr *)&srvaddr);
+        if (rc != 0) {
+                CFS_NET_EX;
+                if (rc != EADDRNOTAVAIL && rc != EADDRINUSE)
+                        CDEBUG(D_NETERROR,
+                               "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+                               HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+                goto out;
+        }
+        s = splnet();
+        while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+                CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n");
+                (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz);
+        }
+        if ((rc = so->so_error) != 0) {
+                so->so_error = 0;
+                splx(s);
+                CFS_NET_EX;
+                CDEBUG(D_NETERROR,
+                       "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+                       HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+                goto out;
+        }
+        LASSERT(so->so_state & SS_ISCONNECTED);
+        splx(s);
+        CFS_NET_EX;
+        if (sockp)
+                *sockp = so;
+        return (0);
+out:
+        CFS_NET_IN;
+        soshutdown(so, 2);
+        soclose(so);
+        CFS_NET_EX;
+        return (-rc);
+}
+
+void
+libcfs_sock_release (struct socket *sock)
+{
+        CFS_DECL_FUNNEL_DATA;
+        CFS_NET_IN;
+        soshutdown(sock, 0);
+        CFS_NET_EX;
+}
+
+#endif
diff --git a/libcfs/libcfs/darwin/darwin-tracefile.c b/libcfs/libcfs/darwin/darwin-tracefile.c
new file mode 100644 (file)
index 0000000..e672ad5
--- /dev/null
@@ -0,0 +1,191 @@
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+/*
+ * We can't support smp tracefile currently.
+ * Everything is put on one cpu.
+ */
+
+#define M_TCD_MAX_PAGES (128 * 1280)
+
+static long max_permit_mb = (64 * 1024);
+
+spinlock_t trace_cpu_serializer;
+
+/*
+ * thread currently executing tracefile code or NULL if none does. Used to
+ * detect recursive calls to libcfs_debug_msg().
+ */
+static thread_t trace_owner = NULL;
+
+extern int get_preemption_level(void);
+extern atomic_t tage_allocated;
+
+struct rw_semaphore tracefile_sem;
+
+int tracefile_init_arch() {
+    init_rwsem(&tracefile_sem);
+#error "Todo: initialise per-cpu console buffers"
+    return 0;
+}
+
+void tracefile_fini_arch() {
+}
+
+void tracefile_read_lock() {
+    down_read(&tracefile_sem);
+}
+
+void tracefile_read_unlock() {
+    up_read(&tracefile_sem);
+}
+
+void tracefile_write_lock() {
+    down_write(&tracefile_sem);
+}
+
+void tracefile_write_unlock() {
+    up_write(&tracefile_sem);
+}
+
+char *trace_get_console_buffer(void)
+{
+#error "todo: return a per-cpu/interrupt console buffer and disable pre-emption"
+}
+
+void trace_put_console_buffer(char *buffer)
+{
+#error "todo: re-enable pre-emption"
+}
+
+struct trace_cpu_data *trace_get_tcd(void)
+{
+       struct trace_cpu_data *tcd;
+       int nr_pages;
+       struct list_head pages;
+
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+       /*
+        * debugging check for recursive call to libcfs_debug_msg()
+        */
+       if (trace_owner == current_thread()) {
+                /*
+                 * Cannot assert here.
+                 */
+               printk(KERN_EMERG "recursive call to %s", __FUNCTION__);
+               /*
+                 * "The death of God left the angels in a strange position."
+                */
+               cfs_enter_debugger();
+       }
+       tcd = &trace_data[0].tcd;
+        CFS_INIT_LIST_HEAD(&pages);
+       if (get_preemption_level() == 0)
+               nr_pages = trace_refill_stock(tcd, CFS_ALLOC_STD, &pages);
+       else
+               nr_pages = 0;
+       spin_lock(&trace_cpu_serializer);
+       trace_owner = current_thread();
+       tcd->tcd_cur_stock_pages += nr_pages;
+       list_splice(&pages, &tcd->tcd_stock_pages);
+       return tcd;
+}
+
+extern void raw_page_death_row_clean(void);
+
+void __trace_put_tcd(struct trace_cpu_data *tcd)
+{
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       LASSERT(trace_owner == current_thread());
+       trace_owner = NULL;
+       spin_unlock(&trace_cpu_serializer);
+       if (get_preemption_level() == 0)
+               /* purge all pending pages */
+               raw_page_death_row_clean();
+}
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
+{
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       /* XNU has global tcd, and all pages are owned by it */
+       return 1;
+}
+
+void
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+                   const int line, unsigned long stack)
+{
+       struct timeval tv;
+       
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       do_gettimeofday(&tv);
+       header->ph_subsys = subsys;
+       header->ph_mask = mask;
+       header->ph_cpu_id = smp_processor_id();
+       header->ph_sec = (__u32)tv.tv_sec;
+       header->ph_usec = tv.tv_usec;
+       header->ph_stack = stack;
+       header->ph_pid = cfs_curproc_pid();
+       header->ph_line_num = line;
+       header->ph_extern_pid = (__u32)current_thread();
+}
+
+void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
+                     int len, const char *file, const char *fn)
+{
+       char *prefix = "Lustre", *ptype = KERN_INFO;
+
+       /*
+        * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       if ((mask & D_EMERG) != 0) {
+               prefix = "LustreError";
+               ptype = KERN_EMERG;
+       } else if ((mask & D_ERROR) != 0) {
+               prefix = "LustreError";
+               ptype = KERN_ERR;
+       } else if ((mask & D_WARNING) != 0) {
+               prefix = "Lustre";
+               ptype = KERN_WARNING;
+       } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) {
+               prefix = "Lustre";
+               ptype = KERN_INFO;
+       }
+
+       if ((mask & D_CONSOLE) != 0) {
+               printk("%s%s: %.*s", ptype, prefix, len, buf);
+       } else {
+               printk("%s%s: %d:%d:(%s:%d:%s()) %*s",
+                      ptype, prefix, hdr->ph_pid, hdr->ph_extern_pid,
+                      file, hdr->ph_line_num, fn, len, buf);
+       }
+}
+
+int trace_max_debug_mb(void)
+{
+       return max_permit_mb;
+}
+
+void
+trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
+{
+#error "tbd"
+}
diff --git a/libcfs/libcfs/darwin/darwin-utils.c b/libcfs/libcfs/darwin/darwin-utils.c
new file mode 100644 (file)
index 0000000..cfd7a2d
--- /dev/null
@@ -0,0 +1,578 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Darwin porting library
+ * Make things easy to port
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <mach/mach_types.h>
+#include <string.h>
+#include <sys/errno.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <lnet/types.h>
+
+#include <libcfs/kp30.h>
+
+#ifndef isspace
+inline int
+isspace(char c)
+{
+        return (c == ' ' || c == '\t' || c == '\n' || c == '\12');
+}
+#endif
+
+char * strpbrk(const char * cs,const char * ct)
+{
+       const char *sc1,*sc2;
+       
+       for( sc1 = cs; *sc1 != '\0'; ++sc1) {
+               for( sc2 = ct; *sc2 != '\0'; ++sc2) {
+                       if (*sc1 == *sc2)
+                               return (char *) sc1;
+               }
+       }
+       return NULL;
+}
+
+char * strsep(char **s, const char *ct)
+{
+       char *sbegin = *s, *end;
+       
+       if (sbegin == NULL)
+               return NULL;
+       end = strpbrk(sbegin, ct);
+       if (end != NULL)
+               *end++ = '\0';
+       *s = end;
+
+       return sbegin;
+}
+
+size_t strnlen(const char * s, size_t count)
+{
+       const char *sc;
+
+       for (sc = s; count-- && *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
+
+char *
+strstr(const char *in, const char *str)
+{
+       char c;
+       size_t len;
+       
+       c = *str++;
+       if (!c)
+               return (char *) in;     // Trivial empty string case
+       len = strlen(str);
+       do {
+               char sc;
+               do {
+                       sc = *in++;
+                       if (!sc)
+                               return (char *) 0;
+               } while (sc != c);
+       } while (strncmp(in, str, len) != 0);
+       return (char *) (in - 1);
+}
+
+char *
+strrchr(const char *p, int ch)
+{
+        const char *end = p + strlen(p);
+        do {
+                if (*end == (char)ch)
+                        return (char *)end;
+        } while (--end >= p);
+        return NULL;
+}
+
+char *
+ul2dstr(unsigned long address, char *buf, int len)
+{
+        char *pos = buf + len - 1;
+
+        if (len <= 0 || !buf)
+                return NULL;
+        *pos = 0;
+        while (address) {
+                if (!--len) break;
+                *--pos = address % 10 + '0';
+                address /= 10;
+        }
+        return pos;
+}
+
+/*
+ * miscellaneous libcfs stuff
+ */
+
+/*
+ * Convert server error code to client format.
+ * Linux errno.h.
+ */
+
+/* obtained by
+ *
+ *     cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3
+ *
+ */
+enum linux_errnos {
+       LINUX_EPERM              = 1,
+       LINUX_ENOENT             = 2,
+       LINUX_ESRCH              = 3,
+       LINUX_EINTR              = 4,
+       LINUX_EIO                = 5,
+       LINUX_ENXIO              = 6,
+       LINUX_E2BIG              = 7,
+       LINUX_ENOEXEC            = 8,
+       LINUX_EBADF              = 9,
+       LINUX_ECHILD             = 10,
+       LINUX_EAGAIN             = 11,
+       LINUX_ENOMEM             = 12,
+       LINUX_EACCES             = 13,
+       LINUX_EFAULT             = 14,
+       LINUX_ENOTBLK            = 15,
+       LINUX_EBUSY              = 16,
+       LINUX_EEXIST             = 17,
+       LINUX_EXDEV              = 18,
+       LINUX_ENODEV             = 19,
+       LINUX_ENOTDIR            = 20,
+       LINUX_EISDIR             = 21,
+       LINUX_EINVAL             = 22,
+       LINUX_ENFILE             = 23,
+       LINUX_EMFILE             = 24,
+       LINUX_ENOTTY             = 25,
+       LINUX_ETXTBSY            = 26,
+       LINUX_EFBIG              = 27,
+       LINUX_ENOSPC             = 28,
+       LINUX_ESPIPE             = 29,
+       LINUX_EROFS              = 30,
+       LINUX_EMLINK             = 31,
+       LINUX_EPIPE              = 32,
+       LINUX_EDOM               = 33,
+       LINUX_ERANGE             = 34,
+       LINUX_EDEADLK            = 35,
+       LINUX_ENAMETOOLONG       = 36,
+       LINUX_ENOLCK             = 37,
+       LINUX_ENOSYS             = 38,
+       LINUX_ENOTEMPTY          = 39,
+       LINUX_ELOOP              = 40,
+       LINUX_ENOMSG             = 42,
+       LINUX_EIDRM              = 43,
+       LINUX_ECHRNG             = 44,
+       LINUX_EL2NSYNC           = 45,
+       LINUX_EL3HLT             = 46,
+       LINUX_EL3RST             = 47,
+       LINUX_ELNRNG             = 48,
+       LINUX_EUNATCH            = 49,
+       LINUX_ENOCSI             = 50,
+       LINUX_EL2HLT             = 51,
+       LINUX_EBADE              = 52,
+       LINUX_EBADR              = 53,
+       LINUX_EXFULL             = 54,
+       LINUX_ENOANO             = 55,
+       LINUX_EBADRQC            = 56,
+       LINUX_EBADSLT            = 57,
+       LINUX_EBFONT             = 59,
+       LINUX_ENOSTR             = 60,
+       LINUX_ENODATA            = 61,
+       LINUX_ETIME              = 62,
+       LINUX_ENOSR              = 63,
+       LINUX_ENONET             = 64,
+       LINUX_ENOPKG             = 65,
+       LINUX_EREMOTE            = 66,
+       LINUX_ENOLINK            = 67,
+       LINUX_EADV               = 68,
+       LINUX_ESRMNT             = 69,
+       LINUX_ECOMM              = 70,
+       LINUX_EPROTO             = 71,
+       LINUX_EMULTIHOP          = 72,
+       LINUX_EDOTDOT            = 73,
+       LINUX_EBADMSG            = 74,
+       LINUX_EOVERFLOW          = 75,
+       LINUX_ENOTUNIQ           = 76,
+       LINUX_EBADFD             = 77,
+       LINUX_EREMCHG            = 78,
+       LINUX_ELIBACC            = 79,
+       LINUX_ELIBBAD            = 80,
+       LINUX_ELIBSCN            = 81,
+       LINUX_ELIBMAX            = 82,
+       LINUX_ELIBEXEC           = 83,
+       LINUX_EILSEQ             = 84,
+       LINUX_ERESTART           = 85,
+       LINUX_ESTRPIPE           = 86,
+       LINUX_EUSERS             = 87,
+       LINUX_ENOTSOCK           = 88,
+       LINUX_EDESTADDRREQ       = 89,
+       LINUX_EMSGSIZE           = 90,
+       LINUX_EPROTOTYPE         = 91,
+       LINUX_ENOPROTOOPT        = 92,
+       LINUX_EPROTONOSUPPORT    = 93,
+       LINUX_ESOCKTNOSUPPORT    = 94,
+       LINUX_EOPNOTSUPP         = 95,
+       LINUX_EPFNOSUPPORT       = 96,
+       LINUX_EAFNOSUPPORT       = 97,
+       LINUX_EADDRINUSE         = 98,
+       LINUX_EADDRNOTAVAIL      = 99,
+       LINUX_ENETDOWN           = 100,
+       LINUX_ENETUNREACH        = 101,
+       LINUX_ENETRESET          = 102,
+       LINUX_ECONNABORTED       = 103,
+       LINUX_ECONNRESET         = 104,
+       LINUX_ENOBUFS            = 105,
+       LINUX_EISCONN            = 106,
+       LINUX_ENOTCONN           = 107,
+       LINUX_ESHUTDOWN          = 108,
+       LINUX_ETOOMANYREFS       = 109,
+       LINUX_ETIMEDOUT          = 110,
+       LINUX_ECONNREFUSED       = 111,
+       LINUX_EHOSTDOWN          = 112,
+       LINUX_EHOSTUNREACH       = 113,
+       LINUX_EALREADY           = 114,
+       LINUX_EINPROGRESS        = 115,
+       LINUX_ESTALE             = 116,
+       LINUX_EUCLEAN            = 117,
+       LINUX_ENOTNAM            = 118,
+       LINUX_ENAVAIL            = 119,
+       LINUX_EISNAM             = 120,
+       LINUX_EREMOTEIO          = 121,
+       LINUX_EDQUOT             = 122,
+       LINUX_ENOMEDIUM          = 123,
+       LINUX_EMEDIUMTYPE        = 124,
+
+       /*
+        * we don't need these, but for completeness..
+        */
+       LINUX_EDEADLOCK          = LINUX_EDEADLK,
+       LINUX_EWOULDBLOCK        = LINUX_EAGAIN
+};
+
+int convert_server_error(__u64 ecode)
+{
+       int sign;
+       int code;
+
+       static int errno_xlate[] = {
+               /* success is always success */
+               [0]                     = 0,
+               [LINUX_EPERM]           = EPERM,
+               [LINUX_ENOENT]          = ENOENT,
+               [LINUX_ESRCH]           = ESRCH,
+               [LINUX_EINTR]           = EINTR,
+               [LINUX_EIO]             = EIO,
+               [LINUX_ENXIO]           = ENXIO,
+               [LINUX_E2BIG]           = E2BIG,
+               [LINUX_ENOEXEC]         = ENOEXEC,
+               [LINUX_EBADF]           = EBADF,
+               [LINUX_ECHILD]          = ECHILD,
+               [LINUX_EAGAIN]          = EAGAIN,
+               [LINUX_ENOMEM]          = ENOMEM,
+               [LINUX_EACCES]          = EACCES,
+               [LINUX_EFAULT]          = EFAULT,
+               [LINUX_ENOTBLK]         = ENOTBLK,
+               [LINUX_EBUSY]           = EBUSY,
+               [LINUX_EEXIST]          = EEXIST,
+               [LINUX_EXDEV]           = EXDEV,
+               [LINUX_ENODEV]          = ENODEV,
+               [LINUX_ENOTDIR]         = ENOTDIR,
+               [LINUX_EISDIR]          = EISDIR,
+               [LINUX_EINVAL]          = EINVAL,
+               [LINUX_ENFILE]          = ENFILE,
+               [LINUX_EMFILE]          = EMFILE,
+               [LINUX_ENOTTY]          = ENOTTY,
+               [LINUX_ETXTBSY]         = ETXTBSY,
+               [LINUX_EFBIG]           = EFBIG,
+               [LINUX_ENOSPC]          = ENOSPC,
+               [LINUX_ESPIPE]          = ESPIPE,
+               [LINUX_EROFS]           = EROFS,
+               [LINUX_EMLINK]          = EMLINK,
+               [LINUX_EPIPE]           = EPIPE,
+               [LINUX_EDOM]            = EDOM,
+               [LINUX_ERANGE]          = ERANGE,
+               [LINUX_EDEADLK]         = EDEADLK,
+               [LINUX_ENAMETOOLONG]    = ENAMETOOLONG,
+               [LINUX_ENOLCK]          = ENOLCK,
+               [LINUX_ENOSYS]          = ENOSYS,
+               [LINUX_ENOTEMPTY]       = ENOTEMPTY,
+               [LINUX_ELOOP]           = ELOOP,
+               [LINUX_ENOMSG]          = ENOMSG,
+               [LINUX_EIDRM]           = EIDRM,
+               [LINUX_ECHRNG]          = EINVAL /* ECHRNG */,
+               [LINUX_EL2NSYNC]        = EINVAL /* EL2NSYNC */,
+               [LINUX_EL3HLT]          = EINVAL /* EL3HLT */,
+               [LINUX_EL3RST]          = EINVAL /* EL3RST */,
+               [LINUX_ELNRNG]          = EINVAL /* ELNRNG */,
+               [LINUX_EUNATCH]         = EINVAL /* EUNATCH */,
+               [LINUX_ENOCSI]          = EINVAL /* ENOCSI */,
+               [LINUX_EL2HLT]          = EINVAL /* EL2HLT */,
+               [LINUX_EBADE]           = EINVAL /* EBADE */,
+               [LINUX_EBADR]           = EBADRPC,
+               [LINUX_EXFULL]          = EINVAL /* EXFULL */,
+               [LINUX_ENOANO]          = EINVAL /* ENOANO */,
+               [LINUX_EBADRQC]         = EINVAL /* EBADRQC */,
+               [LINUX_EBADSLT]         = EINVAL /* EBADSLT */,
+               [LINUX_EBFONT]          = EINVAL /* EBFONT */,
+               [LINUX_ENOSTR]          = EINVAL /* ENOSTR */,
+               [LINUX_ENODATA]         = EINVAL /* ENODATA */,
+               [LINUX_ETIME]           = EINVAL /* ETIME */,
+               [LINUX_ENOSR]           = EINVAL /* ENOSR */,
+               [LINUX_ENONET]          = EINVAL /* ENONET */,
+               [LINUX_ENOPKG]          = EINVAL /* ENOPKG */,
+               [LINUX_EREMOTE]         = EREMOTE,
+               [LINUX_ENOLINK]         = EINVAL /* ENOLINK */,
+               [LINUX_EADV]            = EINVAL /* EADV */,
+               [LINUX_ESRMNT]          = EINVAL /* ESRMNT */,
+               [LINUX_ECOMM]           = EINVAL /* ECOMM */,
+               [LINUX_EPROTO]          = EPROTOTYPE,
+               [LINUX_EMULTIHOP]       = EINVAL /* EMULTIHOP */,
+               [LINUX_EDOTDOT]         = EINVAL /* EDOTDOT */,
+               [LINUX_EBADMSG]         = EINVAL /* EBADMSG */,
+               [LINUX_EOVERFLOW]       = EOVERFLOW,
+               [LINUX_ENOTUNIQ]        = EINVAL /* ENOTUNIQ */,
+               [LINUX_EBADFD]          = EINVAL /* EBADFD */,
+               [LINUX_EREMCHG]         = EINVAL /* EREMCHG */,
+               [LINUX_ELIBACC]         = EINVAL /* ELIBACC */,
+               [LINUX_ELIBBAD]         = EINVAL /* ELIBBAD */,
+               [LINUX_ELIBSCN]         = EINVAL /* ELIBSCN */,
+               [LINUX_ELIBMAX]         = EINVAL /* ELIBMAX */,
+               [LINUX_ELIBEXEC]        = EINVAL /* ELIBEXEC */,
+               [LINUX_EILSEQ]          = EILSEQ,
+               [LINUX_ERESTART]        = EINVAL /* because ERESTART is
+                                                  * negative in XNU */,
+               [LINUX_ESTRPIPE]        = EINVAL /* ESTRPIPE */,
+               [LINUX_EUSERS]          = EUSERS,
+               [LINUX_ENOTSOCK]        = ENOTSOCK,
+               [LINUX_EDESTADDRREQ]    = EDESTADDRREQ,
+               [LINUX_EMSGSIZE]        = EMSGSIZE,
+               [LINUX_EPROTOTYPE]      = EPROTOTYPE,
+               [LINUX_ENOPROTOOPT]     = ENOPROTOOPT,
+               [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT,
+               [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT,
+               [LINUX_EOPNOTSUPP]      = EOPNOTSUPP,
+               [LINUX_EPFNOSUPPORT]    = EPFNOSUPPORT,
+               [LINUX_EAFNOSUPPORT]    = EAFNOSUPPORT,
+               [LINUX_EADDRINUSE]      = EADDRINUSE,
+               [LINUX_EADDRNOTAVAIL]   = EADDRNOTAVAIL,
+               [LINUX_ENETDOWN]        = ENETDOWN,
+               [LINUX_ENETUNREACH]     = ENETUNREACH,
+               [LINUX_ENETRESET]       = ENETRESET,
+               [LINUX_ECONNABORTED]    = ECONNABORTED,
+               [LINUX_ECONNRESET]      = ECONNRESET,
+               [LINUX_ENOBUFS]         = ENOBUFS,
+               [LINUX_EISCONN]         = EISCONN,
+               [LINUX_ENOTCONN]        = ENOTCONN,
+               [LINUX_ESHUTDOWN]       = ESHUTDOWN,
+               [LINUX_ETOOMANYREFS]    = ETOOMANYREFS,
+               [LINUX_ETIMEDOUT]       = ETIMEDOUT,
+               [LINUX_ECONNREFUSED]    = ECONNREFUSED,
+               [LINUX_EHOSTDOWN]       = EHOSTDOWN,
+               [LINUX_EHOSTUNREACH]    = EHOSTUNREACH,
+               [LINUX_EALREADY]        = EALREADY,
+               [LINUX_EINPROGRESS]     = EINPROGRESS,
+               [LINUX_ESTALE]          = ESTALE,
+               [LINUX_EUCLEAN]         = EINVAL /* EUCLEAN */,
+               [LINUX_ENOTNAM]         = EINVAL /* ENOTNAM */,
+               [LINUX_ENAVAIL]         = EINVAL /* ENAVAIL */,
+               [LINUX_EISNAM]          = EINVAL /* EISNAM */,
+               [LINUX_EREMOTEIO]       = EINVAL /* EREMOTEIO */,
+               [LINUX_EDQUOT]          = EDQUOT,
+               [LINUX_ENOMEDIUM]       = EINVAL /* ENOMEDIUM */,
+               [LINUX_EMEDIUMTYPE]     = EINVAL /* EMEDIUMTYPE */,
+       };
+       code = (int)ecode;
+       if (code >= 0) {
+               sign = +1;
+       } else {
+               sign = -1;
+               code = -code;
+       }
+       if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) {
+               code = errno_xlate[code];
+               LASSERT(code >= 0);
+        }
+       return sign * code;
+}
+
+enum {
+       LINUX_O_RDONLY   =           00,
+       LINUX_O_WRONLY   =           01,
+       LINUX_O_RDWR     =           02,
+       LINUX_O_CREAT    =         0100,
+       LINUX_O_EXCL     =         0200,
+       LINUX_O_NOCTTY   =         0400,
+       LINUX_O_TRUNC    =        01000,
+       LINUX_O_APPEND   =        02000,
+       LINUX_O_NONBLOCK =        04000,
+       LINUX_O_NDELAY   =             LINUX_O_NONBLOCK,
+       LINUX_O_SYNC     =       010000,
+       LINUX_O_FSYNC    =             LINUX_O_SYNC,
+       LINUX_O_ASYNC    =       020000,
+       LINUX_O_DIRECT   =       040000,
+       LINUX_O_NOFOLLOW =      0400000
+};
+
+static inline void obit_convert(int *cflag, int *sflag,
+                               unsigned cmask, unsigned smask)
+{
+       if (*cflag & cmask != 0) {
+               *sflag |= smask;
+               *cflag &= ~cmask;
+       }
+}
+
+/*
+ * convert <fcntl.h> flag from XNU client to Linux _i386_ server.
+ */
+int convert_client_oflag(int cflag, int *result)
+{
+       int sflag = 0;
+
+       cflag = 0;
+       obit_convert(&cflag, &sflag, O_RDONLY,   LINUX_O_RDONLY);
+       obit_convert(&cflag, &sflag, O_WRONLY,   LINUX_O_WRONLY);
+       obit_convert(&cflag, &sflag, O_RDWR,     LINUX_O_RDWR);
+       obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK);
+       obit_convert(&cflag, &sflag, O_APPEND,   LINUX_O_APPEND);
+       obit_convert(&cflag, &sflag, O_ASYNC,    LINUX_O_ASYNC);
+       obit_convert(&cflag, &sflag, O_FSYNC,    LINUX_O_FSYNC);
+       obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW);
+       obit_convert(&cflag, &sflag, O_CREAT,    LINUX_O_CREAT);
+       obit_convert(&cflag, &sflag, O_TRUNC,    LINUX_O_TRUNC);
+       obit_convert(&cflag, &sflag, O_EXCL,     LINUX_O_EXCL);
+       obit_convert(&cflag, &sflag, O_CREAT,    LINUX_O_CREAT);
+       obit_convert(&cflag, &sflag, O_NDELAY,   LINUX_O_NDELAY);
+       obit_convert(&cflag, &sflag, O_NOCTTY,   LINUX_O_NOCTTY);
+       /*
+        * Some more obscure BSD flags have no Linux counterparts:
+        *
+        * O_SHLOCK     0x0010
+        * O_EXLOCK     0x0020
+        * O_EVTONLY    0x8000
+        * O_POPUP      0x80000000
+        * O_ALERT      0x20000000
+        */
+       if (cflag == 0) {
+               *result = sflag;
+               return 0;
+       } else
+               return -EINVAL;
+}
+
+#ifdef __DARWIN8__
+#else /* !__DARWIN8__ */
+extern int unix_syscall();
+extern int unix_syscall_return();
+
+extern int ktrsysret();
+extern int ktrace();
+
+extern int ast_taken();
+extern int ast_check();
+
+extern int trap();
+extern int syscall_trace();
+
+static int is_addr_in_range(void *addr, void *start, void *end)
+{
+       return start <= addr && addr <= end;
+}
+
+extern void cfs_thread_agent (void);
+
+static int is_last_frame(void *addr)
+{
+       if (addr == NULL)
+               return 1;
+       else if (is_addr_in_range(addr, unix_syscall, unix_syscall_return))
+               return 1;
+       else if (is_addr_in_range(addr, ktrsysret, ktrace))
+               return 1;
+       else if (is_addr_in_range(addr, ast_taken, ast_check))
+               return 1;
+       else if (is_addr_in_range(addr, trap, syscall_trace))
+               return 1;
+       else if (is_addr_in_range(addr, cfs_thread_agent, cfs_kernel_thread))
+               return 1;
+       else
+               return 0;
+}
+
+static void *get_frame(int i)
+{
+       void *result;
+
+#define CASE(i) case (i): result = __builtin_return_address(i); break
+       switch (i + 1) {
+               CASE(1);
+               CASE(2);
+               CASE(3);
+               CASE(4);
+               CASE(5);
+               CASE(6);
+               CASE(7);
+               CASE(8);
+               CASE(9);
+               CASE(10);
+               CASE(11);
+               CASE(12);
+               CASE(13);
+               CASE(14);
+               CASE(15);
+               CASE(16);
+               CASE(17);
+               CASE(18);
+               CASE(19);
+               CASE(20);
+       default:
+               panic("impossible frame number: %d\n", i);
+               result = NULL;
+       }
+       return result;
+}
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{
+       int i;
+
+       memset(trace, 0, sizeof *trace);
+       for (i = 0; i < sizeof_array(trace->frame); ++ i) {
+               void *addr;
+
+               addr = get_frame(i);
+               trace->frame[i] = addr;
+               if (is_last_frame(addr))
+                       break;
+       }
+}
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+        if (0 <= frame_no && frame_no < sizeof_array(trace->frame))
+                return trace->frame[frame_no];
+        else
+                return NULL;
+}
+#endif /* !__DARWIN8__ */
diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c
new file mode 100644 (file)
index 0000000..9810bdb
--- /dev/null
@@ -0,0 +1,839 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <stdarg.h>
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include "tracefile.h"
+
+static char debug_file_name[1024];
+
+#ifdef __KERNEL__
+unsigned int libcfs_subsystem_debug = ~0;
+CFS_MODULE_PARM(libcfs_subsystem_debug, "i", int, 0644,
+                "Lustre kernel debug subsystem mask");
+EXPORT_SYMBOL(libcfs_subsystem_debug);
+
+unsigned int libcfs_debug = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE |
+                             D_NETERROR | D_HA | D_CONFIG | D_IOCTL);
+CFS_MODULE_PARM(libcfs_debug, "i", int, 0644,
+                "Lustre kernel debug mask");
+EXPORT_SYMBOL(libcfs_debug);
+
+int libcfs_debug_mb = -1;
+CFS_MODULE_PARM(libcfs_debug_mb, "i", int, 0644,
+                "Total debug buffer size.");
+EXPORT_SYMBOL(libcfs_debug_mb);
+
+unsigned int libcfs_printk = D_CANTMASK;
+CFS_MODULE_PARM(libcfs_printk, "i", uint, 0644,
+                "Lustre kernel debug console mask");
+EXPORT_SYMBOL(libcfs_printk);
+
+unsigned int libcfs_console_ratelimit = 1;
+CFS_MODULE_PARM(libcfs_console_ratelimit, "i", uint, 0644,
+                "Lustre kernel debug console ratelimit (0 to disable)");
+EXPORT_SYMBOL(libcfs_console_ratelimit);
+
+cfs_duration_t libcfs_console_max_delay;
+CFS_MODULE_PARM(libcfs_console_max_delay, "l", ulong, 0644,
+                "Lustre kernel debug console max delay (jiffies)");
+EXPORT_SYMBOL(libcfs_console_max_delay);
+
+cfs_duration_t libcfs_console_min_delay;
+CFS_MODULE_PARM(libcfs_console_min_delay, "l", ulong, 0644,
+                "Lustre kernel debug console min delay (jiffies)");
+EXPORT_SYMBOL(libcfs_console_min_delay);
+
+unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
+CFS_MODULE_PARM(libcfs_console_backoff, "i", uint, 0644,
+                "Lustre kernel debug console backoff factor");
+EXPORT_SYMBOL(libcfs_console_backoff);
+
+unsigned int libcfs_debug_binary = 1;
+EXPORT_SYMBOL(libcfs_debug_binary);
+
+unsigned int libcfs_stack;
+EXPORT_SYMBOL(libcfs_stack);
+
+unsigned int portal_enter_debugger;
+EXPORT_SYMBOL(portal_enter_debugger);
+
+unsigned int libcfs_catastrophe;
+EXPORT_SYMBOL(libcfs_catastrophe);
+
+unsigned int libcfs_panic_on_lbug = 0;
+CFS_MODULE_PARM(libcfs_panic_on_lbug, "i", uint, 0644,
+                "Lustre kernel panic on LBUG");
+EXPORT_SYMBOL(libcfs_panic_on_lbug);
+
+atomic_t libcfs_kmemory = ATOMIC_INIT(0);
+EXPORT_SYMBOL(libcfs_kmemory);
+
+static cfs_waitq_t debug_ctlwq;
+
+#ifdef __arch_um__
+char debug_file_path[1024] = "/r/tmp/lustre-log";
+#else
+char debug_file_path[1024] = "/tmp/lustre-log";
+#endif
+CFS_MODULE_PARM(debug_file_path, "s", charp, 0644,
+                "Path for dumping debug logs, "
+                "set 'NONE' to prevent log dumping");
+
+int libcfs_panic_in_progress;
+
+/* libcfs_debug_token2mask() expects the returned
+ * string in lower-case */
+const char *
+libcfs_debug_subsys2str(int subsys)
+{
+        switch (subsys) {
+        default:
+                return NULL;
+        case S_UNDEFINED:
+                return "undefined";
+        case S_MDC:
+                return "mdc";
+        case S_MDS:
+                return "mds";
+        case S_OSC:
+                return "osc";
+        case S_OST:
+                return "ost";
+        case S_CLASS:
+                return "class";
+        case S_LOG:
+                return "log";
+        case S_LLITE:
+                return "llite";
+        case S_RPC:
+                return "rpc";
+        case S_LNET:
+                return "lnet";
+        case S_LND:
+                return "lnd";
+        case S_PINGER:
+                return "pinger";
+        case S_FILTER:
+                return "filter";
+        case S_ECHO:
+                return "echo";
+        case S_LDLM:
+                return "ldlm";
+        case S_LOV:
+                return "lov";
+        case S_LMV:
+                return "lmv";
+        case S_SEC:
+                return "sec";
+        case S_GSS:
+                return "gss";
+        case S_MGC:
+                return "mgc";
+        case S_MGS:
+                return "mgs";
+        case S_FID:
+                return "fid";
+        case S_FLD:
+                return "fld";
+        }
+}
+
+/* libcfs_debug_token2mask() expects the returned
+ * string in lower-case */
+const char *
+libcfs_debug_dbg2str(int debug)
+{
+        switch (debug) {
+        default:
+                return NULL;
+        case D_TRACE:
+                return "trace";
+        case D_INODE:
+                return "inode";
+        case D_SUPER:
+                return "super";
+        case D_EXT2:
+                return "ext2";
+        case D_MALLOC:
+                return "malloc";
+        case D_CACHE:
+                return "cache";
+        case D_INFO:
+                return "info";
+        case D_IOCTL:
+                return "ioctl";
+        case D_NETERROR:
+                return "neterror";
+        case D_NET:
+                return "net";
+        case D_WARNING:
+                return "warning";
+        case D_BUFFS:
+                return "buffs";
+        case D_OTHER:
+                return "other";
+        case D_DENTRY:
+                return "dentry";
+        case D_NETTRACE:
+                return "nettrace";
+        case D_PAGE:
+                return "page";
+        case D_DLMTRACE:
+                return "dlmtrace";
+        case D_ERROR:
+                return "error";
+        case D_EMERG:
+                return "emerg";
+        case D_HA:
+                return "ha";
+        case D_RPCTRACE:
+                return "rpctrace";
+        case D_VFSTRACE:
+                return "vfstrace";
+        case D_READA:
+                return "reada";
+        case D_MMAP:
+                return "mmap";
+        case D_CONFIG:
+                return "config";
+        case D_CONSOLE:
+                return "console";
+        case D_QUOTA:
+                return "quota";
+        case D_SEC:
+                return "sec";
+        }
+}
+
+int
+libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys)
+{
+        const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
+                                                 libcfs_debug_dbg2str;
+        int           len = 0;
+        const char   *token;
+        int           bit;
+        int           i;
+
+        if (mask == 0) {                        /* "0" */
+                if (size > 0)
+                        str[0] = '0';
+                len = 1;
+        } else {                                /* space-separated tokens */
+                for (i = 0; i < 32; i++) {
+                        bit = 1 << i;
+
+                        if ((mask & bit) == 0)
+                                continue;
+
+                        token = fn(bit);
+                        if (token == NULL)              /* unused bit */
+                                continue;
+
+                        if (len > 0) {                  /* separator? */
+                                if (len < size)
+                                        str[len] = ' ';
+                                len++;
+                        }
+
+                        while (*token != 0) {
+                                if (len < size)
+                                        str[len] = *token;
+                                token++;
+                                len++;
+                        }
+                }
+        }
+
+        /* terminate 'str' */
+        if (len < size)
+                str[len] = 0;
+        else
+                str[size - 1] = 0;
+
+        return len;
+}
+
+int
+libcfs_debug_token2mask(int *mask, const char *str, int len, int is_subsys)
+{
+        const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str :
+                                                 libcfs_debug_dbg2str;
+        int           i;
+        int           j;
+        int           bit;
+        const char   *token;
+
+        /* match against known tokens */
+        for (i = 0; i < 32; i++) {
+                bit = 1 << i;
+
+                token = fn(bit);
+                if (token == NULL)              /* unused? */
+                        continue;
+
+                /* strcasecmp */
+                for (j = 0; ; j++) {
+                        if (j == len) {         /* end of token */
+                                if (token[j] == 0) {
+                                        *mask = bit;
+                                        return 0;
+                                }
+                                break;
+                        }
+
+                        if (token[j] == 0)
+                                break;
+
+                        if (str[j] == token[j])
+                                continue;
+
+                        if (str[j] < 'A' || 'Z' < str[j])
+                                break;
+
+                        if (str[j] - 'A' + 'a' != token[j])
+                                break;
+                }
+        }
+
+        return -EINVAL;                         /* no match */
+}
+
+int
+libcfs_debug_str2mask(int *mask, const char *str, int is_subsys)
+{
+        int         m = 0;
+        char        op = 0;
+        int         matched;
+        int         n;
+        int         t;
+
+        /* Allow a number for backwards compatibility */
+
+        for (n = strlen(str); n > 0; n--)
+                if (!isspace(str[n-1]))
+                        break;
+        matched = n;
+
+        if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 &&
+            matched == n) {
+                *mask = m;
+                return 0;
+        }
+
+        /* <str> must be a list of debug tokens or numbers separated by
+         * whitespace and optionally an operator ('+' or '-').  If an operator
+         * appears first in <str>, '*mask' is used as the starting point
+         * (relative), otherwise 0 is used (absolute).  An operator applies to
+         * all following tokens up to the next operator. */
+
+        matched = 0;
+        while (*str != 0) {
+                while (isspace(*str)) /* skip whitespace */
+                        str++;
+
+                if (*str == 0)
+                        break;
+
+                if (*str == '+' || *str == '-') {
+                        op = *str++;
+
+                        /* op on first token == relative */
+                        if (!matched)
+                                m = *mask;
+
+                        while (isspace(*str)) /* skip whitespace */
+                                str++;
+
+                        if (*str == 0)          /* trailing op */
+                                return -EINVAL;
+                }
+
+                /* find token length */
+                for (n = 0; str[n] != 0 && !isspace(str[n]); n++);
+
+                /* match token */
+                if (libcfs_debug_token2mask(&t, str, n, is_subsys) != 0)
+                        return -EINVAL;
+
+                matched = 1;
+                if (op == '-')
+                        m &= ~t;
+                else
+                        m |= t;
+
+                str += n;
+        }
+
+        if (!matched)
+                return -EINVAL;
+
+        *mask = m;
+        return 0;
+}
+
+void libcfs_debug_dumplog_internal(void *arg)
+{
+        CFS_DECL_JOURNAL_DATA;
+
+        CFS_PUSH_JOURNAL;
+
+        if (strncmp(debug_file_path, "NONE", 4) != 0) {
+                snprintf(debug_file_name, sizeof(debug_file_name) - 1,
+                         "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(),
+                         (long)arg);
+                printk(KERN_ALERT "LustreError: dumping log to %s\n",
+                       debug_file_name);
+                tracefile_dump_all_pages(debug_file_name);
+        }
+        CFS_POP_JOURNAL;
+}
+
+int libcfs_debug_dumplog_thread(void *arg)
+{
+        cfs_daemonize("");
+        libcfs_debug_dumplog_internal(arg);
+        cfs_waitq_signal(&debug_ctlwq);
+        return 0;
+}
+
+void libcfs_debug_dumplog(void)
+{
+        int            rc;
+        cfs_waitlink_t wait;
+        ENTRY;
+
+        /* we're being careful to ensure that the kernel thread is
+         * able to set our state to running as it exits before we
+         * get to schedule() */
+        cfs_waitlink_init(&wait);
+        set_current_state(TASK_INTERRUPTIBLE);
+        cfs_waitq_add(&debug_ctlwq, &wait);
+
+        rc = cfs_kernel_thread(libcfs_debug_dumplog_thread,
+                               (void *)(long)cfs_curproc_pid(),
+                               CLONE_VM | CLONE_FS | CLONE_FILES);
+        if (rc < 0)
+                printk(KERN_ERR "LustreError: cannot start log dump thread: "
+                       "%d\n", rc);
+        else
+                cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE);
+
+        /* be sure to teardown if kernel_thread() failed */
+        cfs_waitq_del(&debug_ctlwq, &wait);
+        set_current_state(TASK_RUNNING);
+}
+
+int libcfs_debug_init(unsigned long bufsize)
+{
+        int    rc = 0;
+        int    max = libcfs_debug_mb;
+
+        cfs_waitq_init(&debug_ctlwq);
+        libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
+        libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
+        /* If libcfs_debug_mb is set to an invalid value or uninitialized
+         * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES */
+        if (max > trace_max_debug_mb() || max < num_possible_cpus()) {
+                max = TCD_MAX_PAGES;
+        } else {
+                max = (max / num_possible_cpus());
+                max = (max << (20 - CFS_PAGE_SHIFT));
+        }
+        rc = tracefile_init(max);
+
+        if (rc == 0)
+                libcfs_register_panic_notifier();
+
+        return rc;
+}
+
+int libcfs_debug_cleanup(void)
+{
+        libcfs_unregister_panic_notifier();
+        tracefile_exit();
+        return 0;
+}
+
+int libcfs_debug_clear_buffer(void)
+{
+        trace_flush_pages();
+        return 0;
+}
+
+/* Debug markers, although printed by S_LNET
+ * should not be be marked as such. */
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_UNDEFINED
+int libcfs_debug_mark_buffer(char *text)
+{
+        CDEBUG(D_TRACE,"***************************************************\n");
+        CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text);
+        CDEBUG(D_TRACE,"***************************************************\n");
+
+        return 0;
+}
+#undef DEBUG_SUBSYSTEM
+#define DEBUG_SUBSYSTEM S_LNET
+
+void libcfs_debug_set_level(unsigned int debug_level)
+{
+        printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n",
+               debug_level);
+        libcfs_debug = debug_level;
+}
+
+EXPORT_SYMBOL(libcfs_debug_dumplog);
+EXPORT_SYMBOL(libcfs_debug_set_level);
+
+
+#else /* !__KERNEL__ */
+
+#include <libcfs/libcfs.h>
+
+#ifdef HAVE_CATAMOUNT_DATA_H
+#include <catamount/data.h>
+#include <catamount/lputs.h>
+
+static char source_nid[16];
+/* 0 indicates no messages to console, 1 is errors, > 1 is all debug messages */
+static int toconsole = 1;
+unsigned int libcfs_console_ratelimit = 1;
+cfs_duration_t libcfs_console_max_delay;
+cfs_duration_t libcfs_console_min_delay;
+unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
+#else /* !HAVE_CATAMOUNT_DATA_H */
+#ifdef HAVE_NETDB_H
+#include <sys/utsname.h>
+#endif /* HAVE_NETDB_H */
+struct utsname *tmp_utsname;
+static char source_nid[sizeof(tmp_utsname->nodename)];
+#endif /* HAVE_CATAMOUNT_DATA_H */
+
+static int source_pid;
+int smp_processor_id = 1;
+char debug_file_path[1024];
+FILE *debug_file_fd;
+
+int portals_do_debug_dumplog(void *arg)
+{
+        printf("Look in %s\n", debug_file_name);
+        return 0;
+}
+
+
+void portals_debug_print(void)
+{
+        return;
+}
+
+
+void libcfs_debug_dumplog(void)
+{
+        printf("Look in %s\n", debug_file_name);
+        return;
+}
+
+int libcfs_debug_init(unsigned long bufsize)
+{
+        char *debug_mask = NULL;
+        char *debug_subsys = NULL;
+        char *debug_filename;
+
+#ifdef HAVE_CATAMOUNT_DATA_H
+        char *debug_console = NULL;
+        char *debug_ratelimit = NULL;
+        char *debug_max_delay = NULL;
+        char *debug_min_delay = NULL;
+        char *debug_backoff = NULL;
+
+        libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
+        libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
+
+        snprintf(source_nid, sizeof(source_nid) - 1, "%u", _my_pnid);
+        source_pid = _my_pid;
+
+        debug_console = getenv("LIBLUSTRE_DEBUG_CONSOLE");
+        if (debug_console != NULL) {
+                toconsole = strtoul(debug_console, NULL, 0);
+                CDEBUG(D_INFO, "set liblustre toconsole to %u\n", toconsole);
+        }
+        debug_ratelimit = getenv("LIBLUSTRE_DEBUG_CONSOLE_RATELIMIT");
+        if (debug_ratelimit != NULL) {
+                libcfs_console_ratelimit = strtoul(debug_ratelimit, NULL, 0);
+                CDEBUG(D_INFO, "set liblustre console ratelimit to %u\n",
+                                libcfs_console_ratelimit);
+        }
+        debug_max_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY");
+        if (debug_max_delay != NULL)
+                libcfs_console_max_delay =
+                            cfs_time_seconds(strtoul(debug_max_delay, NULL, 0));
+        debug_min_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY");
+        if (debug_min_delay != NULL)
+                libcfs_console_min_delay =
+                            cfs_time_seconds(strtoul(debug_min_delay, NULL, 0));
+        if (debug_min_delay || debug_max_delay) {
+                if (!libcfs_console_max_delay || !libcfs_console_min_delay ||
+                    libcfs_console_max_delay < libcfs_console_min_delay) {
+                        libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY;
+                        libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY;
+                        CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY "
+                                       "should be greater than "
+                                       "LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY "
+                                       "and both parameters should be non-null"
+                                       ": restore default values\n");
+                } else {
+                        CDEBUG(D_INFO, "set liblustre console max delay to %lus"
+                                       " and min delay to %lus\n",
+                               (cfs_duration_t)
+                                     cfs_duration_sec(libcfs_console_max_delay),
+                               (cfs_duration_t)
+                                    cfs_duration_sec(libcfs_console_min_delay));
+                }
+        }
+        debug_backoff = getenv("LIBLUSTRE_DEBUG_CONSOLE_BACKOFF");
+        if (debug_backoff != NULL) {
+                libcfs_console_backoff = strtoul(debug_backoff, NULL, 0);
+                if (libcfs_console_backoff <= 0) {
+                        libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF;
+                        CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_BACKOFF <= 0: "
+                                       "restore default value\n");
+                } else {
+                        CDEBUG(D_INFO, "set liblustre console backoff to %u\n",
+                               libcfs_console_backoff);
+                }
+        }
+#else
+        struct utsname myname;
+
+        if (uname(&myname) == 0)
+                strcpy(source_nid, myname.nodename);
+        source_pid = getpid();
+#endif
+        /* debug masks */
+        debug_mask = getenv("LIBLUSTRE_DEBUG_MASK");
+        if (debug_mask)
+                libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0);
+
+        debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS");
+        if (debug_subsys)
+                libcfs_subsystem_debug =
+                                (unsigned int) strtol(debug_subsys, NULL, 0);
+
+        debug_filename = getenv("LIBLUSTRE_DEBUG_BASE");
+        if (debug_filename)
+                strncpy(debug_file_path,debug_filename,sizeof(debug_file_path));
+
+        debug_filename = getenv("LIBLUSTRE_DEBUG_FILE");
+        if (debug_filename)
+                strncpy(debug_file_name,debug_filename,sizeof(debug_file_name));
+
+        if (debug_file_name[0] == '\0' && debug_file_path[0] != '\0')
+                snprintf(debug_file_name, sizeof(debug_file_name) - 1,
+                         "%s-%s-"CFS_TIME_T".log", debug_file_path, source_nid, time(0));
+
+        if (strcmp(debug_file_name, "stdout") == 0 ||
+            strcmp(debug_file_name, "-") == 0) {
+                debug_file_fd = stdout;
+        } else if (strcmp(debug_file_name, "stderr") == 0) {
+                debug_file_fd = stderr;
+        } else if (debug_file_name[0] != '\0') {
+                debug_file_fd = fopen(debug_file_name, "w");
+                if (debug_file_fd == NULL)
+                        fprintf(stderr, "%s: unable to open '%s': %s\n",
+                                source_nid, debug_file_name, strerror(errno));
+        }
+
+        if (debug_file_fd == NULL)
+                debug_file_fd = stdout;
+
+        return 0;
+}
+
+int libcfs_debug_cleanup(void)
+{
+        if (debug_file_fd != stdout && debug_file_fd != stderr)
+                fclose(debug_file_fd);
+        return 0;
+}
+
+int libcfs_debug_clear_buffer(void)
+{
+        return 0;
+}
+
+int libcfs_debug_mark_buffer(char *text)
+{
+
+        fprintf(debug_file_fd, "*******************************************************************************\n");
+        fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
+        fprintf(debug_file_fd, "*******************************************************************************\n");
+
+        return 0;
+}
+
+#ifdef HAVE_CATAMOUNT_DATA_H
+#define CATAMOUNT_MAXLINE (256-4)
+void catamount_printline(char *buf, size_t size)
+{
+    char *pos = buf;
+    int prsize = size;
+
+    while (prsize > 0){
+        lputs(pos);
+        pos += CATAMOUNT_MAXLINE;
+        prsize -= CATAMOUNT_MAXLINE;
+    }
+}
+#endif
+
+int
+libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls,
+                   int subsys, int mask,
+                   const char *file, const char *fn, const int line,
+                   const char *format1, va_list args,
+                   const char *format2, ...)
+{
+        struct timeval tv;
+        int            nob;
+        int            remain;
+        va_list        ap;
+        char           buf[CFS_PAGE_SIZE]; /* size 4096 used for compatimble
+                                            * with linux, where message can`t
+                                            * be exceed PAGE_SIZE */
+        int            console = 0;
+        char *prefix = "Lustre";
+
+#ifdef HAVE_CATAMOUNT_DATA_H
+        /* toconsole == 0 - all messages to debug_file_fd
+         * toconsole == 1 - warnings to console, all to debug_file_fd
+         * toconsole >  1 - all debug to console */
+        if (((mask & libcfs_printk) && toconsole == 1) || toconsole > 1)
+                console = 1;
+#endif
+
+        if ((!console) && (!debug_file_fd)) {
+                return 0;
+        }
+
+        if (mask & (D_EMERG | D_ERROR))
+               prefix = "LustreError";
+
+        nob = snprintf(buf, sizeof(buf), "%s: %u-%s:(%s:%d:%s()): ", prefix,
+                       source_pid, source_nid, file, line, fn);
+
+        remain = sizeof(buf) - nob;
+        if (format1) {
+                nob += vsnprintf(&buf[nob], remain, format1, args);
+        }
+
+        remain = sizeof(buf) - nob;
+        if ((format2) && (remain > 0)) {
+                va_start(ap, format2);
+                nob += vsnprintf(&buf[nob], remain, format2, ap);
+                va_end(ap);
+        }
+
+#ifdef HAVE_CATAMOUNT_DATA_H
+        if (console) {
+                /* check rate limit for console */
+                if (cdls != NULL) {
+                        if (libcfs_console_ratelimit &&
+                                cdls->cdls_next != 0 &&     /* not first time ever */
+                                !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
+
+                                /* skipping a console message */
+                                cdls->cdls_count++;
+                                goto out_file;
+                        }
+
+                        if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
+                                           libcfs_console_max_delay +
+                                           cfs_time_seconds(10))) {
+                                /* last timeout was a long time ago */
+                                cdls->cdls_delay /= libcfs_console_backoff * 4;
+                        } else {
+                                cdls->cdls_delay *= libcfs_console_backoff;
+
+                                if (cdls->cdls_delay <
+                                                libcfs_console_min_delay)
+                                        cdls->cdls_delay =
+                                                libcfs_console_min_delay;
+                                else if (cdls->cdls_delay >
+                                                libcfs_console_max_delay)
+                                        cdls->cdls_delay =
+                                                libcfs_console_max_delay;
+                        }
+
+                        /* ensure cdls_next is never zero after it's been seen */
+                        cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
+                }
+
+                if (cdls != NULL && cdls->cdls_count != 0) {
+                        char buf2[100];
+
+                        nob = snprintf(buf2, sizeof(buf2),
+                                       "Skipped %d previous similar message%s\n",
+                                       cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
+
+                        catamount_printline(buf2, nob);
+                        cdls->cdls_count = 0;
+                        goto out_file;
+                }
+                catamount_printline(buf, nob);
+       }
+out_file:
+        /* return on toconsole > 1, as we don't want the user getting
+        * spammed by the debug data */
+        if (toconsole > 1)
+                return 0;
+#endif
+        if (debug_file_fd == NULL)
+                return 0;
+
+        gettimeofday(&tv, NULL);
+
+        fprintf(debug_file_fd, CFS_TIME_T".%06lu:%u:%s:(%s:%d:%s()): %s",
+                tv.tv_sec, tv.tv_usec, source_pid, source_nid,
+                file, line, fn, buf);
+
+        return 0;
+}
+
+void
+libcfs_assertion_failed(const char *expr, const char *file, const char *func,
+                        const int line)
+{
+        libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
+                         "ASSERTION(%s) failed\n", expr);
+        abort();
+}
+
+#endif /* __KERNEL__ */
diff --git a/libcfs/libcfs/libcfs.xcode/project.pbxproj b/libcfs/libcfs/libcfs.xcode/project.pbxproj
new file mode 100644 (file)
index 0000000..479c21b
--- /dev/null
@@ -0,0 +1,439 @@
+// !$*UTF8*$!
+{
+       archiveVersion = 1;
+       classes = {
+       };
+       objectVersion = 39;
+       objects = {
+               06AA1262FFB20DD611CA28AA = {
+                       buildRules = (
+                       );
+                       buildSettings = {
+                               COPY_PHASE_STRIP = NO;
+                               GCC_DYNAMIC_NO_PIC = NO;
+                               GCC_ENABLE_FIX_AND_CONTINUE = YES;
+                               GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
+                               GCC_OPTIMIZATION_LEVEL = 0;
+                               OPTIMIZATION_CFLAGS = "-O0";
+                               ZERO_LINK = YES;
+                       };
+                       isa = PBXBuildStyle;
+                       name = Development;
+               };
+               06AA1263FFB20DD611CA28AA = {
+                       buildRules = (
+                       );
+                       buildSettings = {
+                               COPY_PHASE_STRIP = YES;
+                               GCC_ENABLE_FIX_AND_CONTINUE = NO;
+                               ZERO_LINK = NO;
+                       };
+                       isa = PBXBuildStyle;
+                       name = Deployment;
+               };
+//060
+//061
+//062
+//063
+//064
+//080
+//081
+//082
+//083
+//084
+               089C1669FE841209C02AAC07 = {
+                       buildSettings = {
+                       };
+                       buildStyles = (
+                               06AA1262FFB20DD611CA28AA,
+                               06AA1263FFB20DD611CA28AA,
+                       );
+                       hasScannedForEncodings = 1;
+                       isa = PBXProject;
+                       mainGroup = 089C166AFE841209C02AAC07;
+                       projectDirPath = "";
+                       targets = (
+                               32A4FEB80562C75700D090E7,
+                       );
+               };
+               089C166AFE841209C02AAC07 = {
+                       children = (
+                               247142CAFF3F8F9811CA285C,
+                               089C167CFE841241C02AAC07,
+                               19C28FB6FE9D52B211CA2CBB,
+                       );
+                       isa = PBXGroup;
+                       name = libcfs;
+                       refType = 4;
+                       sourceTree = "<group>";
+               };
+               089C167CFE841241C02AAC07 = {
+                       children = (
+                               32A4FEC30562C75700D090E7,
+                       );
+                       isa = PBXGroup;
+                       name = Resources;
+                       refType = 4;
+                       sourceTree = "<group>";
+               };
+//080
+//081
+//082
+//083
+//084
+//190
+//191
+//192
+//193
+//194
+               19444794072D07AD00DAF9BC = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       path = tracefile.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19444795072D07AD00DAF9BC = {
+                       fileRef = 19444794072D07AD00DAF9BC;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19444796072D08AA00DAF9BC = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       path = debug.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19444797072D08AA00DAF9BC = {
+                       fileRef = 19444796072D08AA00DAF9BC;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19509C03072CD5FF00A958C3 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       path = module.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19509C04072CD5FF00A958C3 = {
+                       fileRef = 19509C03072CD5FF00A958C3;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713B76072E8274004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_prim.c;
+                       path = arch/xnu/cfs_prim.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713B77072E8274004E8469 = {
+                       fileRef = 19713B76072E8274004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713BB7072E8281004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_mem.c;
+                       path = arch/xnu/cfs_mem.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713BB8072E8281004E8469 = {
+                       fileRef = 19713BB7072E8281004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713BF7072E828E004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_proc.c;
+                       path = arch/xnu/cfs_proc.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713BF8072E828E004E8469 = {
+                       fileRef = 19713BF7072E828E004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713C7A072E82B2004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_utils.c;
+                       path = arch/xnu/cfs_utils.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713C7B072E82B2004E8469 = {
+                       fileRef = 19713C7A072E82B2004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713CD6072E8A56004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_module.c;
+                       path = arch/xnu/cfs_module.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713CD7072E8A56004E8469 = {
+                       fileRef = 19713CD6072E8A56004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713D1B072E8E39004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_fs.c;
+                       path = arch/xnu/cfs_fs.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713D1C072E8E39004E8469 = {
+                       fileRef = 19713D1B072E8E39004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713D60072E9109004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = xnu_sync.c;
+                       path = arch/xnu/xnu_sync.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713D61072E9109004E8469 = {
+                       fileRef = 19713D60072E9109004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713DC2072F994D004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_tracefile.c;
+                       path = arch/xnu/cfs_tracefile.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713DC3072F994D004E8469 = {
+                       fileRef = 19713DC2072F994D004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19713E1C072FAFB5004E8469 = {
+                       fileEncoding = 30;
+                       isa = PBXFileReference;
+                       lastKnownFileType = sourcecode.c.c;
+                       name = cfs_debug.c;
+                       path = arch/xnu/cfs_debug.c;
+                       refType = 2;
+                       sourceTree = SOURCE_ROOT;
+               };
+               19713E1D072FAFB5004E8469 = {
+                       fileRef = 19713E1C072FAFB5004E8469;
+                       isa = PBXBuildFile;
+                       settings = {
+                       };
+               };
+               19C28FB6FE9D52B211CA2CBB = {
+                       children = (
+                               32A4FEC40562C75800D090E7,
+                       );
+                       isa = PBXGroup;
+                       name = Products;
+                       refType = 4;
+                       sourceTree = "<group>";
+               };
+//190
+//191
+//192
+//193
+//194
+//240
+//241
+//242
+//243
+//244
+               247142CAFF3F8F9811CA285C = {
+                       children = (
+                               19713E1C072FAFB5004E8469,
+                               19713DC2072F994D004E8469,
+                               19713D60072E9109004E8469,
+                               19713D1B072E8E39004E8469,
+                               19713CD6072E8A56004E8469,
+                               19713C7A072E82B2004E8469,
+                               19713BF7072E828E004E8469,
+                               19713BB7072E8281004E8469,
+                               19713B76072E8274004E8469,
+                               19444796072D08AA00DAF9BC,
+                               19444794072D07AD00DAF9BC,
+                               19509C03072CD5FF00A958C3,
+                       );
+                       isa = PBXGroup;
+                       name = Source;
+                       path = "";
+                       refType = 4;
+                       sourceTree = "<group>";
+               };
+//240
+//241
+//242
+//243
+//244
+//320
+//321
+//322
+//323
+//324
+               32A4FEB80562C75700D090E7 = {
+                       buildPhases = (
+                               32A4FEB90562C75700D090E7,
+                               32A4FEBA0562C75700D090E7,
+                               32A4FEBB0562C75700D090E7,
+                               32A4FEBD0562C75700D090E7,
+                               32A4FEBF0562C75700D090E7,
+                               32A4FEC00562C75700D090E7,
+                               32A4FEC10562C75700D090E7,
+                       );
+                       buildRules = (
+                       );
+                       buildSettings = {
+                               FRAMEWORK_SEARCH_PATHS = "";
+                               GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
+                               GCC_WARN_UNKNOWN_PRAGMAS = NO;
+                               HEADER_SEARCH_PATHS = ../include;
+                               INFOPLIST_FILE = Info.plist;
+                               INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+                               LIBRARY_SEARCH_PATHS = "";
+                               MODULE_NAME = com.clusterfs.lustre.portals.libcfs;
+                               MODULE_START = libcfs_start;
+                               MODULE_STOP = libcfs_stop;
+                               MODULE_VERSION = 1.0.1;
+                               OTHER_CFLAGS = "-D__KERNEL__";
+                               OTHER_LDFLAGS = "";
+                               OTHER_REZFLAGS = "";
+                               PRODUCT_NAME = libcfs;
+                               SECTORDER_FLAGS = "";
+                               WARNING_CFLAGS = "-Wmost";
+                               WRAPPER_EXTENSION = kext;
+                       };
+                       dependencies = (
+                       );
+                       isa = PBXNativeTarget;
+                       name = libcfs;
+                       productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
+                       productName = libcfs;
+                       productReference = 32A4FEC40562C75800D090E7;
+                       productType = "com.apple.product-type.kernel-extension";
+               };
+               32A4FEB90562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       isa = PBXShellScriptBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+                       shellPath = /bin/sh;
+                       shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n    . \"$script\"\nfi";
+               };
+               32A4FEBA0562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       isa = PBXHeadersBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               32A4FEBB0562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       isa = PBXResourcesBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               32A4FEBD0562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                               19509C04072CD5FF00A958C3,
+                               19444795072D07AD00DAF9BC,
+                               19444797072D08AA00DAF9BC,
+                               19713B77072E8274004E8469,
+                               19713BB8072E8281004E8469,
+                               19713BF8072E828E004E8469,
+                               19713C7B072E82B2004E8469,
+                               19713CD7072E8A56004E8469,
+                               19713D1C072E8E39004E8469,
+                               19713D61072E9109004E8469,
+                               19713DC3072F994D004E8469,
+                               19713E1D072FAFB5004E8469,
+                       );
+                       isa = PBXSourcesBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               32A4FEBF0562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       isa = PBXFrameworksBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               32A4FEC00562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       isa = PBXRezBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+               };
+               32A4FEC10562C75700D090E7 = {
+                       buildActionMask = 2147483647;
+                       files = (
+                       );
+                       isa = PBXShellScriptBuildPhase;
+                       runOnlyForDeploymentPostprocessing = 0;
+                       shellPath = /bin/sh;
+                       shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n    . \"$script\"\nfi";
+               };
+               32A4FEC30562C75700D090E7 = {
+                       isa = PBXFileReference;
+                       lastKnownFileType = text.plist.xml;
+                       path = Info.plist;
+                       refType = 4;
+                       sourceTree = "<group>";
+               };
+               32A4FEC40562C75800D090E7 = {
+                       explicitFileType = wrapper.cfbundle;
+                       includeInIndex = 0;
+                       isa = PBXFileReference;
+                       path = libcfs.kext;
+                       refType = 3;
+                       sourceTree = BUILT_PRODUCTS_DIR;
+               };
+       };
+       rootObject = 089C1669FE841209C02AAC07;
+}
diff --git a/libcfs/libcfs/linux/.cvsignore b/libcfs/libcfs/linux/.cvsignore
new file mode 100644 (file)
index 0000000..2bc4137
--- /dev/null
@@ -0,0 +1,3 @@
+Makefile
+Makefile.in
+*.o.cmd
diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am
new file mode 100644 (file)
index 0000000..8bf35cc
--- /dev/null
@@ -0,0 +1,4 @@
+EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \
+       linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c  \
+       linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c
+
diff --git a/libcfs/libcfs/linux/linux-curproc.c b/libcfs/libcfs/linux/linux-curproc.c
new file mode 100644 (file)
index 0000000..e446169
--- /dev/null
@@ -0,0 +1,133 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Lustre curproc API implementation for Linux kernel
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation. Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+ * Public License for more details. You should have received a copy of the GNU
+ * General Public License along with Lustre; if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/sched.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for Linux kernel.
+ */
+
+uid_t  cfs_curproc_uid(void)
+{
+        return current->uid;
+}
+
+gid_t  cfs_curproc_gid(void)
+{
+        return current->gid;
+}
+
+uid_t  cfs_curproc_fsuid(void)
+{
+        return current->fsuid;
+}
+
+gid_t  cfs_curproc_fsgid(void)
+{
+        return current->fsgid;
+}
+
+pid_t  cfs_curproc_pid(void)
+{
+        return current->pid;
+}
+
+int    cfs_curproc_groups_nr(void)
+{
+        int nr;
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+        task_lock(current);
+        nr = current->group_info->ngroups;
+        task_unlock(current);
+#else
+        nr = current->ngroups;
+#endif
+        return nr;
+}
+
+void   cfs_curproc_groups_dump(gid_t *array, int size)
+{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)
+        task_lock(current);
+        size = min_t(int, size, current->group_info->ngroups);
+        memcpy(array, current->group_info->blocks[0], size * sizeof(__u32));
+        task_unlock(current);
+#else
+        LASSERT(size <= NGROUPS);
+        size = min_t(int, size, current->ngroups);
+        memcpy(array, current->groups, size * sizeof(__u32));
+#endif
+}
+
+
+int    cfs_curproc_is_in_groups(gid_t gid)
+{
+        return in_group_p(gid);
+}
+
+mode_t cfs_curproc_umask(void)
+{
+        return current->fs->umask;
+}
+
+char  *cfs_curproc_comm(void)
+{
+        return current->comm;
+}
+
+cfs_kernel_cap_t cfs_curproc_cap_get(void)
+{
+        return current->cap_effective;
+}
+
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
+{
+        current->cap_effective = cap;
+}
+
+EXPORT_SYMBOL(cfs_curproc_uid);
+EXPORT_SYMBOL(cfs_curproc_pid);
+EXPORT_SYMBOL(cfs_curproc_gid);
+EXPORT_SYMBOL(cfs_curproc_fsuid);
+EXPORT_SYMBOL(cfs_curproc_fsgid);
+EXPORT_SYMBOL(cfs_curproc_umask);
+EXPORT_SYMBOL(cfs_curproc_comm);
+EXPORT_SYMBOL(cfs_curproc_groups_nr);
+EXPORT_SYMBOL(cfs_curproc_groups_dump);
+EXPORT_SYMBOL(cfs_curproc_is_in_groups);
+EXPORT_SYMBOL(cfs_curproc_cap_get);
+EXPORT_SYMBOL(cfs_curproc_cap_set);
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/libcfs/linux/linux-debug.c b/libcfs/libcfs/linux/linux-debug.c
new file mode 100644 (file)
index 0000000..9b2a9dc
--- /dev/null
@@ -0,0 +1,239 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/notifier.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <linux/completion.h>
+
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <asm/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/version.h>
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+#include <libcfs/linux/portals_compat25.h>
+#include <libcfs/libcfs.h>
+
+#include "tracefile.h"
+
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+#include <linux/kallsyms.h>
+#endif
+
+char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall";
+
+void libcfs_run_upcall(char **argv)
+{
+        int   rc;
+        int   argc;
+        char *envp[] = {
+                "HOME=/",
+                "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
+                NULL};
+        ENTRY;
+
+        argv[0] = lnet_upcall;
+        argc = 1;
+        while (argv[argc] != NULL)
+                argc++;
+
+        LASSERT(argc >= 2);
+
+        rc = USERMODEHELPER(argv[0], argv, envp);
+        if (rc < 0 && rc != -ENOENT) {
+                CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; "
+                       "check /proc/sys/lnet/upcall\n",
+                       rc, argv[0], argv[1],
+                       argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+                       argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+                       argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+                       argc < 6 ? "" : ",...");
+        } else {
+                CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n",
+                       argv[0], argv[1],
+                       argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
+                       argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
+                       argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
+                       argc < 6 ? "" : ",...");
+        }
+}
+
+void libcfs_run_lbug_upcall(char *file, const char *fn, const int line)
+{
+        char *argv[6];
+        char buf[32];
+
+        ENTRY;
+        snprintf (buf, sizeof buf, "%d", line);
+
+        argv[1] = "LBUG";
+        argv[2] = file;
+        argv[3] = (char *)fn;
+        argv[4] = buf;
+        argv[5] = NULL;
+
+        libcfs_run_upcall (argv);
+}
+
+#ifdef __arch_um__
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+        libcfs_catastrophe = 1;
+        libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
+                         "LBUG - trying to dump log to %s\n", debug_file_path);
+        libcfs_debug_dumplog();
+        libcfs_run_lbug_upcall(file, func, line);
+        asm("int $3");
+        panic("LBUG");
+}
+#else
+/* coverity[+kill] */
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+        libcfs_catastrophe = 1;
+        libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n");
+
+        if (in_interrupt()) {
+                panic("LBUG in interrupt.\n");
+                /* not reached */
+        }
+
+        libcfs_debug_dumpstack(NULL);
+        libcfs_debug_dumplog();
+        libcfs_run_lbug_upcall(file, func, line);
+        if (libcfs_panic_on_lbug)
+                panic("LBUG");
+        set_task_state(current, TASK_UNINTERRUPTIBLE);
+        while (1)
+                schedule();
+}
+#endif /* __arch_um__ */
+
+#ifdef __KERNEL__
+
+void libcfs_debug_dumpstack(struct task_struct *tsk)
+{
+#if defined(__arch_um__)
+        if (tsk != NULL)
+                CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n",
+                      tsk->pid, UML_PID(tsk));
+        //asm("int $3");
+#elif defined(HAVE_SHOW_TASK)
+        /* this is exported by lustre kernel version 42 */
+        extern void show_task(struct task_struct *);
+
+        if (tsk == NULL)
+                tsk = current;
+        CWARN("showing stack for process %d\n", tsk->pid);
+        show_task(tsk);
+#else
+        if ((tsk == NULL) || (tsk == current))
+                dump_stack();
+        else
+                CWARN("can't show stack: kernel doesn't export show_task\n");
+#endif
+}
+
+cfs_task_t *libcfs_current(void)
+{
+        CWARN("current task struct is %p\n", current);
+        return current;
+}
+
+static int panic_notifier(struct notifier_block *self, unsigned long unused1,
+                         void *unused2)
+{
+        if (libcfs_panic_in_progress)
+                return 0;
+
+        libcfs_panic_in_progress = 1;
+        mb();
+
+#ifdef LNET_DUMP_ON_PANIC
+        /* This is currently disabled because it spews far too much to the
+         * console on the rare cases it is ever triggered. */
+
+        if (in_interrupt()) {
+                trace_debug_print();
+        } else {
+                while (current->lock_depth >= 0)
+                        unlock_kernel();
+
+                libcfs_debug_dumplog_internal((void *)(long)cfs_curproc_pid());
+        }
+#endif
+        return 0;
+}
+
+static struct notifier_block libcfs_panic_notifier = {
+        notifier_call :     panic_notifier,
+        next :              NULL,
+        priority :          10000
+};
+
+void libcfs_register_panic_notifier(void)
+{
+#ifdef HAVE_ATOMIC_PANIC_NOTIFIER
+        atomic_notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier);
+#else
+        notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier);
+#endif
+}
+
+void libcfs_unregister_panic_notifier(void)
+{
+#ifdef HAVE_ATOMIC_PANIC_NOTIFIER
+        atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier);
+#else
+        notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier);
+#endif
+}
+
+EXPORT_SYMBOL(libcfs_debug_dumpstack);
+EXPORT_SYMBOL(libcfs_current);
+
+#endif /* __KERNEL__ */
+
+EXPORT_SYMBOL(libcfs_run_upcall);
+EXPORT_SYMBOL(libcfs_run_lbug_upcall);
+EXPORT_SYMBOL(lbug_with_loc);
diff --git a/libcfs/libcfs/linux/linux-fs.c b/libcfs/libcfs/linux/linux-fs.c
new file mode 100644 (file)
index 0000000..3b15576
--- /dev/null
@@ -0,0 +1,100 @@
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/ctype.h>
+#include <asm/uaccess.h>
+
+#include <libcfs/libcfs.h>
+
+cfs_file_t *
+cfs_filp_open (const char *name, int flags, int mode, int *err)
+{
+       /* XXX
+        * Maybe we need to handle flags and mode in the future
+        */
+       cfs_file_t      *filp = NULL;
+
+       filp = filp_open(name, flags, mode);
+       if (IS_ERR(filp)) {
+               int rc;
+
+               rc = PTR_ERR(filp);
+               printk(KERN_ERR "LustreError: can't open %s file: err %d\n",
+                               name, rc);
+               if (err)
+                       *err = rc;
+               filp = NULL;
+       }
+       return filp;
+}
+
+/* write a userspace buffer to disk.
+ * NOTE: this returns 0 on success, not the number of bytes written. */
+ssize_t
+cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset)
+{
+       mm_segment_t fs;
+       ssize_t size = 0;
+
+       fs = get_fs();
+       set_fs(KERNEL_DS);
+       while (count > 0) {
+               size = filp->f_op->write(filp, (char *)buf, count, offset);
+               if (size < 0)
+                       break;
+               count -= size;
+               size = 0;
+       }
+       set_fs(fs);
+
+       return size;
+}
+
+#if !(CFS_O_CREAT == O_CREAT && CFS_O_EXCL == O_EXCL &&        \
+     CFS_O_TRUNC == O_TRUNC && CFS_O_APPEND == O_APPEND &&\
+     CFS_O_NONBLOCK == O_NONBLOCK && CFS_O_NDELAY == O_NDELAY &&\
+     CFS_O_SYNC == O_SYNC && CFS_O_ASYNC == FASYNC &&\
+     CFS_O_DIRECT == O_DIRECT && CFS_O_LARGEFILE == O_LARGEFILE &&\
+     CFS_O_DIRECTORY == O_DIRECTORY && CFS_O_NOFOLLOW == O_NOFOLLOW)
+
+int cfs_oflags2univ(int flags)
+{
+       int f; 
+       
+       f = flags & O_ACCMODE;
+       f |= (flags & O_CREAT) ? CFS_O_CREAT: 0;
+       f |= (flags & O_EXCL) ? CFS_O_EXCL: 0;
+       f |= (flags & O_NOCTTY) ? CFS_O_NOCTTY: 0;
+       f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0;
+       f |= (flags & O_APPEND) ? CFS_O_APPEND: 0;
+       f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0;
+       f |= (flags & O_SYNC)? CFS_O_SYNC: 0;
+       f |= (flags & FASYNC)? CFS_O_ASYNC: 0;
+       f |= (flags & O_DIRECTORY)? CFS_O_DIRECTORY: 0;
+       f |= (flags & O_DIRECT)? CFS_O_DIRECT: 0;
+       f |= (flags & O_LARGEFILE)? CFS_O_LARGEFILE: 0;
+       f |= (flags & O_NOFOLLOW)? CFS_O_NOFOLLOW: 0;
+       f |= (flags & O_NOATIME)? CFS_O_NOATIME: 0;
+       return f;
+}
+#else
+
+int cfs_oflags2univ(int flags)
+{
+       return (flags);
+}
+#endif
+
+/* 
+ * XXX Liang: we don't need cfs_univ2oflags() now.
+ */
+int cfs_univ2oflags(int flags)
+{
+       return (flags);
+}
+
+EXPORT_SYMBOL(cfs_filp_open);
+EXPORT_SYMBOL(cfs_user_write);
+EXPORT_SYMBOL(cfs_oflags2univ);
+EXPORT_SYMBOL(cfs_univ2oflags);
diff --git a/libcfs/libcfs/linux/linux-lock.c b/libcfs/libcfs/linux/linux-lock.c
new file mode 100644 (file)
index 0000000..01511d6
--- /dev/null
@@ -0,0 +1,4 @@
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <arch-linux/cfs_lock.h>
+#include <libcfs/libcfs.h>
diff --git a/libcfs/libcfs/linux/linux-lwt.c b/libcfs/libcfs/linux/linux-lwt.c
new file mode 100644 (file)
index 0000000..520c54c
--- /dev/null
@@ -0,0 +1,2 @@
+# define DEBUG_SUBSYSTEM S_LNET
+
diff --git a/libcfs/libcfs/linux/linux-mem.c b/libcfs/libcfs/linux/linux-mem.c
new file mode 100644 (file)
index 0000000..30ecf6a
--- /dev/null
@@ -0,0 +1,145 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <libcfs/libcfs.h>
+
+static unsigned int cfs_alloc_flags_to_gfp(u_int32_t flags)
+{
+       unsigned int mflags = 0;
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        if (flags & CFS_ALLOC_ATOMIC)
+                mflags |= __GFP_HIGH;
+        else if (flags & CFS_ALLOC_WAIT)
+                mflags |= __GFP_WAIT;
+        else
+                mflags |= (__GFP_HIGH | __GFP_WAIT);
+        if (flags & CFS_ALLOC_IO)
+                mflags |= __GFP_IO | __GFP_HIGHIO;
+#else
+        if (flags & CFS_ALLOC_ATOMIC)
+                mflags |= __GFP_HIGH;
+        else
+                mflags |= __GFP_WAIT;
+        if (flags & CFS_ALLOC_NOWARN)
+                mflags |= __GFP_NOWARN;
+        if (flags & CFS_ALLOC_IO)
+                mflags |= __GFP_IO;
+#endif
+        if (flags & CFS_ALLOC_FS)
+                mflags |= __GFP_FS;
+        if (flags & CFS_ALLOC_HIGH)
+                mflags |= __GFP_HIGH;
+        return mflags;
+}
+
+void *
+cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+       void *ptr = NULL;
+
+       ptr = kmalloc(nr_bytes, cfs_alloc_flags_to_gfp(flags));
+       if (ptr != NULL && (flags & CFS_ALLOC_ZERO))
+               memset(ptr, 0, nr_bytes);
+       return ptr;
+}
+
+void
+cfs_free(void *addr)
+{
+       kfree(addr);
+}
+
+void *
+cfs_alloc_large(size_t nr_bytes)
+{
+       return vmalloc(nr_bytes);
+}
+
+void
+cfs_free_large(void *addr)
+{
+       vfree(addr);
+}
+
+cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order)
+{
+        /*
+         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
+        return alloc_pages(cfs_alloc_flags_to_gfp(flags), order);
+}
+
+void __cfs_free_pages(cfs_page_t *page, unsigned int order)
+{
+        __free_pages(page, order);
+}
+
+cfs_mem_cache_t *
+cfs_mem_cache_create (const char *name, size_t size, size_t offset,
+                      unsigned long flags)
+{
+#ifdef HAVE_KMEM_CACHE_CREATE_DTOR
+        return kmem_cache_create(name, size, offset, flags, NULL, NULL);
+#else
+        return kmem_cache_create(name, size, offset, flags, NULL);
+#endif
+}
+
+int
+cfs_mem_cache_destroy (cfs_mem_cache_t * cachep)
+{
+#ifdef HAVE_KMEM_CACHE_DESTROY_INT
+        return kmem_cache_destroy(cachep);
+#else
+        kmem_cache_destroy(cachep);
+        return 0;
+#endif
+}
+
+void *
+cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags)
+{
+        return kmem_cache_alloc(cachep, cfs_alloc_flags_to_gfp(flags));
+}
+
+void
+cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp)
+{
+        return kmem_cache_free(cachep, objp);
+}
+
+EXPORT_SYMBOL(cfs_alloc);
+EXPORT_SYMBOL(cfs_free);
+EXPORT_SYMBOL(cfs_alloc_large);
+EXPORT_SYMBOL(cfs_free_large);
+EXPORT_SYMBOL(cfs_alloc_pages);
+EXPORT_SYMBOL(__cfs_free_pages);
+EXPORT_SYMBOL(cfs_mem_cache_create);
+EXPORT_SYMBOL(cfs_mem_cache_destroy);
+EXPORT_SYMBOL(cfs_mem_cache_alloc);
+EXPORT_SYMBOL(cfs_mem_cache_free);
diff --git a/libcfs/libcfs/linux/linux-module.c b/libcfs/libcfs/linux/linux-module.c
new file mode 100644 (file)
index 0000000..6f21853
--- /dev/null
@@ -0,0 +1,151 @@
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define LNET_MINOR 240
+
+int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
+{
+        struct libcfs_ioctl_hdr   *hdr;
+        struct libcfs_ioctl_data  *data;
+        int err;
+        ENTRY;
+
+        hdr = (struct libcfs_ioctl_hdr *)buf;
+        data = (struct libcfs_ioctl_data *)buf;
+
+        err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+        if (err)
+                RETURN(err);
+
+        if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
+                CERROR("PORTALS: version mismatch kernel vs application\n");
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len + buf >= end) {
+                CERROR("PORTALS: user buffer exceeds kernel buffer\n");
+                RETURN(-EINVAL);
+        }
+
+
+        if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
+                CERROR("PORTALS: user buffer too small for ioctl\n");
+                RETURN(-EINVAL);
+        }
+
+        err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+        if (err)
+                RETURN(err);
+
+        if (libcfs_ioctl_is_invalid(data)) {
+                CERROR("PORTALS: ioctl not correctly formatted\n");
+                RETURN(-EINVAL);
+        }
+
+        if (data->ioc_inllen1)
+                data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+        if (data->ioc_inllen2)
+                data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+                        size_round(data->ioc_inllen1);
+
+        RETURN(0);
+}
+
+int libcfs_ioctl_popdata(void *arg, void *data, int size)
+{
+       if (copy_to_user((char *)arg, data, size))
+               return -EFAULT;
+       return 0;
+}
+
+extern struct cfs_psdev_ops          libcfs_psdev_ops;
+
+static int
+libcfs_psdev_open(struct inode * inode, struct file * file)
+{
+       struct libcfs_device_userstate **pdu = NULL;
+       int    rc = 0;
+
+       if (!inode)
+               return (-EINVAL);
+       pdu = (struct libcfs_device_userstate **)&file->private_data;
+       if (libcfs_psdev_ops.p_open != NULL)
+               rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
+       else
+               return (-EPERM);
+       return rc;
+}
+
+/* called when closing /dev/device */
+static int
+libcfs_psdev_release(struct inode * inode, struct file * file)
+{
+       struct libcfs_device_userstate *pdu;
+       int    rc = 0;
+
+       if (!inode)
+               return (-EINVAL);
+       pdu = file->private_data;
+       if (libcfs_psdev_ops.p_close != NULL)
+               rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
+       else
+               rc = -EPERM;
+       return rc;
+}
+
+static int
+libcfs_ioctl(struct inode *inode, struct file *file,
+            unsigned int cmd, unsigned long arg)
+{
+       struct cfs_psdev_file    pfile;
+       int    rc = 0;
+
+       if (current->fsuid != 0)
+               return -EACCES;
+
+       if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE ||
+            _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  ||
+            _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) {
+               CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
+                      _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
+               return (-EINVAL);
+       }
+
+       /* Handle platform-dependent IOC requests */
+       switch (cmd) {
+       case IOC_LIBCFS_PANIC:
+               if (!capable (CAP_SYS_BOOT))
+                       return (-EPERM);
+               panic("debugctl-invoked panic");
+               return (0);
+       case IOC_LIBCFS_MEMHOG:
+               if (!capable (CAP_SYS_ADMIN))
+                       return -EPERM;
+               /* go thought */
+       }
+
+       pfile.off = 0;
+       pfile.private_data = file->private_data;
+       if (libcfs_psdev_ops.p_ioctl != NULL)
+               rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg);
+       else
+               rc = -EPERM;
+       return (rc);
+}
+
+static struct file_operations libcfs_fops = {
+       ioctl:   libcfs_ioctl,
+       open:    libcfs_psdev_open,
+       release: libcfs_psdev_release
+};
+
+cfs_psdev_t libcfs_dev = {
+       LNET_MINOR,
+       "lnet",
+       &libcfs_fops
+};
+
+
diff --git a/libcfs/libcfs/linux/linux-prim.c b/libcfs/libcfs/linux/linux-prim.c
new file mode 100644 (file)
index 0000000..cc02829
--- /dev/null
@@ -0,0 +1,154 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <libcfs/libcfs.h>
+
+#if defined(CONFIG_KGDB)
+#include <asm/kgdb.h>
+#endif
+
+void cfs_enter_debugger(void)
+{
+#if defined(CONFIG_KGDB)
+        BREAKPOINT();
+#elif defined(__arch_um__)
+        asm("int $3");
+#else
+        /* nothing */
+#endif
+}
+
+void cfs_daemonize(char *str) {
+        unsigned long flags;
+
+        lock_kernel();
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
+        daemonize(str);
+#else
+        daemonize();
+        exit_files(current);
+        reparent_to_init();
+        snprintf (current->comm, sizeof (current->comm), "%s", str);
+#endif
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+        unlock_kernel();
+}
+
+int cfs_daemonize_ctxt(char *str) {
+        struct task_struct *tsk = current;
+        struct fs_struct *fs = NULL;
+
+        cfs_daemonize(str);
+        fs = copy_fs_struct(tsk->fs);
+        if (fs == NULL)
+                return -ENOMEM;
+        exit_fs(tsk);
+        tsk->fs = fs;
+        return 0;
+}
+
+
+sigset_t
+cfs_get_blockedsigs(void)
+{
+        unsigned long          flags;
+        sigset_t        old;
+
+        SIGNAL_MASK_LOCK(current, flags);
+        old = current->blocked;
+        SIGNAL_MASK_UNLOCK(current, flags);
+        return old;
+}
+
+sigset_t
+cfs_block_allsigs(void)
+{
+        unsigned long          flags;
+        sigset_t        old;
+
+        SIGNAL_MASK_LOCK(current, flags);
+        old = current->blocked;
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+        return old;
+}
+
+sigset_t
+cfs_block_sigs(sigset_t bits)
+{
+        unsigned long  flags;
+        sigset_t        old;
+
+        SIGNAL_MASK_LOCK(current, flags);
+        old = current->blocked;
+        current->blocked = bits;
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+        return old;
+}
+
+void
+cfs_restore_sigs (cfs_sigset_t old)
+{
+        unsigned long  flags;
+
+        SIGNAL_MASK_LOCK(current, flags);
+        current->blocked = old;
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+}
+
+int
+cfs_signal_pending(void)
+{
+        return signal_pending(current);
+}
+
+void
+cfs_clear_sigpending(void)
+{
+        unsigned long flags;
+
+        SIGNAL_MASK_LOCK(current, flags);
+        CLEAR_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+}
+
+int
+libcfs_arch_init(void)
+{
+        return 0;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+        return;
+}
+
+EXPORT_SYMBOL(libcfs_arch_init);
+EXPORT_SYMBOL(libcfs_arch_cleanup);
+EXPORT_SYMBOL(cfs_daemonize);
+EXPORT_SYMBOL(cfs_daemonize_ctxt);
+EXPORT_SYMBOL(cfs_block_allsigs);
+EXPORT_SYMBOL(cfs_block_sigs);
+EXPORT_SYMBOL(cfs_get_blockedsigs);
+EXPORT_SYMBOL(cfs_restore_sigs);
+EXPORT_SYMBOL(cfs_signal_pending);
+EXPORT_SYMBOL(cfs_clear_sigpending);
diff --git a/libcfs/libcfs/linux/linux-proc.c b/libcfs/libcfs/linux/linux-proc.c
new file mode 100644 (file)
index 0000000..ae3312a
--- /dev/null
@@ -0,0 +1,443 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *   Author: Zach Brown <zab@zabbo.net>
+ *   Author: Peter J. Braam <braam@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <net/sock.h>
+#include <linux/uio.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+
+#include <linux/proc_fs.h>
+#include <linux/sysctl.h>
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+#include <asm/div64.h>
+#include "tracefile.h"
+
+static cfs_sysctl_table_header_t *lnet_table_header = NULL;
+extern char lnet_upcall[1024];
+
+#define PSDEV_LNET  (0x100)
+enum {
+        PSDEV_DEBUG = 1,          /* control debugging */
+        PSDEV_SUBSYSTEM_DEBUG,    /* control debugging */
+        PSDEV_PRINTK,             /* force all messages to console */
+        PSDEV_CONSOLE_RATELIMIT,  /* ratelimit console messages */
+        PSDEV_CONSOLE_MAX_DELAY_CS, /* maximum delay over which we skip messages */
+        PSDEV_CONSOLE_MIN_DELAY_CS, /* initial delay over which we skip messages */
+        PSDEV_CONSOLE_BACKOFF,    /* delay increase factor */
+        PSDEV_DEBUG_PATH,         /* crashdump log location */
+        PSDEV_DEBUG_DUMP_PATH,    /* crashdump tracelog location */
+        PSDEV_LNET_UPCALL,        /* User mode upcall script  */
+        PSDEV_LNET_MEMUSED,       /* bytes currently PORTAL_ALLOCated */
+        PSDEV_LNET_CATASTROPHE,   /* if we have LBUGged or panic'd */
+        PSDEV_LNET_PANIC_ON_LBUG, /* flag to panic on LBUG */
+        PSDEV_LNET_DUMP_KERNEL,   /* snapshot kernel debug buffer to file */
+        PSDEV_LNET_DAEMON_FILE,   /* spool kernel debug buffer to file */
+        PSDEV_LNET_DEBUG_MB,      /* size of debug buffer */
+};
+
+static int 
+proc_call_handler(void *data, int write, 
+                  loff_t *ppos, void *buffer, size_t *lenp, 
+                  int (*handler)(void *data, int write,
+                                 loff_t pos, void *buffer, int len))
+{
+        int rc = handler(data, write, *ppos, buffer, *lenp);
+
+        if (rc < 0)
+                return rc;
+
+        if (write) {
+                *ppos += *lenp;
+        } else {
+                *lenp = rc;
+                *ppos += rc;
+        }
+        return 0;
+}
+
+#define DECLARE_PROC_HANDLER(name)                      \
+static int                                              \
+LL_PROC_PROTO(name)                                     \
+{                                                       \
+        DECLARE_LL_PROC_PPOS_DECL;                      \
+                                                        \
+        return proc_call_handler(table->data, write,    \
+                                 ppos, buffer, lenp,    \
+                                 __##name);             \
+}
+
+static int __proc_dobitmasks(void *data, int write, 
+                             loff_t pos, void *buffer, int nob)
+{
+        const int     tmpstrlen = 512;
+        char         *tmpstr;
+        int           rc;
+        unsigned int *mask = data;
+        int           is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
+        int           is_printk = (mask == &libcfs_printk) ? 1 : 0;
+
+        rc = trace_allocate_string_buffer(&tmpstr, tmpstrlen);
+        if (rc < 0)
+                return rc;
+
+        if (!write) {
+                libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
+                rc = strlen(tmpstr);
+
+                if (pos >= rc) {
+                        rc = 0;
+                } else {
+                        rc = trace_copyout_string(buffer, nob,
+                                                  tmpstr + pos, "\n");
+                }
+        } else {
+                rc = trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
+                if (rc < 0)
+                        return rc;
+
+                rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
+                /* Always print LBUG/LASSERT to console, so keep this mask */
+                if (is_printk)
+                        *mask |= D_EMERG;
+        }
+
+        trace_free_string_buffer(tmpstr, tmpstrlen);
+        return rc;
+}
+
+DECLARE_PROC_HANDLER(proc_dobitmasks)
+
+static int __proc_dump_kernel(void *data, int write,
+                              loff_t pos, void *buffer, int nob)
+{
+        if (!write)
+                return 0;
+        
+        return trace_dump_debug_buffer_usrstr(buffer, nob);
+}
+
+DECLARE_PROC_HANDLER(proc_dump_kernel)
+
+static int __proc_daemon_file(void *data, int write,
+                              loff_t pos, void *buffer, int nob)
+{
+        if (!write) {
+                int len = strlen(tracefile);
+                
+                if (pos >= len)
+                        return 0;
+                
+                return trace_copyout_string(buffer, nob, 
+                                            tracefile + pos, "\n");
+        }
+        
+        return trace_daemon_command_usrstr(buffer, nob);
+}
+
+DECLARE_PROC_HANDLER(proc_daemon_file)
+
+static int __proc_debug_mb(void *data, int write,
+                           loff_t pos, void *buffer, int nob)
+{
+        if (!write) {
+                char tmpstr[32];
+                int  len = snprintf(tmpstr, sizeof(tmpstr), "%d",
+                                    trace_get_debug_mb());
+
+                if (pos >= len)
+                        return 0;
+                
+                return trace_copyout_string(buffer, nob, tmpstr + pos, "\n");
+        }
+        
+        return trace_set_debug_mb_usrstr(buffer, nob);
+}
+
+DECLARE_PROC_HANDLER(proc_debug_mb)
+
+int LL_PROC_PROTO(proc_console_max_delay_cs)
+{
+        int rc, max_delay_cs;
+        cfs_sysctl_table_t dummy = *table;
+        cfs_duration_t d;
+
+        dummy.data = &max_delay_cs;
+        dummy.proc_handler = &proc_dointvec;
+
+        if (!write) { /* read */
+                max_delay_cs = cfs_duration_sec(libcfs_console_max_delay * 100);
+                rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+                return rc;
+        }
+
+        /* write */
+        max_delay_cs = 0;
+        rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+        if (rc < 0)
+                return rc;
+        if (max_delay_cs <= 0)
+                return -EINVAL;
+
+        d = cfs_time_seconds(max_delay_cs) / 100;
+        if (d == 0 || d < libcfs_console_min_delay)
+                return -EINVAL;
+        libcfs_console_max_delay = d;
+
+        return rc;
+}
+
+int LL_PROC_PROTO(proc_console_min_delay_cs)
+{
+        int rc, min_delay_cs;
+        cfs_sysctl_table_t dummy = *table;
+        cfs_duration_t d;
+
+        dummy.data = &min_delay_cs;
+        dummy.proc_handler = &proc_dointvec;
+
+        if (!write) { /* read */
+                min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100);
+                rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+                return rc;
+        }
+
+        /* write */
+        min_delay_cs = 0;
+        rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+        if (rc < 0)
+                return rc;
+        if (min_delay_cs <= 0)
+                return -EINVAL;
+
+        d = cfs_time_seconds(min_delay_cs) / 100;
+        if (d == 0 || d > libcfs_console_max_delay)
+                return -EINVAL;
+        libcfs_console_min_delay = d;
+
+        return rc;
+}
+
+int LL_PROC_PROTO(proc_console_backoff)
+{
+        int rc, backoff;
+        cfs_sysctl_table_t dummy = *table;
+
+        dummy.data = &backoff;
+        dummy.proc_handler = &proc_dointvec;
+
+        if (!write) { /* read */
+                backoff= libcfs_console_backoff;
+                rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+                return rc;
+        }
+
+        /* write */
+        backoff = 0;
+        rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos);
+        if (rc < 0)
+                return rc;
+        if (backoff <= 0)
+                return -EINVAL;
+
+        libcfs_console_backoff = backoff;
+
+        return rc;
+}
+
+static cfs_sysctl_table_t lnet_table[] = {
+        /*
+         * NB No .strategy entries have been provided since sysctl(8) prefers
+         * to go via /proc for portability.
+         */
+        {
+                .ctl_name = PSDEV_DEBUG,
+                .procname = "debug",
+                .data     = &libcfs_debug,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dobitmasks
+        },
+        {
+                .ctl_name = PSDEV_SUBSYSTEM_DEBUG,
+                .procname = "subsystem_debug",
+                .data     = &libcfs_subsystem_debug,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dobitmasks
+        },
+        {
+                .ctl_name = PSDEV_PRINTK,
+                .procname = "printk",
+                .data     = &libcfs_printk,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dobitmasks
+        },
+        {
+                .ctl_name = PSDEV_CONSOLE_RATELIMIT,
+                .procname = "console_ratelimit",
+                .data     = &libcfs_console_ratelimit,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        },
+        {
+                .ctl_name = PSDEV_CONSOLE_MAX_DELAY_CS,
+                .procname = "console_max_delay_centisecs",
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_console_max_delay_cs
+        },
+        {
+                .ctl_name = PSDEV_CONSOLE_MIN_DELAY_CS,
+                .procname = "console_min_delay_centisecs",
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_console_min_delay_cs
+        },
+        {
+                .ctl_name = PSDEV_CONSOLE_BACKOFF,
+                .procname = "console_backoff",
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_console_backoff
+        },
+
+        {
+                .ctl_name = PSDEV_DEBUG_PATH,
+                .procname = "debug_path",
+                .data     = debug_file_path,
+                .maxlen   = sizeof(debug_file_path),
+                .mode     = 0644,
+                .proc_handler = &proc_dostring,
+        },
+
+        {
+                .ctl_name = PSDEV_LNET_UPCALL,
+                .procname = "upcall",
+                .data     = lnet_upcall,
+                .maxlen   = sizeof(lnet_upcall),
+                .mode     = 0644,
+                .proc_handler = &proc_dostring,
+        },
+        {
+                .ctl_name = PSDEV_LNET_MEMUSED,
+                .procname = "memused",
+                .data     = (int *)&libcfs_kmemory.counter,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        },
+        {
+                .ctl_name = PSDEV_LNET_CATASTROPHE,
+                .procname = "catastrophe",
+                .data     = &libcfs_catastrophe,
+                .maxlen   = sizeof(int),
+                .mode     = 0444,
+                .proc_handler = &proc_dointvec
+        },
+        {
+                .ctl_name = PSDEV_LNET_PANIC_ON_LBUG,
+                .procname = "panic_on_lbug",
+                .data     = &libcfs_panic_on_lbug,
+                .maxlen   = sizeof(int),
+                .mode     = 0644,
+                .proc_handler = &proc_dointvec
+        },
+        {
+                .ctl_name = PSDEV_LNET_DUMP_KERNEL,
+                .procname = "dump_kernel",
+                .mode     = 0200,
+                .proc_handler = &proc_dump_kernel,
+        },
+        {
+                .ctl_name = PSDEV_LNET_DAEMON_FILE,
+                .procname = "daemon_file",
+                .mode     = 0644,
+                .proc_handler = &proc_daemon_file,
+        },
+        {
+                .ctl_name = PSDEV_LNET_DEBUG_MB,
+                .procname = "debug_mb",
+                .mode     = 0644,
+                .proc_handler = &proc_debug_mb,
+        },
+        {0}
+};
+
+static cfs_sysctl_table_t top_table[2] = {
+        {
+                .ctl_name = PSDEV_LNET,
+                .procname = "lnet",
+                .data     = NULL,
+                .maxlen   = 0,
+                .mode     = 0555,
+                .child    = lnet_table
+        },
+        {0}
+};
+
+int insert_proc(void)
+{
+#ifdef CONFIG_SYSCTL
+        if (lnet_table_header == NULL)
+                lnet_table_header = cfs_register_sysctl_table(top_table, 0);
+#endif
+        return 0;
+}
+
+void remove_proc(void)
+{
+#ifdef CONFIG_SYSCTL
+        if (lnet_table_header != NULL)
+                cfs_unregister_sysctl_table(lnet_table_header);
+
+        lnet_table_header = NULL;
+#endif
+}
diff --git a/libcfs/libcfs/linux/linux-sync.c b/libcfs/libcfs/linux/linux-sync.c
new file mode 100644 (file)
index 0000000..520c54c
--- /dev/null
@@ -0,0 +1,2 @@
+# define DEBUG_SUBSYSTEM S_LNET
+
diff --git a/libcfs/libcfs/linux/linux-tcpip.c b/libcfs/libcfs/linux/linux-tcpip.c
new file mode 100644 (file)
index 0000000..e8ceafd
--- /dev/null
@@ -0,0 +1,683 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/file.h>
+/* For sys_open & sys_close */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
+#include <linux/syscalls.h>
+#else
+#include <linux/fs.h>
+#endif
+
+int
+libcfs_sock_ioctl(int cmd, unsigned long arg)
+{
+        mm_segment_t   oldmm = get_fs();
+        struct socket  *sock;
+        int             fd;
+        int             rc;
+        struct file     *sock_filp;
+
+        rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return rc;
+        }
+
+        fd = sock_map_fd(sock);
+        if (fd < 0) {
+                rc = fd;
+                sock_release(sock);
+                goto out;
+        }
+
+        sock_filp = fget(fd);
+        if (!sock_filp) {
+                rc = -ENOMEM;
+                goto out_fd;
+        }
+
+        set_fs(KERNEL_DS);
+#ifdef HAVE_UNLOCKED_IOCTL
+        if (sock_filp->f_op->unlocked_ioctl)
+                rc = sock_filp->f_op->unlocked_ioctl(sock_filp, cmd, arg);
+        else
+#endif
+             {
+                lock_kernel();
+                rc =sock_filp->f_op->ioctl(sock_filp->f_dentry->d_inode,
+                                           sock_filp, cmd, arg);
+                unlock_kernel();
+             }
+        set_fs(oldmm);
+
+        fput(sock_filp);
+
+ out_fd:
+        sys_close(fd);
+ out:
+        return rc;
+}
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask)
+{
+        struct ifreq   ifr;
+        int            nob;
+        int            rc;
+        __u32          val;
+
+        nob = strnlen(name, IFNAMSIZ);
+        if (nob == IFNAMSIZ) {
+                CERROR("Interface name %s too long\n", name);
+                return -EINVAL;
+        }
+
+        CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+
+        strcpy(ifr.ifr_name, name);
+        rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get flags for interface %s\n", name);
+                return rc;
+        }
+
+        if ((ifr.ifr_flags & IFF_UP) == 0) {
+                CDEBUG(D_NET, "Interface %s down\n", name);
+                *up = 0;
+                *ip = *mask = 0;
+                return 0;
+        }
+
+        *up = 1;
+
+        strcpy(ifr.ifr_name, name);
+        ifr.ifr_addr.sa_family = AF_INET;
+        rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get IP address for interface %s\n", name);
+                return rc;
+        }
+
+        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+        *ip = ntohl(val);
+
+        strcpy(ifr.ifr_name, name);
+        ifr.ifr_addr.sa_family = AF_INET;
+        rc = libcfs_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get netmask for interface %s\n", name);
+                return rc;
+        }
+
+        val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr;
+        *mask = ntohl(val);
+
+        return 0;
+}
+
+EXPORT_SYMBOL(libcfs_ipif_query);
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+        /* Allocate and fill in 'names', returning # interfaces/error */
+        char           **names;
+        int             toobig;
+        int             nalloc;
+        int             nfound;
+        struct ifreq   *ifr;
+        struct ifconf   ifc;
+        int             rc;
+        int             nob;
+        int             i;
+
+
+        nalloc = 16;        /* first guess at max interfaces */
+        toobig = 0;
+        for (;;) {
+                if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) {
+                        toobig = 1;
+                        nalloc = CFS_PAGE_SIZE/sizeof(*ifr);
+                        CWARN("Too many interfaces: only enumerating first %d\n",
+                              nalloc);
+                }
+
+                LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+                if (ifr == NULL) {
+                        CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc);
+                        rc = -ENOMEM;
+                        goto out0;
+                }
+
+                ifc.ifc_buf = (char *)ifr;
+                ifc.ifc_len = nalloc * sizeof(*ifr);
+
+                rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
+
+                if (rc < 0) {
+                        CERROR ("Error %d enumerating interfaces\n", rc);
+                        goto out1;
+                }
+
+                LASSERT (rc == 0);
+
+                nfound = ifc.ifc_len/sizeof(*ifr);
+                LASSERT (nfound <= nalloc);
+
+                if (nfound < nalloc || toobig)
+                        break;
+
+                LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+                nalloc *= 2;
+        }
+
+        if (nfound == 0)
+                goto out1;
+
+        LIBCFS_ALLOC(names, nfound * sizeof(*names));
+        if (names == NULL) {
+                rc = -ENOMEM;
+                goto out1;
+        }
+        /* NULL out all names[i] */
+        memset (names, 0, nfound * sizeof(*names));
+
+        for (i = 0; i < nfound; i++) {
+
+                nob = strnlen (ifr[i].ifr_name, IFNAMSIZ);
+                if (nob == IFNAMSIZ) {
+                        /* no space for terminating NULL */
+                        CERROR("interface name %.*s too long (%d max)\n",
+                               nob, ifr[i].ifr_name, IFNAMSIZ);
+                        rc = -ENAMETOOLONG;
+                        goto out2;
+                }
+
+                LIBCFS_ALLOC(names[i], IFNAMSIZ);
+                if (names[i] == NULL) {
+                        rc = -ENOMEM;
+                        goto out2;
+                }
+
+                memcpy(names[i], ifr[i].ifr_name, nob);
+                names[i][nob] = 0;
+        }
+
+        *namesp = names;
+        rc = nfound;
+
+ out2:
+        if (rc < 0)
+                libcfs_ipif_free_enumeration(names, nfound);
+ out1:
+        LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+ out0:
+        return rc;
+}
+
+EXPORT_SYMBOL(libcfs_ipif_enumerate);
+
+void
+libcfs_ipif_free_enumeration (char **names, int n)
+{
+        int      i;
+
+        LASSERT (n > 0);
+
+        for (i = 0; i < n && names[i] != NULL; i++)
+                LIBCFS_FREE(names[i], IFNAMSIZ);
+
+        LIBCFS_FREE(names, n * sizeof(*names));
+}
+
+EXPORT_SYMBOL(libcfs_ipif_free_enumeration);
+
+int
+libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
+{
+        int            rc;
+        mm_segment_t   oldmm = get_fs();
+        long           ticks = timeout * HZ;
+        unsigned long  then;
+        struct timeval tv;
+
+        LASSERT (nob > 0);
+        /* Caller may pass a zero timeout if she thinks the socket buffer is
+         * empty enough to take the whole message immediately */
+
+        for (;;) {
+                struct iovec  iov = {
+                        .iov_base = buffer,
+                        .iov_len  = nob
+                };
+                struct msghdr msg = {
+                        .msg_name       = NULL,
+                        .msg_namelen    = 0,
+                        .msg_iov        = &iov,
+                        .msg_iovlen     = 1,
+                        .msg_control    = NULL,
+                        .msg_controllen = 0,
+                        .msg_flags      = (timeout == 0) ? MSG_DONTWAIT : 0
+                };
+
+                if (timeout != 0) {
+                        /* Set send timeout to remaining time */
+                        tv = (struct timeval) {
+                                .tv_sec = ticks / HZ,
+                                .tv_usec = ((ticks % HZ) * 1000000) / HZ
+                        };
+                        set_fs(KERNEL_DS);
+                        rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO,
+                                             (char *)&tv, sizeof(tv));
+                        set_fs(oldmm);
+                        if (rc != 0) {
+                                CERROR("Can't set socket send timeout "
+                                       "%ld.%06d: %d\n",
+                                       (long)tv.tv_sec, (int)tv.tv_usec, rc);
+                                return rc;
+                        }
+                }
+
+                set_fs (KERNEL_DS);
+                then = jiffies;
+                rc = sock_sendmsg (sock, &msg, iov.iov_len);
+                ticks -= jiffies - then;
+                set_fs (oldmm);
+
+                if (rc == nob)
+                        return 0;
+
+                if (rc < 0)
+                        return rc;
+
+                if (rc == 0) {
+                        CERROR ("Unexpected zero rc\n");
+                        return (-ECONNABORTED);
+                }
+
+                if (ticks <= 0)
+                        return -EAGAIN;
+
+                buffer = ((char *)buffer) + rc;
+                nob -= rc;
+        }
+
+        return (0);
+}
+EXPORT_SYMBOL(libcfs_sock_write);
+
+int
+libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
+{
+        int            rc;
+        mm_segment_t   oldmm = get_fs();
+        long           ticks = timeout * HZ;
+        unsigned long  then;
+        struct timeval tv;
+
+        LASSERT (nob > 0);
+        LASSERT (ticks > 0);
+
+        for (;;) {
+                struct iovec  iov = {
+                        .iov_base = buffer,
+                        .iov_len  = nob
+                };
+                struct msghdr msg = {
+                        .msg_name       = NULL,
+                        .msg_namelen    = 0,
+                        .msg_iov        = &iov,
+                        .msg_iovlen     = 1,
+                        .msg_control    = NULL,
+                        .msg_controllen = 0,
+                        .msg_flags      = 0
+                };
+
+                /* Set receive timeout to remaining time */
+                tv = (struct timeval) {
+                        .tv_sec = ticks / HZ,
+                        .tv_usec = ((ticks % HZ) * 1000000) / HZ
+                };
+                set_fs(KERNEL_DS);
+                rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
+                                     (char *)&tv, sizeof(tv));
+                set_fs(oldmm);
+                if (rc != 0) {
+                        CERROR("Can't set socket recv timeout %ld.%06d: %d\n",
+                               (long)tv.tv_sec, (int)tv.tv_usec, rc);
+                        return rc;
+                }
+
+                set_fs(KERNEL_DS);
+                then = jiffies;
+                rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
+                ticks -= jiffies - then;
+                set_fs(oldmm);
+
+                if (rc < 0)
+                        return rc;
+
+                if (rc == 0)
+                        return -ECONNRESET;
+
+                buffer = ((char *)buffer) + rc;
+                nob -= rc;
+
+                if (nob == 0)
+                        return 0;
+
+                if (ticks <= 0)
+                        return -ETIMEDOUT;
+        }
+}
+
+EXPORT_SYMBOL(libcfs_sock_read);
+
+static int
+libcfs_sock_create (struct socket **sockp, int *fatal,
+                    __u32 local_ip, int local_port)
+{
+        struct sockaddr_in  locaddr;
+        struct socket      *sock;
+        int                 rc;
+        int                 option;
+        mm_segment_t        oldmm = get_fs();
+
+        /* All errors are fatal except bind failure if the port is in use */
+        *fatal = 1;
+
+        rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
+        *sockp = sock;
+        if (rc != 0) {
+                CERROR ("Can't create socket: %d\n", rc);
+                return (rc);
+        }
+
+        set_fs (KERNEL_DS);
+        option = 1;
+        rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
+                             (char *)&option, sizeof (option));
+        set_fs (oldmm);
+        if (rc != 0) {
+                CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
+                goto failed;
+        }
+
+        if (local_ip != 0 || local_port != 0) {
+                memset(&locaddr, 0, sizeof(locaddr));
+                locaddr.sin_family = AF_INET;
+                locaddr.sin_port = htons(local_port);
+                locaddr.sin_addr.s_addr = (local_ip == 0) ?
+                                          INADDR_ANY : htonl(local_ip);
+
+                rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr,
+                                     sizeof(locaddr));
+                if (rc == -EADDRINUSE) {
+                        CDEBUG(D_NET, "Port %d already in use\n", local_port);
+                        *fatal = 0;
+                        goto failed;
+                }
+                if (rc != 0) {
+                        CERROR("Error trying to bind to port %d: %d\n",
+                               local_port, rc);
+                        goto failed;
+                }
+        }
+
+        return 0;
+
+ failed:
+        sock_release(sock);
+        return rc;
+}
+
+int
+libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize)
+{
+        mm_segment_t        oldmm = get_fs();
+        int                 option;
+        int                 rc;
+
+        if (txbufsize != 0) {
+                option = txbufsize;
+                set_fs (KERNEL_DS);
+                rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
+                                     (char *)&option, sizeof (option));
+                set_fs (oldmm);
+                if (rc != 0) {
+                        CERROR ("Can't set send buffer %d: %d\n",
+                                option, rc);
+                        return (rc);
+                }
+        }
+
+        if (rxbufsize != 0) {
+                option = rxbufsize;
+                set_fs (KERNEL_DS);
+                rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
+                                      (char *)&option, sizeof (option));
+                set_fs (oldmm);
+                if (rc != 0) {
+                        CERROR ("Can't set receive buffer %d: %d\n",
+                                option, rc);
+                        return (rc);
+                }
+        }
+
+        return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_setbuf);
+
+int
+libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port)
+{
+        struct sockaddr_in sin;
+        int                len = sizeof (sin);
+        int                rc;
+
+        rc = sock->ops->getname (sock, (struct sockaddr *)&sin, &len,
+                                 remote ? 2 : 0);
+        if (rc != 0) {
+                CERROR ("Error %d getting sock %s IP/port\n",
+                        rc, remote ? "peer" : "local");
+                return rc;
+        }
+
+        if (ip != NULL)
+                *ip = ntohl (sin.sin_addr.s_addr);
+
+        if (port != NULL)
+                *port = ntohs (sin.sin_port);
+
+        return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_getaddr);
+
+int
+libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize)
+{
+
+        if (txbufsize != NULL) {
+                *txbufsize = sock->sk->sk_sndbuf;
+        }
+
+        if (rxbufsize != NULL) {
+                *rxbufsize = sock->sk->sk_rcvbuf;
+        }
+
+        return 0;
+}
+
+EXPORT_SYMBOL(libcfs_sock_getbuf);
+
+int
+libcfs_sock_listen (struct socket **sockp,
+                    __u32 local_ip, int local_port, int backlog)
+{
+        int      fatal;
+        int      rc;
+
+        rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port);
+        if (rc != 0) {
+                if (!fatal)
+                        CERROR("Can't create socket: port %d already in use\n",
+                               local_port);
+                return rc;
+        }
+
+        rc = (*sockp)->ops->listen(*sockp, backlog);
+        if (rc == 0)
+                return 0;
+
+        CERROR("Can't set listen backlog %d: %d\n", backlog, rc);
+        sock_release(*sockp);
+        return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_listen);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12)
+int sock_create_lite(int family, int type, int protocol, struct socket **res)
+{
+        struct socket *sock;
+
+        sock = sock_alloc();
+        if (sock == NULL) 
+                return -ENOMEM;
+
+        sock->type = type;
+        *res = sock;
+
+        return 0;
+}
+#endif
+
+int
+libcfs_sock_accept (struct socket **newsockp, struct socket *sock)
+{
+        wait_queue_t   wait;
+        struct socket *newsock;
+        int            rc;
+
+        init_waitqueue_entry(&wait, current);
+
+        /* XXX this should add a ref to sock->ops->owner, if
+         * TCP could be a module */
+        rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock);
+        if (rc) {
+                CERROR("Can't allocate socket\n");
+                return rc;
+        }
+
+        newsock->ops = sock->ops;
+
+        set_current_state(TASK_INTERRUPTIBLE);
+        add_wait_queue(sock->sk->sk_sleep, &wait);
+
+        rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+        if (rc == -EAGAIN) {
+                /* Nothing ready, so wait for activity */
+                schedule();
+                rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
+        }
+
+        remove_wait_queue(sock->sk->sk_sleep, &wait);
+        set_current_state(TASK_RUNNING);
+
+        if (rc != 0)
+                goto failed;
+
+        *newsockp = newsock;
+        return 0;
+
+ failed:
+        sock_release(newsock);
+        return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_accept);
+
+void
+libcfs_sock_abort_accept (struct socket *sock)
+{
+        wake_up_all(sock->sk->sk_sleep);
+}
+
+EXPORT_SYMBOL(libcfs_sock_abort_accept);
+
+int
+libcfs_sock_connect (struct socket **sockp, int *fatal,
+                     __u32 local_ip, int local_port,
+                     __u32 peer_ip, int peer_port)
+{
+        struct sockaddr_in  srvaddr;
+        int                 rc;
+
+        rc = libcfs_sock_create(sockp, fatal, local_ip, local_port);
+        if (rc != 0)
+                return rc;
+
+        memset (&srvaddr, 0, sizeof (srvaddr));
+        srvaddr.sin_family = AF_INET;
+        srvaddr.sin_port = htons(peer_port);
+        srvaddr.sin_addr.s_addr = htonl(peer_ip);
+
+        rc = (*sockp)->ops->connect(*sockp,
+                                    (struct sockaddr *)&srvaddr, sizeof(srvaddr),
+                                    0);
+        if (rc == 0)
+                return 0;
+
+        /* EADDRNOTAVAIL probably means we're already connected to the same
+         * peer/port on the same local port on a differently typed
+         * connection.  Let our caller retry with a different local
+         * port... */
+        *fatal = !(rc == -EADDRNOTAVAIL);
+
+        CDEBUG(*fatal ? D_NETERROR : D_NET,
+               "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
+               HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port);
+
+        sock_release(*sockp);
+        return rc;
+}
+
+EXPORT_SYMBOL(libcfs_sock_connect);
+
+void
+libcfs_sock_release (struct socket *sock)
+{
+        sock_release(sock);
+}
+
+EXPORT_SYMBOL(libcfs_sock_release);
diff --git a/libcfs/libcfs/linux/linux-tracefile.c b/libcfs/libcfs/linux/linux-tracefile.c
new file mode 100644 (file)
index 0000000..5956027
--- /dev/null
@@ -0,0 +1,266 @@
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+#ifndef get_cpu
+#define get_cpu() smp_processor_id()
+#define put_cpu() do { } while (0)
+#endif
+
+/* three types of trace_data in linux */
+enum {
+       TCD_TYPE_PROC = 0,
+       TCD_TYPE_SOFTIRQ,
+       TCD_TYPE_IRQ,
+       TCD_TYPE_MAX
+};
+
+/* percents to share the total debug memory for each type */
+static unsigned int pages_factor[TCD_TYPE_MAX] = {
+       80,  /* 80% pages for TCD_TYPE_PROC */
+       10,  /* 10% pages for TCD_TYPE_SOFTIRQ */
+       10   /* 10% pages for TCD_TYPE_IRQ */
+};
+
+char *trace_console_buffers[NR_CPUS][3];
+
+struct rw_semaphore tracefile_sem;
+
+int tracefile_init_arch()
+{
+       int    i;
+       int    j;
+       struct trace_cpu_data *tcd;
+
+       init_rwsem(&tracefile_sem);
+
+       /* initialize trace_data */
+       memset(trace_data, 0, sizeof(trace_data));
+       for (i = 0; i < TCD_TYPE_MAX; i++) {
+               trace_data[i]=kmalloc(sizeof(union trace_data_union)*NR_CPUS,
+                                                         GFP_KERNEL);
+               if (trace_data[i] == NULL)
+                       goto out;
+
+       }
+
+       /* arch related info initialized */
+       tcd_for_each(tcd, i, j) {
+               tcd->tcd_pages_factor = pages_factor[i];
+               tcd->tcd_type = i;
+               tcd->tcd_cpu = j;
+       }
+
+       for (i = 0; i < num_possible_cpus(); i++)
+               for (j = 0; j < 3; j++) {
+                       trace_console_buffers[i][j] =
+                               kmalloc(TRACE_CONSOLE_BUFFER_SIZE,
+                                       GFP_KERNEL);
+
+                       if (trace_console_buffers[i][j] == NULL)
+                               goto out;
+               }
+
+       return 0;
+
+out:
+       tracefile_fini_arch();
+       printk(KERN_ERR "lnet: No enough memory\n");
+       return -ENOMEM;
+
+}
+
+void tracefile_fini_arch()
+{
+       int    i;
+       int    j;
+
+       for (i = 0; i < num_possible_cpus(); i++)
+               for (j = 0; j < 3; j++)
+                       if (trace_console_buffers[i][j] != NULL) {
+                               kfree(trace_console_buffers[i][j]);
+                               trace_console_buffers[i][j] = NULL;
+                       }
+
+       for (i = 0; trace_data[i] != NULL; i++) {
+               kfree(trace_data[i]);
+               trace_data[i] = NULL;
+       }
+}
+
+void tracefile_read_lock()
+{
+       down_read(&tracefile_sem);
+}
+
+void tracefile_read_unlock()
+{
+       up_read(&tracefile_sem);
+}
+
+void tracefile_write_lock()
+{
+       down_write(&tracefile_sem);
+}
+
+void tracefile_write_unlock()
+{
+       up_write(&tracefile_sem);
+}
+
+char *
+trace_get_console_buffer(void)
+{
+       int  cpu = get_cpu();
+       int  idx;
+
+       if (in_irq()) {
+               idx = 0;
+       } else if (in_softirq()) {
+               idx = 1;
+       } else {
+               idx = 2;
+       }
+
+       return trace_console_buffers[cpu][idx];
+}
+
+void
+trace_put_console_buffer(char *buffer)
+{
+       put_cpu();
+}
+
+struct trace_cpu_data *
+trace_get_tcd(void)
+{
+       int cpu;
+
+       cpu = get_cpu();
+       if (in_irq())
+               return &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd;
+       else if (in_softirq())
+               return &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd;
+       return &(*trace_data[TCD_TYPE_PROC])[cpu].tcd;
+}
+
+void
+trace_put_tcd (struct trace_cpu_data *tcd)
+{
+       put_cpu();
+}
+
+int trace_lock_tcd(struct trace_cpu_data *tcd)
+{
+       __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+       if (tcd->tcd_type == TCD_TYPE_IRQ)
+               local_irq_disable();
+       else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ)
+               local_bh_disable();
+       return 1;
+}
+
+void trace_unlock_tcd(struct trace_cpu_data *tcd)
+{
+       __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+       if (tcd->tcd_type == TCD_TYPE_IRQ)
+               local_irq_enable();
+       else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ)
+               local_bh_enable();
+}
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
+{
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+       return tcd->tcd_cpu == tage->cpu;
+}
+
+void
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+                   const int line, unsigned long stack)
+{
+       struct timeval tv;
+
+       do_gettimeofday(&tv);
+
+       header->ph_subsys = subsys;
+       header->ph_mask = mask;
+       header->ph_cpu_id = smp_processor_id();
+       header->ph_sec = (__u32)tv.tv_sec;
+       header->ph_usec = tv.tv_usec;
+       header->ph_stack = stack;
+       header->ph_pid = current->pid;
+       header->ph_line_num = line;
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+       header->ph_extern_pid = current->thread.extern_pid;
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+       header->ph_extern_pid = current->thread.mode.tt.extern_pid;
+#else
+       header->ph_extern_pid = 0;
+#endif
+       return;
+}
+
+void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
+                            int len, const char *file, const char *fn)
+{
+       char *prefix = "Lustre", *ptype = NULL;
+
+       if ((mask & D_EMERG) != 0) {
+               prefix = "LustreError";
+               ptype = KERN_EMERG;
+       } else if ((mask & D_ERROR) != 0) {
+               prefix = "LustreError";
+               ptype = KERN_ERR;
+       } else if ((mask & D_WARNING) != 0) {
+               prefix = "Lustre";
+               ptype = KERN_WARNING;
+       } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) {
+               prefix = "Lustre";
+               ptype = KERN_INFO;
+       }
+
+       if ((mask & D_CONSOLE) != 0) {
+               printk("%s%s: %.*s", ptype, prefix, len, buf);
+       } else {
+               printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
+                      hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
+       }
+       return;
+}
+
+int trace_max_debug_mb(void)
+{
+       int  total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT));
+       
+       return MAX(512, (total_mb * 80)/100);
+}
+
+void
+trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
+{
+        cpumask_t cpus_allowed = current->cpus_allowed;
+       /* use cpus_allowed to quiet 2.4 UP kernel warning only */
+        cpumask_t m = cpus_allowed;
+        int       cpu;
+
+       /* Run the given routine on every CPU in thread context */
+        for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+                if (!cpu_online(cpu))
+                       continue;
+
+               cpus_clear(m);
+               cpu_set(cpu, m);
+               set_cpus_allowed(current, m);
+
+               fn(arg);
+
+               set_cpus_allowed(current, cpus_allowed);
+        }
+}
diff --git a/libcfs/libcfs/linux/linux-utils.c b/libcfs/libcfs/linux/linux-utils.c
new file mode 100644 (file)
index 0000000..60f7cb8
--- /dev/null
@@ -0,0 +1,60 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/*
+ * miscellaneous libcfs stuff
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+#include <lnet/types.h>
+
+/*
+ * Convert server error code to client format. Error codes are from
+ * Linux errno.h, so for Linux client---identity.
+ */
+int convert_server_error(__u64 ecode)
+{
+       return ecode;
+}
+EXPORT_SYMBOL(convert_server_error);
+
+/*
+ * convert <fcntl.h> flag from client to server.
+ */
+int convert_client_oflag(int cflag, int *result)
+{
+        *result = cflag;
+       return 0;
+}
+EXPORT_SYMBOL(convert_client_oflag);
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{}
+
+EXPORT_SYMBOL(cfs_stack_trace_fill);
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+        return NULL;
+}
+EXPORT_SYMBOL(cfs_stack_trace_frame);
+
diff --git a/libcfs/libcfs/lwt.c b/libcfs/libcfs/lwt.c
new file mode 100644 (file)
index 0000000..6455ece
--- /dev/null
@@ -0,0 +1,270 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2003 Cluster File Systems, Inc.
+ *   Author: Eric Barton <eeb@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#ifndef AUTOCONF_INCLUDED
+#include <linux/config.h>
+#endif
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/kernel.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/errno.h>
+#include <linux/smp_lock.h>
+#include <linux/unistd.h>
+#include <linux/interrupt.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+
+#if LWT_SUPPORT
+
+#if !KLWT_SUPPORT
+int         lwt_enabled;
+lwt_cpu_t   lwt_cpus[NR_CPUS];
+#endif
+
+int         lwt_pages_per_cpu;
+
+/* NB only root is allowed to retrieve LWT info; it's an open door into the
+ * kernel... */
+
+int
+lwt_lookup_string (int *size, char *knl_ptr,
+                   char *user_ptr, int user_size)
+{
+        int   maxsize = 128;
+        
+        /* knl_ptr was retrieved from an LWT snapshot and the caller wants to
+         * turn it into a string.  NB we can crash with an access violation
+         * trying to determine the string length, so we're trusting our
+         * caller... */
+
+        if (!capable(CAP_SYS_ADMIN))
+                return (-EPERM);
+
+        if (user_size > 0 && 
+            maxsize > user_size)
+                maxsize = user_size;
+
+        *size = strnlen (knl_ptr, maxsize - 1) + 1;
+        
+        if (user_ptr != NULL) {
+                if (user_size < 4)
+                        return (-EINVAL);
+                
+                if (copy_to_user (user_ptr, knl_ptr, *size))
+                        return (-EFAULT);
+
+                /* Did I truncate the string?  */
+                if (knl_ptr[*size - 1] != 0)
+                        copy_to_user (user_ptr + *size - 4, "...", 4);
+        }
+
+        return (0);
+}
+
+int
+lwt_control (int enable, int clear)
+{
+        lwt_page_t  *p;
+        int          i;
+        int          j;
+
+        if (!capable(CAP_SYS_ADMIN))
+                return (-EPERM);
+
+        if (!enable) {
+                LWT_EVENT(0,0,0,0);
+                lwt_enabled = 0;
+                mb();
+                /* give people some time to stop adding traces */
+                schedule_timeout(10);
+        }
+
+        for (i = 0; i < num_online_cpus(); i++) {
+                p = lwt_cpus[i].lwtc_current_page;
+
+                if (p == NULL)
+                        return (-ENODATA);
+
+                if (!clear)
+                        continue;
+
+                for (j = 0; j < lwt_pages_per_cpu; j++) {
+                        memset (p->lwtp_events, 0, CFS_PAGE_SIZE);
+
+                        p = list_entry (p->lwtp_list.next,
+                                        lwt_page_t, lwtp_list);
+                }
+        }
+
+        if (enable) {
+                lwt_enabled = 1;
+                mb();
+                LWT_EVENT(0,0,0,0);
+        }
+
+        return (0);
+}
+
+int
+lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, 
+              void *user_ptr, int user_size)
+{
+        const int    events_per_page = CFS_PAGE_SIZE / sizeof(lwt_event_t);
+        const int    bytes_per_page = events_per_page * sizeof(lwt_event_t);
+        lwt_page_t  *p;
+        int          i;
+        int          j;
+
+        if (!capable(CAP_SYS_ADMIN))
+                return (-EPERM);
+
+        *ncpu = num_online_cpus();
+        *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
+        *now = get_cycles();
+        
+        if (user_ptr == NULL)
+                return (0);
+
+        for (i = 0; i < num_online_cpus(); i++) {
+                p = lwt_cpus[i].lwtc_current_page;
+
+                if (p == NULL)
+                        return (-ENODATA);
+                
+                for (j = 0; j < lwt_pages_per_cpu; j++) {
+                        if (copy_to_user(user_ptr, p->lwtp_events,
+                                         bytes_per_page))
+                                return (-EFAULT);
+
+                        user_ptr = ((char *)user_ptr) + bytes_per_page;
+                        p = list_entry(p->lwtp_list.next,
+                                       lwt_page_t, lwtp_list);
+                        
+                }
+        }
+
+        return (0);
+}
+
+int
+lwt_init () 
+{
+       int     i;
+        int     j;
+
+        for (i = 0; i < num_online_cpus(); i++)
+                if (lwt_cpus[i].lwtc_current_page != NULL)
+                        return (-EALREADY);
+        
+        LASSERT (!lwt_enabled);
+
+       /* NULL pointers, zero scalars */
+       memset (lwt_cpus, 0, sizeof (lwt_cpus));
+        lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * CFS_PAGE_SIZE);
+
+       for (i = 0; i < num_online_cpus(); i++)
+               for (j = 0; j < lwt_pages_per_cpu; j++) {
+                       struct page *page = alloc_page (GFP_KERNEL);
+                       lwt_page_t  *lwtp;
+
+                       if (page == NULL) {
+                               CERROR ("Can't allocate page\n");
+                                lwt_fini ();
+                               return (-ENOMEM);
+                       }
+
+                        LIBCFS_ALLOC(lwtp, sizeof (*lwtp));
+                       if (lwtp == NULL) {
+                               CERROR ("Can't allocate lwtp\n");
+                                __free_page(page);
+                               lwt_fini ();
+                               return (-ENOMEM);
+                       }
+
+                        lwtp->lwtp_page = page;
+                        lwtp->lwtp_events = page_address(page);
+                       memset (lwtp->lwtp_events, 0, CFS_PAGE_SIZE);
+
+                       if (j == 0) {
+                               INIT_LIST_HEAD (&lwtp->lwtp_list);
+                               lwt_cpus[i].lwtc_current_page = lwtp;
+                       } else {
+                               list_add (&lwtp->lwtp_list,
+                                   &lwt_cpus[i].lwtc_current_page->lwtp_list);
+                       }
+                }
+
+        lwt_enabled = 1;
+        mb();
+
+        LWT_EVENT(0,0,0,0);
+
+        return (0);
+}
+
+void
+lwt_fini () 
+{
+        int    i;
+
+        lwt_control(0, 0);
+        
+        for (i = 0; i < num_online_cpus(); i++)
+                while (lwt_cpus[i].lwtc_current_page != NULL) {
+                        lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page;
+                        
+                        if (list_empty (&lwtp->lwtp_list)) {
+                                lwt_cpus[i].lwtc_current_page = NULL;
+                        } else {
+                                lwt_cpus[i].lwtc_current_page =
+                                        list_entry (lwtp->lwtp_list.next,
+                                                    lwt_page_t, lwtp_list);
+
+                                list_del (&lwtp->lwtp_list);
+                        }
+                        
+                        __free_page (lwtp->lwtp_page);
+                        LIBCFS_FREE (lwtp, sizeof (*lwtp));
+                }
+}
+
+EXPORT_SYMBOL(lwt_enabled);
+EXPORT_SYMBOL(lwt_cpus);
+
+EXPORT_SYMBOL(lwt_init);
+EXPORT_SYMBOL(lwt_fini);
+EXPORT_SYMBOL(lwt_lookup_string);
+EXPORT_SYMBOL(lwt_control);
+EXPORT_SYMBOL(lwt_snapshot);
+#endif
diff --git a/libcfs/libcfs/module.c b/libcfs/libcfs/module.c
new file mode 100644 (file)
index 0000000..5e273cb
--- /dev/null
@@ -0,0 +1,423 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <lnet/lib-lnet.h>
+#include <lnet/lnet.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+void
+kportal_memhog_free (struct libcfs_device_userstate *ldu)
+{
+        cfs_page_t **level0p = &ldu->ldu_memhog_root_page;
+        cfs_page_t **level1p;
+        cfs_page_t **level2p;
+        int           count1;
+        int           count2;
+
+        if (*level0p != NULL) {
+
+                level1p = (cfs_page_t **)cfs_page_address(*level0p);
+                count1 = 0;
+
+                while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) &&
+                       *level1p != NULL) {
+
+                        level2p = (cfs_page_t **)cfs_page_address(*level1p);
+                        count2 = 0;
+
+                        while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) &&
+                               *level2p != NULL) {
+
+                                cfs_free_page(*level2p);
+                                ldu->ldu_memhog_pages--;
+                                level2p++;
+                                count2++;
+                        }
+
+                        cfs_free_page(*level1p);
+                        ldu->ldu_memhog_pages--;
+                        level1p++;
+                        count1++;
+                }
+
+                cfs_free_page(*level0p);
+                ldu->ldu_memhog_pages--;
+
+                *level0p = NULL;
+        }
+
+        LASSERT (ldu->ldu_memhog_pages == 0);
+}
+
+int
+kportal_memhog_alloc (struct libcfs_device_userstate *ldu, int npages, int flags)
+{
+        cfs_page_t **level0p;
+        cfs_page_t **level1p;
+        cfs_page_t **level2p;
+        int           count1;
+        int           count2;
+
+        LASSERT (ldu->ldu_memhog_pages == 0);
+        LASSERT (ldu->ldu_memhog_root_page == NULL);
+
+        if (npages < 0)
+                return -EINVAL;
+
+        if (npages == 0)
+                return 0;
+
+        level0p = &ldu->ldu_memhog_root_page;
+        *level0p = cfs_alloc_page(flags);
+        if (*level0p == NULL)
+                return -ENOMEM;
+        ldu->ldu_memhog_pages++;
+
+        level1p = (cfs_page_t **)cfs_page_address(*level0p);
+        count1 = 0;
+        memset(level1p, 0, CFS_PAGE_SIZE);
+
+        while (ldu->ldu_memhog_pages < npages &&
+               count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) {
+
+                if (cfs_signal_pending())
+                        return (-EINTR);
+
+                *level1p = cfs_alloc_page(flags);
+                if (*level1p == NULL)
+                        return -ENOMEM;
+                ldu->ldu_memhog_pages++;
+
+                level2p = (cfs_page_t **)cfs_page_address(*level1p);
+                count2 = 0;
+                memset(level2p, 0, CFS_PAGE_SIZE);
+
+                while (ldu->ldu_memhog_pages < npages &&
+                       count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) {
+
+                        if (cfs_signal_pending())
+                                return (-EINTR);
+
+                        *level2p = cfs_alloc_page(flags);
+                        if (*level2p == NULL)
+                                return (-ENOMEM);
+                        ldu->ldu_memhog_pages++;
+
+                        level2p++;
+                        count2++;
+                }
+
+                level1p++;
+                count1++;
+        }
+
+        return 0;
+}
+
+/* called when opening /dev/device */
+static int libcfs_psdev_open(unsigned long flags, void *args)
+{
+        struct libcfs_device_userstate *ldu;
+        ENTRY;
+
+        PORTAL_MODULE_USE;
+
+        LIBCFS_ALLOC(ldu, sizeof(*ldu));
+        if (ldu != NULL) {
+                ldu->ldu_memhog_pages = 0;
+                ldu->ldu_memhog_root_page = NULL;
+        }
+        *(struct libcfs_device_userstate **)args = ldu;
+
+        RETURN(0);
+}
+
+/* called when closing /dev/device */
+static int libcfs_psdev_release(unsigned long flags, void *args)
+{
+        struct libcfs_device_userstate *ldu;
+        ENTRY;
+
+        ldu = (struct libcfs_device_userstate *)args;
+        if (ldu != NULL) {
+                kportal_memhog_free(ldu);
+                LIBCFS_FREE(ldu, sizeof(*ldu));
+        }
+
+        PORTAL_MODULE_UNUSE;
+        RETURN(0);
+}
+
+static struct rw_semaphore ioctl_list_sem;
+static struct list_head ioctl_list;
+
+int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
+{
+        int rc = 0;
+
+        down_write(&ioctl_list_sem);
+        if (!list_empty(&hand->item))
+                rc = -EBUSY;
+        else
+                list_add_tail(&hand->item, &ioctl_list);
+        up_write(&ioctl_list_sem);
+
+        return rc;
+}
+EXPORT_SYMBOL(libcfs_register_ioctl);
+
+int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
+{
+        int rc = 0;
+
+        down_write(&ioctl_list_sem);
+        if (list_empty(&hand->item))
+                rc = -ENOENT;
+        else
+                list_del_init(&hand->item);
+        up_write(&ioctl_list_sem);
+
+        return rc;
+}
+EXPORT_SYMBOL(libcfs_deregister_ioctl);
+
+static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg)
+{
+        char    buf[1024];
+        int err = -EINVAL;
+        struct libcfs_ioctl_data *data;
+        ENTRY;
+
+        /* 'cmd' and permissions get checked in our arch-specific caller */
+
+        if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) {
+                CERROR("PORTALS ioctl: data error\n");
+                RETURN(-EINVAL);
+        }
+        data = (struct libcfs_ioctl_data *)buf;
+
+        switch (cmd) {
+        case IOC_LIBCFS_CLEAR_DEBUG:
+                libcfs_debug_clear_buffer();
+                RETURN(0);
+        /*
+         * case IOC_LIBCFS_PANIC:
+         * Handled in arch/cfs_module.c
+         */
+        case IOC_LIBCFS_MARK_DEBUG:
+                if (data->ioc_inlbuf1 == NULL ||
+                    data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
+                        RETURN(-EINVAL);
+                libcfs_debug_mark_buffer(data->ioc_inlbuf1);
+                RETURN(0);
+#if LWT_SUPPORT
+        case IOC_LIBCFS_LWT_CONTROL:
+                err = lwt_control ((data->ioc_flags & 1) != 0, 
+                                   (data->ioc_flags & 2) != 0);
+                break;
+
+        case IOC_LIBCFS_LWT_SNAPSHOT: {
+                cycles_t   now;
+                int        ncpu;
+                int        total_size;
+
+                err = lwt_snapshot (&now, &ncpu, &total_size,
+                                    data->ioc_pbuf1, data->ioc_plen1);
+                data->ioc_u64[0] = now;
+                data->ioc_u32[0] = ncpu;
+                data->ioc_u32[1] = total_size;
+
+                /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */
+                data->ioc_u32[2] = sizeof(lwt_event_t);
+                data->ioc_u32[3] = offsetof(lwt_event_t, lwte_where);
+
+                if (err == 0 &&
+                    libcfs_ioctl_popdata(arg, data, sizeof (*data)))
+                        err = -EFAULT;
+                break;
+        }
+
+        case IOC_LIBCFS_LWT_LOOKUP_STRING:
+                err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1,
+                                         data->ioc_pbuf2, data->ioc_plen2);
+                if (err == 0 &&
+                    libcfs_ioctl_popdata(arg, data, sizeof (*data)))
+                        err = -EFAULT;
+                break;
+#endif
+        case IOC_LIBCFS_MEMHOG:
+                if (pfile->private_data == NULL) {
+                        err = -EINVAL;
+                } else {
+                        kportal_memhog_free(pfile->private_data);
+                        /* XXX The ioc_flags is not GFP flags now, need to be fixed */
+                        err = kportal_memhog_alloc(pfile->private_data,
+                                                   data->ioc_count,
+                                                   data->ioc_flags);
+                        if (err != 0)
+                                kportal_memhog_free(pfile->private_data);
+                }
+                break;
+
+        case IOC_LIBCFS_PING_TEST: {
+                extern void (kping_client)(struct libcfs_ioctl_data *);
+                void (*ping)(struct libcfs_ioctl_data *);
+
+                CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
+                       data->ioc_count, libcfs_nid2str(data->ioc_nid),
+                       libcfs_nid2str(data->ioc_nid));
+                ping = PORTAL_SYMBOL_GET(kping_client);
+                if (!ping)
+                        CERROR("PORTAL_SYMBOL_GET failed\n");
+                else {
+                        ping(data);
+                        PORTAL_SYMBOL_PUT(kping_client);
+                }
+                RETURN(0);
+        }
+
+        default: {
+                struct libcfs_ioctl_handler *hand;
+                err = -EINVAL;
+                down_read(&ioctl_list_sem);
+                list_for_each_entry(hand, &ioctl_list, item) {
+                        err = hand->handle_ioctl(cmd, data);
+                        if (err != -EINVAL) {
+                                if (err == 0)
+                                        err = libcfs_ioctl_popdata(arg, 
+                                                        data, sizeof (*data));
+                                break;
+                        }
+                }
+                up_read(&ioctl_list_sem);
+                break;
+        }
+        }
+
+        RETURN(err);
+}
+
+struct cfs_psdev_ops libcfs_psdev_ops = {
+        libcfs_psdev_open,
+        libcfs_psdev_release,
+        NULL,
+        NULL,
+        libcfs_ioctl
+};
+
+extern int insert_proc(void);
+extern void remove_proc(void);
+MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
+MODULE_DESCRIPTION("Portals v3.1");
+MODULE_LICENSE("GPL");
+
+extern cfs_psdev_t libcfs_dev;
+extern struct rw_semaphore tracefile_sem;
+extern struct semaphore trace_thread_sem;
+
+extern void libcfs_init_nidstrings(void);
+extern int libcfs_arch_init(void);
+extern void libcfs_arch_cleanup(void);
+
+static int init_libcfs_module(void)
+{
+        int rc;
+
+        libcfs_arch_init();
+        libcfs_init_nidstrings();
+        init_rwsem(&tracefile_sem);
+        init_mutex(&trace_thread_sem);
+        init_rwsem(&ioctl_list_sem);
+        CFS_INIT_LIST_HEAD(&ioctl_list);
+
+        rc = libcfs_debug_init(5 * 1024 * 1024);
+        if (rc < 0) {
+                printk(KERN_ERR "LustreError: libcfs_debug_init: %d\n", rc);
+                return (rc);
+        }
+
+#if LWT_SUPPORT
+        rc = lwt_init();
+        if (rc != 0) {
+                CERROR("lwt_init: error %d\n", rc);
+                goto cleanup_debug;
+        }
+#endif
+        rc = cfs_psdev_register(&libcfs_dev);
+        if (rc) {
+                CERROR("misc_register: error %d\n", rc);
+                goto cleanup_lwt;
+        }
+
+        rc = insert_proc();
+        if (rc) {
+                CERROR("insert_proc: error %d\n", rc);
+                goto cleanup_deregister;
+        }
+
+        CDEBUG (D_OTHER, "portals setup OK\n");
+        return (0);
+
+ cleanup_deregister:
+        cfs_psdev_deregister(&libcfs_dev);
+ cleanup_lwt:
+#if LWT_SUPPORT
+        lwt_fini();
+ cleanup_debug:
+#endif
+        libcfs_debug_cleanup();
+        return rc;
+}
+
+static void exit_libcfs_module(void)
+{
+        int rc;
+
+        remove_proc();
+
+        CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
+               atomic_read(&libcfs_kmemory));
+
+        rc = cfs_psdev_deregister(&libcfs_dev);
+        if (rc)
+                CERROR("misc_deregister error %d\n", rc);
+
+#if LWT_SUPPORT
+        lwt_fini();
+#endif
+
+        if (atomic_read(&libcfs_kmemory) != 0)
+                CERROR("Portals memory leaked: %d bytes\n",
+                       atomic_read(&libcfs_kmemory));
+
+        rc = libcfs_debug_cleanup();
+        if (rc)
+                printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc);
+        libcfs_arch_cleanup();
+}
+
+cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module);
diff --git a/libcfs/libcfs/nidstrings.c b/libcfs/libcfs/nidstrings.c
new file mode 100644 (file)
index 0000000..5f17f5a
--- /dev/null
@@ -0,0 +1,540 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2002 Cluster File Systems, Inc.
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <lnet/lnet.h>
+#include <libcfs/kp30.h>
+#ifndef __KERNEL__
+#ifdef HAVE_GETHOSTBYNAME
+# include <netdb.h>
+#endif
+#endif
+
+/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids
+ * consistent in all conversion functions.  Some code fragments are copied
+ * around for the sake of clarity...
+ */
+
+/* CAVEAT EMPTOR! Racey temporary buffer allocation!
+ * Choose the number of nidstrings to support the MAXIMUM expected number of
+ * concurrent users.  If there are more, the returned string will be volatile.
+ * NB this number must allow for a process to be descheduled for a timeslice
+ * between getting its string and using it.
+ */
+
+#define LNET_NIDSTR_COUNT  128     /* # of nidstrings */
+#define LNET_NIDSTR_SIZE   32      /* size of each one (see below for usage) */
+
+static char      libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE];
+static int       libcfs_nidstring_idx = 0;
+
+#ifdef __KERNEL__
+static spinlock_t libcfs_nidstring_lock;
+
+void libcfs_init_nidstrings (void)
+{
+        spin_lock_init(&libcfs_nidstring_lock);
+}
+
+# define NIDSTR_LOCK(f)   spin_lock_irqsave(&libcfs_nidstring_lock, f)
+# define NIDSTR_UNLOCK(f) spin_unlock_irqrestore(&libcfs_nidstring_lock, f)
+#else
+# define NIDSTR_LOCK(f)   (f=0)                 /* avoid unused var warnings */
+# define NIDSTR_UNLOCK(f) (f=0)
+#endif
+
+static char *
+libcfs_next_nidstring (void)
+{
+        char          *str;
+        unsigned long  flags;
+
+        NIDSTR_LOCK(flags);
+
+        str = libcfs_nidstrings[libcfs_nidstring_idx++];
+        if (libcfs_nidstring_idx ==
+            sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0]))
+                libcfs_nidstring_idx = 0;
+
+        NIDSTR_UNLOCK(flags);
+        return str;
+}
+
+static int  libcfs_lo_str2addr(const char *str, int nob, __u32 *addr);
+static void libcfs_ip_addr2str(__u32 addr, char *str);
+static int  libcfs_ip_str2addr(const char *str, int nob, __u32 *addr);
+static void libcfs_decnum_addr2str(__u32 addr, char *str);
+static void libcfs_hexnum_addr2str(__u32 addr, char *str);
+static int  libcfs_num_str2addr(const char *str, int nob, __u32 *addr);
+
+struct netstrfns {
+        int          nf_type;
+        char        *nf_name;
+        char        *nf_modname;
+        void       (*nf_addr2str)(__u32 addr, char *str);
+        int        (*nf_str2addr)(const char *str, int nob, __u32 *addr);
+};
+
+static struct netstrfns  libcfs_netstrfns[] = {
+        {/* .nf_type      */  LOLND,
+         /* .nf_name      */  "lo",
+         /* .nf_modname   */  "klolnd",
+         /* .nf_addr2str  */  libcfs_decnum_addr2str,
+         /* .nf_str2addr  */  libcfs_lo_str2addr},
+        {/* .nf_type      */  SOCKLND,
+         /* .nf_name      */  "tcp",
+         /* .nf_modname   */  "ksocklnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  O2IBLND,
+         /* .nf_name      */  "o2ib",
+         /* .nf_modname   */  "ko2iblnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  CIBLND,
+         /* .nf_name      */  "cib",
+         /* .nf_modname   */  "kciblnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  OPENIBLND,
+         /* .nf_name      */  "openib",
+         /* .nf_modname   */  "kopeniblnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  IIBLND,
+         /* .nf_name      */  "iib",
+         /* .nf_modname   */  "kiiblnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  VIBLND,
+         /* .nf_name      */  "vib",
+         /* .nf_modname   */  "kviblnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  RALND,
+         /* .nf_name      */  "ra",
+         /* .nf_modname   */  "kralnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  QSWLND,
+         /* .nf_name      */  "elan",
+         /* .nf_modname   */  "kqswlnd",
+         /* .nf_addr2str  */  libcfs_decnum_addr2str,
+         /* .nf_str2addr  */  libcfs_num_str2addr},
+        {/* .nf_type      */  GMLND,
+         /* .nf_name      */  "gm",
+         /* .nf_modname   */  "kgmlnd",
+         /* .nf_addr2str  */  libcfs_hexnum_addr2str,
+         /* .nf_str2addr  */  libcfs_num_str2addr},
+        {/* .nf_type      */  MXLND,
+         /* .nf_name      */  "mx",
+         /* .nf_modname   */  "kmxlnd",
+         /* .nf_addr2str  */  libcfs_ip_addr2str,
+         /* .nf_str2addr  */  libcfs_ip_str2addr},
+        {/* .nf_type      */  PTLLND,
+         /* .nf_name      */  "ptl",
+         /* .nf_modname   */  "kptllnd",
+         /* .nf_addr2str  */  libcfs_decnum_addr2str,
+         /* .nf_str2addr  */  libcfs_num_str2addr},
+        /* placeholder for net0 alias.  It MUST BE THE LAST ENTRY */
+        {/* .nf_type      */  -1},
+};
+
+const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]);
+
+int
+libcfs_lo_str2addr(const char *str, int nob, __u32 *addr)
+{
+        *addr = 0;
+        return 1;
+}
+
+void
+libcfs_ip_addr2str(__u32 addr, char *str)
+{
+#if 0   /* never lookup */
+#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME
+        __u32           netip = htonl(addr);
+        struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET);
+
+        if (he != NULL) {
+                snprintf(str, LNET_NIDSTR_SIZE, "%s", he->h_name);
+                return;
+        }
+#endif
+#endif
+        snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u",
+                 (addr >> 24) & 0xff, (addr >> 16) & 0xff,
+                 (addr >> 8) & 0xff, addr & 0xff);
+}
+
+/* CAVEAT EMPTOR XscanfX
+ * I use "%n" at the end of a sscanf format to detect trailing junk.  However
+ * sscanf may return immediately if it sees the terminating '0' in a string, so
+ * I initialise the %n variable to the expected length.  If sscanf sets it;
+ * fine, if it doesn't, then the scan ended at the end of the string, which is
+ * fine too :) */
+
+int
+libcfs_ip_str2addr(const char *str, int nob, __u32 *addr)
+{
+        int   a;
+        int   b;
+        int   c;
+        int   d;
+        int   n = nob;                          /* XscanfX */
+
+        /* numeric IP? */
+        if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 &&
+            n == nob &&
+            (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
+            (c & ~0xff) == 0 && (d & ~0xff) == 0) {
+                *addr = ((a<<24)|(b<<16)|(c<<8)|d);
+                return 1;
+        }
+
+#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME
+        /* known hostname? */
+        if (('a' <= str[0] && str[0] <= 'z') ||
+            ('A' <= str[0] && str[0] <= 'Z')) {
+                char *tmp;
+
+                LIBCFS_ALLOC(tmp, nob + 1);
+                if (tmp != NULL) {
+                        struct hostent *he;
+
+                        memcpy(tmp, str, nob);
+                        tmp[nob] = 0;
+
+                        he = gethostbyname(tmp);
+
+                        LIBCFS_FREE(tmp, nob);
+
+                        if (he != NULL) {
+                                __u32 ip = *(__u32 *)he->h_addr;
+
+                                *addr = ntohl(ip);
+                                return 1;
+                        }
+                }
+        }
+#endif
+        return 0;
+}
+
+void
+libcfs_decnum_addr2str(__u32 addr, char *str)
+{
+        snprintf(str, LNET_NIDSTR_SIZE, "%u", addr);
+}
+
+void
+libcfs_hexnum_addr2str(__u32 addr, char *str)
+{
+        snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr);
+}
+
+int
+libcfs_num_str2addr(const char *str, int nob, __u32 *addr)
+{
+        int     n;
+
+        n = nob;
+        if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob)
+                return 1;
+
+        n = nob;
+        if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob)
+                return 1;
+
+        n = nob;
+        if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob)
+                return 1;
+
+        return 0;
+}
+
+struct netstrfns *
+libcfs_lnd2netstrfns(int lnd)
+{
+        int    i;
+
+        if (lnd >= 0)
+                for (i = 0; i < libcfs_nnetstrfns; i++)
+                        if (lnd == libcfs_netstrfns[i].nf_type)
+                                return &libcfs_netstrfns[i];
+
+        return NULL;
+}
+
+struct netstrfns *
+libcfs_name2netstrfns(const char *name)
+{
+        int    i;
+
+        for (i = 0; i < libcfs_nnetstrfns; i++)
+                if (libcfs_netstrfns[i].nf_type >= 0 &&
+                    !strcmp(libcfs_netstrfns[i].nf_name, name))
+                        return &libcfs_netstrfns[i];
+
+        return NULL;
+}
+
+int
+libcfs_isknown_lnd(int type)
+{
+        return libcfs_lnd2netstrfns(type) != NULL;
+}
+
+char *
+libcfs_lnd2modname(int lnd)
+{
+        struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
+
+        return (nf == NULL) ? NULL : nf->nf_modname;
+}
+
+char *
+libcfs_lnd2str(int lnd)
+{
+        char           *str;
+        struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
+
+        if (nf != NULL)
+                return nf->nf_name;
+
+        str = libcfs_next_nidstring();
+        snprintf(str, LNET_NIDSTR_SIZE, "?%u?", lnd);
+        return str;
+}
+
+int
+libcfs_str2lnd(const char *str)
+{
+        struct netstrfns *nf = libcfs_name2netstrfns(str);
+
+        if (nf != NULL)
+                return nf->nf_type;
+
+        return -1;
+}
+
+char *
+libcfs_net2str(__u32 net)
+{
+        int               lnd = LNET_NETTYP(net);
+        int               num = LNET_NETNUM(net);
+        struct netstrfns *nf  = libcfs_lnd2netstrfns(lnd);
+        char             *str = libcfs_next_nidstring();
+
+        if (nf == NULL)
+                snprintf(str, LNET_NIDSTR_SIZE, "<%u:%u>", lnd, num);
+        else if (num == 0)
+                snprintf(str, LNET_NIDSTR_SIZE, "%s", nf->nf_name);
+        else
+                snprintf(str, LNET_NIDSTR_SIZE, "%s%u", nf->nf_name, num);
+
+        return str;
+}
+
+char *
+libcfs_nid2str(lnet_nid_t nid)
+{
+        __u32             addr = LNET_NIDADDR(nid);
+        __u32             net = LNET_NIDNET(nid);
+        int               lnd = LNET_NETTYP(net);
+        int               nnum = LNET_NETNUM(net);
+        struct netstrfns *nf;
+        char             *str;
+        int               nob;
+
+        if (nid == LNET_NID_ANY)
+                return "LNET_NID_ANY";
+
+        nf = libcfs_lnd2netstrfns(lnd);
+        str = libcfs_next_nidstring();
+
+        if (nf == NULL)
+                snprintf(str, LNET_NIDSTR_SIZE, "%x@<%u:%u>", addr, lnd, nnum);
+        else {
+                nf->nf_addr2str(addr, str);
+                nob = strlen(str);
+                if (nnum == 0)
+                        snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s",
+                                 nf->nf_name);
+                else
+                        snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s%u",
+                                 nf->nf_name, nnum);
+        }
+
+        return str;
+}
+
+static struct netstrfns *
+libcfs_str2net_internal(const char *str, __u32 *net)
+{
+        struct netstrfns *nf;
+        int               nob;
+        int               netnum;
+        int               i;
+
+        for (i = 0; i < libcfs_nnetstrfns; i++) {
+                nf = &libcfs_netstrfns[i];
+                if (nf->nf_type >= 0 &&
+                    !strncmp(str, nf->nf_name, strlen(nf->nf_name)))
+                        break;
+        }
+
+        if (i == libcfs_nnetstrfns)
+                return NULL;
+
+        nob = strlen(nf->nf_name);
+
+        if (strlen(str) == (unsigned int)nob) {
+                netnum = 0;
+        } else {
+                if (nf->nf_type == LOLND) /* net number not allowed */
+                        return NULL;
+
+                str += nob;
+                i = strlen(str);
+                if (sscanf(str, "%u%n", &netnum, &i) < 1 ||
+                    i != (int)strlen(str))
+                        return NULL;
+        }
+
+        *net = LNET_MKNET(nf->nf_type, netnum);
+        return nf;
+}
+
+__u32
+libcfs_str2net(const char *str)
+{
+        __u32  net;
+
+        if (libcfs_str2net_internal(str, &net) != NULL)
+                return net;
+
+        return LNET_NIDNET(LNET_NID_ANY);
+}
+
+lnet_nid_t
+libcfs_str2nid(const char *str)
+{
+        const char       *sep = strchr(str, '@');
+        struct netstrfns *nf;
+        __u32             net;
+        __u32             addr;
+
+        if (sep != NULL) {
+                nf = libcfs_str2net_internal(sep + 1, &net);
+                if (nf == NULL)
+                        return LNET_NID_ANY;
+        } else {
+                sep = str + strlen(str);
+                net = LNET_MKNET(SOCKLND, 0);
+                nf = libcfs_lnd2netstrfns(SOCKLND);
+                LASSERT (nf != NULL);
+        }
+
+        if (!nf->nf_str2addr(str, sep - str, &addr))
+                return LNET_NID_ANY;
+
+        return LNET_MKNID(net, addr);
+}
+
+char *
+libcfs_id2str(lnet_process_id_t id)
+{
+        char *str = libcfs_next_nidstring();
+
+        if (id.pid == LNET_PID_ANY) {
+                snprintf(str, LNET_NIDSTR_SIZE,
+                         "LNET_PID_ANY-%s", libcfs_nid2str(id.nid));
+                return str;
+        }
+
+        snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s",
+                 ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "",
+                 (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid));
+        return str;
+}
+
+int
+libcfs_str2anynid(lnet_nid_t *nidp, const char *str)
+{
+        if (!strcmp(str, "*")) {
+                *nidp = LNET_NID_ANY;
+                return 1;
+        }
+
+        *nidp = libcfs_str2nid(str);
+        return *nidp != LNET_NID_ANY;
+}
+
+#ifdef __KERNEL__
+void
+libcfs_setnet0alias(int lnd)
+{
+        struct netstrfns *nf = libcfs_lnd2netstrfns(lnd);
+        struct netstrfns *nf0 = &libcfs_netstrfns[libcfs_nnetstrfns - 1];
+
+        /* Ghastly hack to allow LNET to inter-operate with portals.
+         * NET type 0 becomes an alias for whatever local network we have, and
+         * this assignment here means we can parse and print its NIDs */
+
+        LASSERT (nf != NULL);
+        LASSERT (nf0->nf_type < 0);
+
+        nf0->nf_name = "zero";//nf->nf_name;
+        nf0->nf_modname = nf->nf_modname;
+        nf0->nf_addr2str = nf->nf_addr2str;
+        nf0->nf_str2addr = nf->nf_str2addr;
+        mb();
+        nf0->nf_type = 0;
+}
+
+EXPORT_SYMBOL(libcfs_isknown_lnd);
+EXPORT_SYMBOL(libcfs_lnd2modname);
+EXPORT_SYMBOL(libcfs_lnd2str);
+EXPORT_SYMBOL(libcfs_str2lnd);
+EXPORT_SYMBOL(libcfs_net2str);
+EXPORT_SYMBOL(libcfs_nid2str);
+EXPORT_SYMBOL(libcfs_str2net);
+EXPORT_SYMBOL(libcfs_str2nid);
+EXPORT_SYMBOL(libcfs_id2str);
+EXPORT_SYMBOL(libcfs_str2anynid);
+EXPORT_SYMBOL(libcfs_setnet0alias);
+#else  /* __KERNEL__ */
+void
+libcfs_setnet0alias(int lnd)
+{
+        LCONSOLE_ERROR_MSG(0x125, "Liblustre cannot interoperate with old "
+                           "Portals.\nportals_compatibility must be set to "
+                           "'none'.\n");
+}
+#endif
diff --git a/libcfs/libcfs/tracefile.c b/libcfs/libcfs/tracefile.c
new file mode 100644 (file)
index 0000000..4a5cf52
--- /dev/null
@@ -0,0 +1,1114 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *   Author: Zach Brown <zab@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+#include "tracefile.h"
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+
+/* XXX move things up to the top, comment */
+union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned;
+
+char tracefile[TRACEFILE_NAME_SIZE];
+long long tracefile_size = TRACEFILE_SIZE;
+static struct tracefiled_ctl trace_tctl;
+struct semaphore trace_thread_sem;
+static int thread_running = 0;
+
+atomic_t tage_allocated = ATOMIC_INIT(0);
+
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+                                         struct trace_cpu_data *tcd);
+
+static inline struct trace_page *tage_from_list(struct list_head *list)
+{
+        return list_entry(list, struct trace_page, linkage);
+}
+
+static struct trace_page *tage_alloc(int gfp)
+{
+        cfs_page_t        *page;
+        struct trace_page *tage;
+
+        /*
+         * Don't spam console with allocation failures: they will be reported
+         * by upper layer anyway.
+         */
+        gfp |= CFS_ALLOC_NOWARN;
+        page = cfs_alloc_page(gfp);
+        if (page == NULL)
+                return NULL;
+
+        tage = cfs_alloc(sizeof(*tage), gfp);
+        if (tage == NULL) {
+                cfs_free_page(page);
+                return NULL;
+        }
+
+        tage->page = page;
+        atomic_inc(&tage_allocated);
+        return tage;
+}
+
+static void tage_free(struct trace_page *tage)
+{
+        __LASSERT(tage != NULL);
+        __LASSERT(tage->page != NULL);
+
+        cfs_free_page(tage->page);
+        cfs_free(tage);
+        atomic_dec(&tage_allocated);
+}
+
+static void tage_to_tail(struct trace_page *tage, struct list_head *queue)
+{
+        __LASSERT(tage != NULL);
+        __LASSERT(queue != NULL);
+
+        list_move_tail(&tage->linkage, queue);
+}
+
+int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
+                       struct list_head *stock)
+{
+        int i;
+
+        /*
+         * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+         * from here: this will lead to infinite recursion.
+         */
+
+        for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) {
+                struct trace_page *tage;
+
+                tage = tage_alloc(gfp);
+                if (tage == NULL)
+                        break;
+                list_add_tail(&tage->linkage, stock);
+        }
+        return i;
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd,
+                                             unsigned long len)
+{
+        struct trace_page *tage;
+
+        if (tcd->tcd_cur_pages > 0) {
+                __LASSERT(!list_empty(&tcd->tcd_pages));
+                tage = tage_from_list(tcd->tcd_pages.prev);
+                if (tage->used + len <= CFS_PAGE_SIZE)
+                        return tage;
+        }
+
+        if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
+                if (tcd->tcd_cur_stock_pages > 0) {
+                        tage = tage_from_list(tcd->tcd_stock_pages.prev);
+                        -- tcd->tcd_cur_stock_pages;
+                        list_del_init(&tage->linkage);
+                } else {
+                        tage = tage_alloc(CFS_ALLOC_ATOMIC);
+                        if (tage == NULL) {
+                                printk(KERN_WARNING
+                                       "failure to allocate a tage (%ld)\n",
+                                       tcd->tcd_cur_pages);
+                                return NULL;
+                        }
+                }
+
+                tage->used = 0;
+                tage->cpu = smp_processor_id();
+                tage->type = tcd->tcd_type;
+                list_add_tail(&tage->linkage, &tcd->tcd_pages);
+                tcd->tcd_cur_pages++;
+
+                if (tcd->tcd_cur_pages > 8 && thread_running) {
+                        struct tracefiled_ctl *tctl = &trace_tctl;
+                        /*
+                         * wake up tracefiled to process some pages.
+                         */
+                        cfs_waitq_signal(&tctl->tctl_waitq);
+                }
+                return tage;
+        }
+        return NULL;
+}
+
+static void tcd_shrink(struct trace_cpu_data *tcd)
+{
+        int pgcount = tcd->tcd_cur_pages / 10;
+        struct page_collection pc;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+        printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
+               " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages);
+
+        CFS_INIT_LIST_HEAD(&pc.pc_pages);
+        spin_lock_init(&pc.pc_lock);
+
+        list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+                if (pgcount-- == 0)
+                        break;
+
+                list_move_tail(&tage->linkage, &pc.pc_pages);
+                tcd->tcd_cur_pages--;
+        }
+        put_pages_on_tcd_daemon_list(&pc, tcd);
+}
+
+/* return a page that has 'len' bytes left at the end */
+static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd,
+                                         unsigned long len)
+{
+        struct trace_page *tage;
+
+       /*
+        * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT)
+        * from here: this will lead to infinite recursion.
+        */
+
+        if (len > CFS_PAGE_SIZE) {
+                printk(KERN_ERR
+                       "cowardly refusing to write %lu bytes in a page\n", len);
+                return NULL;
+        }
+
+        tage = trace_get_tage_try(tcd, len);
+        if (tage != NULL)
+                return tage;
+        if (thread_running)
+                tcd_shrink(tcd);
+        if (tcd->tcd_cur_pages > 0) {
+                tage = tage_from_list(tcd->tcd_pages.next);
+                tage->used = 0;
+                tage_to_tail(tage, &tcd->tcd_pages);
+        }
+        return tage;
+}
+
+int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask,
+                       const char *file, const char *fn, const int line,
+                       const char *format1, va_list args,
+                       const char *format2, ...)                      
+{
+        struct trace_cpu_data   *tcd = NULL;
+        struct ptldebug_header   header;
+        struct trace_page       *tage;
+        /* string_buf is used only if tcd != NULL, and is always set then */
+        char                    *string_buf = NULL;
+        char                    *debug_buf;
+        int                      known_size;
+        int                      needed = 85; /* average message length */
+        int                      max_nob;
+        va_list                  ap;
+        int                      depth;
+        int                      i;
+        int                      remain;
+
+        if (strchr(file, '/'))
+                file = strrchr(file, '/') + 1;
+
+
+        set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK());
+
+        tcd = trace_get_tcd();
+        if (tcd == NULL)                /* arch may not log in IRQ context */
+                goto console;
+
+        if (tcd->tcd_shutting_down) {
+                trace_put_tcd(tcd);
+                tcd = NULL;
+                goto console;
+        }
+
+        depth = __current_nesting_level();
+        known_size = strlen(file) + 1 + depth;
+        if (fn)
+                known_size += strlen(fn) + 1;
+
+        if (libcfs_debug_binary)
+                known_size += sizeof(header);
+
+        /*/
+         * '2' used because vsnprintf return real size required for output
+         * _without_ terminating NULL.
+         * if needed is to small for this format.
+         */
+        for (i=0;i<2;i++) {
+                tage = trace_get_tage(tcd, needed + known_size + 1);
+                if (tage == NULL) {
+                        if (needed + known_size > CFS_PAGE_SIZE)
+                                mask |= D_ERROR;
+
+                        trace_put_tcd(tcd);
+                        tcd = NULL;
+                        goto console;
+                }
+
+                string_buf = (char *)cfs_page_address(tage->page)+tage->used+known_size;
+
+                max_nob = CFS_PAGE_SIZE - tage->used - known_size;
+                if (max_nob <= 0) {
+                        printk(KERN_EMERG "negative max_nob: %i\n", max_nob);
+                        mask |= D_ERROR;
+                        trace_put_tcd(tcd);
+                        tcd = NULL;
+                        goto console;
+                }
+
+                needed = 0;
+                if (format1) {
+                        va_copy(ap, args);
+                        needed = vsnprintf(string_buf, max_nob, format1, ap);
+                        va_end(ap);
+                }
+               
+
+                if (format2) {
+                       remain = max_nob - needed;
+                        if (remain < 0)
+                                remain = 0;
+               
+                        va_start(ap, format2);
+                        needed += vsnprintf(string_buf+needed, remain, format2, ap);
+                        va_end(ap);
+                }
+
+                if (needed < max_nob) /* well. printing ok.. */
+                        break;
+        }
+       
+        if (*(string_buf+needed-1) != '\n')
+                printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
+                       file, line, fn);
+       
+        header.ph_len = known_size + needed;
+        debug_buf = (char *)cfs_page_address(tage->page) + tage->used;
+
+        if (libcfs_debug_binary) {
+                memcpy(debug_buf, &header, sizeof(header));
+                tage->used += sizeof(header);
+                debug_buf += sizeof(header);
+        }
+
+        /* indent message according to the nesting level */
+        while (depth-- > 0) {
+                *(debug_buf++) = '.';
+                ++ tage->used;
+        }
+
+        strcpy(debug_buf, file);
+        tage->used += strlen(file) + 1;
+        debug_buf += strlen(file) + 1;
+
+        if (fn) {
+                strcpy(debug_buf, fn);
+                tage->used += strlen(fn) + 1;
+                debug_buf += strlen(fn) + 1;
+        }
+
+        __LASSERT(debug_buf == string_buf);
+
+        tage->used += needed;
+        __LASSERT (tage->used <= CFS_PAGE_SIZE);
+
+console:
+        if ((mask & libcfs_printk) == 0) {
+                /* no console output requested */
+                if (tcd != NULL)
+                        trace_put_tcd(tcd);
+                return 1;
+        }
+
+        if (cdls != NULL) {
+                if (libcfs_console_ratelimit &&
+                    cdls->cdls_next != 0 &&     /* not first time ever */
+                    !cfs_time_after(cfs_time_current(), cdls->cdls_next)) {
+                        /* skipping a console message */
+                        cdls->cdls_count++;
+                        if (tcd != NULL)
+                                trace_put_tcd(tcd);
+                        return 1;
+                }
+
+                if (cfs_time_after(cfs_time_current(), cdls->cdls_next +
+                                                       libcfs_console_max_delay
+                                                       + cfs_time_seconds(10))) {
+                        /* last timeout was a long time ago */
+                        cdls->cdls_delay /= libcfs_console_backoff * 4;
+                } else {
+                        cdls->cdls_delay *= libcfs_console_backoff;
+
+                        if (cdls->cdls_delay < libcfs_console_min_delay)
+                                cdls->cdls_delay = libcfs_console_min_delay;
+                        else if (cdls->cdls_delay > libcfs_console_max_delay)
+                                cdls->cdls_delay = libcfs_console_max_delay;
+                }
+
+                /* ensure cdls_next is never zero after it's been seen */
+                cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1;
+        }
+
+        if (tcd != NULL) {
+                print_to_console(&header, mask, string_buf, needed, file, fn);
+                trace_put_tcd(tcd);
+        } else {
+                string_buf = trace_get_console_buffer();
+
+                needed = 0;
+                if (format1 != NULL) {
+                        va_copy(ap, args);
+                        needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap);
+                        va_end(ap);
+                }
+                if (format2 != NULL) {
+                        remain = TRACE_CONSOLE_BUFFER_SIZE - needed;
+                        if (remain > 0) {
+                                va_start(ap, format2);
+                                needed += vsnprintf(string_buf+needed, remain, format2, ap);
+                                va_end(ap);
+                        }
+                }
+                print_to_console(&header, mask,
+                                 string_buf, needed, file, fn);
+
+                trace_put_console_buffer(string_buf);
+        }
+
+        if (cdls != NULL && cdls->cdls_count != 0) {
+                string_buf = trace_get_console_buffer();
+
+                needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE,
+                         "Skipped %d previous similar message%s\n",
+                         cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : "");
+
+                print_to_console(&header, mask,
+                                 string_buf, needed, file, fn);
+
+                trace_put_console_buffer(string_buf);
+                cdls->cdls_count = 0;
+        }
+
+        return 0;
+}
+EXPORT_SYMBOL(libcfs_debug_vmsg2);
+
+void
+libcfs_assertion_failed(const char *expr, const char *file,
+                        const char *func, const int line)
+{
+        libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line,
+                         "ASSERTION(%s) failed\n", expr);
+        LBUG();
+}
+EXPORT_SYMBOL(libcfs_assertion_failed);
+
+void
+trace_assertion_failed(const char *str,
+                       const char *fn, const char *file, int line)
+{
+        struct ptldebug_header hdr;
+
+        libcfs_panic_in_progress = 1;
+        libcfs_catastrophe = 1;
+        mb();
+
+        set_ptldebug_header(&hdr, DEBUG_SUBSYSTEM, D_EMERG, line,
+                            CDEBUG_STACK());
+
+        print_to_console(&hdr, D_EMERG, str, strlen(str), file, fn);
+
+        LIBCFS_PANIC("Lustre debug assertion failure\n");
+
+        /* not reached */
+}
+
+static void
+panic_collect_pages(struct page_collection *pc)
+{
+        /* Do the collect_pages job on a single CPU: assumes that all other
+         * CPUs have been stopped during a panic.  If this isn't true for some
+         * arch, this will have to be implemented separately in each arch.  */
+        int                    i;
+        int                    j;
+        struct trace_cpu_data *tcd;
+
+        CFS_INIT_LIST_HEAD(&pc->pc_pages);
+
+        tcd_for_each(tcd, i, j) {
+                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+                tcd->tcd_cur_pages = 0;
+
+                if (pc->pc_want_daemon_pages) {
+                        list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
+                        tcd->tcd_cur_daemon_pages = 0;
+                }
+        }
+}
+
+static void collect_pages_on_cpu(void *info)
+{
+        struct trace_cpu_data *tcd;
+        struct page_collection *pc = info;
+        int i;
+
+        spin_lock(&pc->pc_lock);
+        tcd_for_each_type_lock(tcd, i) {
+                list_splice_init(&tcd->tcd_pages, &pc->pc_pages);
+                tcd->tcd_cur_pages = 0;
+                if (pc->pc_want_daemon_pages) {
+                        list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages);
+                        tcd->tcd_cur_daemon_pages = 0;
+                }
+        }
+        spin_unlock(&pc->pc_lock);
+}
+
+static void collect_pages(struct page_collection *pc)
+{
+        CFS_INIT_LIST_HEAD(&pc->pc_pages);
+
+        if (libcfs_panic_in_progress)
+                panic_collect_pages(pc);
+        else
+                trace_call_on_all_cpus(collect_pages_on_cpu, pc);
+}
+
+static void put_pages_back_on_cpu(void *info)
+{
+        struct page_collection *pc = info;
+        struct trace_cpu_data *tcd;
+        struct list_head *cur_head;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        int i;
+
+        spin_lock(&pc->pc_lock);
+        tcd_for_each_type_lock(tcd, i) {
+                cur_head = tcd->tcd_pages.next;
+
+                list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+
+                        __LASSERT_TAGE_INVARIANT(tage);
+
+                        if (tage->cpu != smp_processor_id() || tage->type != i)
+                                continue;
+
+                        tage_to_tail(tage, cur_head);
+                        tcd->tcd_cur_pages++;
+                }
+        }
+        spin_unlock(&pc->pc_lock);
+}
+
+static void put_pages_back(struct page_collection *pc)
+{
+        if (!libcfs_panic_in_progress)
+                trace_call_on_all_cpus(put_pages_back_on_cpu, pc);
+}
+
+/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
+ * we have a good amount of data at all times for dumping during an LBUG, even
+ * if we have been steadily writing (and otherwise discarding) pages via the
+ * debug daemon. */
+static void put_pages_on_tcd_daemon_list(struct page_collection *pc,
+                                         struct trace_cpu_data *tcd)
+{
+        struct trace_page *tage;
+        struct trace_page *tmp;
+
+        spin_lock(&pc->pc_lock);
+        list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) {
+
+                __LASSERT_TAGE_INVARIANT(tage);
+
+                if (tage->cpu != smp_processor_id() ||
+                    tage->type != tcd->tcd_type)
+                        continue;
+
+                tage_to_tail(tage, &tcd->tcd_daemon_pages);
+                tcd->tcd_cur_daemon_pages++;
+
+                if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
+                        struct trace_page *victim;
+
+                        __LASSERT(!list_empty(&tcd->tcd_daemon_pages));
+                        victim = tage_from_list(tcd->tcd_daemon_pages.next);
+
+                        __LASSERT_TAGE_INVARIANT(victim);
+
+                        list_del(&victim->linkage);
+                        tage_free(victim);
+                        tcd->tcd_cur_daemon_pages--;
+                }
+        }
+        spin_unlock(&pc->pc_lock);
+}
+
+static void put_pages_on_daemon_list_on_cpu(void *info)
+{
+        struct trace_cpu_data *tcd;
+        int i;
+
+        tcd_for_each_type_lock(tcd, i)
+                put_pages_on_tcd_daemon_list(info, tcd);
+}
+
+static void put_pages_on_daemon_list(struct page_collection *pc)
+{
+        trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc);
+}
+
+void trace_debug_print(void)
+{
+        struct page_collection pc;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+
+        spin_lock_init(&pc.pc_lock);
+
+        pc.pc_want_daemon_pages = 1;
+        collect_pages(&pc);
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                char *p, *file, *fn;
+                cfs_page_t *page;
+
+                __LASSERT_TAGE_INVARIANT(tage);
+
+                page = tage->page;
+                p = cfs_page_address(page);
+                while (p < ((char *)cfs_page_address(page) + tage->used)) {
+                        struct ptldebug_header *hdr;
+                        int len;
+                        hdr = (void *)p;
+                        p += sizeof(*hdr);
+                        file = p;
+                        p += strlen(file) + 1;
+                        fn = p;
+                        p += strlen(fn) + 1;
+                        len = hdr->ph_len - (p - (char *)hdr);
+
+                        print_to_console(hdr, D_EMERG, p, len, file, fn);
+
+                        p += len;
+                }
+
+                list_del(&tage->linkage);
+                tage_free(tage);
+        }
+}
+
+int tracefile_dump_all_pages(char *filename)
+{
+        struct page_collection pc;
+        cfs_file_t *filp;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        int rc;
+
+        CFS_DECL_MMSPACE;
+
+        tracefile_write_lock();
+
+        filp = cfs_filp_open(filename,
+                             O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc);
+        if (!filp) {
+                if (rc != -EEXIST)
+                        printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
+                               filename, rc);
+                goto out;
+        }
+
+        spin_lock_init(&pc.pc_lock);
+        pc.pc_want_daemon_pages = 1;
+        collect_pages(&pc);
+        if (list_empty(&pc.pc_pages)) {
+                rc = 0;
+                goto close;
+        }
+
+        /* ok, for now, just write the pages.  in the future we'll be building
+         * iobufs with the pages and calling generic_direct_IO */
+        CFS_MMSPACE_OPEN;
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+
+                __LASSERT_TAGE_INVARIANT(tage);
+
+                rc = cfs_filp_write(filp, cfs_page_address(tage->page),
+                                    tage->used, cfs_filp_poff(filp));
+                if (rc != (int)tage->used) {
+                        printk(KERN_WARNING "wanted to write %u but wrote "
+                               "%d\n", tage->used, rc);
+                        put_pages_back(&pc);
+                        __LASSERT(list_empty(&pc.pc_pages));
+                        break;
+                }
+                list_del(&tage->linkage);
+                tage_free(tage);
+        }
+        CFS_MMSPACE_CLOSE;
+        rc = cfs_filp_fsync(filp);
+        if (rc)
+                printk(KERN_ERR "sync returns %d\n", rc);
+ close:
+        cfs_filp_close(filp);
+ out:
+        tracefile_write_unlock();
+        return rc;
+}
+
+void trace_flush_pages(void)
+{
+        struct page_collection pc;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+
+        spin_lock_init(&pc.pc_lock);
+
+        pc.pc_want_daemon_pages = 1;
+        collect_pages(&pc);
+        list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+
+                __LASSERT_TAGE_INVARIANT(tage);
+
+                list_del(&tage->linkage);
+                tage_free(tage);
+        }
+}
+
+int trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+                        const char *usr_buffer, int usr_buffer_nob)
+{
+        int    nob;
+        
+        if (usr_buffer_nob > knl_buffer_nob)
+                return -EOVERFLOW;
+        
+        if (copy_from_user((void *)knl_buffer, 
+                           (void *)usr_buffer, usr_buffer_nob))
+                return -EFAULT;
+
+        nob = strnlen(knl_buffer, usr_buffer_nob);
+        while (nob-- >= 0)                      /* strip trailing whitespace */
+                if (!isspace(knl_buffer[nob]))
+                        break;
+
+        if (nob < 0)                            /* empty string */
+                return -EINVAL;
+
+        if (nob == knl_buffer_nob)              /* no space to terminate */
+                return -EOVERFLOW;
+
+        knl_buffer[nob + 1] = 0;                /* terminate */
+        return 0;
+}
+
+int trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
+                         const char *knl_buffer, char *append)
+{
+        /* NB if 'append' != NULL, it's a single character to append to the
+         * copied out string - usually "\n", for /proc entries and "" (i.e. a
+         * terminating zero byte) for sysctl entries */
+        int   nob = strlen(knl_buffer);
+        
+        if (nob > usr_buffer_nob)
+                nob = usr_buffer_nob;
+        
+        if (copy_to_user(usr_buffer, knl_buffer, nob))
+                return -EFAULT;
+        
+        if (append != NULL && nob < usr_buffer_nob) {
+                if (copy_to_user(usr_buffer + nob, append, 1))
+                        return -EFAULT;
+                
+                nob++;
+        }
+
+        return nob;
+}
+
+int trace_allocate_string_buffer(char **str, int nob)
+{
+        if (nob > 2 * CFS_PAGE_SIZE)            /* string must be "sensible" */
+                return -EINVAL;
+        
+        *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO);
+        if (*str == NULL)
+                return -ENOMEM;
+
+        return 0;
+}
+
+void trace_free_string_buffer(char *str, int nob)
+{
+        cfs_free(str);
+}
+
+int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob)
+{
+        char         *str;
+        int           rc;
+
+        rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
+        if (rc != 0)
+                return rc;
+
+        rc = trace_copyin_string(str, usr_str_nob + 1,
+                                 usr_str, usr_str_nob);
+        if (rc != 0)
+                goto out;
+
+#if !defined(__WINNT__)
+        if (str[0] != '/') {
+                rc = -EINVAL;
+                goto out;
+        }
+#endif
+        rc = tracefile_dump_all_pages(str);
+out:
+        trace_free_string_buffer(str, usr_str_nob + 1);
+        return rc;
+}
+
+int trace_daemon_command(char *str)
+{
+        int       rc = 0;
+        
+       tracefile_write_lock();
+
+       if (strcmp(str, "stop") == 0) {
+               trace_stop_thread();
+                memset(tracefile, 0, sizeof(tracefile));
+
+       } else if (strncmp(str, "size=", 5) == 0) {
+               tracefile_size = simple_strtoul(str + 5, NULL, 0);
+               if (tracefile_size < 10 || tracefile_size > 20480)
+                       tracefile_size = TRACEFILE_SIZE;
+               else
+                       tracefile_size <<= 20;
+
+       } else if (strlen(str) >= sizeof(tracefile)) {
+                rc = -ENAMETOOLONG;
+#ifndef __WINNT__
+        } else if (str[0] != '/') {
+                rc = -EINVAL;
+#endif
+        } else {
+                strcpy(tracefile, str);
+
+                printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
+                       "to %s (%lukB max)\n", tracefile,
+                       (long)(tracefile_size >> 10));
+
+                trace_start_thread();
+        }
+
+       tracefile_write_unlock();
+       return rc;
+}
+
+int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob)
+{
+       char *str;
+       int   rc;
+
+        rc = trace_allocate_string_buffer(&str, usr_str_nob + 1);
+        if (rc != 0)
+                return rc;
+
+        rc = trace_copyin_string(str, usr_str_nob + 1,
+                                 usr_str, usr_str_nob);
+        if (rc == 0)
+                rc = trace_daemon_command(str);
+
+        trace_free_string_buffer(str, usr_str_nob + 1);
+       return rc;
+}
+
+int trace_set_debug_mb(int mb)
+{
+       int i;
+        int j;
+        int pages;
+        int limit = trace_max_debug_mb();
+        struct trace_cpu_data *tcd;
+        
+       if (mb < num_possible_cpus())
+               return -EINVAL;
+
+       if (mb > limit) {
+               printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
+                      "%dMB - limit is %d\n", mb, limit);
+               return -EINVAL;
+       }
+
+       mb /= num_possible_cpus();
+        pages = mb << (20 - CFS_PAGE_SHIFT);
+
+        tracefile_write_lock();
+
+        tcd_for_each(tcd, i, j)
+               tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100;
+
+        tracefile_write_unlock();
+
+       return 0;
+}
+
+int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob)
+{
+       char     str[32];
+        int      rc;
+
+        rc = trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob);
+        if (rc < 0)
+                return rc;
+
+       return trace_set_debug_mb(simple_strtoul(str, NULL, 0));
+}
+
+int trace_get_debug_mb(void)
+{
+       int i;
+        int j;
+        struct trace_cpu_data *tcd;
+        int total_pages = 0;
+        
+        tracefile_read_lock();
+
+        tcd_for_each(tcd, i, j)
+                total_pages += tcd->tcd_max_pages;
+
+        tracefile_read_unlock();
+
+        return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1;
+}
+
+static int tracefiled(void *arg)
+{
+        struct page_collection pc;
+        struct tracefiled_ctl *tctl = arg;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        struct ptldebug_header *hdr;
+        cfs_file_t *filp;
+        int rc;
+
+        CFS_DECL_MMSPACE;
+
+        /* we're started late enough that we pick up init's fs context */
+        /* this is so broken in uml?  what on earth is going on? */
+        cfs_daemonize("ktracefiled");
+
+        spin_lock_init(&pc.pc_lock);
+        complete(&tctl->tctl_start);
+
+        while (1) {
+                cfs_waitlink_t __wait;
+
+                cfs_waitlink_init(&__wait);
+                cfs_waitq_add(&tctl->tctl_waitq, &__wait);
+                set_current_state(TASK_INTERRUPTIBLE);
+                cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE,
+                                    cfs_time_seconds(1));
+                cfs_waitq_del(&tctl->tctl_waitq, &__wait);
+
+                if (atomic_read(&tctl->tctl_shutdown))
+                        break;
+
+                pc.pc_want_daemon_pages = 0;
+                collect_pages(&pc);
+                if (list_empty(&pc.pc_pages))
+                        continue;
+
+                filp = NULL;
+                tracefile_read_lock();
+                if (tracefile[0] != 0) {
+                        filp = cfs_filp_open(tracefile,
+                                             O_CREAT | O_RDWR | O_LARGEFILE,
+                                             0600, &rc);
+                        if (!(filp))
+                                printk(KERN_WARNING "couldn't open %s: %d\n",
+                                       tracefile, rc);
+                }
+                tracefile_read_unlock();
+                if (filp == NULL) {
+                        put_pages_on_daemon_list(&pc);
+                        __LASSERT(list_empty(&pc.pc_pages));
+                        continue;
+                }
+
+                CFS_MMSPACE_OPEN;
+
+                /* mark the first header, so we can sort in chunks */
+                tage = tage_from_list(pc.pc_pages.next);
+                __LASSERT_TAGE_INVARIANT(tage);
+
+                hdr = cfs_page_address(tage->page);
+                hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
+
+                list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) {
+                        static loff_t f_pos;
+
+                        __LASSERT_TAGE_INVARIANT(tage);
+
+                        if (f_pos >= (off_t)tracefile_size)
+                                f_pos = 0;
+                        else if (f_pos > cfs_filp_size(filp))
+                                f_pos = cfs_filp_size(filp);
+
+                        rc = cfs_filp_write(filp, cfs_page_address(tage->page),
+                                            tage->used, &f_pos);
+                        if (rc != (int)tage->used) {
+                                printk(KERN_WARNING "wanted to write %u but "
+                                       "wrote %d\n", tage->used, rc);
+                                put_pages_back(&pc);
+                                __LASSERT(list_empty(&pc.pc_pages));
+                        }
+                }
+                CFS_MMSPACE_CLOSE;
+
+                cfs_filp_close(filp);
+                put_pages_on_daemon_list(&pc);
+                __LASSERT(list_empty(&pc.pc_pages));
+        }
+        complete(&tctl->tctl_stop);
+        return 0;
+}
+
+int trace_start_thread(void)
+{
+        struct tracefiled_ctl *tctl = &trace_tctl;
+        int rc = 0;
+
+        mutex_down(&trace_thread_sem);
+        if (thread_running)
+                goto out;
+
+        init_completion(&tctl->tctl_start);
+        init_completion(&tctl->tctl_stop);
+        cfs_waitq_init(&tctl->tctl_waitq);
+        atomic_set(&tctl->tctl_shutdown, 0);
+
+        if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) {
+                rc = -ECHILD;
+                goto out;
+        }
+
+        wait_for_completion(&tctl->tctl_start);
+        thread_running = 1;
+out:
+        mutex_up(&trace_thread_sem);
+        return rc;
+}
+
+void trace_stop_thread(void)
+{
+        struct tracefiled_ctl *tctl = &trace_tctl;
+
+        mutex_down(&trace_thread_sem);
+        if (thread_running) {
+                printk(KERN_INFO "Lustre: shutting down debug daemon thread...\n");
+                atomic_set(&tctl->tctl_shutdown, 1);
+                wait_for_completion(&tctl->tctl_stop);
+                thread_running = 0;
+        }
+        mutex_up(&trace_thread_sem);
+}
+
+int tracefile_init(int max_pages)
+{
+        struct trace_cpu_data *tcd;
+        int                    i;
+        int                    j;
+        int                    rc;
+        int                    factor;
+
+        rc = tracefile_init_arch();
+        if (rc != 0)
+                return rc;
+
+        tcd_for_each(tcd, i, j) {
+                /* tcd_pages_factor is initialized int tracefile_init_arch. */
+                factor = tcd->tcd_pages_factor;
+                CFS_INIT_LIST_HEAD(&tcd->tcd_pages);
+                CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages);
+                CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
+                tcd->tcd_cur_pages = 0;
+                tcd->tcd_cur_stock_pages = 0;
+                tcd->tcd_cur_daemon_pages = 0;
+                tcd->tcd_max_pages = (max_pages * factor) / 100;
+                LASSERT(tcd->tcd_max_pages > 0);
+                tcd->tcd_shutting_down = 0;
+        }
+
+        return 0;
+}
+
+static void trace_cleanup_on_cpu(void *info)
+{
+        struct trace_cpu_data *tcd;
+        struct trace_page *tage;
+        struct trace_page *tmp;
+        int i;
+
+        tcd_for_each_type_lock(tcd, i) {
+                tcd->tcd_shutting_down = 1;
+
+                list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) {
+                        __LASSERT_TAGE_INVARIANT(tage);
+
+                        list_del(&tage->linkage);
+                        tage_free(tage);
+                }
+                tcd->tcd_cur_pages = 0;
+        }
+}
+
+static void trace_cleanup(void)
+{
+        struct page_collection pc;
+
+        CFS_INIT_LIST_HEAD(&pc.pc_pages);
+        spin_lock_init(&pc.pc_lock);
+
+        trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc);
+
+        tracefile_fini_arch();
+}
+
+void tracefile_exit(void)
+{
+        trace_stop_thread();
+        trace_cleanup();
+}
diff --git a/libcfs/libcfs/tracefile.h b/libcfs/libcfs/tracefile.h
new file mode 100644 (file)
index 0000000..7d43392
--- /dev/null
@@ -0,0 +1,248 @@
+#ifndef __LIBCFS_TRACEFILE_H__
+#define __LIBCFS_TRACEFILE_H__
+
+#include <libcfs/libcfs.h>
+
+/* trace file lock routines */
+
+#define TRACEFILE_NAME_SIZE 1024
+extern char      tracefile[TRACEFILE_NAME_SIZE];
+extern long long tracefile_size;
+
+int  tracefile_init_arch(void);
+void tracefile_fini_arch(void);
+
+void tracefile_read_lock(void);
+void tracefile_read_unlock(void);
+void tracefile_write_lock(void);
+void tracefile_write_unlock(void);
+
+int tracefile_dump_all_pages(char *filename);
+void trace_debug_print(void);
+void trace_flush_pages(void);
+int trace_start_thread(void);
+void trace_stop_thread(void);
+int tracefile_init(int max_pages);
+void tracefile_exit(void);
+
+
+
+int trace_copyin_string(char *knl_buffer, int knl_buffer_nob,
+                        const char *usr_buffer, int usr_buffer_nob);
+int trace_copyout_string(char *usr_buffer, int usr_buffer_nob,
+                         const char *knl_str, char *append);
+int trace_allocate_string_buffer(char **str, int nob);
+void trace_free_string_buffer(char *str, int nob);
+int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob);
+int trace_daemon_command(char *str);
+int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob);
+int trace_set_debug_mb(int mb);
+int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob);
+int trace_get_debug_mb(void);
+
+extern void libcfs_debug_dumplog_internal(void *arg);
+extern void libcfs_register_panic_notifier(void);
+extern void libcfs_unregister_panic_notifier(void);
+extern int  libcfs_panic_in_progress;
+extern int  trace_max_debug_mb(void);
+
+#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
+#define TRACEFILE_SIZE (500 << 20)
+
+#ifdef LUSTRE_TRACEFILE_PRIVATE
+
+/*
+ * Private declare for tracefile
+ */
+#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT))
+#define TCD_STOCK_PAGES (TCD_MAX_PAGES)
+
+#define TRACEFILE_SIZE (500 << 20)
+
+/* Size of a buffer for sprinting console messages if we can't get a page 
+ * from system */
+#define TRACE_CONSOLE_BUFFER_SIZE   1024
+
+union trace_data_union {
+       struct trace_cpu_data {
+               /*
+                * pages with trace records not yet processed by tracefiled.
+                */
+               struct list_head        tcd_pages;
+               /* number of pages on ->tcd_pages */
+               unsigned long           tcd_cur_pages;
+
+               /*
+                * pages with trace records already processed by
+                * tracefiled. These pages are kept in memory, so that some
+                * portion of log can be written in the event of LBUG. This
+                * list is maintained in LRU order.
+                *
+                * Pages are moved to ->tcd_daemon_pages by tracefiled()
+                * (put_pages_on_daemon_list()). LRU pages from this list are
+                * discarded when list grows too large.
+                */
+               struct list_head        tcd_daemon_pages;
+               /* number of pages on ->tcd_daemon_pages */
+               unsigned long           tcd_cur_daemon_pages;
+
+               /*
+                * Maximal number of pages allowed on ->tcd_pages and
+                * ->tcd_daemon_pages each. 
+                * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current
+                * implementation.
+                */
+               unsigned long           tcd_max_pages;
+
+               /*
+                * preallocated pages to write trace records into. Pages from
+                * ->tcd_stock_pages are moved to ->tcd_pages by
+                * portals_debug_msg().
+                *
+                * This list is necessary, because on some platforms it's
+                * impossible to perform efficient atomic page allocation in a
+                * non-blockable context.
+                *
+                * Such platforms fill ->tcd_stock_pages "on occasion", when
+                * tracing code is entered in blockable context.
+                *
+                * trace_get_tage_try() tries to get a page from
+                * ->tcd_stock_pages first and resorts to atomic page
+                * allocation only if this queue is empty. ->tcd_stock_pages
+                * is replenished when tracing code is entered in blocking
+                * context (darwin-tracefile.c:trace_get_tcd()). We try to
+                * maintain TCD_STOCK_PAGES (40 by default) pages in this
+                * queue. Atomic allocation is only required if more than
+                * TCD_STOCK_PAGES pagesful are consumed by trace records all
+                * emitted in non-blocking contexts. Which is quite unlikely.
+                */
+               struct list_head        tcd_stock_pages;
+               /* number of pages on ->tcd_stock_pages */
+               unsigned long           tcd_cur_stock_pages;
+
+               unsigned short          tcd_shutting_down;
+               unsigned short          tcd_cpu;
+               unsigned short          tcd_type;
+               /* The factors to share debug memory. */
+               unsigned short          tcd_pages_factor;
+       } tcd;
+       char __pad[L1_CACHE_ALIGN(sizeof(struct trace_cpu_data))];
+};
+
+#define TCD_MAX_TYPES      8
+extern union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS];
+
+#define tcd_for_each(tcd, i, j)                                       \
+    for (i = 0; trace_data[i] != NULL; i++)                           \
+        for (j = 0, ((tcd) = &(*trace_data[i])[j].tcd);               \
+             j < num_possible_cpus(); j++, (tcd) = &(*trace_data[i])[j].tcd)
+
+#define tcd_for_each_type_lock(tcd, i)                                \
+    for (i = 0; trace_data[i] &&                                      \
+         (tcd = &(*trace_data[i])[smp_processor_id()].tcd) &&         \
+         trace_lock_tcd(tcd); trace_unlock_tcd(tcd), i++)
+
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
+struct page_collection {
+       struct list_head        pc_pages;
+       /*
+        * spin-lock protecting ->pc_pages. It is taken by smp_call_function()
+        * call-back functions. XXX nikita: Which is horrible: all processors
+        * receive NMI at the same time only to be serialized by this
+        * lock. Probably ->pc_pages should be replaced with an array of
+        * NR_CPUS elements accessed locklessly.
+        */
+       spinlock_t              pc_lock;
+       /*
+        * if this flag is set, collect_pages() will spill both
+        * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise,
+        * only ->tcd_pages are spilled.
+        */
+       int                     pc_want_daemon_pages;
+};
+
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
+struct tracefiled_ctl {
+       struct completion       tctl_start;
+       struct completion       tctl_stop;
+       cfs_waitq_t             tctl_waitq;
+       pid_t                   tctl_pid;
+       atomic_t                tctl_shutdown;
+};
+
+/*
+ * small data-structure for each page owned by tracefiled.
+ */
+/* XXX nikita: this declaration is internal to tracefile.c and should probably
+ * be moved there */
+struct trace_page {
+       /*
+        * page itself
+        */
+       cfs_page_t      *page;
+       /*
+        * linkage into one of the lists in trace_data_union or
+        * page_collection
+        */
+       struct list_head linkage;
+       /*
+        * number of bytes used within this page
+        */
+       unsigned int     used;
+       /*
+        * cpu that owns this page
+        */
+       unsigned short   cpu;
+       /*
+        * type(context) of this page 
+        */
+       unsigned short   type;
+};
+
+extern void set_ptldebug_header(struct ptldebug_header *header,
+                          int subsys, int mask, const int line,
+                          unsigned long stack);
+extern void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
+                            int len, const char *file, const char *fn);
+
+extern struct trace_cpu_data *trace_get_tcd(void);
+extern void trace_put_tcd(struct trace_cpu_data *tcd);
+extern int trace_lock_tcd(struct trace_cpu_data *tcd);
+extern void trace_unlock_tcd(struct trace_cpu_data *tcd);
+extern char *trace_get_console_buffer(void);
+extern void trace_put_console_buffer(char *buffer);
+
+extern void trace_call_on_all_cpus(void (*fn)(void *arg), void *arg);
+
+int trace_refill_stock(struct trace_cpu_data *tcd, int gfp,
+                      struct list_head *stock);
+
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage);
+
+extern void trace_assertion_failed(const char *str, const char *fn,
+                                  const char *file, int line);
+
+/* ASSERTION that is safe to use within the debug system */
+#define __LASSERT(cond)                                                                \
+({                                                                             \
+       if (unlikely(!(cond))) {                                                \
+                trace_assertion_failed("ASSERTION("#cond") failed",            \
+                                      __FUNCTION__, __FILE__, __LINE__);       \
+       }                                                                       \
+})
+
+#define __LASSERT_TAGE_INVARIANT(tage)                 \
+({                                                     \
+        __LASSERT(tage != NULL);                       \
+        __LASSERT(tage->page != NULL);                 \
+        __LASSERT(tage->used <= CFS_PAGE_SIZE);                \
+        __LASSERT(cfs_page_count(tage->page) > 0);     \
+})
+
+#endif /* LUSTRE_TRACEFILE_PRIVATE */
+
+#endif /* __LIBCFS_TRACEFILE_H__ */
diff --git a/libcfs/libcfs/user-bitops.c b/libcfs/libcfs/user-bitops.c
new file mode 100644 (file)
index 0000000..8f94593
--- /dev/null
@@ -0,0 +1,98 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2007 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __KERNEL__
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include <libcfs/user-bitops.h>
+
+#define OFF_BY_START(start)     ((start)/BITS_PER_LONG)
+
+unsigned long find_next_bit(unsigned long *addr,
+                            unsigned long size, unsigned long offset)
+{
+        unsigned long *word, *last;
+        unsigned long first_bit, bit, base;
+
+        word = addr + OFF_BY_START(offset);
+        last = addr + OFF_BY_START(size-1);
+        first_bit = offset % BITS_PER_LONG;
+        base = offset - first_bit;
+
+        if (offset >= size)
+                return size;
+        if (first_bit != 0) {
+                int tmp = (*word++) & (~0UL << first_bit);
+                bit = __ffs(tmp);
+                if (bit < BITS_PER_LONG)
+                        goto found;
+                word++;
+                base += BITS_PER_LONG;
+        }
+        while (word <= last) {
+                if (*word != 0UL) {
+                        bit = __ffs(*word);
+                        goto found;
+                }
+                word++;
+                base += BITS_PER_LONG;
+        }
+        return size;
+found:
+        return base + bit;
+}
+
+unsigned long find_next_zero_bit(unsigned long *addr,
+                                 unsigned long size, unsigned long offset)
+{
+        unsigned long *word, *last;
+        unsigned long first_bit, bit, base;
+
+        word = addr + OFF_BY_START(offset);
+        last = addr + OFF_BY_START(size-1);
+        first_bit = offset % BITS_PER_LONG;
+        base = offset - first_bit;
+
+        if (offset >= size)
+                return size;
+        if (first_bit != 0) {
+                int tmp = (*word++) & (~0UL << first_bit);
+                bit = __ffz(tmp);
+                if (bit < BITS_PER_LONG)
+                        goto found;
+                word++;
+                base += BITS_PER_LONG;
+        }
+        while (word <= last) {
+                if (*word != ~0UL) {
+                        bit = __ffz(*word);
+                        goto found;
+                }
+                word++;
+                base += BITS_PER_LONG;
+        }
+        return size;
+found:
+        return base + bit;
+}
+
+#endif
diff --git a/libcfs/libcfs/user-lock.c b/libcfs/libcfs/user-lock.c
new file mode 100644 (file)
index 0000000..c521dc7
--- /dev/null
@@ -0,0 +1,343 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable time API for user-level.
+ *
+ */
+
+/* Implementations of portable synchronization APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ *
+ * XXX Liang: There are several branches share lnet with b_hd_newconfig,
+ * if we define lock APIs at here, there will be conflict with liblustre
+ * in other branches.
+ */
+
+#ifndef __KERNEL__
+
+#include <stdlib.h>
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+/*
+ * Optional debugging (magic stamping and checking ownership) can be added.
+ */
+
+#if 0
+/*
+ * spin_lock
+ *
+ * - spin_lock_init(x)
+ * - spin_lock(x)
+ * - spin_unlock(x)
+ * - spin_trylock(x)
+ *
+ * - spin_lock_irqsave(x, f)
+ * - spin_unlock_irqrestore(x, f)
+ *
+ * No-op implementation.
+ */
+
+void spin_lock_init(spinlock_t *lock)
+{
+        LASSERT(lock != NULL);
+        (void)lock;
+}
+
+void spin_lock(spinlock_t *lock)
+{
+        (void)lock;
+}
+
+void spin_unlock(spinlock_t *lock)
+{
+        (void)lock;
+}
+
+int spin_trylock(spinlock_t *lock)
+{
+        (void)lock;
+       return 1;
+}
+
+void spin_lock_bh_init(spinlock_t *lock)
+{
+        LASSERT(lock != NULL);
+        (void)lock;
+}
+
+void spin_lock_bh(spinlock_t *lock)
+{
+        LASSERT(lock != NULL);
+        (void)lock;
+}
+
+void spin_unlock_bh(spinlock_t *lock)
+{
+        LASSERT(lock != NULL);
+        (void)lock;
+}
+
+/*
+ * Semaphore
+ *
+ * - sema_init(x, v)
+ * - __down(x)
+ * - __up(x)
+ */
+struct semaphore {};
+
+void sema_init(struct semaphore *s, int val)
+{
+        LASSERT(s != NULL);
+        (void)s;
+        (void)val;
+}
+
+void __down(struct semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
+void __up(struct semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
+/*
+ * Mutex:
+ *
+ * - init_mutex(x)
+ * - init_mutex_locked(x)
+ * - mutex_up(x)
+ * - mutex_down(x)
+ */
+
+#define mutex_up(s)                    __up(s)
+#define mutex_down(s)                  __down(s)
+
+#define init_mutex(x)                  sema_init(x, 1)
+#define init_mutex_locked(x)           sema_init(x, 0)
+
+/*
+ * Completion:
+ *
+ * - init_completion(c)
+ * - complete(c)
+ * - wait_for_completion(c)
+ */
+struct completion {};
+
+void init_completion(struct completion *c)
+{
+        LASSERT(c != NULL);
+        (void)c;
+}
+
+void complete(struct completion *c)
+{
+        LASSERT(c != NULL);
+        (void)c;
+}
+
+void wait_for_completion(struct completion *c)
+{
+        LASSERT(c != NULL);
+        (void)c;
+}
+
+/*
+ * rw_semaphore:
+ *
+ * - DECLARE_RWSEM(x)
+ * - init_rwsem(x)
+ * - down_read(x)
+ * - up_read(x)
+ * - down_write(x)
+ * - up_write(x)
+ */
+struct rw_semaphore {};
+
+void init_rwsem(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
+void down_read(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
+int down_read_trylock(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+       return 1;
+}
+
+void down_write(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
+int down_write_trylock(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+       return 1;
+}
+
+void up_read(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+
+void up_write(struct rw_semaphore *s)
+{
+        LASSERT(s != NULL);
+        (void)s;
+}
+#endif
+
+#ifdef HAVE_LIBPTHREAD
+
+/*
+ * Completion
+ */
+
+void cfs_init_completion(struct cfs_completion *c)
+{
+        LASSERT(c != NULL);
+        c->c_done = 0;
+        pthread_mutex_init(&c->c_mut, NULL);
+        pthread_cond_init(&c->c_cond, NULL);
+}
+
+void cfs_fini_completion(struct cfs_completion *c)
+{
+        LASSERT(c != NULL);
+        pthread_mutex_destroy(&c->c_mut);
+        pthread_cond_destroy(&c->c_cond);
+}
+
+void cfs_complete(struct cfs_completion *c)
+{
+        LASSERT(c != NULL);
+        pthread_mutex_lock(&c->c_mut);
+        c->c_done++;
+        pthread_cond_signal(&c->c_cond);
+        pthread_mutex_unlock(&c->c_mut);
+}
+
+void cfs_wait_for_completion(struct cfs_completion *c)
+{
+        LASSERT(c != NULL);
+        pthread_mutex_lock(&c->c_mut);
+        while (c->c_done == 0)
+                pthread_cond_wait(&c->c_cond, &c->c_mut);
+        c->c_done--;
+        pthread_mutex_unlock(&c->c_mut);
+}
+
+/*
+ * atomic primitives
+ */
+
+static pthread_mutex_t atomic_guard_lock = PTHREAD_MUTEX_INITIALIZER;
+
+int cfs_atomic_read(cfs_atomic_t *a)
+{
+        int r;
+
+        pthread_mutex_lock(&atomic_guard_lock);
+        r = a->counter;
+        pthread_mutex_unlock(&atomic_guard_lock);
+        return r;
+}
+
+void cfs_atomic_set(cfs_atomic_t *a, int b)
+{
+        pthread_mutex_lock(&atomic_guard_lock);
+        a->counter = b;
+        pthread_mutex_unlock(&atomic_guard_lock);
+}
+
+int cfs_atomic_dec_and_test(cfs_atomic_t *a)
+{
+        int r;
+
+        pthread_mutex_lock(&atomic_guard_lock);
+        r = --a->counter;
+        pthread_mutex_unlock(&atomic_guard_lock);
+        return (r == 0);
+}
+
+void cfs_atomic_inc(cfs_atomic_t *a)
+{
+        pthread_mutex_lock(&atomic_guard_lock);
+        ++a->counter;
+        pthread_mutex_unlock(&atomic_guard_lock);
+}
+
+void cfs_atomic_dec(cfs_atomic_t *a)
+{
+        pthread_mutex_lock(&atomic_guard_lock);
+        --a->counter;
+        pthread_mutex_unlock(&atomic_guard_lock);
+}
+void cfs_atomic_add(int b, cfs_atomic_t *a)
+
+{
+        pthread_mutex_lock(&atomic_guard_lock);
+        a->counter += b;
+        pthread_mutex_unlock(&atomic_guard_lock);
+}
+
+void cfs_atomic_sub(int b, cfs_atomic_t *a)
+{
+        pthread_mutex_lock(&atomic_guard_lock);
+        a->counter -= b;
+        pthread_mutex_unlock(&atomic_guard_lock);
+}
+
+#endif /* HAVE_LIBPTHREAD */
+
+
+/* !__KERNEL__ */
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/libcfs/user-prim.c b/libcfs/libcfs/user-prim.c
new file mode 100644 (file)
index 0000000..ffa32c1
--- /dev/null
@@ -0,0 +1,399 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ * Author: Nikita Danilov <nikita@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 of the GNU General Public License as published by the
+ * Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass
+ * Ave, Cambridge, MA 02139, USA.
+ *
+ * Implementation of portable APIs for user-level.
+ *
+ */
+
+/* Implementations of portable APIs for liblustre */
+
+/*
+ * liblustre is single-threaded, so most "synchronization" APIs are trivial.
+ */
+
+#ifndef __KERNEL__
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#include <sys/mman.h>
+#ifndef  __CYGWIN__
+#include <stdint.h>
+#ifdef HAVE_ASM_PAGE_H
+#include <asm/page.h>
+#endif
+#ifdef HAVE_SYS_USER_H
+#include <sys/user.h>
+#endif
+#else
+#include <sys/types.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_VFS_H
+#include <sys/vfs.h>
+#endif
+
+/*
+ * Sleep channel. No-op implementation.
+ */
+
+void cfs_waitq_init(struct cfs_waitq *waitq)
+{
+        LASSERT(waitq != NULL);
+        (void)waitq;
+}
+
+void cfs_waitlink_init(struct cfs_waitlink *link)
+{
+        LASSERT(link != NULL);
+        (void)link;
+}
+
+void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+        LASSERT(waitq != NULL);
+        LASSERT(link != NULL);
+        (void)waitq;
+        (void)link;
+}
+
+void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+        LASSERT(waitq != NULL);
+        LASSERT(link != NULL);
+        (void)waitq;
+        (void)link;
+}
+
+void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq)
+{
+        LASSERT(waitq != NULL);
+        LASSERT(link != NULL);
+        (void)waitq;
+        (void)link;
+}
+
+void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link)
+{
+        LASSERT(waitq != NULL);
+        LASSERT(link != NULL);
+        (void)waitq;
+        (void)link;
+}
+
+int cfs_waitq_active(struct cfs_waitq *waitq)
+{
+        LASSERT(waitq != NULL);
+        (void)waitq;
+        return 0;
+}
+
+void cfs_waitq_signal(struct cfs_waitq *waitq)
+{
+        LASSERT(waitq != NULL);
+        (void)waitq;
+}
+
+void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr)
+{
+        LASSERT(waitq != NULL);
+        (void)waitq;
+}
+
+void cfs_waitq_broadcast(struct cfs_waitq *waitq)
+{
+        LASSERT(waitq != NULL);
+        (void)waitq;
+}
+
+void cfs_waitq_wait(struct cfs_waitlink *link, int state)
+{
+        LASSERT(link != NULL);
+        (void)link;
+}
+
+int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout)
+{
+        LASSERT(link != NULL);
+        (void)link;
+        return 0;
+}
+
+#ifdef HAVE_LIBPTHREAD
+
+/*
+ * Threads
+ */
+
+struct lustre_thread_arg {
+        cfs_thread_t f; 
+        void *arg;
+};
+static void *cfs_thread_helper(void *data)
+{
+        struct lustre_thread_arg *targ = data;
+        cfs_thread_t f  = targ->f;
+        void *arg = targ->arg;
+
+        free(targ);
+        
+        (void)f(arg);
+        return NULL;
+}
+int cfs_create_thread(cfs_thread_t func, void *arg)
+{
+        pthread_t tid;
+        pthread_attr_t tattr;
+        int rc;
+        struct lustre_thread_arg *targ_p = malloc(sizeof(struct lustre_thread_arg));
+
+        if ( targ_p == NULL )
+                return -ENOMEM;
+        
+        targ_p->f = func;
+        targ_p->arg = arg;
+
+        pthread_attr_init(&tattr); 
+        pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED);
+        rc = pthread_create(&tid, &tattr, cfs_thread_helper, targ_p);
+        pthread_attr_destroy(&tattr);
+        return -rc;
+}
+#endif
+
+uid_t cfs_curproc_uid(void)
+{
+        return getuid();
+}
+
+int cfs_parse_int_tunable(int *value, char *name)
+{
+        char    *env = getenv(name);
+        char    *end;
+
+        if (env == NULL)
+                return 0;
+
+        *value = strtoull(env, &end, 0);
+        if (*end == 0)
+                return 0;
+
+        CERROR("Can't parse tunable %s=%s\n", name, env);
+        return -EINVAL;
+}
+
+/*
+ * Allocator
+ */
+
+cfs_page_t *cfs_alloc_page(unsigned int flags)
+{
+        cfs_page_t *pg = malloc(sizeof(*pg));
+
+        if (!pg)
+                return NULL;
+        pg->addr = malloc(CFS_PAGE_SIZE);
+
+        if (!pg->addr) {
+                free(pg);
+                return NULL;
+        }
+        return pg;
+}
+
+void cfs_free_page(cfs_page_t *pg)
+{
+        free(pg->addr);
+        free(pg);
+}
+
+void *cfs_page_address(cfs_page_t *pg)
+{
+        return pg->addr;
+}
+
+void *cfs_kmap(cfs_page_t *pg)
+{
+        return pg->addr;
+}
+
+void cfs_kunmap(cfs_page_t *pg)
+{
+}
+
+/*
+ * SLAB allocator
+ */
+
+cfs_mem_cache_t *
+cfs_mem_cache_create(const char *name, size_t objsize, size_t off, unsigned long flags)
+{
+        cfs_mem_cache_t *c;
+
+        c = malloc(sizeof(*c));
+        if (!c)
+                return NULL;
+        c->size = objsize;
+        CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n",
+               name, c, (int)objsize);
+        return c;
+}
+
+int cfs_mem_cache_destroy(cfs_mem_cache_t *c)
+{
+        CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size);
+        free(c);
+        return 0;
+}
+
+void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp)
+{
+        return cfs_alloc(c->size, gfp);
+}
+
+void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr)
+{
+        cfs_free(addr);
+}
+
+void cfs_enter_debugger(void)
+{
+        /*
+         * nothing for now.
+         */
+}
+
+void cfs_daemonize(char *str)
+{
+        return;
+}
+
+int cfs_daemonize_ctxt(char *str)
+{
+        return 0;
+}
+
+cfs_sigset_t cfs_block_allsigs(void)
+{
+        cfs_sigset_t   all;
+        cfs_sigset_t   old;
+        int            rc;
+
+        sigfillset(&all);
+        rc = sigprocmask(SIG_SETMASK, &all, &old);
+        LASSERT(rc == 0);
+
+        return old;
+}
+
+cfs_sigset_t cfs_block_sigs(cfs_sigset_t blocks)
+{
+        cfs_sigset_t   old;
+        int   rc;
+        
+        rc = sigprocmask(SIG_SETMASK, &blocks, &old);
+        LASSERT (rc == 0);
+
+        return old;
+}
+
+void cfs_restore_sigs(cfs_sigset_t old)
+{
+        int   rc = sigprocmask(SIG_SETMASK, &old, NULL);
+
+        LASSERT (rc == 0);
+}
+
+int cfs_signal_pending(void)
+{
+        cfs_sigset_t    empty;
+        cfs_sigset_t    set;
+        int  rc;
+
+        rc = sigpending(&set);
+        LASSERT (rc == 0);
+
+        sigemptyset(&empty);
+
+        return !memcmp(&empty, &set, sizeof(set));
+}
+
+void cfs_clear_sigpending(void)
+{
+        return;
+}
+
+#ifdef __linux__
+
+/*
+ * In glibc (NOT in Linux, so check above is not right), implement
+ * stack-back-tracing through backtrace() function.
+ */
+#include <execinfo.h>
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{
+        backtrace(trace->frame, sizeof_array(trace->frame));
+}
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+        if (0 <= frame_no && frame_no < sizeof_array(trace->frame))
+                return trace->frame[frame_no];
+        else
+                return NULL;
+}
+
+#else
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{}
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+        return NULL;
+}
+
+/* __linux__ */
+#endif
+
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+        /* No libcfs_catastrophe in userspace! */
+        libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n");
+        abort();
+}
+
+/* !__KERNEL__ */
+#endif
+
+/*
+ * Local variables:
+ * c-indentation-style: "K&R"
+ * c-basic-offset: 8
+ * tab-width: 8
+ * fill-column: 80
+ * scroll-step: 1
+ * End:
+ */
diff --git a/libcfs/libcfs/user-tcpip.c b/libcfs/libcfs/user-tcpip.c
new file mode 100644 (file)
index 0000000..e0cedb9
--- /dev/null
@@ -0,0 +1,606 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2005 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if !defined(__KERNEL__) || !defined(REDSTORM)
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#include <sys/socket.h>
+#ifdef HAVE_NETINET_IN_H
+#include <netinet/in.h>
+#endif
+#include <netinet/tcp.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <string.h>
+#include <unistd.h>
+#include <poll.h>
+#include <net/if.h>
+#include <arpa/inet.h>
+#include <errno.h>
+#if defined(__sun__) || defined(__sun)
+#include <sys/sockio.h>
+#endif
+#ifndef __CYGWIN__
+#include <sys/syscall.h>
+#endif
+
+/*
+ * Functions to get network interfaces info
+ */
+
+int
+libcfs_sock_ioctl(int cmd, unsigned long arg)
+{
+        int fd, rc;
+
+        fd = socket(AF_INET, SOCK_STREAM, 0);
+
+        if (fd < 0) {
+                rc = -errno;
+                CERROR("socket() failed: errno==%d\n", errno);
+                return rc;
+        }
+
+        rc = ioctl(fd, cmd, arg);
+
+        close(fd);
+        return rc;
+}
+
+int
+libcfs_ipif_query (char *name, int *up, __u32 *ip)
+{
+        struct ifreq   ifr;
+        int            nob;
+        int            rc;
+        __u32          val;
+
+        nob = strlen(name);
+        if (nob >= IFNAMSIZ) {
+                CERROR("Interface name %s too long\n", name);
+                return -EINVAL;
+        }
+
+        CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ);
+
+        strcpy(ifr.ifr_name, name);
+        rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get flags for interface %s\n", name);
+                return rc;
+        }
+
+        if ((ifr.ifr_flags & IFF_UP) == 0) {
+                CDEBUG(D_NET, "Interface %s down\n", name);
+                *up = 0;
+                *ip = 0;
+                return 0;
+        }
+
+        *up = 1;
+
+        strcpy(ifr.ifr_name, name);
+        ifr.ifr_addr.sa_family = AF_INET;
+        rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr);
+
+        if (rc != 0) {
+                CERROR("Can't get IP address for interface %s\n", name);
+                return rc;
+        }
+
+        val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr;
+        *ip = ntohl(val);
+
+        return 0;
+}
+
+void
+libcfs_ipif_free_enumeration (char **names, int n)
+{
+        int      i;
+
+        LASSERT (n > 0);
+
+        for (i = 0; i < n && names[i] != NULL; i++)
+                LIBCFS_FREE(names[i], IFNAMSIZ);
+
+        LIBCFS_FREE(names, n * sizeof(*names));
+}
+
+int
+libcfs_ipif_enumerate (char ***namesp)
+{
+        /* Allocate and fill in 'names', returning # interfaces/error */
+        char          **names;
+        int             nalloc;
+        int             nfound;
+        struct ifreq   *ifr;
+        struct ifconf   ifc;
+        int             rc;
+        int             nob;
+        int             i;
+
+
+        nalloc = 16;        /* first guess at max interfaces */
+        for (;;) {
+                LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr));
+                if (ifr == NULL) {
+                        CERROR ("ENOMEM enumerating up to %d interfaces\n",
+                                nalloc);
+                        rc = -ENOMEM;
+                        goto out0;
+                }
+
+                ifc.ifc_buf = (char *)ifr;
+                ifc.ifc_len = nalloc * sizeof(*ifr);
+
+                rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc);
+
+                if (rc < 0) {
+                        CERROR ("Error %d enumerating interfaces\n", rc);
+                        goto out1;
+                }
+
+                LASSERT (rc == 0);
+
+                nfound = ifc.ifc_len/sizeof(*ifr);
+                LASSERT (nfound <= nalloc);
+
+                if (nfound < nalloc)
+                        break;
+
+                LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+                nalloc *= 2;
+        }
+
+        if (nfound == 0)
+                goto out1;
+
+        LIBCFS_ALLOC(names, nfound * sizeof(*names));
+        if (names == NULL) {
+                rc = -ENOMEM;
+                goto out1;
+        }
+        /* NULL out all names[i] */
+        memset (names, 0, nfound * sizeof(*names));
+
+        for (i = 0; i < nfound; i++) {
+
+                nob = strlen (ifr[i].ifr_name);
+                if (nob >= IFNAMSIZ) {
+                        /* no space for terminating NULL */
+                        CERROR("interface name %.*s too long (%d max)\n",
+                               nob, ifr[i].ifr_name, IFNAMSIZ);
+                        rc = -ENAMETOOLONG;
+                        goto out2;
+                }
+
+                LIBCFS_ALLOC(names[i], IFNAMSIZ);
+                if (names[i] == NULL) {
+                        rc = -ENOMEM;
+                        goto out2;
+                }
+
+                memcpy(names[i], ifr[i].ifr_name, nob);
+                names[i][nob] = 0;
+        }
+
+        *namesp = names;
+        rc = nfound;
+
+ out2:
+        if (rc < 0)
+                libcfs_ipif_free_enumeration(names, nfound);
+ out1:
+        LIBCFS_FREE(ifr, nalloc * sizeof(*ifr));
+ out0:
+        return rc;
+}
+
+/*
+ * Network functions used by user-land lnet acceptor
+ */
+
+int
+libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog)
+{
+        int                rc;
+        int                option;
+        struct sockaddr_in locaddr;
+        
+        *sockp = socket(AF_INET, SOCK_STREAM, 0);
+        if (*sockp < 0) {
+                rc = -errno;
+                CERROR("socket() failed: errno==%d\n", errno);
+                return rc;
+        }
+
+        option = 1;
+        if ( setsockopt(*sockp, SOL_SOCKET, SO_REUSEADDR,
+                        (char *)&option, sizeof (option)) ) {
+                rc = -errno;
+                CERROR("setsockopt(SO_REUSEADDR) failed: errno==%d\n", errno);
+                goto failed;
+        }
+
+        if (local_ip != 0 || local_port != 0) {
+                memset(&locaddr, 0, sizeof(locaddr));
+                locaddr.sin_family = AF_INET;
+                locaddr.sin_port = htons(local_port);
+                locaddr.sin_addr.s_addr = (local_ip == 0) ?
+                                          INADDR_ANY : htonl(local_ip);
+
+                if ( bind(*sockp, (struct sockaddr *)&locaddr, sizeof(locaddr)) ) {
+                        rc = -errno;
+                        if ( errno == -EADDRINUSE )
+                                CDEBUG(D_NET, "Port %d already in use\n",
+                                       local_port);
+                        else
+                                CERROR("bind() to port %d failed: errno==%d\n",
+                                       local_port, errno);
+                        goto failed;
+                }
+        }
+
+        if ( listen(*sockp, backlog) ) {
+                rc = -errno;
+                CERROR("listen() with backlog==%d failed: errno==%d\n",
+                       backlog, errno);
+                goto failed;
+        }
+        
+        return 0;
+
+  failed:
+        close(*sockp);
+        return rc;
+}
+
+int
+libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port)
+{
+        struct sockaddr_in accaddr;
+        socklen_t accaddr_len = sizeof(struct sockaddr_in);
+
+        *newsockp = accept(sock, (struct sockaddr *)&accaddr, &accaddr_len);
+
+        if ( *newsockp < 0 ) {
+                CERROR("accept() failed: errno==%d\n", errno);
+                return -errno;
+        }
+
+        *peer_ip = ntohl(accaddr.sin_addr.s_addr);
+        *peer_port = ntohs(accaddr.sin_port);
+        
+        return 0;
+}
+
+int
+libcfs_sock_read (int sock, void *buffer, int nob, int timeout)
+{
+        int rc;
+        struct pollfd pfd;
+        cfs_time_t start_time = cfs_time_current();
+
+        pfd.fd = sock;
+        pfd.events = POLLIN;
+        pfd.revents = 0;
+
+        /* poll(2) measures timeout in msec */
+        timeout *= 1000;
+        
+        while (nob != 0 && timeout > 0) {
+                cfs_time_t current_time;
+
+                rc = poll(&pfd, 1, timeout);
+                if (rc < 0)
+                        return -errno;
+                if (rc == 0)
+                        return -ETIMEDOUT;
+                if ((pfd.revents & POLLIN) == 0)
+                        return -EIO;
+                                
+                rc = read(sock, buffer, nob);                
+                if (rc < 0)
+                        return -errno;
+                if (rc == 0)
+                        return -EIO;
+                
+                buffer = ((char *)buffer) + rc;
+                nob -= rc;
+
+                current_time = cfs_time_current();
+                timeout -= cfs_duration_sec(cfs_time_sub(cfs_time_current(),
+                                                        start_time));
+        }
+        
+        if (nob == 0)
+                return 0;
+        else
+                return -ETIMEDOUT;
+}
+
+/* Just try to connect to localhost to wake up entity that are
+ * sleeping in accept() */
+void
+libcfs_sock_abort_accept(__u16 port)
+{
+        int                fd, rc;
+        struct sockaddr_in locaddr;
+
+        memset(&locaddr, 0, sizeof(locaddr));
+        locaddr.sin_family = AF_INET;
+        locaddr.sin_port = htons(port);
+        locaddr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+        fd = socket(AF_INET, SOCK_STREAM, 0);
+        if ( fd < 0 ) {
+                CERROR("socket() failed: errno==%d\n", errno);
+                return;
+        }        
+        
+        rc = connect(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
+        if ( rc != 0 ) {
+                if ( errno != ECONNREFUSED )
+                        CERROR("connect() failed: errno==%d\n", errno);
+                else
+                        CDEBUG(D_NET, "Nobody to wake up at %d\n", port);
+        }
+        
+        close(fd);
+}
+
+/*
+ * Network functions of common use
+ */
+
+int
+libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p)
+{
+        int                rc;
+        struct sockaddr_in peer_addr;
+        socklen_t          peer_addr_len = sizeof(peer_addr);
+
+        rc = getpeername(sock_fd, (struct sockaddr *)&peer_addr, &peer_addr_len);
+        if (rc != 0)
+                return -errno;
+        
+        if (ipaddr_p != NULL)
+                *ipaddr_p = ntohl(peer_addr.sin_addr.s_addr);
+        if (port_p != NULL)
+                *port_p = ntohs(peer_addr.sin_port);
+
+        return 0;
+}
+
+int
+libcfs_socketpair(int *fdp)
+{
+        int rc, i;
+        
+        rc = socketpair(AF_UNIX, SOCK_STREAM, 0, fdp);
+        if (rc != 0) {
+                rc = -errno;
+                CERROR ("Cannot create socket pair\n");
+                return rc;
+        }
+        
+        for (i = 0; i < 2; i++) {
+                rc = libcfs_fcntl_nonblock(fdp[i]);
+                if (rc) {
+                        close(fdp[0]);                        
+                        close(fdp[1]);
+                        return rc;
+                }
+        }
+        
+        return 0;
+}
+
+int
+libcfs_fcntl_nonblock(int fd)
+{
+        int rc, flags;
+        
+        flags = fcntl(fd, F_GETFL, 0);
+        if (flags == -1) {
+                rc = -errno;
+                CERROR ("Cannot get socket flags\n");
+                return rc;
+        }
+        
+        rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+        if (rc != 0) {
+                rc = -errno;
+                CERROR ("Cannot set socket flags\n");
+                return rc;
+        }
+        
+        return 0;
+}
+
+int
+libcfs_sock_set_nagle(int fd, int nagle)
+{
+        int rc;
+        int option = nagle ? 0 : 1;
+
+#if defined(__sun__) || defined(__sun)
+        rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option));
+#else
+        rc = setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
+#endif
+
+        if (rc != 0) {
+                rc = -errno;
+                CERROR ("Cannot set NODELAY socket option\n");
+                return rc;
+        }
+
+        return 0;
+}
+
+int
+libcfs_sock_set_bufsiz(int fd, int bufsiz)
+{
+        int rc, option;
+        
+        LASSERT (bufsiz != 0);
+
+        option = bufsiz;
+        rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
+        if (rc != 0) {
+                rc = -errno;
+                CERROR ("Cannot set SNDBUF socket option\n");
+                return rc;
+        }
+
+        option = bufsiz;
+        rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
+        if (rc != 0) {
+                rc = -errno;
+                CERROR ("Cannot set RCVBUF socket option\n");
+                return rc;
+        }
+
+        return 0;
+}
+
+int
+libcfs_sock_create(int *fdp)
+{
+        int rc, fd, option;
+
+        fd = socket(AF_INET, SOCK_STREAM, 0);
+        if (fd < 0) {
+                rc = -errno;
+                CERROR ("Cannot create socket\n");
+                return rc;
+        }
+
+        option = 1;
+        rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 
+                        &option, sizeof(option));
+        if (rc != 0) {
+                rc = -errno;
+                CERROR  ("Cannot set SO_REUSEADDR for socket\n");
+                close(fd);
+                return rc;
+        } 
+        
+        *fdp = fd;
+        return 0;
+}
+
+int
+libcfs_sock_bind_to_port(int fd, __u16 port)
+{
+        int                rc;
+        struct sockaddr_in locaddr;
+
+        memset(&locaddr, 0, sizeof(locaddr)); 
+        locaddr.sin_family = AF_INET; 
+        locaddr.sin_addr.s_addr = INADDR_ANY;
+        locaddr.sin_port = htons(port);
+
+        rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
+        if (rc != 0) {
+                rc = -errno;
+                CERROR  ("Cannot bind to port %d\n", port);
+                return rc;
+        }
+
+        return 0;
+}
+
+int
+libcfs_sock_connect(int fd, __u32 ip, __u16 port)
+{
+        int                rc;
+        struct sockaddr_in addr;
+
+        memset(&addr, 0, sizeof(addr));
+        addr.sin_family      = AF_INET;
+        addr.sin_addr.s_addr = htonl(ip);
+        addr.sin_port        = htons(port);
+        
+        rc = connect(fd, (struct sockaddr *)&addr,
+                     sizeof(struct sockaddr_in));
+
+        if(rc != 0 && errno != EINPROGRESS) {
+                rc = -errno;
+                if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL)
+                        CERROR ("Cannot connect to %u.%u.%u.%u:%d (err=%d)\n",
+                                HIPQUAD(ip), port, errno);
+                return rc;
+        }
+
+        return 0;
+}
+
+/* NB: EPIPE and ECONNRESET are considered as non-fatal
+ * because:
+ * 1) it still makes sense to continue reading &&
+ * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ 
+int libcfs_sock_writev(int fd, const struct iovec *vector, int count)
+{
+        int rc;
+        
+        rc = syscall(SYS_writev, fd, vector, count);
+        
+        if (rc == 0) /* write nothing */ 
+                return 0;
+        
+        if (rc < 0) {
+                if (errno == EAGAIN ||   /* write nothing   */
+                    errno == EPIPE ||    /* non-fatal error */
+                    errno == ECONNRESET) /* non-fatal error */
+                        return 0;
+                else
+                        return -errno;
+        }
+
+        return rc;
+}
+
+int libcfs_sock_readv(int fd, const struct iovec *vector, int count)
+{
+        int rc;
+        
+        rc = syscall(SYS_readv, fd, vector, count);
+        
+        if (rc == 0) /* EOF */ 
+                return -EIO;
+        
+        if (rc < 0) {
+                if (errno == EAGAIN) /* read nothing */
+                        return 0;
+                else
+                        return -errno;
+        }
+
+        return rc;
+}
+
+#endif /* !__KERNEL__ || !defined(REDSTORM) */
diff --git a/libcfs/libcfs/watchdog.c b/libcfs/libcfs/watchdog.c
new file mode 100644 (file)
index 0000000..89d757c
--- /dev/null
@@ -0,0 +1,427 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2004 Cluster File Systems, Inc.
+ *   Author: Jacob Berkman <jacob@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include "tracefile.h"
+
+struct lc_watchdog {
+        cfs_timer_t       lcw_timer; /* kernel timer */
+        struct list_head  lcw_list;
+        struct timeval    lcw_last_touched;
+        cfs_task_t       *lcw_task;
+
+        void            (*lcw_callback)(pid_t, void *);
+        void             *lcw_data;
+
+        pid_t             lcw_pid;
+        cfs_duration_t    lcw_time; /* time until watchdog fires, jiffies */
+
+        enum {
+                LC_WATCHDOG_DISABLED,
+                LC_WATCHDOG_ENABLED,
+                LC_WATCHDOG_EXPIRED
+        } lcw_state;
+};
+
+#ifdef WITH_WATCHDOG
+/*
+ * The dispatcher will complete lcw_start_completion when it starts,
+ * and lcw_stop_completion when it exits.
+ * Wake lcw_event_waitq to signal timer callback dispatches.
+ */
+static struct completion lcw_start_completion;
+static struct completion lcw_stop_completion;
+static wait_queue_head_t lcw_event_waitq;
+
+/*
+ * Set this and wake lcw_event_waitq to stop the dispatcher.
+ */
+enum {
+        LCW_FLAG_STOP = 0
+};
+static unsigned long lcw_flags = 0;
+
+/*
+ * Number of outstanding watchdogs.
+ * When it hits 1, we start the dispatcher.
+ * When it hits 0, we stop the distpatcher.
+ */
+static __u32         lcw_refcount = 0;
+static DECLARE_MUTEX(lcw_refcount_sem);
+
+/*
+ * List of timers that have fired that need their callbacks run by the
+ * dispatcher.
+ */
+static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */
+static struct list_head lcw_pending_timers = \
+        LIST_HEAD_INIT(lcw_pending_timers);
+
+#ifdef HAVE_TASKLIST_LOCK
+static void
+lcw_dump(struct lc_watchdog *lcw)
+{
+        cfs_task_t *tsk;
+        ENTRY;
+
+        read_lock(&tasklist_lock);
+        tsk = find_task_by_pid(lcw->lcw_pid);
+
+        if (tsk == NULL) {
+                CWARN("Process %d was not found in the task list; "
+                      "watchdog callback may be incomplete\n", (int)lcw->lcw_pid);
+        } else if (tsk != lcw->lcw_task) {
+                CWARN("The current process %d did not set the watchdog; "
+                      "watchdog callback may be incomplete\n", (int)lcw->lcw_pid);
+        } else {
+                libcfs_debug_dumpstack(tsk);
+        }
+        
+        read_unlock(&tasklist_lock);
+        EXIT;
+}
+#else
+static void
+lcw_dump(struct lc_watchdog *lcw)
+{
+        CERROR("unable to dump stack because of missing export\n");
+}
+#endif
+
+static void lcw_cb(unsigned long data)
+{
+        struct lc_watchdog *lcw = (struct lc_watchdog *)data;
+
+        ENTRY;
+
+        if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
+                EXIT;
+                return;
+        }
+
+        lcw->lcw_state = LC_WATCHDOG_EXPIRED;
+
+        /* NB this warning should appear on the console, but may not get into
+         * the logs since we're running in a softirq handler */
+
+        CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n",
+              (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time));
+        lcw_dump(lcw);
+
+        spin_lock_bh(&lcw_pending_timers_lock);
+
+        if (list_empty(&lcw->lcw_list)) {
+                list_add(&lcw->lcw_list, &lcw_pending_timers);
+                wake_up(&lcw_event_waitq);
+        }
+
+        spin_unlock_bh(&lcw_pending_timers_lock);
+
+        EXIT;
+}
+
+static int is_watchdog_fired(void)
+{
+        int rc;
+
+        if (test_bit(LCW_FLAG_STOP, &lcw_flags))
+                return 1;
+
+        spin_lock_bh(&lcw_pending_timers_lock);
+        rc = !list_empty(&lcw_pending_timers);
+        spin_unlock_bh(&lcw_pending_timers_lock);
+        return rc;
+}
+
+static int lcw_dispatch_main(void *data)
+{
+        int                 rc = 0;
+        unsigned long       flags;
+        struct lc_watchdog *lcw;
+
+        ENTRY;
+
+        cfs_daemonize("lc_watchdogd");
+
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+        complete(&lcw_start_completion);
+
+        while (1) {
+                wait_event_interruptible(lcw_event_waitq, is_watchdog_fired());
+                CDEBUG(D_INFO, "Watchdog got woken up...\n");
+                if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
+                        CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
+
+                        spin_lock_bh(&lcw_pending_timers_lock);
+                        rc = !list_empty(&lcw_pending_timers);
+                        spin_unlock_bh(&lcw_pending_timers_lock);
+                        if (rc) {
+                                CERROR("pending timers list was not empty at "
+                                       "time of watchdog dispatch shutdown\n");
+                        }
+                        break;
+                }
+
+                spin_lock_bh(&lcw_pending_timers_lock);
+                while (!list_empty(&lcw_pending_timers)) {
+
+                        lcw = list_entry(lcw_pending_timers.next,
+                                         struct lc_watchdog,
+                                         lcw_list);
+                        list_del_init(&lcw->lcw_list);
+                        spin_unlock_bh(&lcw_pending_timers_lock);
+
+                        CDEBUG(D_INFO, "found lcw for pid %d: inactive for "
+                               "%lds\n", (int)lcw->lcw_pid,
+                               cfs_duration_sec(lcw->lcw_time));
+
+                        if (lcw->lcw_state != LC_WATCHDOG_DISABLED)
+                                lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data);
+
+                        spin_lock_bh(&lcw_pending_timers_lock);
+                }
+                spin_unlock_bh(&lcw_pending_timers_lock);
+        }
+
+        complete(&lcw_stop_completion);
+
+        RETURN(rc);
+}
+
+static void lcw_dispatch_start(void)
+{
+        int rc;
+
+        ENTRY;
+        LASSERT(lcw_refcount == 1);
+
+        init_completion(&lcw_stop_completion);
+        init_completion(&lcw_start_completion);
+        init_waitqueue_head(&lcw_event_waitq);
+
+        CDEBUG(D_INFO, "starting dispatch thread\n");
+        rc = kernel_thread(lcw_dispatch_main, NULL, 0);
+        if (rc < 0) {
+                CERROR("error spawning watchdog dispatch thread: %d\n", rc);
+                EXIT;
+                return;
+        }
+        wait_for_completion(&lcw_start_completion);
+        CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
+
+        EXIT;
+}
+
+static void lcw_dispatch_stop(void)
+{
+        ENTRY;
+        LASSERT(lcw_refcount == 0);
+
+        CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
+
+        set_bit(LCW_FLAG_STOP, &lcw_flags);
+        wake_up(&lcw_event_waitq);
+
+        wait_for_completion(&lcw_stop_completion);
+
+        CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
+
+        EXIT;
+}
+
+struct lc_watchdog *lc_watchdog_add(int timeout_ms,
+                                    void (*callback)(pid_t, void *),
+                                    void *data)
+{
+        struct lc_watchdog *lcw = NULL;
+        ENTRY;
+
+        LIBCFS_ALLOC(lcw, sizeof(*lcw));
+        if (lcw == NULL) {
+                CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
+                RETURN(ERR_PTR(-ENOMEM));
+        }
+
+        lcw->lcw_task     = cfs_current();
+        lcw->lcw_pid      = cfs_curproc_pid();
+        lcw->lcw_time     = cfs_time_seconds(timeout_ms) / 1000;
+        lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog;
+        lcw->lcw_data     = data;
+        lcw->lcw_state    = LC_WATCHDOG_DISABLED;
+
+        INIT_LIST_HEAD(&lcw->lcw_list);
+
+        lcw->lcw_timer.function = lcw_cb;
+        lcw->lcw_timer.data = (unsigned long)lcw;
+        lcw->lcw_timer.expires = jiffies + lcw->lcw_time;
+        init_timer(&lcw->lcw_timer);
+
+        down(&lcw_refcount_sem);
+        if (++lcw_refcount == 1)
+                lcw_dispatch_start();
+        up(&lcw_refcount_sem);
+
+        /* Keep this working in case we enable them by default */
+        if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
+                do_gettimeofday(&lcw->lcw_last_touched);
+                add_timer(&lcw->lcw_timer);
+        }
+
+        RETURN(lcw);
+}
+EXPORT_SYMBOL(lc_watchdog_add);
+
+static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
+{
+        struct timeval newtime;
+        struct timeval timediff;
+
+        do_gettimeofday(&newtime);
+        if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
+                cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff);
+                CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n",
+                      lcw->lcw_pid,
+                      message,
+                      timediff.tv_sec,
+                      timediff.tv_usec / 100);
+        }
+        lcw->lcw_last_touched = newtime;
+}
+
+void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
+{
+        ENTRY;
+        LASSERT(lcw != NULL);
+
+        spin_lock_bh(&lcw_pending_timers_lock);
+        list_del_init(&lcw->lcw_list);
+        spin_unlock_bh(&lcw_pending_timers_lock);
+
+        lcw_update_time(lcw, "touched");
+        lcw->lcw_state = LC_WATCHDOG_ENABLED;
+
+        mod_timer(&lcw->lcw_timer, jiffies +
+                  cfs_time_seconds(timeout_ms) / 1000);
+
+        EXIT;
+}
+EXPORT_SYMBOL(lc_watchdog_touch_ms);
+
+/* deprecated - use above instead */
+void lc_watchdog_touch(struct lc_watchdog *lcw)
+{
+        lc_watchdog_touch_ms(lcw, cfs_duration_sec(lcw->lcw_time) * 1000);
+}
+EXPORT_SYMBOL(lc_watchdog_touch);
+
+void lc_watchdog_disable(struct lc_watchdog *lcw)
+{
+        ENTRY;
+        LASSERT(lcw != NULL);
+
+        spin_lock_bh(&lcw_pending_timers_lock);
+        if (!list_empty(&lcw->lcw_list))
+                list_del_init(&lcw->lcw_list);
+        spin_unlock_bh(&lcw_pending_timers_lock);
+
+        lcw_update_time(lcw, "disabled");
+        lcw->lcw_state = LC_WATCHDOG_DISABLED;
+
+        EXIT;
+}
+EXPORT_SYMBOL(lc_watchdog_disable);
+
+void lc_watchdog_delete(struct lc_watchdog *lcw)
+{
+        ENTRY;
+        LASSERT(lcw != NULL);
+
+        del_timer(&lcw->lcw_timer);
+
+        lcw_update_time(lcw, "deleted");
+
+        spin_lock_bh(&lcw_pending_timers_lock);
+        if (!list_empty(&lcw->lcw_list))
+                list_del_init(&lcw->lcw_list);
+        spin_unlock_bh(&lcw_pending_timers_lock);
+
+        down(&lcw_refcount_sem);
+        if (--lcw_refcount == 0)
+                lcw_dispatch_stop();
+        up(&lcw_refcount_sem);
+
+        LIBCFS_FREE(lcw, sizeof(*lcw));
+
+        EXIT;
+}
+EXPORT_SYMBOL(lc_watchdog_delete);
+
+/*
+ * Provided watchdog handlers
+ */
+
+void lc_watchdog_dumplog(pid_t pid, void *data)
+{
+        libcfs_debug_dumplog_internal((void *)((unsigned long)pid));
+}
+EXPORT_SYMBOL(lc_watchdog_dumplog);
+
+#else   /* !defined(WITH_WATCHDOG) */
+
+struct lc_watchdog *lc_watchdog_add(int timeout_ms,
+                                    void (*callback)(pid_t pid, void *),
+                                    void *data)
+{
+        static struct lc_watchdog      watchdog;
+        return &watchdog;
+}
+EXPORT_SYMBOL(lc_watchdog_add);
+
+void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_touch_ms);
+
+void lc_watchdog_touch(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_touch);
+
+void lc_watchdog_disable(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_disable);
+
+void lc_watchdog_delete(struct lc_watchdog *lcw)
+{
+}
+EXPORT_SYMBOL(lc_watchdog_delete);
+
+#endif
+
diff --git a/libcfs/libcfs/winnt/winnt-curproc.c b/libcfs/libcfs/winnt/winnt-curproc.c
new file mode 100644 (file)
index 0000000..e21c5c9
--- /dev/null
@@ -0,0 +1,453 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ *
+ * Impletion of winnt curproc routines.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+
+/*
+ * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h)
+ * for Linux kernel.
+ */
+
+cfs_task_t this_task = 
+    { 0, 0, 0, 0, 0, 0, 0, 
+      0, 0, 0, 0,  1, 0,  0, 0, 0,
+      "sysetm\0" };
+
+
+uid_t  cfs_curproc_uid(void)
+{
+    return this_task.uid;
+}
+
+gid_t  cfs_curproc_gid(void)
+{
+    return this_task.gid;
+}
+
+uid_t  cfs_curproc_fsuid(void)
+{
+    return this_task.fsuid;
+}
+
+gid_t cfs_curproc_fsgid(void)
+{
+    return this_task.fsgid;
+}
+
+pid_t cfs_curproc_pid(void)
+{
+    return cfs_current()->pid;
+}
+
+int cfs_curproc_groups_nr(void)
+{
+    return this_task.ngroups;
+}
+
+void cfs_curproc_groups_dump(gid_t *array, int size)
+{
+    LASSERT(size <= NGROUPS);
+    size = min_t(int, size, this_task.ngroups);
+    memcpy(array, this_task.groups, size * sizeof(__u32));
+}
+
+int cfs_curproc_is_in_groups(gid_t gid)
+{
+    return in_group_p(gid);
+}
+
+mode_t cfs_curproc_umask(void)
+{
+    return this_task.umask;
+}
+
+char  *cfs_curproc_comm(void)
+{
+    return this_task.comm;
+}
+
+cfs_kernel_cap_t cfs_curproc_cap_get(void)
+{
+    return this_task.cap_effective;
+}
+
+void cfs_curproc_cap_set(cfs_kernel_cap_t cap)
+{
+    this_task.cap_effective = cap;
+}
+
+
+/*
+ * Implementation of linux task management routines
+ */
+
+
+/* global of the task manager structure */
+
+TASK_MAN TaskMan;
+
+
+/*
+ *  task slot routiens
+ */
+
+PTASK_SLOT
+alloc_task_slot()
+{
+    PTASK_SLOT task = NULL;
+
+    if (TaskMan.slab) {
+        task = cfs_mem_cache_alloc(TaskMan.slab, 0);
+    } else {
+        task = cfs_alloc(sizeof(TASK_SLOT), 0);
+    }
+
+    return task;
+}
+
+void
+init_task_slot(PTASK_SLOT task)
+{
+    memset(task, 0, sizeof(TASK_SLOT));
+    task->Magic = TASKSLT_MAGIC;
+    task->task  = this_task;
+    task->task.pid = (pid_t)PsGetCurrentThreadId();
+    cfs_init_event(&task->Event, TRUE, FALSE);
+}
+
+
+void
+cleanup_task_slot(PTASK_SLOT task)
+{
+    if (TaskMan.slab) {
+        cfs_mem_cache_free(TaskMan.slab, task);
+    } else {
+        cfs_free(task);
+    }
+}
+
+/*
+ *  task manager related routines
+ */
+
+VOID
+task_manager_notify(
+    IN HANDLE   ProcessId,
+    IN HANDLE   ThreadId,
+    IN BOOLEAN  Create
+    )
+{
+    PLIST_ENTRY ListEntry = NULL; 
+    PTASK_SLOT  TaskSlot  = NULL;
+
+    spin_lock(&(TaskMan.Lock));
+
+    ListEntry = TaskMan.TaskList.Flink;
+
+    while (ListEntry != (&(TaskMan.TaskList))) {
+
+        TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+
+        if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) {
+
+            if (Create) {
+/*
+                DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n",
+                         ProcessId, ThreadId, TaskSlot->Tet);
+*/
+            } else {
+                /* remove the taskslot */
+                RemoveEntryList(&(TaskSlot->Link));
+                TaskMan.NumOfTasks--;
+
+                /* now free the task slot */
+                cleanup_task_slot(TaskSlot);
+            }
+        }
+
+        ListEntry = ListEntry->Flink;
+    }
+
+    spin_unlock(&(TaskMan.Lock));
+}
+
+int
+init_task_manager()
+{
+    NTSTATUS    status;
+
+    /* initialize the content and magic */
+    memset(&TaskMan, 0, sizeof(TASK_MAN));
+    TaskMan.Magic = TASKMAN_MAGIC;
+
+    /* initialize the spinlock protection */
+    spin_lock_init(&TaskMan.Lock);
+
+    /* create slab memory cache */
+    TaskMan.slab = cfs_mem_cache_create(
+        "TSLT", sizeof(TASK_SLOT), 0, 0);
+
+    /* intialize the list header */
+    InitializeListHead(&(TaskMan.TaskList));
+
+    /* set the thread creation/destruction notify routine */
+    status = PsSetCreateThreadNotifyRoutine(task_manager_notify);
+
+    if (!NT_SUCCESS(status)) {
+        cfs_enter_debugger();
+    }
+
+    return 0;
+}
+
+void
+cleanup_task_manager()
+{
+    PLIST_ENTRY ListEntry = NULL; 
+    PTASK_SLOT  TaskSlot  = NULL;
+
+    /* we must stay in system since we succeed to register the
+       CreateThreadNotifyRoutine: task_manager_notify */
+    cfs_enter_debugger();
+
+
+    /* cleanup all the taskslots attached to the list */
+    spin_lock(&(TaskMan.Lock));
+
+    while (!IsListEmpty(&(TaskMan.TaskList))) {
+
+        ListEntry = TaskMan.TaskList.Flink;
+        TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+
+        RemoveEntryList(ListEntry);
+        cleanup_task_slot(TaskSlot);
+    }
+
+    spin_unlock(&TaskMan.Lock);
+
+    /* destroy the taskslot cache slab */
+    cfs_mem_cache_destroy(TaskMan.slab);
+    memset(&TaskMan, 0, sizeof(TASK_MAN));
+}
+
+
+/*
+ * schedule routines (task slot list)
+ */
+
+
+cfs_task_t *
+cfs_current()
+{
+    HANDLE      Pid = PsGetCurrentProcessId();
+    HANDLE      Tid = PsGetCurrentThreadId();
+    PETHREAD    Tet = PsGetCurrentThread();
+
+    PLIST_ENTRY ListEntry = NULL; 
+    PTASK_SLOT  TaskSlot  = NULL;
+
+    spin_lock(&(TaskMan.Lock));
+
+    ListEntry = TaskMan.TaskList.Flink;
+
+    while (ListEntry != (&(TaskMan.TaskList))) {
+
+        TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+
+        if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) {
+            if (TaskSlot->Tet != Tet) {
+
+/*
+                DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n",
+                         Pid, Tid, Tet, TaskSlot->Tet);
+*/
+                //
+                // The old thread was already exit. This must be a
+                // new thread which get the same Tid to the previous.
+                //
+
+                TaskSlot->Tet = Tet;
+            }
+            break;
+
+        } else {
+
+            if ((ULONG)TaskSlot->Pid > (ULONG)Pid) {
+                TaskSlot = NULL;
+                break;
+            } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) {
+                if ((ULONG)TaskSlot->Tid > (ULONG)Tid) {
+                    TaskSlot = NULL;
+                    break;
+                }
+            }
+
+            TaskSlot =  NULL;
+        }
+
+        ListEntry = ListEntry->Flink;
+    }
+
+    if (!TaskSlot) {
+
+        TaskSlot = alloc_task_slot();
+
+        if (!TaskSlot) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+        init_task_slot(TaskSlot);
+
+        TaskSlot->Pid = Pid;
+        TaskSlot->Tid = Tid;
+        TaskSlot->Tet = Tet;
+
+        if (ListEntry == (&(TaskMan.TaskList))) {
+            //
+            // Empty case or the biggest case, put it to the tail.
+            //
+            InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link));
+        } else {
+            //
+            // Get a slot and smaller than it's tid, put it just before.
+            //
+            InsertHeadList(ListEntry->Blink, &(TaskSlot->Link));
+        }
+
+        TaskMan.NumOfTasks++;
+    }
+
+    //
+    // To Check whether he task structures are arranged in the expected order ?
+    //
+
+    {
+        PTASK_SLOT  Prev = NULL, Curr = NULL;
+        
+        ListEntry = TaskMan.TaskList.Flink;
+
+        while (ListEntry != (&(TaskMan.TaskList))) {
+
+            Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link);
+            ListEntry = ListEntry->Flink;
+
+            if (Prev) {
+                if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) {
+                    cfs_enter_debugger();
+                } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) {
+                    if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) {
+                        cfs_enter_debugger();
+                    }
+                }
+            }
+
+            Prev = Curr;
+        }
+    }
+
+errorout:
+
+    spin_unlock(&(TaskMan.Lock));
+
+    if (!TaskSlot) {
+        cfs_enter_debugger();
+        return NULL;
+    }
+
+    return (&(TaskSlot->task));
+}
+
+int
+schedule_timeout(int64_t time)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        cfs_enter_debugger();
+        return 0;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    cfs_assert(slot->Magic == TASKSLT_MAGIC);
+
+    if (time == MAX_SCHEDULE_TIMEOUT) {
+        time = 0;
+    }
+
+    return (cfs_wait_event(&(slot->Event), time) != 0);
+}
+
+int
+schedule()
+{
+    return schedule_timeout(0);
+}
+
+int
+wake_up_process(
+    cfs_task_t * task
+    )
+{
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        cfs_enter_debugger();
+        return 0;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    cfs_assert(slot->Magic == TASKSLT_MAGIC);
+
+    cfs_wake_event(&(slot->Event));
+
+    return TRUE;
+}
+
+void
+sleep_on(
+    cfs_waitq_t *waitq
+    )
+{
+       cfs_waitlink_t link;
+       
+       cfs_waitlink_init(&link);
+       cfs_waitq_add(waitq, &link);
+       cfs_waitq_wait(&link, CFS_TASK_INTERRUPTIBLE);
+       cfs_waitq_del(waitq, &link);
+}
+
+EXPORT_SYMBOL(cfs_curproc_uid);
+EXPORT_SYMBOL(cfs_curproc_pid);
+EXPORT_SYMBOL(cfs_curproc_gid);
+EXPORT_SYMBOL(cfs_curproc_fsuid);
+EXPORT_SYMBOL(cfs_curproc_fsgid);
+EXPORT_SYMBOL(cfs_curproc_umask);
+EXPORT_SYMBOL(cfs_curproc_comm);
+EXPORT_SYMBOL(cfs_curproc_groups_nr);
+EXPORT_SYMBOL(cfs_curproc_groups_dump);
+EXPORT_SYMBOL(cfs_curproc_is_in_groups);
+EXPORT_SYMBOL(cfs_curproc_cap_get);
+EXPORT_SYMBOL(cfs_curproc_cap_set);
diff --git a/libcfs/libcfs/winnt/winnt-debug.c b/libcfs/libcfs/winnt/winnt-debug.c
new file mode 100644 (file)
index 0000000..9e94f84
--- /dev/null
@@ -0,0 +1,1057 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/kp30.h>
+#include <libcfs/libcfs.h>
+#include "tracefile.h"
+
+void lnet_debug_dumpstack(cfs_task_t *tsk)
+{ 
+       return;
+}
+
+cfs_task_t *lnet_current(void)
+{ 
+       return cfs_current();
+}
+
+int lnet_arch_debug_init(unsigned long bufsize)
+{
+       return 0;
+}
+
+int lnet_arch_debug_cleanup(void)
+{
+       return 0;
+}
+
+void lnet_run_lbug_upcall(char *file, const char *fn, const int line)
+{
+}
+
+void lbug_with_loc(char *file, const char *func, const int line)
+{
+        libcfs_catastrophe = 1;
+        CEMERG("LBUG: pid: %u thread: %#x\n",
+              (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread());
+        // portals_debug_dumplog();
+        // portals_run_lbug_upcall(file, func, line);
+}
+
+#if TDI_LIBCFS_DBG
+
+/*
+ * Definitions
+ */
+
+LONG  KsDebugLevel = 0x5;
+
+
+/*
+ * Routines
+ */
+
+
+/*
+ * KsNtStatusToString
+ *   Get the error message for a specified nt status
+ *
+ * Arguments:
+ *   Status - nt status code
+ *
+ * Return Value:
+ *   PUCHAR - message string for the status code
+ *
+ * NOTES: 
+ *   N/A
+ */
+
+PUCHAR
+KsNtStatusToString (IN NTSTATUS Status)
+{
+    switch (Status) {
+
+    case 0x00000000: return "STATUS_SUCCESS";
+    case 0x00000001: return "STATUS_WAIT_1";
+    case 0x00000002: return "STATUS_WAIT_2";
+    case 0x00000003: return "STATUS_WAIT_3";
+    case 0x0000003F: return "STATUS_WAIT_63";
+    case 0x00000080: return "STATUS_ABANDONED_WAIT_0";
+    case 0x000000BF: return "STATUS_ABANDONED_WAIT_63";
+    case 0x000000C0: return "STATUS_USER_APC";
+    case 0x00000100: return "STATUS_KERNEL_APC";
+    case 0x00000101: return "STATUS_ALERTED";
+    case 0x00000102: return "STATUS_TIMEOUT";
+    case 0x00000103: return "STATUS_PENDING";
+    case 0x00000104: return "STATUS_REPARSE";
+    case 0x00000105: return "STATUS_MORE_ENTRIES";
+    case 0x00000106: return "STATUS_NOT_ALL_ASSIGNED";
+    case 0x00000107: return "STATUS_SOME_NOT_MAPPED";
+    case 0x00000108: return "STATUS_OPLOCK_BREAK_IN_PROGRESS";
+    case 0x00000109: return "STATUS_VOLUME_MOUNTED";
+    case 0x0000010A: return "STATUS_RXACT_COMMITTED";
+    case 0x0000010B: return "STATUS_NOTIFY_CLEANUP";
+    case 0x0000010C: return "STATUS_NOTIFY_ENUM_DIR";
+    case 0x0000010D: return "STATUS_NO_QUOTAS_FOR_ACCOUNT";
+    case 0x0000010E: return "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED";
+    case 0x00000110: return "STATUS_PAGE_FAULT_TRANSITION";
+    case 0x00000111: return "STATUS_PAGE_FAULT_DEMAND_ZERO";
+    case 0x00000112: return "STATUS_PAGE_FAULT_COPY_ON_WRITE";
+    case 0x00000113: return "STATUS_PAGE_FAULT_GUARD_PAGE";
+    case 0x00000114: return "STATUS_PAGE_FAULT_PAGING_FILE";
+    case 0x00000115: return "STATUS_CACHE_PAGE_LOCKED";
+    case 0x00000116: return "STATUS_CRASH_DUMP";
+    case 0x00000117: return "STATUS_BUFFER_ALL_ZEROS";
+    case 0x00000118: return "STATUS_REPARSE_OBJECT";
+    case 0x00000119: return "STATUS_RESOURCE_REQUIREMENTS_CHANGED";
+    case 0x00000120: return "STATUS_TRANSLATION_COMPLETE";
+    case 0x00000121: return "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY";
+    case 0x00010001: return "DBG_EXCEPTION_HANDLED";
+    case 0x00010002: return "DBG_CONTINUE";
+    case 0x40000000: return "STATUS_OBJECT_NAME_EXISTS";
+    case 0x40000001: return "STATUS_THREAD_WAS_SUSPENDED";
+    case 0x40000002: return "STATUS_WORKING_SET_LIMIT_RANGE";
+    case 0x40000003: return "STATUS_IMAGE_NOT_AT_BASE";
+    case 0x40000004: return "STATUS_RXACT_STATE_CREATED";
+    case 0x40000005: return "STATUS_SEGMENT_NOTIFICATION";
+    case 0x40000006: return "STATUS_LOCAL_USER_SESSION_KEY";
+    case 0x40000007: return "STATUS_BAD_CURRENT_DIRECTORY";
+    case 0x40000008: return "STATUS_SERIAL_MORE_WRITES";
+    case 0x40000009: return "STATUS_REGISTRY_RECOVERED";
+    case 0x4000000A: return "STATUS_FT_READ_RECOVERY_FROM_BACKUP";
+    case 0x4000000B: return "STATUS_FT_WRITE_RECOVERY";
+    case 0x4000000C: return "STATUS_SERIAL_COUNTER_TIMEOUT";
+    case 0x4000000D: return "STATUS_NULL_LM_PASSWORD";
+    case 0x4000000E: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH";
+    case 0x4000000F: return "STATUS_RECEIVE_PARTIAL";
+    case 0x40000010: return "STATUS_RECEIVE_EXPEDITED";
+    case 0x40000011: return "STATUS_RECEIVE_PARTIAL_EXPEDITED";
+    case 0x40000012: return "STATUS_EVENT_DONE";
+    case 0x40000013: return "STATUS_EVENT_PENDING";
+    case 0x40000014: return "STATUS_CHECKING_FILE_SYSTEM";
+    case 0x40000015: return "STATUS_FATAL_APP_EXIT";
+    case 0x40000016: return "STATUS_PREDEFINED_HANDLE";
+    case 0x40000017: return "STATUS_WAS_UNLOCKED";
+    case 0x40000018: return "STATUS_SERVICE_NOTIFICATION";
+    case 0x40000019: return "STATUS_WAS_LOCKED";
+    case 0x4000001A: return "STATUS_LOG_HARD_ERROR";
+    case 0x4000001B: return "STATUS_ALREADY_WIN32";
+    case 0x4000001C: return "STATUS_WX86_UNSIMULATE";
+    case 0x4000001D: return "STATUS_WX86_CONTINUE";
+    case 0x4000001E: return "STATUS_WX86_SINGLE_STEP";
+    case 0x4000001F: return "STATUS_WX86_BREAKPOINT";
+    case 0x40000020: return "STATUS_WX86_EXCEPTION_CONTINUE";
+    case 0x40000021: return "STATUS_WX86_EXCEPTION_LASTCHANCE";
+    case 0x40000022: return "STATUS_WX86_EXCEPTION_CHAIN";
+    case 0x40000023: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE";
+    case 0x40000024: return "STATUS_NO_YIELD_PERFORMED";
+    case 0x40000025: return "STATUS_TIMER_RESUME_IGNORED";
+    case 0x40000026: return "STATUS_ARBITRATION_UNHANDLED";
+    case 0x40000027: return "STATUS_CARDBUS_NOT_SUPPORTED";
+    case 0x40000028: return "STATUS_WX86_CREATEWX86TIB";
+    case 0x40000029: return "STATUS_MP_PROCESSOR_MISMATCH";
+    case 0x40010001: return "DBG_REPLY_LATER";
+    case 0x40010002: return "DBG_UNABLE_TO_PROVIDE_HANDLE";
+    case 0x40010003: return "DBG_TERMINATE_THREAD";
+    case 0x40010004: return "DBG_TERMINATE_PROCESS";
+    case 0x40010005: return "DBG_CONTROL_C";
+    case 0x40010006: return "DBG_PRINTEXCEPTION_C";
+    case 0x40010007: return "DBG_RIPEXCEPTION";
+    case 0x40010008: return "DBG_CONTROL_BREAK";
+    case 0x80000001: return "STATUS_GUARD_PAGE_VIOLATION";
+    case 0x80000002: return "STATUS_DATATYPE_MISALIGNMENT";
+    case 0x80000003: return "STATUS_BREAKPOINT";
+    case 0x80000004: return "STATUS_SINGLE_STEP";
+    case 0x80000005: return "STATUS_BUFFER_OVERFLOW";
+    case 0x80000006: return "STATUS_NO_MORE_FILES";
+    case 0x80000007: return "STATUS_WAKE_SYSTEM_DEBUGGER";
+    case 0x8000000A: return "STATUS_HANDLES_CLOSED";
+    case 0x8000000B: return "STATUS_NO_INHERITANCE";
+    case 0x8000000C: return "STATUS_GUID_SUBSTITUTION_MADE";
+    case 0x8000000D: return "STATUS_PARTIAL_COPY";
+    case 0x8000000E: return "STATUS_DEVICE_PAPER_EMPTY";
+    case 0x8000000F: return "STATUS_DEVICE_POWERED_OFF";
+    case 0x80000010: return "STATUS_DEVICE_OFF_LINE";
+    case 0x80000011: return "STATUS_DEVICE_BUSY";
+    case 0x80000012: return "STATUS_NO_MORE_EAS";
+    case 0x80000013: return "STATUS_INVALID_EA_NAME";
+    case 0x80000014: return "STATUS_EA_LIST_INCONSISTENT";
+    case 0x80000015: return "STATUS_INVALID_EA_FLAG";
+    case 0x80000016: return "STATUS_VERIFY_REQUIRED";
+    case 0x80000017: return "STATUS_EXTRANEOUS_INFORMATION";
+    case 0x80000018: return "STATUS_RXACT_COMMIT_NECESSARY";
+    case 0x8000001A: return "STATUS_NO_MORE_ENTRIES";
+    case 0x8000001B: return "STATUS_FILEMARK_DETECTED";
+    case 0x8000001C: return "STATUS_MEDIA_CHANGED";
+    case 0x8000001D: return "STATUS_BUS_RESET";
+    case 0x8000001E: return "STATUS_END_OF_MEDIA";
+    case 0x8000001F: return "STATUS_BEGINNING_OF_MEDIA";
+    case 0x80000020: return "STATUS_MEDIA_CHECK";
+    case 0x80000021: return "STATUS_SETMARK_DETECTED";
+    case 0x80000022: return "STATUS_NO_DATA_DETECTED";
+    case 0x80000023: return "STATUS_REDIRECTOR_HAS_OPEN_HANDLES";
+    case 0x80000024: return "STATUS_SERVER_HAS_OPEN_HANDLES";
+    case 0x80000025: return "STATUS_ALREADY_DISCONNECTED";
+    case 0x80000026: return "STATUS_LONGJUMP";
+    case 0x80010001: return "DBG_EXCEPTION_NOT_HANDLED";
+    case 0xC0000001: return "STATUS_UNSUCCESSFUL";
+    case 0xC0000002: return "STATUS_NOT_IMPLEMENTED";
+    case 0xC0000003: return "STATUS_INVALID_INFO_CLASS";
+    case 0xC0000004: return "STATUS_INFO_LENGTH_MISMATCH";
+    case 0xC0000005: return "STATUS_ACCESS_VIOLATION";
+    case 0xC0000006: return "STATUS_IN_PAGE_ERROR";
+    case 0xC0000007: return "STATUS_PAGEFILE_QUOTA";
+    case 0xC0000008: return "STATUS_INVALID_HANDLE";
+    case 0xC0000009: return "STATUS_BAD_INITIAL_STACK";
+    case 0xC000000A: return "STATUS_BAD_INITIAL_PC";
+    case 0xC000000B: return "STATUS_INVALID_CID";
+    case 0xC000000C: return "STATUS_TIMER_NOT_CANCELED";
+    case 0xC000000D: return "STATUS_INVALID_PARAMETER";
+    case 0xC000000E: return "STATUS_NO_SUCH_DEVICE";
+    case 0xC000000F: return "STATUS_NO_SUCH_FILE";
+    case 0xC0000010: return "STATUS_INVALID_DEVICE_REQUEST";
+    case 0xC0000011: return "STATUS_END_OF_FILE";
+    case 0xC0000012: return "STATUS_WRONG_VOLUME";
+    case 0xC0000013: return "STATUS_NO_MEDIA_IN_DEVICE";
+    case 0xC0000014: return "STATUS_UNRECOGNIZED_MEDIA";
+    case 0xC0000015: return "STATUS_NONEXISTENT_SECTOR";
+    case 0xC0000016: return "STATUS_MORE_PROCESSING_REQUIRED";
+    case 0xC0000017: return "STATUS_NO_MEMORY";
+    case 0xC0000018: return "STATUS_CONFLICTING_ADDRESSES";
+    case 0xC0000019: return "STATUS_NOT_MAPPED_VIEW";
+    case 0xC000001A: return "STATUS_UNABLE_TO_FREE_VM";
+    case 0xC000001B: return "STATUS_UNABLE_TO_DELETE_SECTION";
+    case 0xC000001C: return "STATUS_INVALID_SYSTEM_SERVICE";
+    case 0xC000001D: return "STATUS_ILLEGAL_INSTRUCTION";
+    case 0xC000001E: return "STATUS_INVALID_LOCK_SEQUENCE";
+    case 0xC000001F: return "STATUS_INVALID_VIEW_SIZE";
+    case 0xC0000020: return "STATUS_INVALID_FILE_FOR_SECTION";
+    case 0xC0000021: return "STATUS_ALREADY_COMMITTED";
+    case 0xC0000022: return "STATUS_ACCESS_DENIED";
+    case 0xC0000023: return "STATUS_BUFFER_TOO_SMALL";
+    case 0xC0000024: return "STATUS_OBJECT_TYPE_MISMATCH";
+    case 0xC0000025: return "STATUS_NONCONTINUABLE_EXCEPTION";
+    case 0xC0000026: return "STATUS_INVALID_DISPOSITION";
+    case 0xC0000027: return "STATUS_UNWIND";
+    case 0xC0000028: return "STATUS_BAD_STACK";
+    case 0xC0000029: return "STATUS_INVALID_UNWIND_TARGET";
+    case 0xC000002A: return "STATUS_NOT_LOCKED";
+    case 0xC000002B: return "STATUS_PARITY_ERROR";
+    case 0xC000002C: return "STATUS_UNABLE_TO_DECOMMIT_VM";
+    case 0xC000002D: return "STATUS_NOT_COMMITTED";
+    case 0xC000002E: return "STATUS_INVALID_PORT_ATTRIBUTES";
+    case 0xC000002F: return "STATUS_PORT_MESSAGE_TOO_LONG";
+    case 0xC0000030: return "STATUS_INVALID_PARAMETER_MIX";
+    case 0xC0000031: return "STATUS_INVALID_QUOTA_LOWER";
+    case 0xC0000032: return "STATUS_DISK_CORRUPT_ERROR";
+    case 0xC0000033: return "STATUS_OBJECT_NAME_INVALID";
+    case 0xC0000034: return "STATUS_OBJECT_NAME_NOT_FOUND";
+    case 0xC0000035: return "STATUS_OBJECT_NAME_COLLISION";
+    case 0xC0000037: return "STATUS_PORT_DISCONNECTED";
+    case 0xC0000038: return "STATUS_DEVICE_ALREADY_ATTACHED";
+    case 0xC0000039: return "STATUS_OBJECT_PATH_INVALID";
+    case 0xC000003A: return "STATUS_OBJECT_PATH_NOT_FOUND";
+    case 0xC000003B: return "STATUS_OBJECT_PATH_SYNTAX_BAD";
+    case 0xC000003C: return "STATUS_DATA_OVERRUN";
+    case 0xC000003D: return "STATUS_DATA_LATE_ERROR";
+    case 0xC000003E: return "STATUS_DATA_ERROR";
+    case 0xC000003F: return "STATUS_CRC_ERROR";
+    case 0xC0000040: return "STATUS_SECTION_TOO_BIG";
+    case 0xC0000041: return "STATUS_PORT_CONNECTION_REFUSED";
+    case 0xC0000042: return "STATUS_INVALID_PORT_HANDLE";
+    case 0xC0000043: return "STATUS_SHARING_VIOLATION";
+    case 0xC0000044: return "STATUS_QUOTA_EXCEEDED";
+    case 0xC0000045: return "STATUS_INVALID_PAGE_PROTECTION";
+    case 0xC0000046: return "STATUS_MUTANT_NOT_OWNED";
+    case 0xC0000047: return "STATUS_SEMAPHORE_LIMIT_EXCEEDED";
+    case 0xC0000048: return "STATUS_PORT_ALREADY_SET";
+    case 0xC0000049: return "STATUS_SECTION_NOT_IMAGE";
+    case 0xC000004A: return "STATUS_SUSPEND_COUNT_EXCEEDED";
+    case 0xC000004B: return "STATUS_THREAD_IS_TERMINATING";
+    case 0xC000004C: return "STATUS_BAD_WORKING_SET_LIMIT";
+    case 0xC000004D: return "STATUS_INCOMPATIBLE_FILE_MAP";
+    case 0xC000004E: return "STATUS_SECTION_PROTECTION";
+    case 0xC000004F: return "STATUS_EAS_NOT_SUPPORTED";
+    case 0xC0000050: return "STATUS_EA_TOO_LARGE";
+    case 0xC0000051: return "STATUS_NONEXISTENT_EA_ENTRY";
+    case 0xC0000052: return "STATUS_NO_EAS_ON_FILE";
+    case 0xC0000053: return "STATUS_EA_CORRUPT_ERROR";
+    case 0xC0000054: return "STATUS_FILE_LOCK_CONFLICT";
+    case 0xC0000055: return "STATUS_LOCK_NOT_GRANTED";
+    case 0xC0000056: return "STATUS_DELETE_PENDING";
+    case 0xC0000057: return "STATUS_CTL_FILE_NOT_SUPPORTED";
+    case 0xC0000058: return "STATUS_UNKNOWN_REVISION";
+    case 0xC0000059: return "STATUS_REVISION_MISMATCH";
+    case 0xC000005A: return "STATUS_INVALID_OWNER";
+    case 0xC000005B: return "STATUS_INVALID_PRIMARY_GROUP";
+    case 0xC000005C: return "STATUS_NO_IMPERSONATION_TOKEN";
+    case 0xC000005D: return "STATUS_CANT_DISABLE_MANDATORY";
+    case 0xC000005E: return "STATUS_NO_LOGON_SERVERS";
+    case 0xC000005F: return "STATUS_NO_SUCH_LOGON_SESSION";
+    case 0xC0000060: return "STATUS_NO_SUCH_PRIVILEGE";
+    case 0xC0000061: return "STATUS_PRIVILEGE_NOT_HELD";
+    case 0xC0000062: return "STATUS_INVALID_ACCOUNT_NAME";
+    case 0xC0000063: return "STATUS_USER_EXISTS";
+    case 0xC0000064: return "STATUS_NO_SUCH_USER";
+    case 0xC0000065: return "STATUS_GROUP_EXISTS";
+    case 0xC0000066: return "STATUS_NO_SUCH_GROUP";
+    case 0xC0000067: return "STATUS_MEMBER_IN_GROUP";
+    case 0xC0000068: return "STATUS_MEMBER_NOT_IN_GROUP";
+    case 0xC0000069: return "STATUS_LAST_ADMIN";
+    case 0xC000006A: return "STATUS_WRONG_PASSWORD";
+    case 0xC000006B: return "STATUS_ILL_FORMED_PASSWORD";
+    case 0xC000006C: return "STATUS_PASSWORD_RESTRICTION";
+    case 0xC000006D: return "STATUS_LOGON_FAILURE";
+    case 0xC000006E: return "STATUS_ACCOUNT_RESTRICTION";
+    case 0xC000006F: return "STATUS_INVALID_LOGON_HOURS";
+    case 0xC0000070: return "STATUS_INVALID_WORKSTATION";
+    case 0xC0000071: return "STATUS_PASSWORD_EXPIRED";
+    case 0xC0000072: return "STATUS_ACCOUNT_DISABLED";
+    case 0xC0000073: return "STATUS_NONE_MAPPED";
+    case 0xC0000074: return "STATUS_TOO_MANY_LUIDS_REQUESTED";
+    case 0xC0000075: return "STATUS_LUIDS_EXHAUSTED";
+    case 0xC0000076: return "STATUS_INVALID_SUB_AUTHORITY";
+    case 0xC0000077: return "STATUS_INVALID_ACL";
+    case 0xC0000078: return "STATUS_INVALID_SID";
+    case 0xC0000079: return "STATUS_INVALID_SECURITY_DESCR";
+    case 0xC000007A: return "STATUS_PROCEDURE_NOT_FOUND";
+    case 0xC000007B: return "STATUS_INVALID_IMAGE_FORMAT";
+    case 0xC000007C: return "STATUS_NO_TOKEN";
+    case 0xC000007D: return "STATUS_BAD_INHERITANCE_ACL";
+    case 0xC000007E: return "STATUS_RANGE_NOT_LOCKED";
+    case 0xC000007F: return "STATUS_DISK_FULL";
+    case 0xC0000080: return "STATUS_SERVER_DISABLED";
+    case 0xC0000081: return "STATUS_SERVER_NOT_DISABLED";
+    case 0xC0000082: return "STATUS_TOO_MANY_GUIDS_REQUESTED";
+    case 0xC0000083: return "STATUS_GUIDS_EXHAUSTED";
+    case 0xC0000084: return "STATUS_INVALID_ID_AUTHORITY";
+    case 0xC0000085: return "STATUS_AGENTS_EXHAUSTED";
+    case 0xC0000086: return "STATUS_INVALID_VOLUME_LABEL";
+    case 0xC0000087: return "STATUS_SECTION_NOT_EXTENDED";
+    case 0xC0000088: return "STATUS_NOT_MAPPED_DATA";
+    case 0xC0000089: return "STATUS_RESOURCE_DATA_NOT_FOUND";
+    case 0xC000008A: return "STATUS_RESOURCE_TYPE_NOT_FOUND";
+    case 0xC000008B: return "STATUS_RESOURCE_NAME_NOT_FOUND";
+    case 0xC000008C: return "STATUS_ARRAY_BOUNDS_EXCEEDED";
+    case 0xC000008D: return "STATUS_FLOAT_DENORMAL_OPERAND";
+    case 0xC000008E: return "STATUS_FLOAT_DIVIDE_BY_ZERO";
+    case 0xC000008F: return "STATUS_FLOAT_INEXACT_RESULT";
+    case 0xC0000090: return "STATUS_FLOAT_INVALID_OPERATION";
+    case 0xC0000091: return "STATUS_FLOAT_OVERFLOW";
+    case 0xC0000092: return "STATUS_FLOAT_STACK_CHECK";
+    case 0xC0000093: return "STATUS_FLOAT_UNDERFLOW";
+    case 0xC0000094: return "STATUS_INTEGER_DIVIDE_BY_ZERO";
+    case 0xC0000095: return "STATUS_INTEGER_OVERFLOW";
+    case 0xC0000096: return "STATUS_PRIVILEGED_INSTRUCTION";
+    case 0xC0000097: return "STATUS_TOO_MANY_PAGING_FILES";
+    case 0xC0000098: return "STATUS_FILE_INVALID";
+    case 0xC0000099: return "STATUS_ALLOTTED_SPACE_EXCEEDED";
+    case 0xC000009A: return "STATUS_INSUFFICIENT_RESOURCES";
+    case 0xC000009B: return "STATUS_DFS_EXIT_PATH_FOUND";
+    case 0xC000009C: return "STATUS_DEVICE_DATA_ERROR";
+    case 0xC000009D: return "STATUS_DEVICE_NOT_CONNECTED";
+    case 0xC000009E: return "STATUS_DEVICE_POWER_FAILURE";
+    case 0xC000009F: return "STATUS_FREE_VM_NOT_AT_BASE";
+    case 0xC00000A0: return "STATUS_MEMORY_NOT_ALLOCATED";
+    case 0xC00000A1: return "STATUS_WORKING_SET_QUOTA";
+    case 0xC00000A2: return "STATUS_MEDIA_WRITE_PROTECTED";
+    case 0xC00000A3: return "STATUS_DEVICE_NOT_READY";
+    case 0xC00000A4: return "STATUS_INVALID_GROUP_ATTRIBUTES";
+    case 0xC00000A5: return "STATUS_BAD_IMPERSONATION_LEVEL";
+    case 0xC00000A6: return "STATUS_CANT_OPEN_ANONYMOUS";
+    case 0xC00000A7: return "STATUS_BAD_VALIDATION_CLASS";
+    case 0xC00000A8: return "STATUS_BAD_TOKEN_TYPE";
+    case 0xC00000A9: return "STATUS_BAD_MASTER_BOOT_RECORD";
+    case 0xC00000AA: return "STATUS_INSTRUCTION_MISALIGNMENT";
+    case 0xC00000AB: return "STATUS_INSTANCE_NOT_AVAILABLE";
+    case 0xC00000AC: return "STATUS_PIPE_NOT_AVAILABLE";
+    case 0xC00000AD: return "STATUS_INVALID_PIPE_STATE";
+    case 0xC00000AE: return "STATUS_PIPE_BUSY";
+    case 0xC00000AF: return "STATUS_ILLEGAL_FUNCTION";
+    case 0xC00000B0: return "STATUS_PIPE_DISCONNECTED";
+    case 0xC00000B1: return "STATUS_PIPE_CLOSING";
+    case 0xC00000B2: return "STATUS_PIPE_CONNECTED";
+    case 0xC00000B3: return "STATUS_PIPE_LISTENING";
+    case 0xC00000B4: return "STATUS_INVALID_READ_MODE";
+    case 0xC00000B5: return "STATUS_IO_TIMEOUT";
+    case 0xC00000B6: return "STATUS_FILE_FORCED_CLOSED";
+    case 0xC00000B7: return "STATUS_PROFILING_NOT_STARTED";
+    case 0xC00000B8: return "STATUS_PROFILING_NOT_STOPPED";
+    case 0xC00000B9: return "STATUS_COULD_NOT_INTERPRET";
+    case 0xC00000BA: return "STATUS_FILE_IS_A_DIRECTORY";
+    case 0xC00000BB: return "STATUS_NOT_SUPPORTED";
+    case 0xC00000BC: return "STATUS_REMOTE_NOT_LISTENING";
+    case 0xC00000BD: return "STATUS_DUPLICATE_NAME";
+    case 0xC00000BE: return "STATUS_BAD_NETWORK_PATH";
+    case 0xC00000BF: return "STATUS_NETWORK_BUSY";
+    case 0xC00000C0: return "STATUS_DEVICE_DOES_NOT_EXIST";
+    case 0xC00000C1: return "STATUS_TOO_MANY_COMMANDS";
+    case 0xC00000C2: return "STATUS_ADAPTER_HARDWARE_ERROR";
+    case 0xC00000C3: return "STATUS_INVALID_NETWORK_RESPONSE";
+    case 0xC00000C4: return "STATUS_UNEXPECTED_NETWORK_ERROR";
+    case 0xC00000C5: return "STATUS_BAD_REMOTE_ADAPTER";
+    case 0xC00000C6: return "STATUS_PRINT_QUEUE_FULL";
+    case 0xC00000C7: return "STATUS_NO_SPOOL_SPACE";
+    case 0xC00000C8: return "STATUS_PRINT_CANCELLED";
+    case 0xC00000C9: return "STATUS_NETWORK_NAME_DELETED";
+    case 0xC00000CA: return "STATUS_NETWORK_ACCESS_DENIED";
+    case 0xC00000CB: return "STATUS_BAD_DEVICE_TYPE";
+    case 0xC00000CC: return "STATUS_BAD_NETWORK_NAME";
+    case 0xC00000CD: return "STATUS_TOO_MANY_NAMES";
+    case 0xC00000CE: return "STATUS_TOO_MANY_SESSIONS";
+    case 0xC00000CF: return "STATUS_SHARING_PAUSED";
+    case 0xC00000D0: return "STATUS_REQUEST_NOT_ACCEPTED";
+    case 0xC00000D1: return "STATUS_REDIRECTOR_PAUSED";
+    case 0xC00000D2: return "STATUS_NET_WRITE_FAULT";
+    case 0xC00000D3: return "STATUS_PROFILING_AT_LIMIT";
+    case 0xC00000D4: return "STATUS_NOT_SAME_DEVICE";
+    case 0xC00000D5: return "STATUS_FILE_RENAMED";
+    case 0xC00000D6: return "STATUS_VIRTUAL_CIRCUIT_CLOSED";
+    case 0xC00000D7: return "STATUS_NO_SECURITY_ON_OBJECT";
+    case 0xC00000D8: return "STATUS_CANT_WAIT";
+    case 0xC00000D9: return "STATUS_PIPE_EMPTY";
+    case 0xC00000DA: return "STATUS_CANT_ACCESS_DOMAIN_INFO";
+    case 0xC00000DB: return "STATUS_CANT_TERMINATE_SELF";
+    case 0xC00000DC: return "STATUS_INVALID_SERVER_STATE";
+    case 0xC00000DD: return "STATUS_INVALID_DOMAIN_STATE";
+    case 0xC00000DE: return "STATUS_INVALID_DOMAIN_ROLE";
+    case 0xC00000DF: return "STATUS_NO_SUCH_DOMAIN";
+    case 0xC00000E0: return "STATUS_DOMAIN_EXISTS";
+    case 0xC00000E1: return "STATUS_DOMAIN_LIMIT_EXCEEDED";
+    case 0xC00000E2: return "STATUS_OPLOCK_NOT_GRANTED";
+    case 0xC00000E3: return "STATUS_INVALID_OPLOCK_PROTOCOL";
+    case 0xC00000E4: return "STATUS_INTERNAL_DB_CORRUPTION";
+    case 0xC00000E5: return "STATUS_INTERNAL_ERROR";
+    case 0xC00000E6: return "STATUS_GENERIC_NOT_MAPPED";
+    case 0xC00000E7: return "STATUS_BAD_DESCRIPTOR_FORMAT";
+    case 0xC00000E8: return "STATUS_INVALID_USER_BUFFER";
+    case 0xC00000E9: return "STATUS_UNEXPECTED_IO_ERROR";
+    case 0xC00000EA: return "STATUS_UNEXPECTED_MM_CREATE_ERR";
+    case 0xC00000EB: return "STATUS_UNEXPECTED_MM_MAP_ERROR";
+    case 0xC00000EC: return "STATUS_UNEXPECTED_MM_EXTEND_ERR";
+    case 0xC00000ED: return "STATUS_NOT_LOGON_PROCESS";
+    case 0xC00000EE: return "STATUS_LOGON_SESSION_EXISTS";
+    case 0xC00000EF: return "STATUS_INVALID_PARAMETER_1";
+    case 0xC00000F0: return "STATUS_INVALID_PARAMETER_2";
+    case 0xC00000F1: return "STATUS_INVALID_PARAMETER_3";
+    case 0xC00000F2: return "STATUS_INVALID_PARAMETER_4";
+    case 0xC00000F3: return "STATUS_INVALID_PARAMETER_5";
+    case 0xC00000F4: return "STATUS_INVALID_PARAMETER_6";
+    case 0xC00000F5: return "STATUS_INVALID_PARAMETER_7";
+    case 0xC00000F6: return "STATUS_INVALID_PARAMETER_8";
+    case 0xC00000F7: return "STATUS_INVALID_PARAMETER_9";
+    case 0xC00000F8: return "STATUS_INVALID_PARAMETER_10";
+    case 0xC00000F9: return "STATUS_INVALID_PARAMETER_11";
+    case 0xC00000FA: return "STATUS_INVALID_PARAMETER_12";
+    case 0xC00000FB: return "STATUS_REDIRECTOR_NOT_STARTED";
+    case 0xC00000FC: return "STATUS_REDIRECTOR_STARTED";
+    case 0xC00000FD: return "STATUS_STACK_OVERFLOW";
+    case 0xC00000FE: return "STATUS_NO_SUCH_PACKAGE";
+    case 0xC00000FF: return "STATUS_BAD_FUNCTION_TABLE";
+    case 0xC0000100: return "STATUS_VARIABLE_NOT_FOUND";
+    case 0xC0000101: return "STATUS_DIRECTORY_NOT_EMPTY";
+    case 0xC0000102: return "STATUS_FILE_CORRUPT_ERROR";
+    case 0xC0000103: return "STATUS_NOT_A_DIRECTORY";
+    case 0xC0000104: return "STATUS_BAD_LOGON_SESSION_STATE";
+    case 0xC0000105: return "STATUS_LOGON_SESSION_COLLISION";
+    case 0xC0000106: return "STATUS_NAME_TOO_LONG";
+    case 0xC0000107: return "STATUS_FILES_OPEN";
+    case 0xC0000108: return "STATUS_CONNECTION_IN_USE";
+    case 0xC0000109: return "STATUS_MESSAGE_NOT_FOUND";
+    case 0xC000010A: return "STATUS_PROCESS_IS_TERMINATING";
+    case 0xC000010B: return "STATUS_INVALID_LOGON_TYPE";
+    case 0xC000010C: return "STATUS_NO_GUID_TRANSLATION";
+    case 0xC000010D: return "STATUS_CANNOT_IMPERSONATE";
+    case 0xC000010E: return "STATUS_IMAGE_ALREADY_LOADED";
+    case 0xC000010F: return "STATUS_ABIOS_NOT_PRESENT";
+    case 0xC0000110: return "STATUS_ABIOS_LID_NOT_EXIST";
+    case 0xC0000111: return "STATUS_ABIOS_LID_ALREADY_OWNED";
+    case 0xC0000112: return "STATUS_ABIOS_NOT_LID_OWNER";
+    case 0xC0000113: return "STATUS_ABIOS_INVALID_COMMAND";
+    case 0xC0000114: return "STATUS_ABIOS_INVALID_LID";
+    case 0xC0000115: return "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE";
+    case 0xC0000116: return "STATUS_ABIOS_INVALID_SELECTOR";
+    case 0xC0000117: return "STATUS_NO_LDT";
+    case 0xC0000118: return "STATUS_INVALID_LDT_SIZE";
+    case 0xC0000119: return "STATUS_INVALID_LDT_OFFSET";
+    case 0xC000011A: return "STATUS_INVALID_LDT_DESCRIPTOR";
+    case 0xC000011B: return "STATUS_INVALID_IMAGE_NE_FORMAT";
+    case 0xC000011C: return "STATUS_RXACT_INVALID_STATE";
+    case 0xC000011D: return "STATUS_RXACT_COMMIT_FAILURE";
+    case 0xC000011E: return "STATUS_MAPPED_FILE_SIZE_ZERO";
+    case 0xC000011F: return "STATUS_TOO_MANY_OPENED_FILES";
+    case 0xC0000120: return "STATUS_CANCELLED";
+    case 0xC0000121: return "STATUS_CANNOT_DELETE";
+    case 0xC0000122: return "STATUS_INVALID_COMPUTER_NAME";
+    case 0xC0000123: return "STATUS_FILE_DELETED";
+    case 0xC0000124: return "STATUS_SPECIAL_ACCOUNT";
+    case 0xC0000125: return "STATUS_SPECIAL_GROUP";
+    case 0xC0000126: return "STATUS_SPECIAL_USER";
+    case 0xC0000127: return "STATUS_MEMBERS_PRIMARY_GROUP";
+    case 0xC0000128: return "STATUS_FILE_CLOSED";
+    case 0xC0000129: return "STATUS_TOO_MANY_THREADS";
+    case 0xC000012A: return "STATUS_THREAD_NOT_IN_PROCESS";
+    case 0xC000012B: return "STATUS_TOKEN_ALREADY_IN_USE";
+    case 0xC000012C: return "STATUS_PAGEFILE_QUOTA_EXCEEDED";
+    case 0xC000012D: return "STATUS_COMMITMENT_LIMIT";
+    case 0xC000012E: return "STATUS_INVALID_IMAGE_LE_FORMAT";
+    case 0xC000012F: return "STATUS_INVALID_IMAGE_NOT_MZ";
+    case 0xC0000130: return "STATUS_INVALID_IMAGE_PROTECT";
+    case 0xC0000131: return "STATUS_INVALID_IMAGE_WIN_16";
+    case 0xC0000132: return "STATUS_LOGON_SERVER_CONFLICT";
+    case 0xC0000133: return "STATUS_TIME_DIFFERENCE_AT_DC";
+    case 0xC0000134: return "STATUS_SYNCHRONIZATION_REQUIRED";
+    case 0xC0000135: return "STATUS_DLL_NOT_FOUND";
+    case 0xC0000136: return "STATUS_OPEN_FAILED";
+    case 0xC0000137: return "STATUS_IO_PRIVILEGE_FAILED";
+    case 0xC0000138: return "STATUS_ORDINAL_NOT_FOUND";
+    case 0xC0000139: return "STATUS_ENTRYPOINT_NOT_FOUND";
+    case 0xC000013A: return "STATUS_CONTROL_C_EXIT";
+    case 0xC000013B: return "STATUS_LOCAL_DISCONNECT";
+    case 0xC000013C: return "STATUS_REMOTE_DISCONNECT";
+    case 0xC000013D: return "STATUS_REMOTE_RESOURCES";
+    case 0xC000013E: return "STATUS_LINK_FAILED";
+    case 0xC000013F: return "STATUS_LINK_TIMEOUT";
+    case 0xC0000140: return "STATUS_INVALID_CONNECTION";
+    case 0xC0000141: return "STATUS_INVALID_ADDRESS";
+    case 0xC0000142: return "STATUS_DLL_INIT_FAILED";
+    case 0xC0000143: return "STATUS_MISSING_SYSTEMFILE";
+    case 0xC0000144: return "STATUS_UNHANDLED_EXCEPTION";
+    case 0xC0000145: return "STATUS_APP_INIT_FAILURE";
+    case 0xC0000146: return "STATUS_PAGEFILE_CREATE_FAILED";
+    case 0xC0000147: return "STATUS_NO_PAGEFILE";
+    case 0xC0000148: return "STATUS_INVALID_LEVEL";
+    case 0xC0000149: return "STATUS_WRONG_PASSWORD_CORE";
+    case 0xC000014A: return "STATUS_ILLEGAL_FLOAT_CONTEXT";
+    case 0xC000014B: return "STATUS_PIPE_BROKEN";
+    case 0xC000014C: return "STATUS_REGISTRY_CORRUPT";
+    case 0xC000014D: return "STATUS_REGISTRY_IO_FAILED";
+    case 0xC000014E: return "STATUS_NO_EVENT_PAIR";
+    case 0xC000014F: return "STATUS_UNRECOGNIZED_VOLUME";
+    case 0xC0000150: return "STATUS_SERIAL_NO_DEVICE_INITED";
+    case 0xC0000151: return "STATUS_NO_SUCH_ALIAS";
+    case 0xC0000152: return "STATUS_MEMBER_NOT_IN_ALIAS";
+    case 0xC0000153: return "STATUS_MEMBER_IN_ALIAS";
+    case 0xC0000154: return "STATUS_ALIAS_EXISTS";
+    case 0xC0000155: return "STATUS_LOGON_NOT_GRANTED";
+    case 0xC0000156: return "STATUS_TOO_MANY_SECRETS";
+    case 0xC0000157: return "STATUS_SECRET_TOO_LONG";
+    case 0xC0000158: return "STATUS_INTERNAL_DB_ERROR";
+    case 0xC0000159: return "STATUS_FULLSCREEN_MODE";
+    case 0xC000015A: return "STATUS_TOO_MANY_CONTEXT_IDS";
+    case 0xC000015B: return "STATUS_LOGON_TYPE_NOT_GRANTED";
+    case 0xC000015C: return "STATUS_NOT_REGISTRY_FILE";
+    case 0xC000015D: return "STATUS_NT_CROSS_ENCRYPTION_REQUIRED";
+    case 0xC000015E: return "STATUS_DOMAIN_CTRLR_CONFIG_ERROR";
+    case 0xC000015F: return "STATUS_FT_MISSING_MEMBER";
+    case 0xC0000160: return "STATUS_ILL_FORMED_SERVICE_ENTRY";
+    case 0xC0000161: return "STATUS_ILLEGAL_CHARACTER";
+    case 0xC0000162: return "STATUS_UNMAPPABLE_CHARACTER";
+    case 0xC0000163: return "STATUS_UNDEFINED_CHARACTER";
+    case 0xC0000164: return "STATUS_FLOPPY_VOLUME";
+    case 0xC0000165: return "STATUS_FLOPPY_ID_MARK_NOT_FOUND";
+    case 0xC0000166: return "STATUS_FLOPPY_WRONG_CYLINDER";
+    case 0xC0000167: return "STATUS_FLOPPY_UNKNOWN_ERROR";
+    case 0xC0000168: return "STATUS_FLOPPY_BAD_REGISTERS";
+    case 0xC0000169: return "STATUS_DISK_RECALIBRATE_FAILED";
+    case 0xC000016A: return "STATUS_DISK_OPERATION_FAILED";
+    case 0xC000016B: return "STATUS_DISK_RESET_FAILED";
+    case 0xC000016C: return "STATUS_SHARED_IRQ_BUSY";
+    case 0xC000016D: return "STATUS_FT_ORPHANING";
+    case 0xC000016E: return "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT";
+    case 0xC0000172: return "STATUS_PARTITION_FAILURE";
+    case 0xC0000173: return "STATUS_INVALID_BLOCK_LENGTH";
+    case 0xC0000174: return "STATUS_DEVICE_NOT_PARTITIONED";
+    case 0xC0000175: return "STATUS_UNABLE_TO_LOCK_MEDIA";
+    case 0xC0000176: return "STATUS_UNABLE_TO_UNLOAD_MEDIA";
+    case 0xC0000177: return "STATUS_EOM_OVERFLOW";
+    case 0xC0000178: return "STATUS_NO_MEDIA";
+    case 0xC000017A: return "STATUS_NO_SUCH_MEMBER";
+    case 0xC000017B: return "STATUS_INVALID_MEMBER";
+    case 0xC000017C: return "STATUS_KEY_DELETED";
+    case 0xC000017D: return "STATUS_NO_LOG_SPACE";
+    case 0xC000017E: return "STATUS_TOO_MANY_SIDS";
+    case 0xC000017F: return "STATUS_LM_CROSS_ENCRYPTION_REQUIRED";
+    case 0xC0000180: return "STATUS_KEY_HAS_CHILDREN";
+    case 0xC0000181: return "STATUS_CHILD_MUST_BE_VOLATILE";
+    case 0xC0000182: return "STATUS_DEVICE_CONFIGURATION_ERROR";
+    case 0xC0000183: return "STATUS_DRIVER_INTERNAL_ERROR";
+    case 0xC0000184: return "STATUS_INVALID_DEVICE_STATE";
+    case 0xC0000185: return "STATUS_IO_DEVICE_ERROR";
+    case 0xC0000186: return "STATUS_DEVICE_PROTOCOL_ERROR";
+    case 0xC0000187: return "STATUS_BACKUP_CONTROLLER";
+    case 0xC0000188: return "STATUS_LOG_FILE_FULL";
+    case 0xC0000189: return "STATUS_TOO_LATE";
+    case 0xC000018A: return "STATUS_NO_TRUST_LSA_SECRET";
+    case 0xC000018B: return "STATUS_NO_TRUST_SAM_ACCOUNT";
+    case 0xC000018C: return "STATUS_TRUSTED_DOMAIN_FAILURE";
+    case 0xC000018D: return "STATUS_TRUSTED_RELATIONSHIP_FAILURE";
+    case 0xC000018E: return "STATUS_EVENTLOG_FILE_CORRUPT";
+    case 0xC000018F: return "STATUS_EVENTLOG_CANT_START";
+    case 0xC0000190: return "STATUS_TRUST_FAILURE";
+    case 0xC0000191: return "STATUS_MUTANT_LIMIT_EXCEEDED";
+    case 0xC0000192: return "STATUS_NETLOGON_NOT_STARTED";
+    case 0xC0000193: return "STATUS_ACCOUNT_EXPIRED";
+    case 0xC0000194: return "STATUS_POSSIBLE_DEADLOCK";
+    case 0xC0000195: return "STATUS_NETWORK_CREDENTIAL_CONFLICT";
+    case 0xC0000196: return "STATUS_REMOTE_SESSION_LIMIT";
+    case 0xC0000197: return "STATUS_EVENTLOG_FILE_CHANGED";
+    case 0xC0000198: return "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT";
+    case 0xC0000199: return "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT";
+    case 0xC000019A: return "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT";
+    case 0xC000019B: return "STATUS_DOMAIN_TRUST_INCONSISTENT";
+    case 0xC000019C: return "STATUS_FS_DRIVER_REQUIRED";
+    case 0xC0000202: return "STATUS_NO_USER_SESSION_KEY";
+    case 0xC0000203: return "STATUS_USER_SESSION_DELETED";
+    case 0xC0000204: return "STATUS_RESOURCE_LANG_NOT_FOUND";
+    case 0xC0000205: return "STATUS_INSUFF_SERVER_RESOURCES";
+    case 0xC0000206: return "STATUS_INVALID_BUFFER_SIZE";
+    case 0xC0000207: return "STATUS_INVALID_ADDRESS_COMPONENT";
+    case 0xC0000208: return "STATUS_INVALID_ADDRESS_WILDCARD";
+    case 0xC0000209: return "STATUS_TOO_MANY_ADDRESSES";
+    case 0xC000020A: return "STATUS_ADDRESS_ALREADY_EXISTS";
+    case 0xC000020B: return "STATUS_ADDRESS_CLOSED";
+    case 0xC000020C: return "STATUS_CONNECTION_DISCONNECTED";
+    case 0xC000020D: return "STATUS_CONNECTION_RESET";
+    case 0xC000020E: return "STATUS_TOO_MANY_NODES";
+    case 0xC000020F: return "STATUS_TRANSACTION_ABORTED";
+    case 0xC0000210: return "STATUS_TRANSACTION_TIMED_OUT";
+    case 0xC0000211: return "STATUS_TRANSACTION_NO_RELEASE";
+    case 0xC0000212: return "STATUS_TRANSACTION_NO_MATCH";
+    case 0xC0000213: return "STATUS_TRANSACTION_RESPONDED";
+    case 0xC0000214: return "STATUS_TRANSACTION_INVALID_ID";
+    case 0xC0000215: return "STATUS_TRANSACTION_INVALID_TYPE";
+    case 0xC0000216: return "STATUS_NOT_SERVER_SESSION";
+    case 0xC0000217: return "STATUS_NOT_CLIENT_SESSION";
+    case 0xC0000218: return "STATUS_CANNOT_LOAD_REGISTRY_FILE";
+    case 0xC0000219: return "STATUS_DEBUG_ATTACH_FAILED";
+    case 0xC000021A: return "STATUS_SYSTEM_PROCESS_TERMINATED";
+    case 0xC000021B: return "STATUS_DATA_NOT_ACCEPTED";
+    case 0xC000021C: return "STATUS_NO_BROWSER_SERVERS_FOUND";
+    case 0xC000021D: return "STATUS_VDM_HARD_ERROR";
+    case 0xC000021E: return "STATUS_DRIVER_CANCEL_TIMEOUT";
+    case 0xC000021F: return "STATUS_REPLY_MESSAGE_MISMATCH";
+    case 0xC0000220: return "STATUS_MAPPED_ALIGNMENT";
+    case 0xC0000221: return "STATUS_IMAGE_CHECKSUM_MISMATCH";
+    case 0xC0000222: return "STATUS_LOST_WRITEBEHIND_DATA";
+    case 0xC0000223: return "STATUS_CLIENT_SERVER_PARAMETERS_INVALID";
+    case 0xC0000224: return "STATUS_PASSWORD_MUST_CHANGE";
+    case 0xC0000225: return "STATUS_NOT_FOUND";
+    case 0xC0000226: return "STATUS_NOT_TINY_STREAM";
+    case 0xC0000227: return "STATUS_RECOVERY_FAILURE";
+    case 0xC0000228: return "STATUS_STACK_OVERFLOW_READ";
+    case 0xC0000229: return "STATUS_FAIL_CHECK";
+    case 0xC000022A: return "STATUS_DUPLICATE_OBJECTID";
+    case 0xC000022B: return "STATUS_OBJECTID_EXISTS";
+    case 0xC000022C: return "STATUS_CONVERT_TO_LARGE";
+    case 0xC000022D: return "STATUS_RETRY";
+    case 0xC000022E: return "STATUS_FOUND_OUT_OF_SCOPE";
+    case 0xC000022F: return "STATUS_ALLOCATE_BUCKET";
+    case 0xC0000230: return "STATUS_PROPSET_NOT_FOUND";
+    case 0xC0000231: return "STATUS_MARSHALL_OVERFLOW";
+    case 0xC0000232: return "STATUS_INVALID_VARIANT";
+    case 0xC0000233: return "STATUS_DOMAIN_CONTROLLER_NOT_FOUND";
+    case 0xC0000234: return "STATUS_ACCOUNT_LOCKED_OUT";
+    case 0xC0000235: return "STATUS_HANDLE_NOT_CLOSABLE";
+    case 0xC0000236: return "STATUS_CONNECTION_REFUSED";
+    case 0xC0000237: return "STATUS_GRACEFUL_DISCONNECT";
+    case 0xC0000238: return "STATUS_ADDRESS_ALREADY_ASSOCIATED";
+    case 0xC0000239: return "STATUS_ADDRESS_NOT_ASSOCIATED";
+    case 0xC000023A: return "STATUS_CONNECTION_INVALID";
+    case 0xC000023B: return "STATUS_CONNECTION_ACTIVE";
+    case 0xC000023C: return "STATUS_NETWORK_UNREACHABLE";
+    case 0xC000023D: return "STATUS_HOST_UNREACHABLE";
+    case 0xC000023E: return "STATUS_PROTOCOL_UNREACHABLE";
+    case 0xC000023F: return "STATUS_PORT_UNREACHABLE";
+    case 0xC0000240: return "STATUS_REQUEST_ABORTED";
+    case 0xC0000241: return "STATUS_CONNECTION_ABORTED";
+    case 0xC0000242: return "STATUS_BAD_COMPRESSION_BUFFER";
+    case 0xC0000243: return "STATUS_USER_MAPPED_FILE";
+    case 0xC0000244: return "STATUS_AUDIT_FAILED";
+    case 0xC0000245: return "STATUS_TIMER_RESOLUTION_NOT_SET";
+    case 0xC0000246: return "STATUS_CONNECTION_COUNT_LIMIT";
+    case 0xC0000247: return "STATUS_LOGIN_TIME_RESTRICTION";
+    case 0xC0000248: return "STATUS_LOGIN_WKSTA_RESTRICTION";
+    case 0xC0000249: return "STATUS_IMAGE_MP_UP_MISMATCH";
+    case 0xC0000250: return "STATUS_INSUFFICIENT_LOGON_INFO";
+    case 0xC0000251: return "STATUS_BAD_DLL_ENTRYPOINT";
+    case 0xC0000252: return "STATUS_BAD_SERVICE_ENTRYPOINT";
+    case 0xC0000253: return "STATUS_LPC_REPLY_LOST";
+    case 0xC0000254: return "STATUS_IP_ADDRESS_CONFLICT1";
+    case 0xC0000255: return "STATUS_IP_ADDRESS_CONFLICT2";
+    case 0xC0000256: return "STATUS_REGISTRY_QUOTA_LIMIT";
+    case 0xC0000257: return "STATUS_PATH_NOT_COVERED";
+    case 0xC0000258: return "STATUS_NO_CALLBACK_ACTIVE";
+    case 0xC0000259: return "STATUS_LICENSE_QUOTA_EXCEEDED";
+    case 0xC000025A: return "STATUS_PWD_TOO_SHORT";
+    case 0xC000025B: return "STATUS_PWD_TOO_RECENT";
+    case 0xC000025C: return "STATUS_PWD_HISTORY_CONFLICT";
+    case 0xC000025E: return "STATUS_PLUGPLAY_NO_DEVICE";
+    case 0xC000025F: return "STATUS_UNSUPPORTED_COMPRESSION";
+    case 0xC0000260: return "STATUS_INVALID_HW_PROFILE";
+    case 0xC0000261: return "STATUS_INVALID_PLUGPLAY_DEVICE_PATH";
+    case 0xC0000262: return "STATUS_DRIVER_ORDINAL_NOT_FOUND";
+    case 0xC0000263: return "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND";
+    case 0xC0000264: return "STATUS_RESOURCE_NOT_OWNED";
+    case 0xC0000265: return "STATUS_TOO_MANY_LINKS";
+    case 0xC0000266: return "STATUS_QUOTA_LIST_INCONSISTENT";
+    case 0xC0000267: return "STATUS_FILE_IS_OFFLINE";
+    case 0xC0000268: return "STATUS_EVALUATION_EXPIRATION";
+    case 0xC0000269: return "STATUS_ILLEGAL_DLL_RELOCATION";
+    case 0xC000026A: return "STATUS_LICENSE_VIOLATION";
+    case 0xC000026B: return "STATUS_DLL_INIT_FAILED_LOGOFF";
+    case 0xC000026C: return "STATUS_DRIVER_UNABLE_TO_LOAD";
+    case 0xC000026D: return "STATUS_DFS_UNAVAILABLE";
+    case 0xC000026E: return "STATUS_VOLUME_DISMOUNTED";
+    case 0xC000026F: return "STATUS_WX86_INTERNAL_ERROR";
+    case 0xC0000270: return "STATUS_WX86_FLOAT_STACK_CHECK";
+    case 0xC0000271: return "STATUS_VALIDATE_CONTINUE";
+    case 0xC0000272: return "STATUS_NO_MATCH";
+    case 0xC0000273: return "STATUS_NO_MORE_MATCHES";
+    case 0xC0000275: return "STATUS_NOT_A_REPARSE_POINT";
+    case 0xC0000276: return "STATUS_IO_REPARSE_TAG_INVALID";
+    case 0xC0000277: return "STATUS_IO_REPARSE_TAG_MISMATCH";
+    case 0xC0000278: return "STATUS_IO_REPARSE_DATA_INVALID";
+    case 0xC0000279: return "STATUS_IO_REPARSE_TAG_NOT_HANDLED";
+    case 0xC0000280: return "STATUS_REPARSE_POINT_NOT_RESOLVED";
+    case 0xC0000281: return "STATUS_DIRECTORY_IS_A_REPARSE_POINT";
+    case 0xC0000282: return "STATUS_RANGE_LIST_CONFLICT";
+    case 0xC0000283: return "STATUS_SOURCE_ELEMENT_EMPTY";
+    case 0xC0000284: return "STATUS_DESTINATION_ELEMENT_FULL";
+    case 0xC0000285: return "STATUS_ILLEGAL_ELEMENT_ADDRESS";
+    case 0xC0000286: return "STATUS_MAGAZINE_NOT_PRESENT";
+    case 0xC0000287: return "STATUS_REINITIALIZATION_NEEDED";
+    case 0x80000288: return "STATUS_DEVICE_REQUIRES_CLEANING";
+    case 0x80000289: return "STATUS_DEVICE_DOOR_OPEN";
+    case 0xC000028A: return "STATUS_ENCRYPTION_FAILED";
+    case 0xC000028B: return "STATUS_DECRYPTION_FAILED";
+    case 0xC000028C: return "STATUS_RANGE_NOT_FOUND";
+    case 0xC000028D: return "STATUS_NO_RECOVERY_POLICY";
+    case 0xC000028E: return "STATUS_NO_EFS";
+    case 0xC000028F: return "STATUS_WRONG_EFS";
+    case 0xC0000290: return "STATUS_NO_USER_KEYS";
+    case 0xC0000291: return "STATUS_FILE_NOT_ENCRYPTED";
+    case 0xC0000292: return "STATUS_NOT_EXPORT_FORMAT";
+    case 0xC0000293: return "STATUS_FILE_ENCRYPTED";
+    case 0x40000294: return "STATUS_WAKE_SYSTEM";
+    case 0xC0000295: return "STATUS_WMI_GUID_NOT_FOUND";
+    case 0xC0000296: return "STATUS_WMI_INSTANCE_NOT_FOUND";
+    case 0xC0000297: return "STATUS_WMI_ITEMID_NOT_FOUND";
+    case 0xC0000298: return "STATUS_WMI_TRY_AGAIN";
+    case 0xC0000299: return "STATUS_SHARED_POLICY";
+    case 0xC000029A: return "STATUS_POLICY_OBJECT_NOT_FOUND";
+    case 0xC000029B: return "STATUS_POLICY_ONLY_IN_DS";
+    case 0xC000029C: return "STATUS_VOLUME_NOT_UPGRADED";
+    case 0xC000029D: return "STATUS_REMOTE_STORAGE_NOT_ACTIVE";
+    case 0xC000029E: return "STATUS_REMOTE_STORAGE_MEDIA_ERROR";
+    case 0xC000029F: return "STATUS_NO_TRACKING_SERVICE";
+    case 0xC00002A0: return "STATUS_SERVER_SID_MISMATCH";
+    case 0xC00002A1: return "STATUS_DS_NO_ATTRIBUTE_OR_VALUE";
+    case 0xC00002A2: return "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX";
+    case 0xC00002A3: return "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED";
+    case 0xC00002A4: return "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS";
+    case 0xC00002A5: return "STATUS_DS_BUSY";
+    case 0xC00002A6: return "STATUS_DS_UNAVAILABLE";
+    case 0xC00002A7: return "STATUS_DS_NO_RIDS_ALLOCATED";
+    case 0xC00002A8: return "STATUS_DS_NO_MORE_RIDS";
+    case 0xC00002A9: return "STATUS_DS_INCORRECT_ROLE_OWNER";
+    case 0xC00002AA: return "STATUS_DS_RIDMGR_INIT_ERROR";
+    case 0xC00002AB: return "STATUS_DS_OBJ_CLASS_VIOLATION";
+    case 0xC00002AC: return "STATUS_DS_CANT_ON_NON_LEAF";
+    case 0xC00002AD: return "STATUS_DS_CANT_ON_RDN";
+    case 0xC00002AE: return "STATUS_DS_CANT_MOD_OBJ_CLASS";
+    case 0xC00002AF: return "STATUS_DS_CROSS_DOM_MOVE_FAILED";
+    case 0xC00002B0: return "STATUS_DS_GC_NOT_AVAILABLE";
+    case 0xC00002B1: return "STATUS_DIRECTORY_SERVICE_REQUIRED";
+    case 0xC00002B2: return "STATUS_REPARSE_ATTRIBUTE_CONFLICT";
+    case 0xC00002B3: return "STATUS_CANT_ENABLE_DENY_ONLY";
+    case 0xC00002B4: return "STATUS_FLOAT_MULTIPLE_FAULTS";
+    case 0xC00002B5: return "STATUS_FLOAT_MULTIPLE_TRAPS";
+    case 0xC00002B6: return "STATUS_DEVICE_REMOVED";
+    case 0xC00002B7: return "STATUS_JOURNAL_DELETE_IN_PROGRESS";
+    case 0xC00002B8: return "STATUS_JOURNAL_NOT_ACTIVE";
+    case 0xC00002B9: return "STATUS_NOINTERFACE";
+    case 0xC00002C1: return "STATUS_DS_ADMIN_LIMIT_EXCEEDED";
+    case 0xC00002C2: return "STATUS_DRIVER_FAILED_SLEEP";
+    case 0xC00002C3: return "STATUS_MUTUAL_AUTHENTICATION_FAILED";
+    case 0xC00002C4: return "STATUS_CORRUPT_SYSTEM_FILE";
+    case 0xC00002C5: return "STATUS_DATATYPE_MISALIGNMENT_ERROR";
+    case 0xC00002C6: return "STATUS_WMI_READ_ONLY";
+    case 0xC00002C7: return "STATUS_WMI_SET_FAILURE";
+    case 0xC00002C8: return "STATUS_COMMITMENT_MINIMUM";
+    case 0xC00002C9: return "STATUS_REG_NAT_CONSUMPTION";
+    case 0xC00002CA: return "STATUS_TRANSPORT_FULL";
+    case 0xC00002CB: return "STATUS_DS_SAM_INIT_FAILURE";
+    case 0xC00002CC: return "STATUS_ONLY_IF_CONNECTED";
+    case 0xC00002CD: return "STATUS_DS_SENSITIVE_GROUP_VIOLATION";
+    case 0xC00002CE: return "STATUS_PNP_RESTART_ENUMERATION";
+    case 0xC00002CF: return "STATUS_JOURNAL_ENTRY_DELETED";
+    case 0xC00002D0: return "STATUS_DS_CANT_MOD_PRIMARYGROUPID";
+    case 0xC00002D1: return "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE";
+    case 0xC00002D2: return "STATUS_PNP_REBOOT_REQUIRED";
+    case 0xC00002D3: return "STATUS_POWER_STATE_INVALID";
+    case 0xC00002D4: return "STATUS_DS_INVALID_GROUP_TYPE";
+    case 0xC00002D5: return "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN";
+    case 0xC00002D6: return "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN";
+    case 0xC00002D7: return "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER";
+    case 0xC00002D8: return "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER";
+    case 0xC00002D9: return "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER";
+    case 0xC00002DA: return "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER";
+    case 0xC00002DB: return "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER";
+    case 0xC00002DC: return "STATUS_DS_HAVE_PRIMARY_MEMBERS";
+    case 0xC00002DD: return "STATUS_WMI_NOT_SUPPORTED";
+    case 0xC00002DE: return "STATUS_INSUFFICIENT_POWER";
+    case 0xC00002DF: return "STATUS_SAM_NEED_BOOTKEY_PASSWORD";
+    case 0xC00002E0: return "STATUS_SAM_NEED_BOOTKEY_FLOPPY";
+    case 0xC00002E1: return "STATUS_DS_CANT_START";
+    case 0xC00002E2: return "STATUS_DS_INIT_FAILURE";
+    case 0xC00002E3: return "STATUS_SAM_INIT_FAILURE";
+    case 0xC00002E4: return "STATUS_DS_GC_REQUIRED";
+    case 0xC00002E5: return "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY";
+    case 0xC00002E6: return "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS";
+    case 0xC00002E7: return "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED";
+    case 0xC00002E8: return "STATUS_MULTIPLE_FAULT_VIOLATION";
+    case 0xC0000300: return "STATUS_NOT_SUPPORTED_ON_SBS";
+    case 0xC0009898: return "STATUS_WOW_ASSERTION";
+    case 0xC0010001: return "DBG_NO_STATE_CHANGE";
+    case 0xC0010002: return "DBG_APP_NOT_IDLE";
+    case 0xC0020001: return "RPC_NT_INVALID_STRING_BINDING";
+    case 0xC0020002: return "RPC_NT_WRONG_KIND_OF_BINDING";
+    case 0xC0020003: return "RPC_NT_INVALID_BINDING";
+    case 0xC0020004: return "RPC_NT_PROTSEQ_NOT_SUPPORTED";
+    case 0xC0020005: return "RPC_NT_INVALID_RPC_PROTSEQ";
+    case 0xC0020006: return "RPC_NT_INVALID_STRING_UUID";
+    case 0xC0020007: return "RPC_NT_INVALID_ENDPOINT_FORMAT";
+    case 0xC0020008: return "RPC_NT_INVALID_NET_ADDR";
+    case 0xC0020009: return "RPC_NT_NO_ENDPOINT_FOUND";
+    case 0xC002000A: return "RPC_NT_INVALID_TIMEOUT";
+    case 0xC002000B: return "RPC_NT_OBJECT_NOT_FOUND";
+    case 0xC002000C: return "RPC_NT_ALREADY_REGISTERED";
+    case 0xC002000D: return "RPC_NT_TYPE_ALREADY_REGISTERED";
+    case 0xC002000E: return "RPC_NT_ALREADY_LISTENING";
+    case 0xC002000F: return "RPC_NT_NO_PROTSEQS_REGISTERED";
+    case 0xC0020010: return "RPC_NT_NOT_LISTENING";
+    case 0xC0020011: return "RPC_NT_UNKNOWN_MGR_TYPE";
+    case 0xC0020012: return "RPC_NT_UNKNOWN_IF";
+    case 0xC0020013: return "RPC_NT_NO_BINDINGS";
+    case 0xC0020014: return "RPC_NT_NO_PROTSEQS";
+    case 0xC0020015: return "RPC_NT_CANT_CREATE_ENDPOINT";
+    case 0xC0020016: return "RPC_NT_OUT_OF_RESOURCES";
+    case 0xC0020017: return "RPC_NT_SERVER_UNAVAILABLE";
+    case 0xC0020018: return "RPC_NT_SERVER_TOO_BUSY";
+    case 0xC0020019: return "RPC_NT_INVALID_NETWORK_OPTIONS";
+    case 0xC002001A: return "RPC_NT_NO_CALL_ACTIVE";
+    case 0xC002001B: return "RPC_NT_CALL_FAILED";
+    case 0xC002001C: return "RPC_NT_CALL_FAILED_DNE";
+    case 0xC002001D: return "RPC_NT_PROTOCOL_ERROR";
+    case 0xC002001F: return "RPC_NT_UNSUPPORTED_TRANS_SYN";
+    case 0xC0020021: return "RPC_NT_UNSUPPORTED_TYPE";
+    case 0xC0020022: return "RPC_NT_INVALID_TAG";
+    case 0xC0020023: return "RPC_NT_INVALID_BOUND";
+    case 0xC0020024: return "RPC_NT_NO_ENTRY_NAME";
+    case 0xC0020025: return "RPC_NT_INVALID_NAME_SYNTAX";
+    case 0xC0020026: return "RPC_NT_UNSUPPORTED_NAME_SYNTAX";
+    case 0xC0020028: return "RPC_NT_UUID_NO_ADDRESS";
+    case 0xC0020029: return "RPC_NT_DUPLICATE_ENDPOINT";
+    case 0xC002002A: return "RPC_NT_UNKNOWN_AUTHN_TYPE";
+    case 0xC002002B: return "RPC_NT_MAX_CALLS_TOO_SMALL";
+    case 0xC002002C: return "RPC_NT_STRING_TOO_LONG";
+    case 0xC002002D: return "RPC_NT_PROTSEQ_NOT_FOUND";
+    case 0xC002002E: return "RPC_NT_PROCNUM_OUT_OF_RANGE";
+    case 0xC002002F: return "RPC_NT_BINDING_HAS_NO_AUTH";
+    case 0xC0020030: return "RPC_NT_UNKNOWN_AUTHN_SERVICE";
+    case 0xC0020031: return "RPC_NT_UNKNOWN_AUTHN_LEVEL";
+    case 0xC0020032: return "RPC_NT_INVALID_AUTH_IDENTITY";
+    case 0xC0020033: return "RPC_NT_UNKNOWN_AUTHZ_SERVICE";
+    case 0xC0020034: return "EPT_NT_INVALID_ENTRY";
+    case 0xC0020035: return "EPT_NT_CANT_PERFORM_OP";
+    case 0xC0020036: return "EPT_NT_NOT_REGISTERED";
+    case 0xC0020037: return "RPC_NT_NOTHING_TO_EXPORT";
+    case 0xC0020038: return "RPC_NT_INCOMPLETE_NAME";
+    case 0xC0020039: return "RPC_NT_INVALID_VERS_OPTION";
+    case 0xC002003A: return "RPC_NT_NO_MORE_MEMBERS";
+    case 0xC002003B: return "RPC_NT_NOT_ALL_OBJS_UNEXPORTED";
+    case 0xC002003C: return "RPC_NT_INTERFACE_NOT_FOUND";
+    case 0xC002003D: return "RPC_NT_ENTRY_ALREADY_EXISTS";
+    case 0xC002003E: return "RPC_NT_ENTRY_NOT_FOUND";
+    case 0xC002003F: return "RPC_NT_NAME_SERVICE_UNAVAILABLE";
+    case 0xC0020040: return "RPC_NT_INVALID_NAF_ID";
+    case 0xC0020041: return "RPC_NT_CANNOT_SUPPORT";
+    case 0xC0020042: return "RPC_NT_NO_CONTEXT_AVAILABLE";
+    case 0xC0020043: return "RPC_NT_INTERNAL_ERROR";
+    case 0xC0020044: return "RPC_NT_ZERO_DIVIDE";
+    case 0xC0020045: return "RPC_NT_ADDRESS_ERROR";
+    case 0xC0020046: return "RPC_NT_FP_DIV_ZERO";
+    case 0xC0020047: return "RPC_NT_FP_UNDERFLOW";
+    case 0xC0020048: return "RPC_NT_FP_OVERFLOW";
+    case 0xC0030001: return "RPC_NT_NO_MORE_ENTRIES";
+    case 0xC0030002: return "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL";
+    case 0xC0030003: return "RPC_NT_SS_CHAR_TRANS_SHORT_FILE";
+    case 0xC0030004: return "RPC_NT_SS_IN_NULL_CONTEXT";
+    case 0xC0030005: return "RPC_NT_SS_CONTEXT_MISMATCH";
+    case 0xC0030006: return "RPC_NT_SS_CONTEXT_DAMAGED";
+    case 0xC0030007: return "RPC_NT_SS_HANDLES_MISMATCH";
+    case 0xC0030008: return "RPC_NT_SS_CANNOT_GET_CALL_HANDLE";
+    case 0xC0030009: return "RPC_NT_NULL_REF_POINTER";
+    case 0xC003000A: return "RPC_NT_ENUM_VALUE_OUT_OF_RANGE";
+    case 0xC003000B: return "RPC_NT_BYTE_COUNT_TOO_SMALL";
+    case 0xC003000C: return "RPC_NT_BAD_STUB_DATA";
+    case 0xC0020049: return "RPC_NT_CALL_IN_PROGRESS";
+    case 0xC002004A: return "RPC_NT_NO_MORE_BINDINGS";
+    case 0xC002004B: return "RPC_NT_GROUP_MEMBER_NOT_FOUND";
+    case 0xC002004C: return "EPT_NT_CANT_CREATE";
+    case 0xC002004D: return "RPC_NT_INVALID_OBJECT";
+    case 0xC002004F: return "RPC_NT_NO_INTERFACES";
+    case 0xC0020050: return "RPC_NT_CALL_CANCELLED";
+    case 0xC0020051: return "RPC_NT_BINDING_INCOMPLETE";
+    case 0xC0020052: return "RPC_NT_COMM_FAILURE";
+    case 0xC0020053: return "RPC_NT_UNSUPPORTED_AUTHN_LEVEL";
+    case 0xC0020054: return "RPC_NT_NO_PRINC_NAME";
+    case 0xC0020055: return "RPC_NT_NOT_RPC_ERROR";
+    case 0x40020056: return "RPC_NT_UUID_LOCAL_ONLY";
+    case 0xC0020057: return "RPC_NT_SEC_PKG_ERROR";
+    case 0xC0020058: return "RPC_NT_NOT_CANCELLED";
+    case 0xC0030059: return "RPC_NT_INVALID_ES_ACTION";
+    case 0xC003005A: return "RPC_NT_WRONG_ES_VERSION";
+    case 0xC003005B: return "RPC_NT_WRONG_STUB_VERSION";
+    case 0xC003005C: return "RPC_NT_INVALID_PIPE_OBJECT";
+    case 0xC003005D: return "RPC_NT_INVALID_PIPE_OPERATION";
+    case 0xC003005E: return "RPC_NT_WRONG_PIPE_VERSION";
+    case 0xC003005F: return "RPC_NT_PIPE_CLOSED";
+    case 0xC0030060: return "RPC_NT_PIPE_DISCIPLINE_ERROR";
+    case 0xC0030061: return "RPC_NT_PIPE_EMPTY";
+    case 0xC0020062: return "RPC_NT_INVALID_ASYNC_HANDLE";
+    case 0xC0020063: return "RPC_NT_INVALID_ASYNC_CALL";
+    case 0x400200AF: return "RPC_NT_SEND_INCOMPLETE";
+    case 0xC0140001: return "STATUS_ACPI_INVALID_OPCODE";
+    case 0xC0140002: return "STATUS_ACPI_STACK_OVERFLOW";
+    case 0xC0140003: return "STATUS_ACPI_ASSERT_FAILED";
+    case 0xC0140004: return "STATUS_ACPI_INVALID_INDEX";
+    case 0xC0140005: return "STATUS_ACPI_INVALID_ARGUMENT";
+    case 0xC0140006: return "STATUS_ACPI_FATAL";
+    case 0xC0140007: return "STATUS_ACPI_INVALID_SUPERNAME";
+    case 0xC0140008: return "STATUS_ACPI_INVALID_ARGTYPE";
+    case 0xC0140009: return "STATUS_ACPI_INVALID_OBJTYPE";
+    case 0xC014000A: return "STATUS_ACPI_INVALID_TARGETTYPE";
+    case 0xC014000B: return "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT";
+    case 0xC014000C: return "STATUS_ACPI_ADDRESS_NOT_MAPPED";
+    case 0xC014000D: return "STATUS_ACPI_INVALID_EVENTTYPE";
+    case 0xC014000E: return "STATUS_ACPI_HANDLER_COLLISION";
+    case 0xC014000F: return "STATUS_ACPI_INVALID_DATA";
+    case 0xC0140010: return "STATUS_ACPI_INVALID_REGION";
+    case 0xC0140011: return "STATUS_ACPI_INVALID_ACCESS_SIZE";
+    case 0xC0140012: return "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK";
+    case 0xC0140013: return "STATUS_ACPI_ALREADY_INITIALIZED";
+    case 0xC0140014: return "STATUS_ACPI_NOT_INITIALIZED";
+    case 0xC0140015: return "STATUS_ACPI_INVALID_MUTEX_LEVEL";
+    case 0xC0140016: return "STATUS_ACPI_MUTEX_NOT_OWNED";
+    case 0xC0140017: return "STATUS_ACPI_MUTEX_NOT_OWNER";
+    case 0xC0140018: return "STATUS_ACPI_RS_ACCESS";
+    case 0xC0140019: return "STATUS_ACPI_INVALID_TABLE";
+    case 0xC0140020: return "STATUS_ACPI_REG_HANDLER_FAILED";
+    case 0xC0140021: return "STATUS_ACPI_POWER_REQUEST_FAILED";
+    case 0xC00A0001: return "STATUS_CTX_WINSTATION_NAME_INVALID";
+    case 0xC00A0002: return "STATUS_CTX_INVALID_PD";
+    case 0xC00A0003: return "STATUS_CTX_PD_NOT_FOUND";
+    case 0x400A0004: return "STATUS_CTX_CDM_CONNECT";
+    case 0x400A0005: return "STATUS_CTX_CDM_DISCONNECT";
+    case 0xC00A0006: return "STATUS_CTX_CLOSE_PENDING";
+    case 0xC00A0007: return "STATUS_CTX_NO_OUTBUF";
+    case 0xC00A0008: return "STATUS_CTX_MODEM_INF_NOT_FOUND";
+    case 0xC00A0009: return "STATUS_CTX_INVALID_MODEMNAME";
+    case 0xC00A000A: return "STATUS_CTX_RESPONSE_ERROR";
+    case 0xC00A000B: return "STATUS_CTX_MODEM_RESPONSE_TIMEOUT";
+    case 0xC00A000C: return "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER";
+    case 0xC00A000D: return "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE";
+    case 0xC00A000E: return "STATUS_CTX_MODEM_RESPONSE_BUSY";
+    case 0xC00A000F: return "STATUS_CTX_MODEM_RESPONSE_VOICE";
+    case 0xC00A0010: return "STATUS_CTX_TD_ERROR";
+    case 0xC00A0012: return "STATUS_CTX_LICENSE_CLIENT_INVALID";
+    case 0xC00A0013: return "STATUS_CTX_LICENSE_NOT_AVAILABLE";
+    case 0xC00A0014: return "STATUS_CTX_LICENSE_EXPIRED";
+    case 0xC00A0015: return "STATUS_CTX_WINSTATION_NOT_FOUND";
+    case 0xC00A0016: return "STATUS_CTX_WINSTATION_NAME_COLLISION";
+    case 0xC00A0017: return "STATUS_CTX_WINSTATION_BUSY";
+    case 0xC00A0018: return "STATUS_CTX_BAD_VIDEO_MODE";
+    case 0xC00A0022: return "STATUS_CTX_GRAPHICS_INVALID";
+    case 0xC00A0024: return "STATUS_CTX_NOT_CONSOLE";
+    case 0xC00A0026: return "STATUS_CTX_CLIENT_QUERY_TIMEOUT";
+    case 0xC00A0027: return "STATUS_CTX_CONSOLE_DISCONNECT";
+    case 0xC00A0028: return "STATUS_CTX_CONSOLE_CONNECT";
+    case 0xC00A002A: return "STATUS_CTX_SHADOW_DENIED";
+    case 0xC00A002B: return "STATUS_CTX_WINSTATION_ACCESS_DENIED";
+    case 0xC00A002E: return "STATUS_CTX_INVALID_WD";
+    case 0xC00A002F: return "STATUS_CTX_WD_NOT_FOUND";
+    case 0xC00A0030: return "STATUS_CTX_SHADOW_INVALID";
+    case 0xC00A0031: return "STATUS_CTX_SHADOW_DISABLED";
+    case 0xC00A0032: return "STATUS_RDP_PROTOCOL_ERROR";
+    case 0xC00A0033: return "STATUS_CTX_CLIENT_LICENSE_NOT_SET";
+    case 0xC00A0034: return "STATUS_CTX_CLIENT_LICENSE_IN_USE";
+    case 0xC0040035: return "STATUS_PNP_BAD_MPS_TABLE";
+    case 0xC0040036: return "STATUS_PNP_TRANSLATION_FAILED";
+    case 0xC0040037: return "STATUS_PNP_IRQ_TRANSLATION_FAILED";
+    default:         return "STATUS_UNKNOWN";
+    }
+}
+
+
+/*
+ * KsPrintf
+ *   This function is variable-argument, level-sensitive debug print routine.
+ *   If the specified debug level for the print statement is lower or equal
+ *   to the current debug level, the message will be printed.
+ *
+ * Arguments:
+ *   DebugPrintLevel - Specifies at which debugging level the string should
+ *                     be printed
+ *   DebugMessage - Variable argument ascii c string
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES: 
+ *   N/A
+ */
+
+VOID
+KsPrintf(
+    LONG  DebugPrintLevel,
+    PCHAR DebugMessage,
+    ...
+    )
+{
+    va_list  ap;
+
+    va_start(ap, DebugMessage);
+
+    if (DebugPrintLevel <= KsDebugLevel)
+    {
+        CHAR buffer[0x200];
+
+        vsprintf(buffer, DebugMessage, ap);
+
+        KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer));
+    }
+
+    va_end(ap);
+
+} // KsPrint()
+
+#endif
diff --git a/libcfs/libcfs/winnt/winnt-fs.c b/libcfs/libcfs/winnt/winnt-fs.c
new file mode 100644 (file)
index 0000000..128781b
--- /dev/null
@@ -0,0 +1,541 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+
+const CHAR *dos_file_prefix = "\\??\\";
+
+/*
+ * cfs_filp_open
+ *     To open or create a file in kernel mode
+ *
+ * Arguments:
+ *   name:  name of the file to be opened or created, no dos path prefix
+ *   flags: open/creation attribute options
+ *   mode:  access mode/permission to open or create
+ *   err:   error code
+ *
+ * Return Value:
+ *   the pointer to the cfs_file_t or NULL if it fails
+ *
+ * Notes: 
+ *   N/A
+ */
+
+cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err)
+{
+    cfs_file_t *        fp = NULL;
+
+    NTSTATUS            Status;
+
+    OBJECT_ATTRIBUTES   ObjectAttributes;
+    HANDLE              FileHandle;
+    IO_STATUS_BLOCK     IoStatus;
+    ACCESS_MASK         DesiredAccess;
+    ULONG               CreateDisposition;
+    ULONG               ShareAccess;
+    ULONG               CreateOptions;
+
+    USHORT              NameLength = 0;
+    USHORT              PrefixLength = 0;
+
+    UNICODE_STRING      UnicodeName;
+    PWCHAR              UnicodeString = NULL;
+
+    ANSI_STRING         AnsiName;
+    PUCHAR              AnsiString = NULL;
+
+    /* Analyze the flags settings */
+
+    if (cfs_is_flag_set(flags, O_WRONLY)) {
+        DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
+        ShareAccess = 0;
+    }  else if (cfs_is_flag_set(flags, O_RDWR)) {
+        DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
+        ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
+    } else {
+        DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
+        ShareAccess = FILE_SHARE_READ;
+    }
+
+    if (cfs_is_flag_set(flags, O_CREAT)) {
+        if (cfs_is_flag_set(flags, O_EXCL)) {
+            CreateDisposition = FILE_CREATE;
+        } else {
+            CreateDisposition = FILE_OPEN_IF;
+        }
+    } else {
+        CreateDisposition = FILE_OPEN;
+    }
+
+    if (cfs_is_flag_set(flags, O_TRUNC)) {
+        if (cfs_is_flag_set(flags, O_EXCL)) {
+            CreateDisposition = FILE_OVERWRITE;
+        } else {
+            CreateDisposition = FILE_OVERWRITE_IF;
+        }
+    }
+
+    CreateOptions = 0;
+
+    if (cfs_is_flag_set(flags, O_DIRECTORY)) {
+        cfs_set_flag(CreateOptions,  FILE_DIRECTORY_FILE);
+    }
+
+    if (cfs_is_flag_set(flags, O_SYNC)) {
+         cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
+    }
+
+    if (cfs_is_flag_set(flags, O_DIRECT)) {
+         cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
+    }
+
+    /* Initialize the unicode path name for the specified file */
+
+    NameLength = (USHORT)strlen(name);
+
+    if (name[0] != '\\') {
+        PrefixLength = (USHORT)strlen(dos_file_prefix);
+    }
+
+    AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1),
+                            CFS_ALLOC_ZERO);
+    if (NULL == AnsiString) {
+        if (err) *err = -ENOMEM;
+        return NULL;
+    }
+
+    UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1),
+                               CFS_ALLOC_ZERO);
+
+    if (NULL == UnicodeString) {
+        if (err) *err = -ENOMEM;
+        cfs_free(AnsiString);
+        return NULL;
+    }
+
+    if (PrefixLength) {
+        RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength);
+    }
+
+    RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength);
+    NameLength += PrefixLength;
+
+    AnsiName.MaximumLength = NameLength + 1;
+    AnsiName.Length = NameLength;
+    AnsiName.Buffer = AnsiString;
+
+    UnicodeName.MaximumLength = (NameLength + 1) * sizeof(WCHAR);
+    UnicodeName.Length = 0;
+    UnicodeName.Buffer = (PWSTR)UnicodeString;
+
+    RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE);
+
+    /* Setup the object attributes structure for the file. */
+
+    InitializeObjectAttributes(
+            &ObjectAttributes,
+            &UnicodeName,
+            OBJ_CASE_INSENSITIVE |
+            OBJ_KERNEL_HANDLE,
+            NULL,
+            NULL );
+
+    /* Now to open or create the file now */
+
+    Status = ZwCreateFile(
+            &FileHandle,
+            DesiredAccess,
+            &ObjectAttributes,
+            &IoStatus,
+            0,
+            FILE_ATTRIBUTE_NORMAL,
+            ShareAccess,
+            CreateDisposition,
+            CreateOptions,
+            NULL,
+            0 );
+
+    /* Check the returned status of IoStatus... */
+
+    if (!NT_SUCCESS(IoStatus.Status)) {
+        *err = cfs_error_code(IoStatus.Status);
+        cfs_free(UnicodeString);
+        cfs_free(AnsiString);
+        return NULL;
+    }
+
+    /* Allocate the cfs_file_t: libcfs file object */
+
+    fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO);
+
+    if (NULL == fp) {
+        Status = ZwClose(FileHandle);
+        ASSERT(NT_SUCCESS(Status));
+        *err = -ENOMEM;
+        cfs_free(UnicodeString);
+        cfs_free(AnsiString);
+        return NULL;
+    }
+
+    fp->f_handle = FileHandle;
+    strcpy(fp->f_name, name);
+    fp->f_flags = flags;
+    fp->f_mode  = (mode_t)mode;
+    fp->f_count = 1;
+    *err = 0;
+
+    /* free the memory of temporary name strings */
+    cfs_free(UnicodeString);
+    cfs_free(AnsiString);
+
+    return fp;
+}
+
+
+/*
+ * cfs_filp_close
+ *     To close the opened file and release the filp structure
+ *
+ * Arguments:
+ *   fp:   the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ *   ZERO: on success
+ *   Non-Zero: on failure
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_filp_close(cfs_file_t *fp)
+{
+    NTSTATUS    Status;
+
+    ASSERT(fp != NULL);
+    ASSERT(fp->f_handle != NULL);
+
+    /* release the file handle */
+    Status = ZwClose(fp->f_handle);
+    ASSERT(NT_SUCCESS(Status));
+
+    /* free the file flip structure */
+    cfs_free(fp);
+    return 0;
+}
+
+
+/*
+ * cfs_filp_read
+ *     To read data from the opened file
+ *
+ * Arguments:
+ *   fp:   the pointer of the cfs_file_t strcture
+ *   buf:  pointer to the buffer to contain the data
+ *   nbytes: size in bytes to be read from the file
+ *   pos:  offset in file where reading starts, if pos
+ *         NULL, then read from current file offset
+ *
+ * Return Value:
+ *   Actual size read into the buffer in success case
+ *   Error code in failure case
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+    LARGE_INTEGER   address;
+    NTSTATUS        Status;
+    IO_STATUS_BLOCK IoStatus;
+
+    int             rc = 0;
+
+    /* Read data from the file into the specified buffer */
+
+    if (pos != NULL) {
+        address.QuadPart = *pos;
+    } else {
+        address.QuadPart = fp->f_pos;
+    }
+
+    Status = ZwReadFile( fp->f_handle,
+                         0,
+                         NULL,
+                         NULL,
+                         &IoStatus,
+                         buf,
+                         nbytes,
+                         &address,
+                         NULL );
+
+    if (!NT_SUCCESS(IoStatus.Status)) {
+        rc = cfs_error_code(IoStatus.Status);
+    } else {
+        rc = (int)IoStatus.Information;
+        fp->f_pos = address.QuadPart + rc;
+        if (pos != NULL) {
+            *pos = fp->f_pos;
+        }   
+    }
+
+    return rc;     
+}
+
+
+/*
+ * cfs_filp_wrtie
+ *     To write specified data to the opened file
+ *
+ * Arguments:
+ *   fp:   the pointer of the cfs_file_t strcture
+ *   buf:  pointer to the buffer containing the data
+ *   nbytes: size in bytes to be written to the file
+ *   pos:  offset in file where writing starts, if pos
+ *         NULL, then write to current file offset
+ *
+ * Return Value:
+ *   Actual size written into the buffer in success case
+ *   Error code in failure case
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos)
+{
+    LARGE_INTEGER   address;
+    NTSTATUS        Status;
+    IO_STATUS_BLOCK IoStatus;
+    int             rc = 0;
+
+    /* Write user specified data into the file */
+
+    if (pos != NULL) {
+        address.QuadPart = *pos;
+    } else {
+        address.QuadPart = fp->f_pos;
+    }
+
+    Status = ZwWriteFile( fp->f_handle,
+                         0,
+                         NULL,
+                         NULL,
+                         &IoStatus,
+                         buf,
+                         nbytes,
+                         &address,
+                         NULL );
+
+    if (!NT_SUCCESS(Status)) {
+        rc =  cfs_error_code(Status);
+    } else {
+        rc = (int)IoStatus.Information;
+        fp->f_pos = address.QuadPart + rc;
+        if (pos != NULL) {
+            *pos = fp->f_pos;
+        }   
+    }
+
+    return rc;
+}
+
+
+NTSTATUS
+CompletionRoutine(
+    PDEVICE_OBJECT DeviceObject,
+    PIRP Irp,
+    PVOID Context)
+{
+    /* copy the IoStatus result */
+    *Irp->UserIosb = Irp->IoStatus;
+    
+    /* singal the event we set */
+    KeSetEvent(Irp->UserEvent, 0, FALSE);
+   
+    /* free the Irp we allocated */
+    IoFreeIrp(Irp);
+    
+    return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+
+/*
+ * cfs_filp_fsync
+ *     To sync the dirty data of the file to disk
+ *
+ * Arguments:
+ *   fp: the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ *   Zero:  in success case
+ *   Error code: in failure case
+ *
+ * Notes: 
+ *   Nt kernel doesn't export such a routine to flush a file,
+ *   we must allocate our own Irp and issue it to the file
+ *   system driver.
+ */
+
+int cfs_filp_fsync(cfs_file_t *fp)
+{
+
+    PFILE_OBJECT            FileObject;
+    PDEVICE_OBJECT          DeviceObject;
+
+    NTSTATUS                Status;
+    PIRP                    Irp;
+    KEVENT                  Event;
+    IO_STATUS_BLOCK         IoSb;
+    PIO_STACK_LOCATION      IrpSp;
+
+    /* get the FileObject and the DeviceObject */
+
+    Status = ObReferenceObjectByHandle(
+                fp->f_handle,
+                FILE_WRITE_DATA,
+                NULL,
+                KernelMode,
+                (PVOID*)&FileObject,
+                NULL );
+
+    if (!NT_SUCCESS(Status)) {
+        return cfs_error_code(Status);
+    }
+
+    DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+    /* allocate a new Irp */
+
+    Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
+
+    if (!Irp) {
+
+        ObDereferenceObject(FileObject);
+        return -ENOMEM;
+    }
+
+    /* intialize the event */
+    KeInitializeEvent(&Event, SynchronizationEvent, FALSE);
+
+    /* setup the Irp */
+    Irp->UserEvent = &Event;
+    Irp->UserIosb = &IoSb;
+    Irp->RequestorMode = KernelMode;
+
+    Irp->Tail.Overlay.Thread = PsGetCurrentThread();
+    Irp->Tail.Overlay.OriginalFileObject = FileObject;
+
+    /* setup the Irp stack location */
+    IrpSp = IoGetNextIrpStackLocation(Irp);
+
+    IrpSp->MajorFunction = IRP_MJ_FLUSH_BUFFERS;
+    IrpSp->DeviceObject = DeviceObject;
+    IrpSp->FileObject = FileObject;
+
+    IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE);
+
+
+    /* issue the Irp to the underlying file system driver */
+    IoCallDriver(DeviceObject, Irp);
+
+    /* wait until it is finished */
+    KeWaitForSingleObject(&Event, Executive, KernelMode, TRUE, 0);
+
+    /* cleanup our reference on it */
+    ObDereferenceObject(FileObject);
+
+    Status = IoSb.Status;
+
+    return cfs_error_code(Status);
+}
+
+/*
+ * cfs_get_file
+ *     To increase the reference of the file object
+ *
+ * Arguments:
+ *   fp:   the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ *   Zero:  in success case
+ *   Non-Zero: in failure case
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_get_file(cfs_file_t *fp)
+{
+    InterlockedIncrement(&(fp->f_count));
+    return 0;
+}
+
+
+/*
+ * cfs_put_file
+ *     To decrease the reference of the file object
+ *
+ * Arguments:
+ *   fp:   the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ *   Zero:  in success case
+ *   Non-Zero: in failure case
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_put_file(cfs_file_t *fp)
+{
+    if (InterlockedDecrement(&(fp->f_count)) == 0) {
+        cfs_filp_close(fp);
+    }
+
+    return 0;
+}
+
+
+/*
+ * cfs_file_count
+ *   To query the reference count of the file object
+ *
+ * Arguments:
+ *   fp:   the pointer of the cfs_file_t strcture
+ *
+ * Return Value:
+ *   the reference count of the file object
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_file_count(cfs_file_t *fp)
+{
+    return (int)(fp->f_count);
+}
diff --git a/libcfs/libcfs/winnt/winnt-lock.c b/libcfs/libcfs/winnt/winnt-lock.c
new file mode 100644 (file)
index 0000000..12dbc67
--- /dev/null
@@ -0,0 +1,353 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+
+
+#if _X86_
+
+void __declspec (naked) FASTCALL
+atomic_add(
+    int i,
+    atomic_t *v
+    )
+{
+    // ECX = i
+    // EDX = v ; [EDX][0] = v->counter
+
+    __asm {
+        lock add dword ptr [edx][0], ecx
+        ret
+    }
+}
+
+void __declspec (naked) FASTCALL
+atomic_sub(
+    int i,
+    atomic_t *v
+   ) 
+{
+    // ECX = i
+    // EDX = v ; [EDX][0] = v->counter
+
+    __asm {
+        lock sub dword ptr [edx][0], ecx
+        ret
+    }
+}
+
+void __declspec (naked) FASTCALL
+atomic_inc(
+    atomic_t *v
+    )
+{
+    //InterlockedIncrement((PULONG)(&((v)->counter)));
+
+    //` ECX = v ; [ECX][0] = v->counter
+
+    __asm {
+        lock inc dword ptr [ecx][0]
+        ret
+    }
+}
+
+void __declspec (naked) FASTCALL
+atomic_dec(
+    atomic_t *v
+    )
+{
+    // ECX = v ; [ECX][0] = v->counter
+
+    __asm {
+        lock dec dword ptr [ecx][0]
+        ret
+    }
+}
+
+int __declspec (naked) FASTCALL 
+atomic_sub_and_test(
+    int i,
+    atomic_t *v
+    )
+{
+
+    // ECX = i
+    // EDX = v ; [EDX][0] = v->counter
+
+    __asm {
+        xor eax, eax
+        lock sub dword ptr [edx][0], ecx
+        sete al
+        ret
+    }
+}
+
+int __declspec (naked) FASTCALL
+atomic_inc_and_test(
+    atomic_t *v
+    )
+{
+    // ECX = v ; [ECX][0] = v->counter
+
+    __asm {
+        xor eax, eax
+        lock inc dword ptr [ecx][0]
+        sete al
+        ret
+    }
+}
+
+int __declspec (naked) FASTCALL
+atomic_dec_and_test(
+    atomic_t *v
+    )
+{
+    // ECX = v ; [ECX][0] = v->counter
+
+    __asm {
+        xor eax, eax
+        lock dec dword ptr [ecx][0]
+        sete al
+        ret
+    }
+}
+
+#else
+
+void FASTCALL
+atomic_add(
+    int i,
+    atomic_t *v
+    )
+{
+    InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (i));
+}
+
+void FASTCALL
+atomic_sub(
+    int i,
+    atomic_t *v
+   ) 
+{
+    InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (-1*i));
+}
+
+void FASTCALL
+atomic_inc(
+    atomic_t *v
+    )
+{
+   InterlockedIncrement((PULONG)(&((v)->counter)));
+}
+
+void FASTCALL
+atomic_dec(
+    atomic_t *v
+    )
+{
+    InterlockedDecrement((PULONG)(&((v)->counter)));
+}
+
+int FASTCALL 
+atomic_sub_and_test(
+    int i,
+    atomic_t *v
+    )
+{
+    int counter, result;
+
+    do {
+
+        counter = v->counter;
+        result = counter - i;
+
+    } while ( InterlockedCompareExchange(
+                &(v->counter),
+                result,
+                counter) !=  counter);
+
+    return (result == 0);
+}
+
+int FASTCALL
+atomic_inc_and_test(
+    atomic_t *v
+    )
+{
+    int counter, result;
+
+    do {
+
+        counter = v->counter;
+        result = counter + 1;
+
+    } while ( InterlockedCompareExchange(
+                &(v->counter),
+                result,
+                counter) !=  counter);
+
+    return (result == 0);
+}
+
+int FASTCALL
+atomic_dec_and_test(
+    atomic_t *v
+    )
+{
+    int counter, result;
+
+    do {
+
+        counter = v->counter;
+        result = counter + 1;
+
+    } while ( InterlockedCompareExchange(
+                &(v->counter),
+                result,
+                counter) !=  counter);
+
+    return (result == 0);
+}
+
+#endif
+
+
+/*
+ * rw spinlock
+ */
+
+
+void
+rwlock_init(rwlock_t * rwlock)
+{
+    spin_lock_init(&rwlock->guard);
+    rwlock->count = 0;
+}
+
+void
+rwlock_fini(rwlock_t * rwlock)
+{
+}
+
+void
+read_lock(rwlock_t * rwlock)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        /* should bugchk here */
+        cfs_enter_debugger();
+        return;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    ASSERT(slot->Magic == TASKSLT_MAGIC);
+   
+    slot->irql = KeRaiseIrqlToDpcLevel();
+
+    while (TRUE) {
+           spin_lock(&rwlock->guard);
+        if (rwlock->count >= 0)
+            break;
+        spin_unlock(&rwlock->guard);
+    }
+
+       rwlock->count++;
+       spin_unlock(&rwlock->guard);
+}
+
+void
+read_unlock(rwlock_t * rwlock)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        /* should bugchk here */
+        cfs_enter_debugger();
+        return;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    ASSERT(slot->Magic == TASKSLT_MAGIC);
+   
+    spin_lock(&rwlock->guard);
+       ASSERT(rwlock->count > 0);
+    rwlock->count--;
+    if (rwlock < 0) {
+        cfs_enter_debugger();
+    }
+       spin_unlock(&rwlock->guard);
+
+    KeLowerIrql(slot->irql);
+}
+
+void
+write_lock(rwlock_t * rwlock)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        /* should bugchk here */
+        cfs_enter_debugger();
+        return;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    ASSERT(slot->Magic == TASKSLT_MAGIC);
+   
+    slot->irql = KeRaiseIrqlToDpcLevel();
+
+    while (TRUE) {
+           spin_lock(&rwlock->guard);
+        if (rwlock->count == 0)
+            break;
+        spin_unlock(&rwlock->guard);
+    }
+
+       rwlock->count = -1;
+       spin_unlock(&rwlock->guard);
+}
+
+void
+write_unlock(rwlock_t * rwlock)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        /* should bugchk here */
+        cfs_enter_debugger();
+        return;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    ASSERT(slot->Magic == TASKSLT_MAGIC);
+   
+    spin_lock(&rwlock->guard);
+       ASSERT(rwlock->count == -1);
+    rwlock->count = 0;
+       spin_unlock(&rwlock->guard);
+
+    KeLowerIrql(slot->irql);
+}
diff --git a/libcfs/libcfs/winnt/winnt-lwt.c b/libcfs/libcfs/winnt/winnt-lwt.c
new file mode 100644 (file)
index 0000000..272cbcf
--- /dev/null
@@ -0,0 +1,20 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+# define DEBUG_SUBSYSTEM S_LNET
+
diff --git a/libcfs/libcfs/winnt/winnt-mem.c b/libcfs/libcfs/winnt/winnt-mem.c
new file mode 100644 (file)
index 0000000..6b66a95
--- /dev/null
@@ -0,0 +1,332 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+
+
+cfs_mem_cache_t *cfs_page_t_slab = NULL;
+cfs_mem_cache_t *cfs_page_p_slab = NULL;
+
+/*
+ * cfs_alloc_page
+ *   To allocate the cfs_page_t and also 1 page of memory
+ *
+ * Arguments:
+ *   flags:  the allocation options
+ *
+ * Return Value:
+ *   pointer to the cfs_page_t strcture in success or
+ *   NULL in failure case
+ *
+ * Notes: 
+ *   N/A
+ */
+
+cfs_page_t * cfs_alloc_page(int flags)
+{
+    cfs_page_t *pg;
+    pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0);
+    
+    if (NULL == pg) {
+        cfs_enter_debugger();
+        return NULL;
+    }
+
+    memset(pg, 0, sizeof(cfs_page_t));
+    pg->addr = cfs_mem_cache_alloc(cfs_page_p_slab, 0);
+    atomic_set(&pg->count, 1);
+
+    if (pg->addr) {
+        if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) {
+            memset(pg->addr, 0, CFS_PAGE_SIZE);
+        }
+    } else {
+        cfs_enter_debugger();
+        cfs_mem_cache_free(cfs_page_t_slab, pg);
+        pg = NULL;
+    }
+
+    return pg;
+}
+
+/*
+ * cfs_free_page
+ *   To free the cfs_page_t including the page
+ *
+ * Arguments:
+ *   pg:  pointer to the cfs_page_t strcture
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+void cfs_free_page(cfs_page_t *pg)
+{
+    ASSERT(pg != NULL);
+    ASSERT(pg->addr  != NULL);
+    ASSERT(atomic_read(&pg->count) <= 1);
+
+    cfs_mem_cache_free(cfs_page_p_slab, pg->addr);
+    cfs_mem_cache_free(cfs_page_t_slab, pg);
+}
+
+
+/*
+ * cfs_alloc
+ *   To allocate memory from system pool
+ *
+ * Arguments:
+ *   nr_bytes:  length in bytes of the requested buffer
+ *   flags:     flags indiction
+ *
+ * Return Value:
+ *   NULL: if there's no enough memory space in system
+ *   the address of the allocated memory in success.
+ *
+ * Notes: 
+ *   This operation can be treated as atomic.
+ */
+
+void *
+cfs_alloc(size_t nr_bytes, u_int32_t flags)
+{
+       void *ptr;
+
+    /* Ignore the flags: always allcoate from NonPagedPool */
+
+       ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs');
+
+       if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) {
+               memset(ptr, 0, nr_bytes);
+    }
+
+    if (!ptr) {
+        cfs_enter_debugger();
+    }
+
+       return ptr;
+}
+
+/*
+ * cfs_free
+ *   To free the sepcified memory to system pool
+ *
+ * Arguments:
+ *   addr:   pointer to the buffer to be freed
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *    This operation can be treated as atomic.
+ */
+
+void
+cfs_free(void *addr)
+{
+       ExFreePool(addr);
+}
+
+/*
+ * cfs_alloc_large
+ *   To allocate large block of memory from system pool
+ *
+ * Arguments:
+ *   nr_bytes:  length in bytes of the requested buffer
+ *
+ * Return Value:
+ *   NULL: if there's no enough memory space in system
+ *   the address of the allocated memory in success.
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void *
+cfs_alloc_large(size_t nr_bytes)
+{
+       return cfs_alloc(nr_bytes, 0);
+}
+
+/*
+ * cfs_free_large
+ *   To free the sepcified memory to system pool
+ *
+ * Arguments:
+ *   addr:   pointer to the buffer to be freed
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void
+cfs_free_large(void *addr)
+{
+       cfs_free(addr);
+}
+
+
+/*
+ * cfs_mem_cache_create
+ *   To create a SLAB cache
+ *
+ * Arguments:
+ *   name:   name string of the SLAB cache to be created
+ *   size:   size in bytes of SLAB entry buffer
+ *   offset: offset in the page
+ *   flags:  SLAB creation flags
+*
+ * Return Value:
+ *   The poitner of cfs_memory_cache structure in success.
+ *   NULL pointer in failure case.
+ *
+ * Notes: 
+ *   1, offset won't be used here.
+ *   2, it could be better to induce a lock to protect the access of the
+ *       SLAB structure on SMP if there's not outside lock protection.
+ *   3, parameters C/D are removed.
+ */
+
+cfs_mem_cache_t *
+cfs_mem_cache_create(
+    const char * name,
+    size_t size,
+    size_t offset,
+    unsigned long flags
+    )
+{
+    cfs_mem_cache_t * kmc = NULL;
+
+    /*  The name of the SLAB could not exceed 20 chars */
+
+    if (name && strlen(name) >= 20) {
+        goto errorout;
+    }
+
+    /* Allocate and initialize the SLAB strcture */
+
+    kmc = cfs_alloc (sizeof(cfs_mem_cache_t), 0);
+
+    if (NULL == kmc) {
+        goto errorout;
+    }
+
+    memset(kmc, 0, sizeof(cfs_mem_cache_t));
+
+    kmc->flags = flags;
+
+    if (name) {
+        strcpy(&kmc->name[0], name);
+    }
+
+    /* Initialize the corresponding LookAside list */
+
+    ExInitializeNPagedLookasideList(
+            &(kmc->npll),
+            NULL,
+            NULL,
+            0,
+            size,
+            'pnmk',
+            0);
+errorout:
+
+    return kmc;
+}
+
+/*
+ * cfs_mem_cache_destroy
+ *   To destroy the unused SLAB cache
+ *
+ * Arguments:
+ *   kmc: the SLAB cache to be destroied.
+ *
+ * Return Value:
+ *   0: in success case.
+ *   1: in failure case.
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_mem_cache_destroy (cfs_mem_cache_t * kmc)
+{
+    ASSERT(kmc != NULL);
+
+    ExDeleteNPagedLookasideList(&(kmc->npll));
+
+    cfs_free(kmc);
+
+    return 0;
+}
+
+/*
+ * cfs_mem_cache_alloc
+ *   To allocate an object (LookAside entry) from the SLAB
+ *
+ * Arguments:
+ *   kmc:   the SLAB cache to be allocated from.
+ *   flags: flags for allocation options
+ *
+ * Return Value:
+ *   object buffer address: in success case.
+ *   NULL: in failure case.
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void *cfs_mem_cache_alloc(cfs_mem_cache_t * kmc, int flags)
+{
+    void *buf = NULL;
+
+    buf = ExAllocateFromNPagedLookasideList(&(kmc->npll));
+
+    return buf;
+}
+
+/*
+ * cfs_mem_cache_free
+ *   To free an object (LookAside entry) to the SLAB cache
+ *
+ * Arguments:
+ *   kmc: the SLAB cache to be freed to.
+ *   buf: the pointer to the object to be freed.
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf)
+{
+    ExFreeToNPagedLookasideList(&(kmc->npll), buf);
+}
diff --git a/libcfs/libcfs/winnt/winnt-module.c b/libcfs/libcfs/winnt/winnt-module.c
new file mode 100644 (file)
index 0000000..2b6b008
--- /dev/null
@@ -0,0 +1,160 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+
+#define DEBUG_SUBSYSTEM S_LIBCFS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+#define LIBCFS_MINOR 240
+
+int libcfs_ioctl_getdata(char *buf, char *end, void *arg)
+{
+        struct libcfs_ioctl_hdr *hdr;
+        struct libcfs_ioctl_data *data;
+        int err;
+        ENTRY;
+
+        hdr = (struct libcfs_ioctl_hdr *)buf;
+        data = (struct libcfs_ioctl_data *)buf;
+
+        err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
+        if (err)
+                RETURN(err);
+
+        if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) {
+                CERROR(("LIBCFS: version mismatch kernel vs application\n"));
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len + buf >= end) {
+                CERROR(("LIBCFS: user buffer exceeds kernel buffer\n"));
+                RETURN(-EINVAL);
+        }
+
+        if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) {
+                CERROR(("LIBCFS: user buffer too small for ioctl\n"));
+                RETURN(-EINVAL);
+        }
+
+        err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
+        if (err)
+                RETURN(err);
+
+        if (libcfs_ioctl_is_invalid(data)) {
+                CERROR(("LIBCFS: ioctl not correctly formatted\n"));
+                RETURN(-EINVAL);
+        }
+
+        if (data->ioc_inllen1)
+                data->ioc_inlbuf1 = &data->ioc_bulk[0];
+
+        if (data->ioc_inllen2)
+                data->ioc_inlbuf2 = &data->ioc_bulk[0] +
+                        size_round(data->ioc_inllen1);
+
+        RETURN(0);
+}
+                                                                                                                                                                        
+extern struct cfs_psdev_ops          libcfs_psdev_ops;
+
+static int 
+libcfs_psdev_open(cfs_file_t * file)
+{ 
+       struct libcfs_device_userstate **pdu = NULL;
+       int    rc = 0;
+
+       pdu = (struct libcfs_device_userstate **)&file->private_data;
+       if (libcfs_psdev_ops.p_open != NULL)
+               rc = libcfs_psdev_ops.p_open(0, (void *)pdu);
+       else
+               return (-EPERM);
+       return rc;
+}
+
+/* called when closing /dev/device */
+static int 
+libcfs_psdev_release(cfs_file_t * file)
+{
+       struct libcfss_device_userstate *pdu;
+       int    rc = 0;
+
+       pdu = file->private_data;
+       if (libcfs_psdev_ops.p_close != NULL)
+               rc = libcfs_psdev_ops.p_close(0, (void *)pdu);
+       else
+               rc = -EPERM;
+       return rc;
+}
+
+static int 
+libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg)
+{ 
+       struct cfs_psdev_file    pfile;
+       int    rc = 0;
+
+       if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || 
+            _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR  || 
+            _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { 
+               CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n", 
+                      _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd))); 
+               return (-EINVAL); 
+       } 
+       
+       /* Handle platform-dependent IOC requests */
+       switch (cmd) { 
+       case IOC_LIBCFS_PANIC: 
+               if (!capable (CAP_SYS_BOOT)) 
+                       return (-EPERM); 
+               CERROR(("debugctl-invoked panic"));
+        KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL);
+
+               return (0);
+       case IOC_LIBCFS_MEMHOG:
+
+               if (!capable (CAP_SYS_ADMIN)) 
+                       return -EPERM;
+        break;
+       }
+
+       pfile.off = 0;
+       pfile.private_data = file->private_data;
+       if (libcfs_psdev_ops.p_ioctl != NULL) 
+               rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); 
+       else
+               rc = -EPERM;
+       return (rc);
+}
+
+static struct file_operations libcfs_fops = {
+    /* lseek: */  NULL,
+    /* read: */   NULL,
+    /* write: */  NULL,
+    /* ioctl: */  libcfs_ioctl,
+    /* open: */   libcfs_psdev_open,
+    /* release:*/ libcfs_psdev_release
+};
+
+cfs_psdev_t libcfs_dev = { 
+       LIBCFS_MINOR, 
+       "lnet", 
+       &libcfs_fops
+};
+
diff --git a/libcfs/libcfs/winnt/winnt-prim.c b/libcfs/libcfs/winnt/winnt-prim.c
new file mode 100644 (file)
index 0000000..064b071
--- /dev/null
@@ -0,0 +1,650 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+
+/*
+ *  Thread routines
+ */
+
+/*
+ * cfs_thread_proc
+ *   Lustre thread procedure wrapper routine (It's an internal routine)
+ *
+ * Arguments:
+ *   context:  a structure of cfs_thread_context_t, containing
+ *             all the necessary parameters
+ *
+ * Return Value:
+ *   void: N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void
+cfs_thread_proc(
+    void * context
+    )
+{
+    cfs_thread_context_t * thread_context = 
+        (cfs_thread_context_t *) context;
+
+    /* Execute the specified function ... */
+
+    if (thread_context->func) {
+        (thread_context->func)(thread_context->arg);
+    }
+
+    /* Free the context memory */
+   
+    cfs_free(context);
+
+    /* Terminate this system thread */
+
+    PsTerminateSystemThread(STATUS_SUCCESS);
+}
+
+/*
+ * cfs_kernel_thread
+ *   Create a system thread to execute the routine specified
+ *
+ * Arguments:
+ *   func:  function to be executed in the thread
+ *   arg:   argument transferred to func function
+ *   flag:  thread creation flags.
+ *
+ * Return Value:
+ *   int:   0 on success or error codes
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_kernel_thread(int (*func)(void *), void *arg, int flag)
+{
+    cfs_handle_t  thread = NULL;
+    NTSTATUS      status;
+    cfs_thread_context_t * context = NULL;
+
+    /* Allocate the context to be transferred to system thread */
+
+    context = cfs_alloc(sizeof(cfs_thread_context_t), CFS_ALLOC_ZERO);
+
+    if (!context) {
+        return -ENOMEM;
+    }
+
+    context->func  = func;
+    context->arg   = arg;
+
+    /* Create system thread with the cfs_thread_proc wrapper */
+
+    status = PsCreateSystemThread(
+                &thread,
+                (ACCESS_MASK)0L,
+                0, 0, 0,
+                cfs_thread_proc,
+                context);
+
+    if (!NT_SUCCESS(status)) {
+
+
+        cfs_free(context);
+
+        /* We need translate the nt status to linux error code */
+
+        return cfs_error_code(status);
+    }
+
+    //
+    //  Query the thread id of the newly created thread
+    //
+
+    ZwClose(thread);
+
+    return 0;
+}
+
+
+/*
+ * Symbols routines
+ */
+
+
+static CFS_DECL_RWSEM(cfs_symbol_lock);
+CFS_LIST_HEAD(cfs_symbol_list);
+
+int MPSystem = FALSE;
+
+/*
+ * cfs_symbol_get
+ *   To query the specified symbol form the symbol table
+ *
+ * Arguments:
+ *   name:  the symbol name to be queried
+ *
+ * Return Value:
+ *   If the symbol is in the table, return the address of it.
+ *   If not, return NULL.
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void *
+cfs_symbol_get(const char *name)
+{
+    struct list_head    *walker;
+    struct cfs_symbol   *sym = NULL;
+
+    down_read(&cfs_symbol_lock);
+    list_for_each(walker, &cfs_symbol_list) {
+        sym = list_entry (walker, struct cfs_symbol, sym_list);
+        if (!strcmp(sym->name, name)) {
+            sym->ref ++;
+            break;
+        } 
+    } 
+    up_read(&cfs_symbol_lock);
+
+    if (sym != NULL) 
+        return sym->value;
+
+    return NULL;
+}
+
+/*
+ * cfs_symbol_put
+ *   To decrease the reference of  the specified symbol
+ *
+ * Arguments:
+ *   name:  the symbol name to be dereferred
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void
+cfs_symbol_put(const char *name)
+{
+    struct list_head    *walker;
+    struct cfs_symbol   *sym = NULL;
+
+    down_read(&cfs_symbol_lock);
+    list_for_each(walker, &cfs_symbol_list) {
+        sym = list_entry (walker, struct cfs_symbol, sym_list);
+        if (!strcmp(sym->name, name)) {
+            LASSERT(sym->ref > 0);
+            sym->ref--;
+            break;
+        } 
+    } 
+    up_read(&cfs_symbol_lock);
+
+    LASSERT(sym != NULL);
+}
+
+
+/*
+ * cfs_symbol_register
+ *   To register the specified symbol infromation
+ *
+ * Arguments:
+ *   name:  the symbol name to be dereferred
+ *   value: the value that the symbol stands for
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   Zero: Succeed to register
+ *   Non-Zero: Fail to register the symbol
+ */
+
+int
+cfs_symbol_register(const char *name, const void *value)
+{
+    struct list_head    *walker;
+    struct cfs_symbol   *sym = NULL;
+    struct cfs_symbol   *new = NULL;
+
+    new = cfs_alloc(sizeof(struct cfs_symbol), CFS_ALLOC_ZERO);
+    if (!new) {
+        return (-ENOMEM);
+    }
+    strncpy(new->name, name, CFS_SYMBOL_LEN);
+    new->value = (void *)value;
+    new->ref = 0;
+    CFS_INIT_LIST_HEAD(&new->sym_list);
+
+    down_write(&cfs_symbol_lock);
+    list_for_each(walker, &cfs_symbol_list) {
+        sym = list_entry (walker, struct cfs_symbol, sym_list);
+        if (!strcmp(sym->name, name)) {
+            up_write(&cfs_symbol_lock);
+            cfs_free(new);
+            return 0; // alreay registerred
+        }
+    }
+    list_add_tail(&new->sym_list, &cfs_symbol_list);
+    up_write(&cfs_symbol_lock);
+
+    return 0;
+}
+
+/*
+ * cfs_symbol_unregister
+ *   To unregister/remove the specified symbol
+ *
+ * Arguments:
+ *   name:  the symbol name to be dereferred
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void
+cfs_symbol_unregister(const char *name)
+{
+    struct list_head    *walker;
+    struct list_head    *nxt;
+    struct cfs_symbol   *sym = NULL;
+
+    down_write(&cfs_symbol_lock);
+    list_for_each_safe(walker, nxt, &cfs_symbol_list) {
+        sym = list_entry (walker, struct cfs_symbol, sym_list);
+        if (!strcmp(sym->name, name)) {
+            LASSERT(sym->ref == 0);
+            list_del (&sym->sym_list);
+            cfs_free(sym);
+            break;
+        }
+    }
+    up_write(&cfs_symbol_lock);
+}
+
+/*
+ * cfs_symbol_clean
+ *   To clean all the symbols
+ *
+ * Arguments:
+ *   N/A
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void
+cfs_symbol_clean()
+{
+    struct list_head    *walker;
+    struct cfs_symbol   *sym = NULL;
+
+    down_write(&cfs_symbol_lock);
+    list_for_each(walker, &cfs_symbol_list) {
+        sym = list_entry (walker, struct cfs_symbol, sym_list);
+        LASSERT(sym->ref == 0);
+        list_del (&sym->sym_list);
+        cfs_free(sym);
+    }
+    up_write(&cfs_symbol_lock);
+    return;
+}
+
+
+
+/*
+ * Timer routines
+ */
+
+
+/* Timer dpc procedure */
+static void
+cfs_timer_dpc_proc (
+    IN PKDPC Dpc,
+    IN PVOID DeferredContext,
+    IN PVOID SystemArgument1,
+    IN PVOID SystemArgument2)
+{
+    cfs_timer_t *   timer;
+    KIRQL           Irql;
+
+    timer = (cfs_timer_t *) DeferredContext;
+
+    /* clear the flag */
+    KeAcquireSpinLock(&(timer->Lock), &Irql);
+    cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
+    KeReleaseSpinLock(&(timer->Lock), Irql);
+
+    /* call the user specified timer procedure */
+    timer->proc((unsigned long)(timer->arg));
+}
+
+/*
+ * cfs_timer_init
+ *   To initialize the cfs_timer_t
+ *
+ * Arguments:
+ *   timer:  the cfs_timer to be initialized
+ *   func:   the timer callback procedure
+ *   arg:    argument for the callback proc
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg)
+{
+    memset(timer, 0, sizeof(cfs_timer_t));
+
+    timer->proc = func;
+    timer->arg  = arg;
+
+    KeInitializeSpinLock(&(timer->Lock));
+    KeInitializeTimer(&timer->Timer);
+    KeInitializeDpc (&timer->Dpc, cfs_timer_dpc_proc, timer);
+
+    cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_INITED);
+}
+
+/*
+ * cfs_timer_done
+ *   To finialize the cfs_timer_t (unused)
+ *
+ * Arguments:
+ *   timer:  the cfs_timer to be cleaned up
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_timer_done(cfs_timer_t *timer)
+{
+    return;
+}
+
+/*
+ * cfs_timer_arm
+ *   To schedule the timer while touching @deadline
+ *
+ * Arguments:
+ *   timer:  the cfs_timer to be freed
+ *   dealine: timeout value to wake up the timer
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline)
+{
+    LARGE_INTEGER   timeout;
+    KIRQL           Irql;
+
+    KeAcquireSpinLock(&(timer->Lock), &Irql);
+    if (!cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)){
+
+        timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline;
+
+        if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) {
+            cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
+        }
+
+        timer->deadline = deadline;
+    }
+
+    KeReleaseSpinLock(&(timer->Lock), Irql);
+}
+
+/*
+ * cfs_timer_disarm
+ *   To discard the timer to be scheduled
+ *
+ * Arguments:
+ *   timer:  the cfs_timer to be discarded
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_timer_disarm(cfs_timer_t *timer)
+{
+    KIRQL   Irql;
+
+    KeAcquireSpinLock(&(timer->Lock), &Irql);
+    KeCancelTimer(&(timer->Timer));
+    cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED);
+    KeReleaseSpinLock(&(timer->Lock), Irql);
+}
+
+
+/*
+ * cfs_timer_is_armed
+ *   To check the timer is scheduled or not
+ *
+ * Arguments:
+ *   timer:  the cfs_timer to be checked
+ *
+ * Return Value:
+ *   1:  if it's armed.
+ *   0:  if it's not.
+ *
+ * Notes: 
+ *   N/A
+ */
+
+int cfs_timer_is_armed(cfs_timer_t *timer)
+{
+    int     rc = 0;
+    KIRQL   Irql;
+
+    KeAcquireSpinLock(&(timer->Lock), &Irql);
+    if (cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)) {
+        rc = 1;
+    }
+    KeReleaseSpinLock(&(timer->Lock), Irql);
+
+    return rc;
+}
+
+/*
+ * cfs_timer_deadline
+ *   To query the deadline of the timer
+ *
+ * Arguments:
+ *   timer:  the cfs_timer to be queried
+ *
+ * Return Value:
+ *   the deadline value
+ *
+ * Notes: 
+ *   N/A
+ */
+
+cfs_time_t cfs_timer_deadline(cfs_timer_t * timer)
+{
+    return timer->deadline;
+}
+
+/*
+ * daemonize routine stub
+ */
+
+void cfs_daemonize(char *str)
+{
+    return;
+}
+
+/*
+ *  routine related with sigals
+ */
+
+cfs_sigset_t cfs_get_blockedsigs()
+{
+        return 0;
+}
+
+cfs_sigset_t cfs_block_allsigs()
+{
+        return 0;
+}
+
+cfs_sigset_t cfs_block_sigs(sigset_t bit)
+{
+        return 0;
+}
+
+void cfs_restore_sigs(cfs_sigset_t old)
+{
+}
+
+int cfs_signal_pending(void)
+{
+    return 0;
+}
+
+void cfs_clear_sigpending(void)
+{
+    return;
+}
+
+/**
+ **  Initialize routines 
+ **/
+
+int
+libcfs_arch_init(void)
+{ 
+    int         rc;
+
+    spinlock_t  lock;
+    /* Workground to check the system is MP build or UP build */
+    spin_lock_init(&lock);
+    spin_lock(&lock);
+    MPSystem = (int)lock.lock;
+    /* MP build system: it's a real spin, for UP build system, it
+       only raises the IRQL to DISPATCH_LEVEL */
+    spin_unlock(&lock);
+
+    /* create slab memory caches for page alloctors */
+    cfs_page_t_slab = cfs_mem_cache_create(
+        "CPGT", sizeof(cfs_page_t), 0, 0 );
+
+    cfs_page_p_slab = cfs_mem_cache_create(
+        "CPGP", CFS_PAGE_SIZE, 0, 0 );
+
+    if ( cfs_page_t_slab == NULL ||
+         cfs_page_p_slab == NULL ){
+        rc = -ENOMEM;
+        goto errorout;
+    }    
+
+    rc = init_task_manager();
+
+    if (rc != 0) {
+        cfs_enter_debugger();
+        KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n"));
+        goto errorout;
+    }
+
+    /* initialize the proc file system */
+    rc = proc_init_fs();
+
+    if (rc != 0) {
+        cfs_enter_debugger();
+        KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n"));
+        cleanup_task_manager();
+        goto errorout;
+    }
+
+    /* initialize the tdi data */
+    rc = ks_init_tdi_data();
+
+    if (rc != 0) {
+        cfs_enter_debugger();
+        KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n"));
+        proc_destroy_fs();
+        cleanup_task_manager();
+        goto errorout;
+    }
+
+errorout:
+
+    if (rc != 0) {
+        /* destroy the taskslot cache slab */
+        if (cfs_page_t_slab) {
+            cfs_mem_cache_destroy(cfs_page_t_slab);
+        }
+        if (cfs_page_p_slab) {
+            cfs_mem_cache_destroy(cfs_page_p_slab);
+        }
+    }
+
+    return rc;
+}
+
+void
+libcfs_arch_cleanup(void)
+{
+    /* finialize the tdi data */
+    ks_fini_tdi_data();
+
+    /* detroy the whole proc fs tree and nodes */
+    proc_destroy_fs();
+
+    /* destroy the taskslot cache slab */
+    if (cfs_page_t_slab) {
+        cfs_mem_cache_destroy(cfs_page_t_slab);
+    }
+
+    if (cfs_page_p_slab) {
+        cfs_mem_cache_destroy(cfs_page_p_slab);
+    }
+
+       return; 
+}
+
+EXPORT_SYMBOL(libcfs_arch_init);
+EXPORT_SYMBOL(libcfs_arch_cleanup);
diff --git a/libcfs/libcfs/winnt/winnt-proc.c b/libcfs/libcfs/winnt/winnt-proc.c
new file mode 100644 (file)
index 0000000..cfb8d38
--- /dev/null
@@ -0,0 +1,2039 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+
+# define DEBUG_SUBSYSTEM S_LNET
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+#ifdef __KERNEL__
+
+
+/*
+ *  /proc emulator routines ...
+ */
+
+/* The root node of the proc fs emulation: /proc */
+cfs_proc_entry_t *              proc_fs_root = NULL;
+
+
+/* The sys root: /proc/sys */
+cfs_proc_entry_t *              proc_sys_root = NULL;
+
+
+/* The sys root: /proc/dev | to implement misc device */
+
+cfs_proc_entry_t *              proc_dev_root = NULL;
+
+
+/* SLAB object for cfs_proc_entry_t allocation */
+
+cfs_mem_cache_t *               proc_entry_cache = NULL;
+
+/* root node for sysctl table */
+
+cfs_sysctl_table_header_t       root_table_header;
+
+/* The global lock to protect all the access */
+
+#if LIBCFS_PROCFS_SPINLOCK
+spinlock_t                      proc_fs_lock;
+
+#define INIT_PROCFS_LOCK()      spin_lock_init(&proc_fs_lock)
+#define LOCK_PROCFS()           spin_lock(&proc_fs_lock)
+#define UNLOCK_PROCFS()         spin_unlock(&proc_fs_lock)
+
+#else
+
+mutex_t                         proc_fs_lock;
+
+#define INIT_PROCFS_LOCK()      init_mutex(&proc_fs_lock)
+#define LOCK_PROCFS()           mutex_down(&proc_fs_lock)
+#define UNLOCK_PROCFS()         mutex_up(&proc_fs_lock)
+
+#endif
+
+static ssize_t
+proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos)
+{
+    char    *page;
+    ssize_t retval=0;
+    int eof=0;
+    ssize_t n, count;
+    char    *start;
+    cfs_proc_entry_t * dp;
+
+    dp = (cfs_proc_entry_t  *) file->private_data;
+    if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0)))
+        return -ENOMEM;
+
+    while ((nbytes > 0) && !eof) {
+
+        count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
+
+        start = NULL;
+        if (dp->read_proc) {
+            n = dp->read_proc( page, &start, (long)*ppos,
+                               count, &eof, dp->data);
+        } else
+            break;
+
+        if (!start) {
+            /*
+             * For proc files that are less than 4k
+             */
+            start = page + *ppos;
+            n -= (ssize_t)(*ppos);
+            if (n <= 0)
+                break;
+            if (n > count)
+                n = count;
+        }
+        if (n == 0)
+            break;  /* End of file */
+        if (n < 0) {
+            if (retval == 0)
+                retval = n;
+            break;
+        }
+        
+        n -= copy_to_user((void *)buf, start, n);
+        if (n == 0) {
+            if (retval == 0)
+                retval = -EFAULT;
+            break;
+        }
+
+        *ppos += n;
+        nbytes -= n;
+        buf += n;
+        retval += n;
+    }
+    cfs_free(page);
+
+    return retval;
+}
+
+static ssize_t
+proc_file_write(struct file * file, const char * buffer,
+                size_t count, loff_t *ppos)
+{
+    cfs_proc_entry_t  * dp;
+    
+    dp = (cfs_proc_entry_t *) file->private_data;
+
+    if (!dp->write_proc)
+        return -EIO;
+
+    /* FIXME: does this routine need ppos?  probably... */
+    return dp->write_proc(file, buffer, count, dp->data);
+}
+
+struct file_operations proc_file_operations = {
+    /*lseek:*/      NULL, //proc_file_lseek,
+    /*read:*/       proc_file_read,
+    /*write:*/      proc_file_write,
+    /*ioctl:*/      NULL,
+    /*open:*/       NULL,
+    /*release:*/    NULL
+};
+
+/* allocate proc entry block */
+
+cfs_proc_entry_t *
+proc_alloc_entry()
+{
+    cfs_proc_entry_t * entry = NULL;
+
+    entry = cfs_mem_cache_alloc(proc_entry_cache, 0);
+    if (!entry) {
+        return NULL;
+    }
+
+    memset(entry, 0, sizeof(cfs_proc_entry_t));
+
+    entry->magic = CFS_PROC_ENTRY_MAGIC;
+    RtlInitializeSplayLinks(&(entry->s_link));
+    entry->proc_fops = &proc_file_operations;
+
+    return entry;
+}
+
+/* free the proc entry block */
+
+void
+proc_free_entry(cfs_proc_entry_t * entry)
+
+{
+    ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC);
+
+    cfs_mem_cache_free(proc_entry_cache, entry);
+}
+
+/* dissect the path string for a given full proc path */
+
+void
+proc_dissect_name(
+    char *path,
+    char **first,
+    int  *first_len,
+    char **remain
+    )
+{
+    int i = 0, j = 0, len = 0;
+
+    *first = *remain = NULL;
+    *first_len = 0;
+
+    len = strlen(path);
+
+    while (i < len && (path[i] == '/')) i++;
+
+    if (i < len) {
+
+        *first = path + i;
+        while (i < len && (path[i] != '/')) i++;
+        *first_len = (path + i - *first);
+
+        if (i + 1 < len) {
+            *remain = path + i + 1;
+        }
+    }
+}
+
+/* search the children entries of the parent entry */
+
+cfs_proc_entry_t *
+proc_search_splay (
+    cfs_proc_entry_t *  parent,
+    char *              name
+    )
+{
+    cfs_proc_entry_t *  node;
+    PRTL_SPLAY_LINKS    link;
+
+    ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
+    ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
+
+    link = parent->root;
+
+    while (link) {
+
+        ANSI_STRING ename,nname;
+        long        result;
+
+        node = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link);
+
+        ASSERT(node->magic == CFS_PROC_ENTRY_MAGIC);
+
+        /*  Compare the prefix in the tree with the full name */
+
+        RtlInitAnsiString(&ename, name);
+        RtlInitAnsiString(&nname, node->name);
+
+        result = RtlCompareString(&nname, &ename,TRUE);
+
+        if (result > 0) {
+
+            /*  The prefix is greater than the full name
+                so we go down the left child          */
+
+            link = RtlLeftChild(link);
+
+        } else if (result < 0) {
+
+            /*  The prefix is less than the full name
+                so we go down the right child      */
+            //
+
+            link = RtlRightChild(link);
+
+        } else {
+
+            /*  We got the entry in the splay tree and
+                make it root node instead           */
+
+            parent->root = RtlSplay(link);
+
+            return node;
+        }
+
+        /* we need continue searching down the tree ... */
+    }
+
+    /*  There's no the exptected entry in the splay tree */
+
+    return NULL;
+}
+
+int
+proc_insert_splay (
+    cfs_proc_entry_t * parent,
+    cfs_proc_entry_t * child
+    )
+{
+    cfs_proc_entry_t * entry;
+
+    ASSERT(parent != NULL && child != NULL);
+    ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
+    ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
+    ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
+
+    if (!parent->root) {
+        parent->root = &(child->s_link);
+    } else {
+        entry = CONTAINING_RECORD(parent->root, cfs_proc_entry_t, s_link);
+        while (TRUE) {
+            long        result;
+            ANSI_STRING ename, cname;
+
+            ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC);
+
+            RtlInitAnsiString(&ename, entry->name);
+            RtlInitAnsiString(&cname, child->name);
+
+            result = RtlCompareString(&ename, &cname,TRUE);
+
+            if (result == 0) {
+                cfs_enter_debugger();
+                if (entry == child) {
+                    break;
+                }
+                return FALSE;
+            }
+
+            if (result > 0) {
+                if (RtlLeftChild(&entry->s_link) == NULL) {
+                    RtlInsertAsLeftChild(&entry->s_link, &child->s_link);
+                    break;
+                } else {
+                    entry = CONTAINING_RECORD( RtlLeftChild(&entry->s_link),
+                                               cfs_proc_entry_t, s_link);
+                }
+            } else {
+                if (RtlRightChild(&entry->s_link) == NULL) {
+                    RtlInsertAsRightChild(&entry->s_link, &child->s_link);
+                    break;
+                } else {
+                    entry = CONTAINING_RECORD( RtlRightChild(&entry->s_link),
+                                               cfs_proc_entry_t, s_link );
+                }
+            }
+        }
+    }
+
+    cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED);
+    parent->nlink++;
+
+    return TRUE;
+}
+
+
+/* remove a child entry from the splay tree */
+int
+proc_remove_splay (
+    cfs_proc_entry_t *  parent,
+    cfs_proc_entry_t *  child
+    )
+{
+    cfs_proc_entry_t * entry = NULL;
+
+    ASSERT(parent != NULL && child != NULL);
+    ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC);
+    ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC);
+    ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY));
+    ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED));
+
+    entry = proc_search_splay(parent, child->name);
+
+    if (entry) {
+        ASSERT(entry == child);
+        parent->root = RtlDelete(&(entry->s_link));
+        parent->nlink--;
+    } else {
+        cfs_enter_debugger();
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+
+/* search a node inside the proc fs tree */
+
+cfs_proc_entry_t *
+proc_search_entry(
+    char *              name,
+    cfs_proc_entry_t *  root
+    )
+{
+    cfs_proc_entry_t *  entry;
+    cfs_proc_entry_t *  parent;
+    char *first, *remain;
+    int   flen;
+    char *ename = NULL;
+
+    parent = root;
+    entry = NULL;
+
+    ename = cfs_alloc(0x21, CFS_ALLOC_ZERO);
+
+    if (ename == NULL) {
+        goto errorout;
+    }
+
+again:
+
+    /* dissect the file name string */
+    proc_dissect_name(name, &first, &flen, &remain);
+
+    if (first) {
+
+        if (flen >= 0x20) {
+            cfs_enter_debugger();
+            entry = NULL;
+            goto errorout;
+        }
+
+        memset(ename, 0, 0x20);
+        memcpy(ename, first, flen);
+
+        entry = proc_search_splay(parent, ename);
+
+        if (!entry) {
+            goto errorout;
+        }
+
+        if (remain) {
+            name = remain;
+            parent = entry;
+
+            goto again;
+        }
+    }
+
+errorout:
+
+    if (ename) {
+        cfs_free(ename);
+    }
+
+    return entry;   
+}
+
+/* insert the path nodes to the proc fs tree */
+
+cfs_proc_entry_t *
+proc_insert_entry(
+    char *              name,
+    cfs_proc_entry_t *  root
+    )
+{
+    cfs_proc_entry_t *entry;
+    cfs_proc_entry_t *parent;
+    char *first, *remain;
+    int flen;
+    char ename[0x20];
+
+    parent = root;
+    entry = NULL;
+
+again:
+
+    proc_dissect_name(name, &first, &flen, &remain);
+
+    if (first) {
+
+        if (flen >= 0x20) {
+            return NULL;
+        }
+
+        memset(ename, 0, 0x20);
+        memcpy(ename, first, flen);
+
+        entry = proc_search_splay(parent, ename);
+
+        if (!entry) {
+            entry = proc_alloc_entry();
+            memcpy(entry->name, ename, flen);
+
+            if (entry) {
+                if(!proc_insert_splay(parent, entry)) {
+                    proc_free_entry(entry);
+                    entry = NULL;
+                }
+            }
+        }
+
+        if (!entry) {
+            return NULL;
+        }
+
+        if (remain) {
+            entry->mode |= S_IFDIR | S_IRUGO | S_IXUGO;
+            cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY);
+            name = remain;
+            parent = entry;
+            goto again;
+        }
+    }
+
+    return entry;   
+}
+
+/* remove the path nodes from the proc fs tree */
+
+void
+proc_remove_entry(
+    char *              name,
+    cfs_proc_entry_t *  root
+    )
+{
+    cfs_proc_entry_t *entry;
+    char *first, *remain;
+    int  flen;
+    char ename[0x20];
+
+    entry  = NULL;
+
+    proc_dissect_name(name, &first, &flen, &remain);
+
+    if (first) {
+
+        memset(ename, 0, 0x20);
+        memcpy(ename, first, flen);
+
+        entry = proc_search_splay(root, ename);
+
+        if (entry) {
+
+            if (remain) {
+                ASSERT(S_ISDIR(entry->mode));
+                proc_remove_entry(remain, entry);
+            }
+
+            if (!entry->nlink) {
+                proc_remove_splay(root, entry);
+                proc_free_entry(entry);
+            }
+        }
+    } else {
+        cfs_enter_debugger();
+    }
+}
+
+/* create proc entry and insert it into the proc fs */
+
+cfs_proc_entry_t *
+create_proc_entry (
+    char *              name,
+    mode_t              mode,
+    cfs_proc_entry_t *  root
+    )
+{
+    cfs_proc_entry_t *parent = root;
+    cfs_proc_entry_t *entry  = NULL;
+
+    if (S_ISDIR(mode)) {
+        if ((mode & S_IALLUGO) == 0)
+        mode |= S_IRUGO | S_IXUGO;
+    } else {
+        if ((mode & S_IFMT) == 0)
+            mode |= S_IFREG;
+        if ((mode & S_IALLUGO) == 0)
+            mode |= S_IRUGO;
+    }
+
+    LOCK_PROCFS();
+
+    ASSERT(NULL != proc_fs_root);
+
+    if (!parent) {
+        parent = proc_fs_root;
+    }
+
+    entry = proc_search_entry(name, parent);
+
+    if (!entry) {
+        entry = proc_insert_entry(name, parent);
+        if (!entry) {
+            /* Failed to create/insert the splay node ... */
+            cfs_enter_debugger();
+            goto errorout;
+        }
+        /* Initializing entry ... */
+        entry->mode = mode;
+
+        if (S_ISDIR(mode)) {
+            cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY);
+        }
+    }
+
+errorout:
+
+    UNLOCK_PROCFS();
+
+    return entry;
+}
+
+
+/* search the specified entry form the proc fs */
+
+cfs_proc_entry_t *
+search_proc_entry(
+    char *              name,
+    cfs_proc_entry_t *  root
+    )
+{
+    cfs_proc_entry_t * entry;
+
+    LOCK_PROCFS();
+    if (root == NULL) {
+        root = proc_fs_root;
+    }
+    entry = proc_search_entry(name, root);
+    UNLOCK_PROCFS();
+
+    return entry;    
+}
+
+/* remove the entry from the proc fs */
+
+void
+remove_proc_entry(
+    char *              name,
+    cfs_proc_entry_t *  parent
+    )
+{
+    LOCK_PROCFS();
+    if (parent == NULL) {
+        parent = proc_fs_root;
+    }
+    proc_remove_entry(name, parent);
+    UNLOCK_PROCFS();
+}
+
+
+void proc_destroy_splay(cfs_proc_entry_t * entry)
+{
+    cfs_proc_entry_t * node;
+
+    if (S_ISDIR(entry->mode)) {
+
+        while (entry->root) {
+            node = CONTAINING_RECORD(entry->root, cfs_proc_entry_t, s_link);
+            entry->root = RtlDelete(&(node->s_link));
+            proc_destroy_splay(node);
+        }
+    }
+
+    proc_free_entry(entry);
+}
+
+
+/* destory the whole proc fs tree */
+
+void proc_destroy_fs()
+{
+    LOCK_PROCFS();
+
+    if (proc_fs_root) {
+        proc_destroy_splay(proc_fs_root);
+    }
+
+    if (proc_entry_cache) {
+        cfs_mem_cache_destroy(proc_entry_cache);
+    }
+   
+    UNLOCK_PROCFS();
+}
+
+/* initilaize / build the proc fs tree */
+
+int proc_init_fs()
+{
+    cfs_proc_entry_t * root = NULL;
+
+    memset(&(root_table_header), 0, sizeof(struct ctl_table_header));
+    INIT_LIST_HEAD(&(root_table_header.ctl_entry));
+
+    INIT_PROCFS_LOCK();
+    proc_entry_cache = cfs_mem_cache_create(
+                            NULL,
+                            sizeof(cfs_proc_entry_t),
+                            0,
+                            0
+                            );
+
+    if (!proc_entry_cache) {
+        return (-ENOMEM);
+    }
+
+    root = proc_alloc_entry();
+
+    if (!root) {
+        proc_destroy_fs();
+        return (-ENOMEM);
+    }
+
+    root->magic = CFS_PROC_ENTRY_MAGIC;
+    root->flags = CFS_PROC_FLAG_DIRECTORY;
+    root->mode  = S_IFDIR | S_IRUGO | S_IXUGO;
+    root->nlink = 3; // root should never be deleted.
+
+    root->name[0]='p';
+    root->name[1]='r';
+    root->name[2]='o';
+    root->name[3]='c';
+
+    proc_fs_root = root;
+
+    proc_sys_root = create_proc_entry("sys", S_IFDIR, root);
+
+    if (!proc_sys_root) {
+        proc_free_entry(root);
+        proc_fs_root = NULL;
+        proc_destroy_fs();
+        return (-ENOMEM);
+    }
+
+    proc_sys_root->nlink = 1;
+
+    proc_dev_root = create_proc_entry("dev", S_IFDIR, root);
+
+    if (!proc_dev_root) {
+        proc_free_entry(proc_sys_root);
+        proc_sys_root = NULL;
+        proc_free_entry(proc_fs_root);
+        proc_fs_root = NULL;
+        proc_destroy_fs();
+        return (-ENOMEM);
+    }
+
+    proc_dev_root->nlink = 1;
+   
+    return 0;
+}
+
+
+static ssize_t do_rw_proc(int write, struct file * file, char * buf,
+              size_t count, loff_t *ppos)
+{
+    int op;
+    cfs_proc_entry_t *de;
+    struct ctl_table *table;
+    size_t res;
+    ssize_t error;
+    
+    de = (cfs_proc_entry_t *) file->proc_dentry; 
+
+    if (!de || !de->data)
+        return -ENOTDIR;
+    table = (struct ctl_table *) de->data;
+    if (!table || !table->proc_handler)
+        return -ENOTDIR;
+    op = (write ? 002 : 004);
+
+//  if (ctl_perm(table, op))
+//      return -EPERM;
+    
+    res = count;
+
+    /*
+     * FIXME: we need to pass on ppos to the handler.
+     */
+
+    error = (*table->proc_handler) (table, write, file, buf, &res);
+    if (error)
+        return error;
+    return res;
+}
+
+static ssize_t proc_readsys(struct file * file, char * buf,
+                size_t count, loff_t *ppos)
+{
+    return do_rw_proc(0, file, buf, count, ppos);
+}
+
+static ssize_t proc_writesys(struct file * file, const char * buf,
+                 size_t count, loff_t *ppos)
+{
+    return do_rw_proc(1, file, (char *) buf, count, ppos);
+}
+
+
+struct file_operations proc_sys_file_operations = {
+    /*lseek:*/      NULL,
+    /*read:*/       proc_readsys,
+    /*write:*/      proc_writesys,
+    /*ioctl:*/      NULL,
+    /*open:*/       NULL,
+    /*release:*/    NULL
+};
+
+
+/* Scan the sysctl entries in table and add them all into /proc */
+void register_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t * root)
+{
+    cfs_proc_entry_t * de;
+    int len;
+    mode_t mode;
+    
+    for (; table->ctl_name; table++) {
+        /* Can't do anything without a proc name. */
+        if (!table->procname)
+            continue;
+        /* Maybe we can't do anything with it... */
+        if (!table->proc_handler && !table->child) {
+            printk(KERN_WARNING "SYSCTL: Can't register %s\n",
+                table->procname);
+            continue;
+        }
+
+        len = strlen(table->procname);
+        mode = table->mode;
+
+        de = NULL;
+        if (table->proc_handler)
+            mode |= S_IFREG;
+        else {
+            de = search_proc_entry(table->procname, root);
+            if (de) {
+                break;
+            }
+            /* If the subdir exists already, de is non-NULL */
+        }
+
+        if (!de) {
+
+            de = create_proc_entry((char *)table->procname, mode, root);
+            if (!de)
+                continue;
+            de->data = (void *) table;
+            if (table->proc_handler) {
+                de->proc_fops = &proc_sys_file_operations;
+            }
+        }
+        table->de = de;
+        if (de->mode & S_IFDIR)
+            register_proc_table(table->child, de);
+    }
+}
+
+
+/*
+ * Unregister a /proc sysctl table and any subdirectories.
+ */
+void unregister_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t *root)
+{
+    cfs_proc_entry_t *de;
+    for (; table->ctl_name; table++) {
+        if (!(de = table->de))
+            continue;
+        if (de->mode & S_IFDIR) {
+            if (!table->child) {
+                printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
+                continue;
+            }
+            unregister_proc_table(table->child, de);
+
+            /* Don't unregister directories which still have entries.. */
+            if (de->nlink)
+                continue;
+        }
+
+        /* Don't unregister proc entries that are still being used.. */
+        if (de->nlink)
+            continue;
+
+        table->de = NULL;
+        remove_proc_entry((char *)table->procname, root);
+    }
+}
+
+/* The generic string strategy routine: */
+int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen,
+          void *oldval, size_t *oldlenp,
+          void *newval, size_t newlen, void **context)
+{
+    int l, len;
+    
+    if (!table->data || !table->maxlen) 
+        return -ENOTDIR;
+    
+    if (oldval && oldlenp) {
+        if(get_user(len, oldlenp))
+            return -EFAULT;
+        if (len) {
+            l = strlen(table->data);
+            if (len > l) len = l;
+            if (len >= table->maxlen)
+                len = table->maxlen;
+            if(copy_to_user(oldval, table->data, len))
+                return -EFAULT;
+            if(put_user(0, ((char *) oldval) + len))
+                return -EFAULT;
+            if(put_user(len, oldlenp))
+                return -EFAULT;
+        }
+    }
+    if (newval && newlen) {
+        len = newlen;
+        if (len > table->maxlen)
+            len = table->maxlen;
+        if(copy_from_user(table->data, newval, len))
+            return -EFAULT;
+        if (len == table->maxlen)
+            len--;
+        ((char *) table->data)[len] = 0;
+    }
+    return 0;
+}
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base)
+{
+    unsigned long result = 0, value;
+
+    if (!base) {
+        base = 10;
+        if (*cp == '0') {
+            base = 8;
+            cp++;
+            if ((*cp == 'x') && isxdigit(cp[1])) {
+                cp++;
+                base = 16;
+            }
+        }
+    }
+    while (isxdigit(*cp) &&
+           (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) {
+        result = result*base + value;
+        cp++;
+    }
+    if (endp)
+        *endp = (char *)cp;
+    return result;
+}
+
+#define OP_SET  0
+#define OP_AND  1
+#define OP_OR   2
+#define OP_MAX  3
+#define OP_MIN  4
+
+
+static int do_proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
+          void *buffer, size_t *lenp, int conv, int op)
+{
+    int *i, vleft, first=1, neg, val;
+    size_t left, len;
+    
+    #define TMPBUFLEN 20
+    char buf[TMPBUFLEN], *p;
+    
+    if (!table->data || !table->maxlen || !*lenp)
+    {
+        *lenp = 0;
+        return 0;
+    }
+    
+    i = (int *) table->data;
+    vleft = table->maxlen / sizeof(int);
+    left = *lenp;
+    
+    for (; left && vleft--; i++, first=0) {
+        if (write) {
+            while (left) {
+                char c;
+                if(get_user(c,(char *) buffer))
+                    return -EFAULT;
+                if (!isspace(c))
+                    break;
+                left--;
+                ((char *) buffer)++;
+            }
+            if (!left)
+                break;
+            neg = 0;
+            len = left;
+            if (len > TMPBUFLEN-1)
+                len = TMPBUFLEN-1;
+            if(copy_from_user(buf, buffer, len))
+                return -EFAULT;
+            buf[len] = 0;
+            p = buf;
+            if (*p == '-' && left > 1) {
+                neg = 1;
+                left--, p++;
+            }
+            if (*p < '0' || *p > '9')
+                break;
+            val = simple_strtoul(p, &p, 0) * conv;
+            len = p-buf;
+            if ((len < left) && *p && !isspace(*p))
+                break;
+            if (neg)
+                val = -val;
+            (char *)buffer += len;
+            left -= len;
+            switch(op) {
+            case OP_SET:    *i = val; break;
+            case OP_AND:    *i &= val; break;
+            case OP_OR: *i |= val; break;
+            case OP_MAX:    if(*i < val)
+                        *i = val;
+                    break;
+            case OP_MIN:    if(*i > val)
+                        *i = val;
+                    break;
+            }
+        } else {
+            p = buf;
+            if (!first)
+                *p++ = '\t';
+            sprintf(p, "%d", (*i) / conv);
+            len = strlen(buf);
+            if (len > left)
+                len = left;
+            if(copy_to_user(buffer, buf, len))
+                return -EFAULT;
+            left -= len;
+            (char *)buffer += len;
+        }
+    }
+
+    if (!write && !first && left) {
+        if(put_user('\n', (char *) buffer))
+            return -EFAULT;
+        left--, ((char *)buffer)++;
+    }
+    if (write) {
+        p = (char *) buffer;
+        while (left) {
+            char c;
+            if(get_user(c, p++))
+                return -EFAULT;
+            if (!isspace(c))
+                break;
+            left--;
+        }
+    }
+    if (write && first)
+        return -EINVAL;
+    *lenp -= left;
+    memset(&(filp->f_pos) , 0, sizeof(loff_t));
+    filp->f_pos += (loff_t)(*lenp);
+    return 0;
+}
+
+/**
+ * proc_dointvec - read a vector of integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string. 
+ *
+ * Returns 0 on success.
+ */
+int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp,
+             void *buffer, size_t *lenp)
+{
+    return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET);
+}
+
+
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
+int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp,
+          void *buffer, size_t *lenp)
+{
+    size_t len;
+    char *p, c;
+    
+    if (!table->data || !table->maxlen || !*lenp ||
+        (filp->f_pos && !write)) {
+        *lenp = 0;
+        return 0;
+    }
+    
+    if (write) {
+        len = 0;
+        p = buffer;
+        while (len < *lenp) {
+            if(get_user(c, p++))
+                return -EFAULT;
+            if (c == 0 || c == '\n')
+                break;
+            len++;
+        }
+        if (len >= (size_t)table->maxlen)
+            len = (size_t)table->maxlen-1;
+        if(copy_from_user(table->data, buffer, len))
+            return -EFAULT;
+        ((char *) table->data)[len] = 0;
+        filp->f_pos += *lenp;
+    } else {
+        len = (size_t)strlen(table->data);
+        if (len > (size_t)table->maxlen)
+            len = (size_t)table->maxlen;
+        if (len > *lenp)
+            len = *lenp;
+        if (len)
+            if(copy_to_user(buffer, table->data, len))
+                return -EFAULT;
+        if (len < *lenp) {
+            if(put_user('\n', ((char *) buffer) + len))
+                return -EFAULT;
+            len++;
+        }
+        *lenp = len;
+        filp->f_pos += len;
+    }
+    return 0;
+}
+
+/* Perform the actual read/write of a sysctl table entry. */
+int do_sysctl_strategy (cfs_sysctl_table_t *table, 
+            int *name, int nlen,
+            void *oldval, size_t *oldlenp,
+            void *newval, size_t newlen, void **context)
+{
+    int op = 0, rc;
+    size_t len;
+
+    if (oldval)
+        op |= 004;
+    if (newval) 
+        op |= 002;
+
+    if (table->strategy) {
+        rc = table->strategy(table, name, nlen, oldval, oldlenp,
+                     newval, newlen, context);
+        if (rc < 0)
+            return rc;
+        if (rc > 0)
+            return 0;
+    }
+
+    /* If there is no strategy routine, or if the strategy returns
+     * zero, proceed with automatic r/w */
+    if (table->data && table->maxlen) {
+        if (oldval && oldlenp) {
+            get_user(len, oldlenp);
+            if (len) {
+                if (len > (size_t)table->maxlen)
+                    len = (size_t)table->maxlen;
+                if(copy_to_user(oldval, table->data, len))
+                    return -EFAULT;
+                if(put_user(len, oldlenp))
+                    return -EFAULT;
+            }
+        }
+        if (newval && newlen) {
+            len = newlen;
+            if (len > (size_t)table->maxlen)
+                len = (size_t)table->maxlen;
+            if(copy_from_user(table->data, newval, len))
+                return -EFAULT;
+        }
+    }
+    return 0;
+}
+
+static int parse_table(int *name, int nlen,
+               void *oldval, size_t *oldlenp,
+               void *newval, size_t newlen,
+               cfs_sysctl_table_t *table, void **context)
+{
+    int n;
+
+repeat:
+
+    if (!nlen)
+        return -ENOTDIR;
+    if (get_user(n, name))
+        return -EFAULT;
+    for ( ; table->ctl_name; table++) {
+        if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
+            int error;
+            if (table->child) {
+/*
+                if (ctl_perm(table, 001))
+                    return -EPERM;
+*/
+                if (table->strategy) {
+                    error = table->strategy(
+                        table, name, nlen,
+                        oldval, oldlenp,
+                        newval, newlen, context);
+                    if (error)
+                        return error;
+                }
+                name++;
+                nlen--;
+                table = table->child;
+                goto repeat;
+            }
+            error = do_sysctl_strategy(table, name, nlen,
+                           oldval, oldlenp,
+                           newval, newlen, context);
+            return error;
+        }
+    }
+    return -ENOTDIR;
+}
+
+int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
+           void *newval, size_t newlen)
+{
+    struct list_head *tmp;
+
+    if (nlen <= 0 || nlen >= CTL_MAXNAME)
+        return -ENOTDIR;
+    if (oldval) {
+        int old_len;
+        if (!oldlenp || get_user(old_len, oldlenp))
+            return -EFAULT;
+    }
+    tmp = &root_table_header.ctl_entry;
+    do {
+        struct ctl_table_header *head =
+            list_entry(tmp, struct ctl_table_header, ctl_entry);
+        void *context = NULL;
+        int error = parse_table(name, nlen, oldval, oldlenp, 
+                    newval, newlen, head->ctl_table,
+                    &context);
+        if (context)
+            cfs_free(context);
+        if (error != -ENOTDIR)
+            return error;
+        tmp = tmp->next;
+    } while (tmp != &root_table_header.ctl_entry);
+    return -ENOTDIR;
+}
+
+/**
+ * register_sysctl_table - register a sysctl heirarchy
+ * @table: the top-level table structure
+ * @insert_at_head: whether the entry should be inserted in front or at the end
+ *
+ * Register a sysctl table heirarchy. @table should be a filled in ctl_table
+ * array. An entry with a ctl_name of 0 terminates the table. 
+ *
+ * The members of the &ctl_table structure are used as follows:
+ *
+ * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
+ *            must be unique within that level of sysctl
+ *
+ * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
+ *            enter a sysctl file
+ *
+ * data - a pointer to data for use by proc_handler
+ *
+ * maxlen - the maximum size in bytes of the data
+ *
+ * mode - the file permissions for the /proc/sys file, and for sysctl(2)
+ *
+ * child - a pointer to the child sysctl table if this entry is a directory, or
+ *         %NULL.
+ *
+ * proc_handler - the text handler routine (described below)
+ *
+ * strategy - the strategy routine (described below)
+ *
+ * de - for internal use by the sysctl routines
+ *
+ * extra1, extra2 - extra pointers usable by the proc handler routines
+ *
+ * Leaf nodes in the sysctl tree will be represented by a single file
+ * under /proc; non-leaf nodes will be represented by directories.
+ *
+ * sysctl(2) can automatically manage read and write requests through
+ * the sysctl table.  The data and maxlen fields of the ctl_table
+ * struct enable minimal validation of the values being written to be
+ * performed, and the mode field allows minimal authentication.
+ *
+ * More sophisticated management can be enabled by the provision of a
+ * strategy routine with the table entry.  This will be called before
+ * any automatic read or write of the data is performed.
+ *
+ * The strategy routine may return
+ *
+ * < 0 - Error occurred (error is passed to user process)
+ *
+ * 0   - OK - proceed with automatic read or write.
+ *
+ * > 0 - OK - read or write has been done by the strategy routine, so
+ *       return immediately.
+ *
+ * There must be a proc_handler routine for any terminal nodes
+ * mirrored under /proc/sys (non-terminals are handled by a built-in
+ * directory handler).  Several default handlers are available to
+ * cover common cases -
+ *
+ * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
+ * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(),
+ * proc_doulongvec_minmax()
+ *
+ * It is the handler's job to read the input buffer from user memory
+ * and process it. The handler should return 0 on success.
+ *
+ * This routine returns %NULL on a failure to register, and a pointer
+ * to the table header on success.
+ */
+struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table, 
+                           int insert_at_head)
+{
+    struct ctl_table_header *tmp;
+    tmp = cfs_alloc(sizeof(struct ctl_table_header), 0);
+    if (!tmp)
+        return NULL;
+    tmp->ctl_table = table;
+
+    INIT_LIST_HEAD(&tmp->ctl_entry);
+    if (insert_at_head)
+        list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
+    else
+        list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
+#ifdef CONFIG_PROC_FS
+    register_proc_table(table, proc_sys_root);
+#endif
+    return tmp;
+}
+
+/**
+ * unregister_sysctl_table - unregister a sysctl table heirarchy
+ * @header: the header returned from register_sysctl_table
+ *
+ * Unregisters the sysctl table and all children. proc entries may not
+ * actually be removed until they are no longer used by anyone.
+ */
+void unregister_sysctl_table(struct ctl_table_header * header)
+{
+    list_del(&header->ctl_entry);
+#ifdef CONFIG_PROC_FS
+    unregister_proc_table(header->ctl_table, proc_sys_root);
+#endif
+    cfs_free(header);
+}
+
+
+int cfs_psdev_register(cfs_psdev_t * psdev)
+{
+    cfs_proc_entry_t *  entry;
+
+    entry = create_proc_entry (
+                (char *)psdev->name,
+                S_IFREG,
+                proc_dev_root
+            );
+
+    if (!entry) {
+        return -ENOMEM;
+    }
+
+    entry->flags |= CFS_PROC_FLAG_MISCDEV;
+
+    entry->proc_fops = psdev->fops;
+    entry->data = (void *)psdev;
+
+    return 0;
+}
+
+int cfs_psdev_deregister(cfs_psdev_t * psdev)
+{
+    cfs_proc_entry_t *  entry;
+
+    entry = search_proc_entry (
+                (char *)psdev->name,
+                proc_dev_root
+            );
+
+    if (entry) {
+
+        ASSERT(entry->data == (void *)psdev);
+        ASSERT(entry->flags & CFS_PROC_FLAG_MISCDEV);
+
+        remove_proc_entry(
+            (char *)psdev->name,
+            proc_dev_root
+            );
+    }
+
+    return 0;
+}
+
+extern char debug_file_path[1024];
+
+#define PSDEV_LNET  (0x100)
+enum {
+        PSDEV_DEBUG = 1,          /* control debugging */
+        PSDEV_SUBSYSTEM_DEBUG,    /* control debugging */
+        PSDEV_PRINTK,             /* force all messages to console */
+        PSDEV_CONSOLE_RATELIMIT,  /* rate limit console messages */
+        PSDEV_DEBUG_PATH,         /* crashdump log location */
+        PSDEV_DEBUG_DUMP_PATH,    /* crashdump tracelog location */
+        PSDEV_LIBCFS_MEMUSED,     /* bytes currently PORTAL_ALLOCated */
+};
+
+static struct ctl_table lnet_table[] = {
+        {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug,
+         sizeof(int), 0644, NULL, &proc_dointvec},
+        {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+        {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, 
+         sizeof(int), 0644, NULL, &proc_dointvec},
+        {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
+         sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
+/*
+        {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
+         sizeof(portals_upcall), 0644, NULL, &proc_dostring,
+         &sysctl_string},
+*/
+        {PSDEV_LIBCFS_MEMUSED, "memused", (int *)&libcfs_kmemory.counter,
+         sizeof(int), 0644, NULL, &proc_dointvec},
+        {0}
+};
+
+static struct ctl_table top_table[2] = {
+        {PSDEV_LNET, "lnet", NULL, 0, 0555, lnet_table},
+        {0}
+};
+
+
+int trace_write_dump_kernel(struct file *file, const char *buffer,
+                             unsigned long count, void *data)
+{
+        int rc = trace_dump_debug_buffer_usrstr(buffer, count);
+        
+        return (rc < 0) ? rc : count;
+}
+
+int trace_write_daemon_file(struct file *file, const char *buffer,
+                            unsigned long count, void *data)
+{
+        int rc = trace_daemon_command_usrstr(buffer, count);
+
+        return (rc < 0) ? rc : count;
+}
+
+int trace_read_daemon_file(char *page, char **start, off_t off, int count,
+                           int *eof, void *data)
+{
+       int rc;
+
+       tracefile_read_lock();
+
+        rc = trace_copyout_string(page, count, tracefile, "\n");
+
+        tracefile_read_unlock();
+
+       return rc;
+}
+
+int trace_write_debug_mb(struct file *file, const char *buffer,
+                         unsigned long count, void *data)
+{
+        int rc = trace_set_debug_mb_userstr(buffer, count);
+        
+        return (rc < 0) ? rc : count;
+}
+
+int trace_read_debug_mb(char *page, char **start, off_t off, int count,
+                        int *eof, void *data)
+{
+        char   str[32];
+
+        snprintf(str, sizeof(str), "%d\n", trace_get_debug_mb());
+
+        return trace_copyout_string(page, count, str, NULL);
+}
+
+int insert_proc(void)
+{
+        cfs_proc_entry_t *ent;
+
+        ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL);
+        if (ent == NULL) {
+                CERROR(("couldn't register dump_kernel\n"));
+                return -1;
+        }
+        ent->write_proc = trace_write_dump_kernel;
+
+        ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL);
+        if (ent == NULL) {
+                CERROR(("couldn't register daemon_file\n"));
+                return -1;
+        }
+        ent->write_proc = trace_write_daemon_file;
+        ent->read_proc = trace_read_daemon_file;
+
+        ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL);
+        if (ent == NULL) {
+                CERROR(("couldn't register debug_mb\n"));
+                return -1;
+        }
+        ent->write_proc = trace_write_debug_mb;
+        ent->read_proc = trace_read_debug_mb;
+
+        return 0;
+}
+
+void remove_proc(void)
+{
+        remove_proc_entry("sys/portals/dump_kernel", NULL);
+        remove_proc_entry("sys/portals/daemon_file", NULL);
+        remove_proc_entry("sys/portals/debug_mb", NULL);
+
+#ifdef CONFIG_SYSCTL
+        if (portals_table_header)
+                unregister_sysctl_table(portals_table_header);
+        portals_table_header = NULL;
+#endif
+}
+
+
+/*
+ *  proc process routines of kernel space
+ */
+
+cfs_file_t *
+lustre_open_file(char * filename)
+{
+    int rc = 0;
+    cfs_file_t * fh = NULL;
+    cfs_proc_entry_t * fp = NULL;
+
+    fp = search_proc_entry(filename, proc_fs_root);
+
+    if (!fp) {
+        rc =  -ENOENT;
+        return NULL;
+    }
+
+    fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO);
+
+    if (!fh) {
+        rc =  -ENOMEM;
+        return NULL;
+    }
+
+    fh->private_data = (void *)fp;
+    fh->f_op = fp->proc_fops;
+
+    if (fh->f_op->open) {
+        rc = (fh->f_op->open)(fh);
+    } else {
+        fp->nlink++;
+    }
+
+    if (0 != rc) {
+        cfs_free(fh);
+        return NULL;
+    }
+
+    return fh;
+}
+
+int
+lustre_close_file(cfs_file_t * fh)
+{
+    int rc = 0;
+    cfs_proc_entry_t * fp = NULL;
+
+    fp = (cfs_proc_entry_t *) fh->private_data;
+
+    if (fh->f_op->release) {
+        rc = (fh->f_op->release)(fh);
+    } else {
+        fp->nlink--;
+    }
+
+    cfs_free(fh);
+
+    return rc;
+}
+
+int
+lustre_do_ioctl( cfs_file_t * fh,
+                 unsigned long cmd,
+                 ulong_ptr arg )
+{
+    int rc = 0;
+
+    if (fh->f_op->ioctl) {
+        rc = (fh->f_op->ioctl)(fh, cmd, arg);
+    }
+
+    if (rc != 0) {
+        printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n",
+                cmd, arg, rc);
+    }
+
+    return rc;
+}
+    
+int
+lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl)
+{
+    int         rc = 0;
+    ulong_ptr   data;
+
+    data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL);
+
+    /* obd ioctl code */
+    if (_IOC_TYPE(devctl->cmd) == 'f') {
+#if 0
+        struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data;
+
+        if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE  &&
+             devctl->cmd != (ULONG)OBD_IOC_BRW_READ ) {
+
+            unsigned long off = obd->ioc_len;
+
+            if (obd->ioc_pbuf1) {
+                obd->ioc_pbuf1 = (char *)(data + off);
+                off += size_round(obd->ioc_plen1);
+            }
+
+            if (obd->ioc_pbuf2) {
+                obd->ioc_pbuf2 = (char *)(data + off);
+            }
+        }
+ #endif
+   }
+
+    rc = lustre_do_ioctl(fh, devctl->cmd, data);
+
+    return rc;
+} 
+
+
+size_t
+lustre_read_file(
+    cfs_file_t *    fh,
+    loff_t          off,
+    size_t          size,
+    char *          buf
+    )
+{
+    size_t rc = 0;
+
+    if (fh->f_op->read) {
+        rc = (fh->f_op->read) (fh, buf, size, &off);
+    }
+
+    return rc;
+}
+
+size_t
+lustre_write_file(
+    cfs_file_t *    fh,
+    loff_t          off,
+    size_t          size,
+    char *          buf
+    )
+{
+    size_t rc = 0;
+
+    if (fh->f_op->write) {
+        rc = (fh->f_op->write)(fh, buf, size, &off);
+    }
+
+    return rc;
+}  
+
+#else /* !__KERNEL__ */
+
+#include <lnet/api-support.h>
+#include <liblustre.h>
+#include <lustre_lib.h>
+
+/*
+ * proc process routines of user space
+ */
+
+HANDLE cfs_proc_open (char * filename, int oflag)
+{
+    NTSTATUS            status;
+    IO_STATUS_BLOCK     iosb;
+    int                 rc;
+
+    HANDLE              FileHandle = INVALID_HANDLE_VALUE;
+    OBJECT_ATTRIBUTES   ObjectAttributes;
+    ACCESS_MASK         DesiredAccess;
+    ULONG               CreateDisposition;
+    ULONG               ShareAccess;
+    ULONG               CreateOptions;
+    UNICODE_STRING      UnicodeName;
+    USHORT              NameLength;
+
+    PFILE_FULL_EA_INFORMATION Ea = NULL;
+    ULONG               EaLength;
+    UCHAR               EaBuffer[EA_MAX_LENGTH];
+
+    /* Check the filename: should start with "/proc" or "/dev" */
+    NameLength = (USHORT)strlen(filename);
+    if (NameLength > 0x05) {
+        if (_strnicmp(filename, "/proc/", 6) == 0) {
+            filename += 6;
+            NameLength -=6;
+            if (NameLength <= 0) {
+                rc = -EINVAL;
+                goto errorout;
+            }
+        } else if (_strnicmp(filename, "/dev/", 5) == 0) {
+        } else {
+            rc = -EINVAL;
+            goto errorout;
+        }
+    } else {
+        rc = -EINVAL;
+        goto errorout;
+    }
+
+    /* Analyze the flags settings */
+
+    if (cfs_is_flag_set(oflag, O_WRONLY)) {
+        DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE);
+        ShareAccess = 0;
+    }  else if (cfs_is_flag_set(oflag, O_RDWR)) {
+        DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE);
+        ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE;
+    } else {
+        DesiredAccess = (GENERIC_READ | SYNCHRONIZE);
+        ShareAccess = FILE_SHARE_READ;
+    }
+
+    if (cfs_is_flag_set(oflag, O_CREAT)) {
+        if (cfs_is_flag_set(oflag, O_EXCL)) {
+            CreateDisposition = FILE_CREATE;
+            rc = -EINVAL;
+            goto errorout;
+        } else {
+            CreateDisposition = FILE_OPEN_IF;
+        }
+    } else {
+        CreateDisposition = FILE_OPEN;
+    }
+
+    if (cfs_is_flag_set(oflag, O_TRUNC)) {
+        if (cfs_is_flag_set(oflag, O_EXCL)) {
+            CreateDisposition = FILE_OVERWRITE;
+        } else {
+            CreateDisposition = FILE_OVERWRITE_IF;
+        }
+    }
+
+    CreateOptions = 0;
+
+    if (cfs_is_flag_set(oflag, O_DIRECTORY)) {
+        cfs_set_flag(CreateOptions,  FILE_DIRECTORY_FILE);
+    }
+
+    if (cfs_is_flag_set(oflag, O_SYNC)) {
+         cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH);
+    }
+
+    if (cfs_is_flag_set(oflag, O_DIRECT)) {
+         cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING);
+    }
+
+    /* Initialize the unicode path name for the specified file */
+    RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK);
+
+    /* Setup the object attributes structure for the file. */
+    InitializeObjectAttributes(
+            &ObjectAttributes,
+            &UnicodeName,
+            OBJ_CASE_INSENSITIVE,
+            NULL,
+            NULL );
+
+    /* building EA for the proc entry ...  */
+    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+    Ea->NextEntryOffset = 0;
+    Ea->Flags = 0;
+    Ea->EaNameLength = (UCHAR)NameLength;
+    Ea->EaValueLength = 0;
+    RtlCopyMemory(
+        &(Ea->EaName),
+        filename,
+        NameLength + 1
+        );
+    EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
+                               Ea->EaNameLength + 1;
+
+    /* Now to open or create the file now */
+    status = ZwCreateFile(
+                &FileHandle,
+                DesiredAccess,
+                &ObjectAttributes,
+                &iosb,
+                0,
+                FILE_ATTRIBUTE_NORMAL,
+                ShareAccess,
+                CreateDisposition,
+                CreateOptions,
+                Ea,
+                EaLength );
+
+    /* Check the returned status of Iosb ... */
+
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+        goto errorout;
+    }
+
+errorout:
+
+    return FileHandle;
+}
+
+int cfs_proc_close(HANDLE handle)
+{
+    if (handle) {
+        NtClose((HANDLE)handle);
+    }
+
+    return 0;
+}
+
+int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count)
+{
+    NTSTATUS            status;
+    IO_STATUS_BLOCK     iosb;
+    LARGE_INTEGER       offset;
+
+
+    offset.QuadPart = 0;
+
+    /* read file data */
+    status = NtReadFile(
+                (HANDLE)handle,
+                0,
+                NULL,
+                NULL,
+                &iosb,
+                buffer,
+                count,
+                &offset,
+                NULL);                     
+
+    /* check the return status */
+    if (!NT_SUCCESS(status)) {
+        printf("NtReadFile request failed 0x%0x\n", status);
+        goto errorout;
+    }
+
+errorout:
+
+    if (NT_SUCCESS(status)) {
+        return iosb.Information;
+    }
+
+    return cfs_error_code(status);
+}
+
+
+int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count)
+{
+    NTSTATUS            status;
+    IO_STATUS_BLOCK     iosb;
+    LARGE_INTEGER       offset;
+
+    offset.QuadPart = -1;
+
+    /* write buffer to the opened file */
+    status = NtWriteFile(
+                (HANDLE)handle,
+                0,
+                NULL,
+                NULL,
+                &iosb,
+                buffer,
+                count,
+                &offset,
+                NULL);                     
+
+    /* check the return status */
+    if (!NT_SUCCESS(status)) {
+        printf("NtWriteFile request failed 0x%0x\n", status);
+        goto errorout;
+    }
+
+errorout:
+
+    if (NT_SUCCESS(status)) {
+        return iosb.Information;
+    }
+
+    return cfs_error_code(status);
+}
+
+int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer)
+{
+    PUCHAR          procdat = NULL;
+    CFS_PROC_IOCTL  procctl;
+    ULONG           length = 0;
+    ULONG           extra = 0;
+
+    NTSTATUS        status;
+    IO_STATUS_BLOCK iosb;
+
+    procctl.cmd = cmd;
+
+    if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) {
+        struct libcfs_ioctl_data * portal;
+        portal = (struct libcfs_ioctl_data *) buffer;
+        length = portal->ioc_len;
+    } else if (_IOC_TYPE(cmd) == 'f') {
+        struct obd_ioctl_data * obd;
+        obd = (struct obd_ioctl_data *) buffer;
+        length = obd->ioc_len;
+        extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2);
+    } else if(_IOC_TYPE(cmd) == 'u') {
+        length = 4;
+        extra  = 0;
+    } else {
+        printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n");
+        cfs_enter_debugger();
+        status = STATUS_INVALID_PARAMETER;
+        goto errorout;
+    }
+
+    procctl.len = length + extra;
+    procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL));
+
+    if (NULL == procdat) {
+        printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n");
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        cfs_enter_debugger();
+        goto errorout;
+    }
+    memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL));
+    memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL));
+    memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length);
+    length += sizeof(CFS_PROC_IOCTL);
+
+    if (_IOC_TYPE(cmd) == 'f') {
+
+        char *ptr;
+        struct obd_ioctl_data * data;
+        struct obd_ioctl_data * obd;
+
+        data = (struct obd_ioctl_data *) buffer;
+        obd  = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL));
+        ptr = obd->ioc_bulk;
+
+        if (data->ioc_inlbuf1) {
+                obd->ioc_inlbuf1 = ptr;
+                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
+        }
+
+        if (data->ioc_inlbuf2) {
+                obd->ioc_inlbuf2 = ptr;
+                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
+        }
+        if (data->ioc_inlbuf3) {
+                obd->ioc_inlbuf3 = ptr;
+                LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr);
+        }
+        if (data->ioc_inlbuf4) {
+                obd->ioc_inlbuf4 = ptr;
+                LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr);
+        }
+    
+        if ( cmd != (ULONG)OBD_IOC_BRW_WRITE  &&
+             cmd != (ULONG)OBD_IOC_BRW_READ ) {
+
+            if (data->ioc_pbuf1 && data->ioc_plen1) {
+                obd->ioc_pbuf1 = &procdat[length];
+                memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1); 
+                length += size_round(data->ioc_plen1);
+            }
+
+            if (data->ioc_pbuf2 && data->ioc_plen2) {
+                obd->ioc_pbuf2 = &procdat[length];
+                memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2);
+                length += size_round(data->ioc_plen2);
+            }
+        }
+
+        if (obd_ioctl_is_invalid(obd)) {
+            cfs_enter_debugger();
+        }
+    }
+
+    status = NtDeviceIoControlFile(
+                (HANDLE)handle,
+                NULL, NULL, NULL, &iosb,
+                IOCTL_LIBCFS_ENTRY,
+                procdat, length,
+                procdat, length );
+
+
+    if (NT_SUCCESS(status)) {
+        memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); 
+    }
+
+errorout:
+
+    if (procdat) {
+        free(procdat);
+    }
+
+    return cfs_error_code(status);
+}
+
+#endif /* __KERNEL__ */
diff --git a/libcfs/libcfs/winnt/winnt-sync.c b/libcfs/libcfs/winnt/winnt-sync.c
new file mode 100644 (file)
index 0000000..5094bef
--- /dev/null
@@ -0,0 +1,449 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LIBCFS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+
+
+/*
+ * Wait queue routines
+ */
+
+/*
+ * cfs_waitq_init
+ *   To initialize the wait queue
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_init(cfs_waitq_t *waitq)
+{
+    waitq->magic = CFS_WAITQ_MAGIC;
+    waitq->flags = 0;
+    INIT_LIST_HEAD(&(waitq->waiters));
+    spin_lock_init(&(waitq->guard));
+}
+
+/*
+ * cfs_waitlink_init
+ *   To initialize the wake link node
+ *
+ * Arguments:
+ *   link:  pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitlink_init(cfs_waitlink_t *link)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        /* should bugchk here */
+        cfs_enter_debugger();
+        return;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    cfs_assert(slot->Magic == TASKSLT_MAGIC);
+
+    memset(link, 0, sizeof(cfs_waitlink_t));
+
+    link->magic = CFS_WAITLINK_MAGIC;
+    link->flags = 0;
+
+    link->event = &(slot->Event);
+    link->hits  = &(slot->hits);
+
+    atomic_inc(&slot->count);
+
+    INIT_LIST_HEAD(&(link->waitq[0].link));
+    INIT_LIST_HEAD(&(link->waitq[1].link));
+
+    link->waitq[0].waitl = link->waitq[1].waitl = link;
+}
+
+
+/*
+ * cfs_waitlink_fini
+ *   To finilize the wake link node
+ *
+ * Arguments:
+ *   link:  pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitlink_fini(cfs_waitlink_t *link)
+{
+    cfs_task_t * task = cfs_current();
+    PTASK_SLOT   slot = NULL;
+
+    if (!task) {
+        /* should bugchk here */
+        cfs_enter_debugger();
+        return;
+    }
+
+    slot = CONTAINING_RECORD(task, TASK_SLOT, task);
+    cfs_assert(slot->Magic == TASKSLT_MAGIC);
+    cfs_assert(link->magic == CFS_WAITLINK_MAGIC);
+    cfs_assert(link->waitq[0].waitq == NULL);
+    cfs_assert(link->waitq[1].waitq == NULL);
+
+    atomic_dec(&slot->count);
+}
+
+
+/*
+ * cfs_waitq_add_internal
+ *   To queue the wait link node to the wait queue
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *   link:   pointer to the cfs_waitlink_t structure
+ *   int:    queue no (Normal or Forward waitq)
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_add_internal(cfs_waitq_t *waitq,
+                            cfs_waitlink_t *link,
+                            __u32 waitqid )
+{ 
+    LASSERT(waitq != NULL);
+    LASSERT(link != NULL);
+    LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+    LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+    LASSERT(waitqid < CFS_WAITQ_CHANNELS);
+
+    spin_lock(&(waitq->guard));
+    LASSERT(link->waitq[waitqid].waitq == NULL);
+    link->waitq[waitqid].waitq = waitq;
+    if (link->flags & CFS_WAITQ_EXCLUSIVE) {
+        list_add_tail(&link->waitq[waitqid].link, &waitq->waiters);
+    } else {
+        list_add(&link->waitq[waitqid].link, &waitq->waiters);
+    }
+    spin_unlock(&(waitq->guard));
+}
+/*
+ * cfs_waitq_add
+ *   To queue the wait link node to the wait queue
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *   link:  pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_add(cfs_waitq_t *waitq,
+                   cfs_waitlink_t *link)
+{ 
+    cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_NORMAL);
+}
+
+/*
+ * cfs_waitq_add_exclusive
+ *   To set the wait link node to exclusive mode
+ *   and queue it to the wait queue
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *   link:  pointer to the cfs_wait_link structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_add_exclusive( cfs_waitq_t *waitq,
+                              cfs_waitlink_t *link)
+{
+    LASSERT(waitq != NULL);
+    LASSERT(link != NULL);
+    LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+    LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+
+       link->flags |= CFS_WAITQ_EXCLUSIVE;
+    cfs_waitq_add(waitq, link);
+}
+
+/*
+ * cfs_waitq_forward
+ *   To be determinated.
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *   link:  pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_forward( cfs_waitlink_t *link,
+                        cfs_waitq_t *waitq)
+{
+    cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_FORWARD);
+}
+
+/*
+ * cfs_waitq_del
+ *   To remove the wait link node from the waitq
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_ waitq_t structure
+ *   link:  pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_del( cfs_waitq_t *waitq,
+                    cfs_waitlink_t *link)
+{
+    int i = 0;
+
+    LASSERT(waitq != NULL);
+    LASSERT(link != NULL);
+
+    LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+    LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+
+    spin_lock(&(waitq->guard));
+
+    for (i=0; i < CFS_WAITQ_CHANNELS; i++) {
+        if (link->waitq[i].waitq == waitq)
+            break;
+    }
+
+    if (i < CFS_WAITQ_CHANNELS) {
+        link->waitq[i].waitq = NULL;
+        list_del_init(&link->waitq[i].link);
+    } else {
+        cfs_enter_debugger();
+    }
+
+    spin_unlock(&(waitq->guard));
+}
+
+/*
+ * cfs_waitq_active
+ *   Is the waitq active (not empty) ?
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_ waitq_t structure
+ *
+ * Return Value:
+ *   Zero: the waitq is empty
+ *   Non-Zero: the waitq is active
+ *
+ * Notes: 
+ *   We always returns TRUE here, the same to Darwin.
+ */
+
+int cfs_waitq_active(cfs_waitq_t *waitq)
+{
+    LASSERT(waitq != NULL);
+    LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+
+       return (1);
+}
+
+/*
+ * cfs_waitq_signal_nr
+ *   To wake up all the non-exclusive tasks plus nr exclusive
+ *   ones in the waitq
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *   nr:    number of exclusive tasks to be woken up
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+
+void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr)
+{
+    int     result;
+    cfs_waitlink_channel_t * scan;
+
+    LASSERT(waitq != NULL);
+    LASSERT(waitq->magic == CFS_WAITQ_MAGIC);
+
+    spin_lock(&waitq->guard);
+
+    list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) {
+
+        cfs_waitlink_t *waitl = scan->waitl;
+
+        result = cfs_wake_event(waitl->event);
+        LASSERT( result == FALSE || result == TRUE );
+
+        if (result) {
+            atomic_inc(waitl->hits);
+        }
+
+        if ((waitl->flags & CFS_WAITQ_EXCLUSIVE) && --nr == 0)
+            break;
+    }
+
+    spin_unlock(&waitq->guard);
+    return;
+}
+
+/*
+ * cfs_waitq_signal
+ *   To wake up all the non-exclusive tasks and 1 exclusive
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_signal(cfs_waitq_t *waitq)
+{
+    cfs_waitq_signal_nr(waitq, 1);
+}
+
+
+/*
+ * cfs_waitq_broadcast
+ *   To wake up all the tasks in the waitq
+ *
+ * Arguments:
+ *   waitq:  pointer to the cfs_waitq_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_broadcast(cfs_waitq_t *waitq)
+{
+    LASSERT(waitq != NULL);
+    LASSERT(waitq->magic ==CFS_WAITQ_MAGIC);
+
+       cfs_waitq_signal_nr(waitq, 0);
+}
+
+/*
+ * cfs_waitq_wait
+ *   To wait on the link node until it is signaled.
+ *
+ * Arguments:
+ *   link:  pointer to the cfs_waitlink_t structure
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes: 
+ *   N/A
+ */
+
+void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state)
+{ 
+    LASSERT(link != NULL);
+    LASSERT(link->magic == CFS_WAITLINK_MAGIC);
+
+    if (atomic_read(link->hits) > 0) {
+        atomic_dec(link->hits);
+        LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
+    } else {
+        cfs_wait_event(link->event, 0);
+    }
+}
+
+/*
+ * cfs_waitq_timedwait
+ *   To wait the link node to be signaled with a timeout limit
+ *
+ * Arguments:
+ *   link:   pointer to the cfs_waitlink_t structure
+ *   timeout: the timeout limitation
+ *
+ * Return Value:
+ *   Woken up: return the difference of the current time and
+ *             the timeout
+ *   Timeout:  return 0
+ *
+ * Notes: 
+ *   What if it happens to be woken up at the just timeout time !?
+ */
+
+cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link,
+                                    cfs_task_state_t state,
+                                    cfs_duration_t timeout)
+{ 
+
+    if (atomic_read(link->hits) > 0) {
+        atomic_dec(link->hits);
+        LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00);
+        return TRUE;
+    }
+
+    return (cfs_duration_t)cfs_wait_event(link->event, timeout);
+}
+
+
diff --git a/libcfs/libcfs/winnt/winnt-tcpip.c b/libcfs/libcfs/winnt/winnt-tcpip.c
new file mode 100644 (file)
index 0000000..d0c725c
--- /dev/null
@@ -0,0 +1,6706 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LIBCFS
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include <lnet/lnet.h>
+
+#define TDILND_MODULE_NAME L"Tdilnd"
+
+ks_data_t ks_data;
+
+ULONG
+ks_tdi_send_flags(ULONG SockFlags)
+{
+    ULONG   TdiFlags = 0;
+
+    if (cfs_is_flag_set(SockFlags, MSG_OOB)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED);
+    }
+
+    if (cfs_is_flag_set(SockFlags, MSG_MORE)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL);
+    }
+
+    if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) {
+        cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING);
+    }
+
+    return TdiFlags;
+}
+
+NTSTATUS
+KsIrpCompletionRoutine(
+    IN PDEVICE_OBJECT    DeviceObject,
+    IN PIRP              Irp,
+    IN PVOID             Context
+    )
+{
+    if (NULL != Context) {
+        KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE);
+    }
+
+    return STATUS_MORE_PROCESSING_REQUIRED;
+
+    UNREFERENCED_PARAMETER(DeviceObject);
+    UNREFERENCED_PARAMETER(Irp);
+}
+
+
+/*
+ * KsBuildTdiIrp
+ *   Allocate a new IRP and initialize it to be issued to tdi
+ *
+ * Arguments:
+ *   DeviceObject:  device object created by the underlying
+ *                  TDI transport driver
+ *
+ * Return Value:
+ *   PRIP:   the allocated Irp in success or NULL in failure.
+ *
+ * NOTES:
+ *   N/A
+ */
+
+PIRP
+KsBuildTdiIrp(
+    IN PDEVICE_OBJECT    DeviceObject
+    )
+{
+    PIRP                Irp;
+    PIO_STACK_LOCATION  IrpSp;
+
+    //
+    // Allocating the IRP ...
+    //
+
+    Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE);
+
+    if (NULL != Irp) {
+
+        //
+        // Getting the Next Stack Location ...
+        //
+
+        IrpSp = IoGetNextIrpStackLocation(Irp);
+
+        //
+        // Initializing Irp ...
+        //
+
+        IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL;
+        IrpSp->Parameters.DeviceIoControl.IoControlCode = 0;
+    }
+
+    return Irp;
+}
+
+/*
+ * KsSubmitTdiIrp
+ *   Issue the Irp to the underlying tdi driver
+ *
+ * Arguments:
+ *   DeviceObject:  the device object created by TDI driver
+ *   Irp:           the I/O request packet to be processed
+ *   bSynchronous:  synchronous or not. If true, we need wait
+ *                  until the process is finished.
+ *   Information:   returned info
+ *
+ * Return Value:
+ *   NTSTATUS:      kernel status code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsSubmitTdiIrp(
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN BOOLEAN          bSynchronous,
+    OUT PULONG          Information
+    )
+{
+    NTSTATUS            Status;
+    KEVENT              Event;
+
+    if (bSynchronous) {
+
+        KeInitializeEvent(
+            &Event,
+            SynchronizationEvent,
+            FALSE
+            );
+
+
+        IoSetCompletionRoutine(
+            Irp,
+            KsIrpCompletionRoutine,
+            &Event,
+            TRUE,
+            TRUE,
+            TRUE
+            );
+    }
+
+    Status = IoCallDriver(DeviceObject, Irp);
+
+    if (bSynchronous) {
+
+        if (STATUS_PENDING == Status) {
+
+            Status = KeWaitForSingleObject(
+                        &Event,
+                        Executive,
+                        KernelMode,
+                        FALSE,
+                        NULL
+                        );
+        }
+
+        Status = Irp->IoStatus.Status;
+
+        if (Information) {
+            *Information = (ULONG)(Irp->IoStatus.Information);
+        }
+
+        Irp->MdlAddress = NULL;
+        IoFreeIrp(Irp);
+    }
+
+    if (!NT_SUCCESS(Status)) {
+
+        KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n",
+                    Status, KsNtStatusToString(Status)));
+    }
+
+    return (Status);
+}
+
+
+
+/*
+ * KsOpenControl
+ *   Open the Control Channel Object ...
+ *
+ * Arguments:
+ *   DeviceName:   the device name to be opened
+ *   Handle:       opened handle in success case
+ *   FileObject:   the fileobject of the device
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsOpenControl(
+    IN PUNICODE_STRING      DeviceName,
+    OUT HANDLE *            Handle,
+    OUT PFILE_OBJECT *      FileObject
+   )
+{
+    NTSTATUS          Status = STATUS_SUCCESS;
+
+    OBJECT_ATTRIBUTES ObjectAttributes;
+    IO_STATUS_BLOCK   IoStatus;
+
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    //
+    // Initializing ...
+    //
+
+    InitializeObjectAttributes(
+        &ObjectAttributes,
+        DeviceName,
+        OBJ_CASE_INSENSITIVE |
+        OBJ_KERNEL_HANDLE,
+        NULL,
+        NULL
+        );
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    //
+    // Creating the Transport Address Object ...
+    //
+
+    Status = ZwCreateFile(
+                Handle,
+                FILE_READ_DATA | FILE_WRITE_DATA,
+                &ObjectAttributes,
+                &IoStatus,
+                0,
+                FILE_ATTRIBUTE_NORMAL,
+                FILE_SHARE_READ | FILE_SHARE_WRITE,
+                FILE_OPEN,
+                0,
+                NULL,
+                0
+                );
+
+
+    if (NT_SUCCESS(Status)) {
+
+        //
+        // Now Obtaining the FileObject of the Transport Address ...
+        //
+
+        Status = ObReferenceObjectByHandle(
+                    *Handle,
+                    FILE_ANY_ACCESS,
+                    NULL,
+                    KernelMode,
+                    FileObject,
+                    NULL
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+
+            cfs_enter_debugger();
+            ZwClose(*Handle);
+        }
+
+    } else {
+
+        cfs_enter_debugger();
+    }
+
+    return (Status);
+}
+
+
+/*
+ * KsCloseControl
+ *   Release the Control Channel Handle and FileObject
+ *
+ * Arguments:
+ *   Handle:       the channel handle to be released
+ *   FileObject:   the fileobject to be released
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsCloseControl(
+    IN HANDLE             Handle,
+    IN PFILE_OBJECT       FileObject
+   )
+{
+    NTSTATUS  Status = STATUS_SUCCESS;
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    if (FileObject) {
+
+        ObDereferenceObject(FileObject);
+    }
+
+    if (Handle) {
+
+        Status = ZwClose(Handle);
+    }
+
+    ASSERT(NT_SUCCESS(Status));
+
+    return (Status);
+}
+
+
+/*
+ * KsOpenAddress
+ *   Open the tdi address object
+ *
+ * Arguments:
+ *   DeviceName:   device name of the address object
+ *   pAddress:     tdi address of the address object
+ *   AddressLength: length in bytes of the tdi address
+ *   Handle:       the newly opened handle
+ *   FileObject:   the newly opened fileobject
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsOpenAddress(
+    IN PUNICODE_STRING      DeviceName,
+    IN PTRANSPORT_ADDRESS   pAddress,
+    IN ULONG                AddressLength,
+    OUT HANDLE *            Handle,
+    OUT PFILE_OBJECT *      FileObject
+   )
+{
+    NTSTATUS          Status = STATUS_SUCCESS;
+
+    PFILE_FULL_EA_INFORMATION Ea = NULL;
+    ULONG             EaLength;
+    UCHAR             EaBuffer[EA_MAX_LENGTH];
+
+    OBJECT_ATTRIBUTES ObjectAttributes;
+    IO_STATUS_BLOCK   IoStatus;
+
+    //
+    // Building EA for the Address Object to be Opened ...
+    //
+
+    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+    Ea->NextEntryOffset = 0;
+    Ea->Flags = 0;
+    Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH;
+    Ea->EaValueLength = (USHORT)AddressLength;
+    RtlCopyMemory(
+        &(Ea->EaName),
+        TdiTransportAddress,
+        Ea->EaNameLength + 1
+        );
+    RtlMoveMemory(
+        &(Ea->EaName[Ea->EaNameLength + 1]),
+        pAddress,
+        AddressLength
+        );
+    EaLength =  sizeof(FILE_FULL_EA_INFORMATION) +
+                Ea->EaNameLength + AddressLength;
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+
+    //
+    // Initializing ...
+    //
+
+    InitializeObjectAttributes(
+        &ObjectAttributes,
+        DeviceName,
+        OBJ_CASE_INSENSITIVE |
+        OBJ_KERNEL_HANDLE,
+        NULL,
+        NULL
+        );
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    //
+    // Creating the Transport Address Object ...
+    //
+
+    Status = ZwCreateFile(
+                Handle,
+                FILE_READ_DATA | FILE_WRITE_DATA,
+                &ObjectAttributes,
+                &IoStatus,
+                0,
+                FILE_ATTRIBUTE_NORMAL,
+                FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */
+                FILE_OPEN,
+                0,
+                Ea,
+                EaLength
+                );
+
+
+    if (NT_SUCCESS(Status)) {
+
+        //
+        // Now Obtaining the FileObject of the Transport Address ...
+        //
+
+        Status = ObReferenceObjectByHandle(
+                    *Handle,
+                    FILE_ANY_ACCESS,
+                    NULL,
+                    KernelMode,
+                    FileObject,
+                    NULL
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+
+            cfs_enter_debugger();
+            ZwClose(*Handle);
+        }
+
+    } else {
+
+        cfs_enter_debugger();
+    }
+
+    return (Status);
+}
+
+/*
+ * KsCloseAddress
+ *   Release the Hanlde and FileObject of an opened tdi
+ *   address object
+ *
+ * Arguments:
+ *   Handle:       the handle to be released
+ *   FileObject:   the fileobject to be released
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsCloseAddress(
+    IN HANDLE             Handle,
+    IN PFILE_OBJECT       FileObject
+)
+{
+    NTSTATUS  Status = STATUS_SUCCESS;
+
+    if (FileObject) {
+
+        ObDereferenceObject(FileObject);
+    }
+
+    if (Handle) {
+
+        Status = ZwClose(Handle);
+    }
+
+    ASSERT(NT_SUCCESS(Status));
+
+    return (Status);
+}
+
+
+/*
+ * KsOpenConnection
+ *   Open a tdi connection object
+ *
+ * Arguments:
+ *   DeviceName:   device name of the connection object
+ *   ConnectionContext: the connection context
+ *   Handle:       the newly opened handle
+ *   FileObject:   the newly opened fileobject
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsOpenConnection(
+    IN PUNICODE_STRING      DeviceName,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    OUT HANDLE *            Handle,
+    OUT PFILE_OBJECT *      FileObject
+   )
+{
+    NTSTATUS            Status = STATUS_SUCCESS;
+
+    PFILE_FULL_EA_INFORMATION Ea = NULL;
+    ULONG               EaLength;
+    UCHAR               EaBuffer[EA_MAX_LENGTH];
+
+    OBJECT_ATTRIBUTES   ObjectAttributes;
+    IO_STATUS_BLOCK     IoStatus;
+
+    //
+    // Building EA for the Address Object to be Opened ...
+    //
+
+    Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer;
+    Ea->NextEntryOffset = 0;
+    Ea->Flags = 0;
+    Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH;
+    Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT);
+    RtlCopyMemory(
+        &(Ea->EaName),
+        TdiConnectionContext,
+        Ea->EaNameLength + 1
+        );
+    RtlMoveMemory(
+        &(Ea->EaName[Ea->EaNameLength + 1]),
+        &ConnectionContext,
+        sizeof(CONNECTION_CONTEXT)
+        );
+    EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 +
+                               Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT);
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+
+    //
+    // Initializing ...
+    //
+
+    InitializeObjectAttributes(
+        &ObjectAttributes,
+        DeviceName,
+        OBJ_CASE_INSENSITIVE |
+        OBJ_KERNEL_HANDLE,
+        NULL,
+        NULL
+        );
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    //
+    // Creating the Connection Object ...
+    //
+
+    Status = ZwCreateFile(
+                Handle,
+                FILE_READ_DATA | FILE_WRITE_DATA,
+                &ObjectAttributes,
+                &IoStatus,
+                NULL,
+                FILE_ATTRIBUTE_NORMAL,
+                0,
+                FILE_OPEN,
+                0,
+                Ea,
+                EaLength
+                );
+
+
+    if (NT_SUCCESS(Status)) {
+
+        //
+        // Now Obtaining the FileObject of the Transport Address ...
+        //
+
+        Status = ObReferenceObjectByHandle(
+                    *Handle,
+                    FILE_ANY_ACCESS,
+                    NULL,
+                    KernelMode,
+                    FileObject,
+                    NULL
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+
+            cfs_enter_debugger();
+            ZwClose(*Handle);
+        }
+
+    } else {
+
+        cfs_enter_debugger();
+    }
+
+    return (Status);
+}
+
+/*
+ * KsCloseConnection
+ *   Release the Hanlde and FileObject of an opened tdi
+ *   connection object
+ *
+ * Arguments:
+ *   Handle:       the handle to be released
+ *   FileObject:   the fileobject to be released
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsCloseConnection(
+    IN HANDLE             Handle,
+    IN PFILE_OBJECT       FileObject
+    )
+{
+    NTSTATUS  Status = STATUS_SUCCESS;
+
+    if (FileObject) {
+
+        ObDereferenceObject(FileObject);
+    }
+
+    if (Handle) {
+
+        Status = ZwClose(Handle);
+    }
+
+    ASSERT(NT_SUCCESS(Status));
+
+    return (Status);
+}
+
+
+/*
+ * KsAssociateAddress
+ *   Associate an address object with a connection object
+ *
+ * Arguments:
+ *   AddressHandle:  the handle of the address object
+ *   ConnectionObject:  the FileObject of the connection
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsAssociateAddress(
+    IN HANDLE           AddressHandle,
+    IN PFILE_OBJECT     ConnectionObject
+    )
+{
+    NTSTATUS            Status;
+    PDEVICE_OBJECT      DeviceObject;
+    PIRP                Irp;
+
+    //
+    // Getting the DeviceObject from Connection FileObject
+    //
+
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    //
+    // Building Tdi Internal Irp ...
+    //
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+
+    } else {
+
+        //
+        // Assocating the Address Object with the Connection Object
+        //
+
+        TdiBuildAssociateAddress(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            NULL,
+            NULL,
+            AddressHandle
+            );
+
+        //
+        // Calling the Transprot Driver with the Prepared Irp
+        //
+
+        Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+    }
+
+    return (Status);
+}
+
+
+/*
+ * KsDisassociateAddress
+ *   Disassociate the connection object (the relationship will
+ *   the corresponding address object will be dismissed. )
+ *
+ * Arguments:
+ *   ConnectionObject:  the FileObject of the connection
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsDisassociateAddress(
+    IN PFILE_OBJECT     ConnectionObject
+    )
+{
+    NTSTATUS            Status;
+    PDEVICE_OBJECT      DeviceObject;
+    PIRP                   Irp;
+
+    //
+    // Getting the DeviceObject from Connection FileObject
+    //
+
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    //
+    // Building Tdi Internal Irp ...
+    //
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+
+    } else {
+
+        //
+        // Disassocating the Address Object with the Connection Object
+        //
+
+        TdiBuildDisassociateAddress(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            NULL,
+            NULL
+            );
+
+        //
+        // Calling the Transprot Driver with the Prepared Irp
+        //
+
+        Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+    }
+
+    return (Status);
+}
+
+
+/*
+
+//
+// Connection Control Event Callbacks
+//
+
+TDI_EVENT_CONNECT
+TDI_EVENT_DISCONNECT
+TDI_EVENT_ERROR
+
+//
+// Tcp Event Callbacks
+//
+
+TDI_EVENT_RECEIVE
+TDI_EVENT_RECEIVE_EXPEDITED
+TDI_EVENT_CHAINED_RECEIVE
+TDI_EVENT_CHAINED_RECEIVE_EXPEDITED
+
+//
+// Udp Event Callbacks
+//
+
+TDI_EVENT_RECEIVE_DATAGRAM
+TDI_EVENT_CHAINED_RECEIVE_DATAGRAM
+
+*/
+
+
+/*
+ * KsSetEventHandlers
+ *   Set the tdi event callbacks with an address object
+ *
+ * Arguments:
+ *   AddressObject: the FileObject of the address object
+ *   EventContext:  the parameter for the callbacks
+ *   Handlers:      the handlers indictor array
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsSetEventHandlers(
+    IN PFILE_OBJECT                         AddressObject,  // Address File Object
+    IN PVOID                                EventContext,   // Context for Handlers
+    IN PKS_EVENT_HANDLERS                   Handlers        // Handlers Indictor
+   )
+{
+    NTSTATUS             Status = STATUS_SUCCESS;
+    PDEVICE_OBJECT       DeviceObject;
+    USHORT               i = 0;
+
+    DeviceObject = IoGetRelatedDeviceObject(AddressObject);
+
+    for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) {
+
+        //
+        // Setup the tdi event callback handler if requested.
+        //
+
+        if (Handlers->IsActive[i]) {
+
+            PIRP            Irp;
+
+            //
+            // Building Tdi Internal Irp ...
+            //
+
+            Irp = KsBuildTdiIrp(DeviceObject);
+
+            if (NULL == Irp) {
+
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+
+            } else {
+
+                //
+                // Building the Irp to set the Event Handler ...
+                //
+
+                TdiBuildSetEventHandler(
+                    Irp,
+                    DeviceObject,
+                    AddressObject,
+                    NULL,
+                    NULL,
+                    i,                      /* tdi event type */
+                    Handlers->Handler[i],   /* tdi event handler */
+                    EventContext            /* context for the handler */
+                    );
+
+                //
+                // Calling the Transprot Driver with the Prepared Irp
+                //
+
+                Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL);
+
+                //
+                // tcp/ip tdi does not support these two event callbacks
+                //
+
+                if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE ||
+                     i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) {
+                    cfs_enter_debugger();
+                    Status = STATUS_SUCCESS;
+                }
+            }
+
+            if (!NT_SUCCESS(Status)) {
+                cfs_enter_debugger();
+                goto errorout;
+            }
+        }
+    }
+
+
+errorout:
+
+    if (!NT_SUCCESS(Status)) {
+
+        KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n",
+                    Status, KsNtStatusToString(Status) ));
+    }
+
+    return (Status);
+}
+
+
+
+/*
+ * KsQueryAddressInfo
+ *   Query the address of the FileObject specified
+ *
+ * Arguments:
+ *   FileObject:  the FileObject to be queried
+ *   AddressInfo: buffer to contain the address info
+ *   AddressSize: length of the AddressInfo buffer
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsQueryAddressInfo(
+    PFILE_OBJECT            FileObject,
+    PTDI_ADDRESS_INFO       AddressInfo,
+    PULONG                  AddressSize
+   )
+{
+    NTSTATUS          Status = STATUS_UNSUCCESSFUL;
+    PIRP              Irp = NULL;
+    PMDL              Mdl;
+    PDEVICE_OBJECT    DeviceObject;
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+    RtlZeroMemory(AddressInfo, *(AddressSize));
+
+    //
+    // Allocating the Tdi Setting Irp ...
+    //
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+
+    } else {
+
+        //
+        // Locking the User Buffer / Allocating a MDL for it
+        //
+
+        Status = KsLockUserBuffer(
+                    AddressInfo,
+                    FALSE,
+                    *(AddressSize),
+                    IoModifyAccess,
+                    &Mdl
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+
+            IoFreeIrp(Irp);
+            Irp = NULL;
+        }
+    }
+
+    if (Irp) {
+
+        LASSERT(NT_SUCCESS(Status));
+
+        TdiBuildQueryInformation(
+                    Irp,
+                    DeviceObject,
+                    FileObject,
+                    NULL,
+                    NULL,
+                    TDI_QUERY_ADDRESS_INFO,
+                    Mdl
+                    );
+
+        Status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    AddressSize
+                    );
+
+        KsReleaseMdl(Mdl, FALSE);
+    }
+
+    if (!NT_SUCCESS(Status)) {
+
+        cfs_enter_debugger();
+        //TDI_BUFFER_OVERFLOW
+    }
+
+    return (Status);
+}
+
+/*
+ * KsQueryProviderInfo
+ *   Query the underlying transport device's information
+ *
+ * Arguments:
+ *   TdiDeviceName:  the transport device's name string
+ *   ProviderInfo:   TDI_PROVIDER_INFO struncture
+ *
+ * Return Value:
+ *   NTSTATUS:       Nt system status code
+  *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsQueryProviderInfo(
+    PWSTR               TdiDeviceName,
+    PTDI_PROVIDER_INFO  ProviderInfo
+   )
+{
+    NTSTATUS            Status = STATUS_SUCCESS;
+
+    PIRP                Irp = NULL;
+    PMDL                Mdl = NULL;
+
+    UNICODE_STRING      ControlName;
+
+    HANDLE              Handle;
+    PFILE_OBJECT        FileObject;
+    PDEVICE_OBJECT      DeviceObject;
+
+    ULONG               ProviderSize = 0;
+
+    RtlInitUnicodeString(&ControlName, TdiDeviceName);
+
+    //
+    // Open the Tdi Control Channel
+    //
+
+    Status = KsOpenControl(
+                &ControlName,
+                &Handle,
+                &FileObject
+                );
+
+    if (!NT_SUCCESS(Status)) {
+
+        KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n"));
+        return (Status);
+    }
+
+    //
+    // Obtain The Related Device Object
+    //
+
+    DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+    ProviderSize = sizeof(TDI_PROVIDER_INFO);
+    RtlZeroMemory(ProviderInfo, ProviderSize);
+
+    //
+    // Allocating the Tdi Setting Irp ...
+    //
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+
+    } else {
+
+        //
+        // Locking the User Buffer / Allocating a MDL for it
+        //
+
+        Status = KsLockUserBuffer(
+                    ProviderInfo,
+                    FALSE,
+                    ProviderSize,
+                    IoModifyAccess,
+                    &Mdl
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+
+            IoFreeIrp(Irp);
+            Irp = NULL;
+        }
+    }
+
+    if (Irp) {
+
+        LASSERT(NT_SUCCESS(Status));
+
+        TdiBuildQueryInformation(
+                    Irp,
+                    DeviceObject,
+                    FileObject,
+                    NULL,
+                    NULL,
+                    TDI_QUERY_PROVIDER_INFO,
+                    Mdl
+                    );
+
+        Status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    &ProviderSize
+                    );
+
+        KsReleaseMdl(Mdl, FALSE);
+    }
+
+    if (!NT_SUCCESS(Status)) {
+
+        cfs_enter_debugger();
+        //TDI_BUFFER_OVERFLOW
+    }
+
+    KsCloseControl(Handle, FileObject);
+
+    return (Status);
+}
+
+/*
+ * KsQueryConnectionInfo
+ *   Query the connection info of the FileObject specified
+ *   (some statics data of the traffic)
+ *
+ * Arguments:
+ *   FileObject:     the FileObject to be queried
+ *   ConnectionInfo: buffer to contain the connection info
+ *   ConnectionSize: length of the ConnectionInfo buffer
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsQueryConnectionInfo(
+    PFILE_OBJECT            ConnectionObject,
+    PTDI_CONNECTION_INFO    ConnectionInfo,
+    PULONG                  ConnectionSize
+   )
+{
+    NTSTATUS          Status = STATUS_UNSUCCESSFUL;
+    PIRP              Irp = NULL;
+    PMDL              Mdl;
+    PDEVICE_OBJECT    DeviceObject;
+
+    LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL );
+
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    RtlZeroMemory(ConnectionInfo, *(ConnectionSize));
+
+    //
+    // Allocating the Tdi Query Irp ...
+    //
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+
+    } else {
+
+        //
+        // Locking the User Buffer / Allocating a MDL for it
+        //
+
+        Status = KsLockUserBuffer(
+                    ConnectionInfo,
+                    FALSE,
+                    *(ConnectionSize),
+                    IoModifyAccess,
+                    &Mdl
+                    );
+
+        if (NT_SUCCESS(Status)) {
+
+            IoFreeIrp(Irp);
+            Irp = NULL;
+        }
+    }
+
+    if (Irp) {
+
+        LASSERT(NT_SUCCESS(Status));
+
+        TdiBuildQueryInformation(
+                    Irp,
+                    DeviceObject,
+                    ConnectionObject,
+                    NULL,
+                    NULL,
+                    TDI_QUERY_CONNECTION_INFO,
+                    Mdl
+                    );
+
+        Status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    ConnectionSize
+                    );
+
+        KsReleaseMdl(Mdl, FALSE);
+    }
+
+    return (Status);
+}
+
+
+/*
+ * KsInitializeTdiAddress
+ *   Initialize the tdi addresss
+ *
+ * Arguments:
+ *   pTransportAddress: tdi address to be initialized
+ *   IpAddress:         the ip address of object
+ *   IpPort:            the ip port of the object
+ *
+ * Return Value:
+ *   ULONG: the total size of the tdi address
+ *
+ * NOTES:
+ *   N/A
+ */
+
+ULONG
+KsInitializeTdiAddress(
+    IN OUT PTA_IP_ADDRESS   pTransportAddress,
+    IN ULONG                IpAddress,
+    IN USHORT               IpPort
+    )
+{
+    pTransportAddress->TAAddressCount = 1;
+    pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP;
+    pTransportAddress->Address[ 0 ].AddressType   = TDI_ADDRESS_TYPE_IP;
+    pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort;
+    pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr  = IpAddress;
+
+    return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP);
+}
+
+/*
+ * KsQueryTdiAddressLength
+ *   Query the total size of the tdi address
+ *
+ * Arguments:
+ *   pTransportAddress: tdi address to be queried
+ *
+ * Return Value:
+ *   ULONG: the total size of the tdi address
+ *
+ * NOTES:
+ *   N/A
+ */
+
+ULONG
+KsQueryTdiAddressLength(
+    PTRANSPORT_ADDRESS      pTransportAddress
+    )
+{
+    ULONG                   TotalLength = 0;
+    LONG                    i;
+
+    PTA_ADDRESS UNALIGNED   pTaAddress = NULL;
+
+    ASSERT (NULL != pTransportAddress);
+
+    TotalLength  = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) +
+                   FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount;
+
+    pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address;
+
+    for (i = 0; i < pTransportAddress->TAAddressCount; i++)
+    {
+        TotalLength += pTaAddress->AddressLength;
+        pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress +
+                                           FIELD_OFFSET(TA_ADDRESS,Address) +
+                                           pTaAddress->AddressLength );
+    }
+
+    return (TotalLength);
+}
+
+
+/*
+ * KsQueryIpAddress
+ *   Query the ip address of the tdi object
+ *
+ * Arguments:
+ *   FileObject: tdi object to be queried
+ *   TdiAddress: TdiAddress buffer, to store the queried
+ *               tdi ip address
+ *   AddressLength: buffer length of the TdiAddress
+ *
+ * Return Value:
+ *   ULONG: the total size of the tdi ip address
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsQueryIpAddress(
+    PFILE_OBJECT    FileObject,
+    PVOID           TdiAddress,
+    ULONG*          AddressLength
+    )
+{
+    NTSTATUS        Status;
+
+    PTDI_ADDRESS_INFO   TdiAddressInfo;
+    ULONG               Length;
+
+
+    //
+    // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS
+    //
+
+    Length = MAX_ADDRESS_LENGTH;
+
+    TdiAddressInfo = (PTDI_ADDRESS_INFO)
+                        ExAllocatePoolWithTag(
+                            NonPagedPool,
+                            Length,
+                            'KSAI' );
+
+    if (NULL == TdiAddressInfo) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+
+    Status = KsQueryAddressInfo(
+        FileObject,
+        TdiAddressInfo,
+        &Length
+        );
+
+errorout:
+
+    if (NT_SUCCESS(Status))
+    {
+        if (*AddressLength < Length) {
+
+            Status = STATUS_BUFFER_TOO_SMALL;
+
+        } else {
+
+            *AddressLength = Length;
+            RtlCopyMemory(
+                TdiAddress,
+                &(TdiAddressInfo->Address),
+                Length
+                );
+
+            Status = STATUS_SUCCESS;
+        }
+
+    } else {
+
+    }
+
+
+    if (NULL != TdiAddressInfo) {
+
+        ExFreePool(TdiAddressInfo);
+    }
+
+    return Status;
+}
+
+
+/*
+ * KsErrorEventHandler
+ *   the common error event handler callback
+ *
+ * Arguments:
+ *   TdiEventContext: should be the socket
+ *   Status: the error code
+ *
+ * Return Value:
+ *   Status: STATS_SUCCESS
+ *
+ * NOTES:
+ *   We need not do anything in such a severe
+ *   error case. System will process it for us.
+ */
+
+NTSTATUS
+KsErrorEventHandler(
+    IN PVOID        TdiEventContext,
+    IN NTSTATUS     Status
+   )
+{
+    KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n",
+                KeGetCurrentIrql()));
+
+    cfs_enter_debugger();
+
+    return (STATUS_SUCCESS);
+}
+
+
+/*
+ * ks_set_handlers
+ *   setup all the event handler callbacks
+ *
+ * Arguments:
+ *   tconn: the tdi connecton object
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+int
+ks_set_handlers(
+    ksock_tconn_t *     tconn
+    )
+{
+    NTSTATUS            status = STATUS_SUCCESS;
+    KS_EVENT_HANDLERS   handlers;
+
+    /* to make sure the address object is opened already */
+    if (tconn->kstc_addr.FileObject == NULL) {
+        goto errorout;
+    }
+
+    /* initialize the handlers indictor array. for sender and listenr,
+       there are different set of callbacks. for child, we just return. */
+
+    memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
+
+    SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler);
+    SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler);
+
+    // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler);
+
+    if (tconn->kstc_type == kstt_listener) {
+        SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler);
+    } else if (tconn->kstc_type == kstt_child) {
+        goto errorout;
+    }
+
+    /* set all the event callbacks */
+    status = KsSetEventHandlers(
+                tconn->kstc_addr.FileObject, /* Address File Object  */
+                tconn,                       /* Event Context */
+                &handlers                    /* Event callback handlers */
+                );
+
+errorout:
+
+    return cfs_error_code(status);
+}
+
+
+/*
+ * ks_reset_handlers
+ *   disable all the event handler callbacks (set to NULL)
+ *
+ * Arguments:
+ *   tconn: the tdi connecton object
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+int
+ks_reset_handlers(
+    ksock_tconn_t *     tconn
+    )
+{
+    NTSTATUS            status = STATUS_SUCCESS;
+    KS_EVENT_HANDLERS   handlers;
+
+    /* to make sure the address object is opened already */
+    if (tconn->kstc_addr.FileObject == NULL) {
+        goto errorout;
+    }
+
+    /* initialize the handlers indictor array. for sender and listenr,
+       there are different set of callbacks. for child, we just return. */
+
+    memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS));
+
+    SetEventHandler(handlers, TDI_EVENT_ERROR, NULL);
+    SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL);
+    SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL);
+    SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL);
+    // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL);
+
+    if (tconn->kstc_type == kstt_listener) {
+        SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL);
+    } else if (tconn->kstc_type == kstt_child) {
+        goto errorout;
+    }
+
+    /* set all the event callbacks */
+    status = KsSetEventHandlers(
+                tconn->kstc_addr.FileObject, /* Address File Object  */
+                tconn,                       /* Event Context */
+                &handlers                    /* Event callback handlers */
+                );
+
+errorout:
+
+    return cfs_error_code(status);
+}
+
+
+/*
+ * KsAcceptCompletionRoutine
+ *   Irp completion routine for TdiBuildAccept (KsConnectEventHandler)
+ *
+ *   Here system gives us a chance to check the conneciton is built
+ *   ready or not.
+ *
+ * Arguments:
+ *   DeviceObject:  the device object of the transport driver
+ *   Irp:           the Irp is being completed.
+ *   Context:       the context we specified when issuing the Irp
+ *
+ * Return Value:
+ *   Nt status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsAcceptCompletionRoutine(
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN PVOID            Context
+    )
+{
+    ksock_tconn_t * child = (ksock_tconn_t *) Context;
+    ksock_tconn_t * parent = child->child.kstc_parent;
+
+    KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n",
+                KeGetCurrentIrql() ));
+
+    KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n",
+                 Context, Irp->IoStatus.Status));
+
+    LASSERT(child->kstc_type == kstt_child);
+
+    spin_lock(&(child->kstc_lock));
+
+    LASSERT(parent->kstc_state == ksts_listening);
+    LASSERT(child->kstc_state == ksts_connecting);
+
+    if (NT_SUCCESS(Irp->IoStatus.Status)) {
+
+        child->child.kstc_accepted = TRUE;
+
+        child->kstc_state = ksts_connected;
+
+        /* wake up the daemon thread which waits on this event */
+        KeSetEvent(
+            &(parent->listener.kstc_accept_event),
+            0,
+            FALSE
+            );
+
+        spin_unlock(&(child->kstc_lock));
+
+        KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent));
+
+    } else {
+
+        /* re-use this child connecton  */
+        child->child.kstc_accepted = FALSE;
+        child->child.kstc_busy = FALSE;
+        child->kstc_state = ksts_associated;
+
+        spin_unlock(&(child->kstc_lock));
+    }
+
+    /* now free the Irp */
+    IoFreeIrp(Irp);
+
+    /* drop the refer count of the child */
+    ks_put_tconn(child);
+
+    return (STATUS_MORE_PROCESSING_REQUIRED);
+}
+
+
+/*
+ * ks_get_vacancy_backlog
+ *   Get a vacancy listeing child from the backlog list
+ *
+ * Arguments:
+ *   parent: the listener daemon connection
+ *
+ * Return Value:
+ *   the child listening connection or NULL in failure
+ *
+ * Notes
+ *   Parent's lock should be acquired before calling.
+ */
+
+ksock_tconn_t *
+ks_get_vacancy_backlog(
+    ksock_tconn_t *  parent
+    )
+{
+    ksock_tconn_t * child;
+
+    LASSERT(parent->kstc_type == kstt_listener);
+    LASSERT(parent->kstc_state == ksts_listening);
+
+    if (list_empty(&(parent->listener.kstc_listening.list))) {
+
+        child = NULL;
+
+    } else {
+
+        struct list_head * tmp;
+
+        /* check the listening queue and try to get a free connecton */
+
+        list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
+            child = list_entry (tmp, ksock_tconn_t, child.kstc_link);
+            spin_lock(&(child->kstc_lock));
+
+            if (!child->child.kstc_busy) {
+                LASSERT(child->kstc_state == ksts_associated);
+                child->child.kstc_busy = TRUE;
+                spin_unlock(&(child->kstc_lock));
+                break;
+            } else {
+                spin_unlock(&(child->kstc_lock));
+                child = NULL;
+            }
+        }
+    }
+
+    return child;
+}
+
+ks_addr_slot_t *
+KsSearchIpAddress(PUNICODE_STRING  DeviceName)
+{
+    ks_addr_slot_t * slot = NULL;
+    PLIST_ENTRY      list = NULL;
+
+    spin_lock(&ks_data.ksnd_addrs_lock);
+
+    list = ks_data.ksnd_addrs_list.Flink;
+    while (list != &ks_data.ksnd_addrs_list) {
+        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
+        if (RtlCompareUnicodeString(
+                    DeviceName,
+                    &slot->devname,
+                    TRUE) == 0) {
+            break;
+        }
+        list = list->Flink;
+        slot = NULL;
+    }
+
+    spin_unlock(&ks_data.ksnd_addrs_lock);
+
+    return slot;
+}
+
+void
+KsCleanupIpAddresses()
+{
+    spin_lock(&ks_data.ksnd_addrs_lock);
+
+    while (!IsListEmpty(&ks_data.ksnd_addrs_list)) {
+
+        ks_addr_slot_t * slot = NULL;
+        PLIST_ENTRY      list = NULL;
+
+        list = RemoveHeadList(&ks_data.ksnd_addrs_list);
+        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
+        cfs_free(slot);
+        ks_data.ksnd_naddrs--;
+    }
+
+    cfs_assert(ks_data.ksnd_naddrs == 0);
+    spin_unlock(&ks_data.ksnd_addrs_lock);
+}
+
+VOID
+KsAddAddressHandler(
+    IN  PTA_ADDRESS      Address,
+    IN  PUNICODE_STRING  DeviceName,
+    IN  PTDI_PNP_CONTEXT Context
+    )
+{
+    PTDI_ADDRESS_IP IpAddress = NULL;
+
+    if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
+         Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
+
+        ks_addr_slot_t * slot = NULL;
+
+        IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
+        KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
+                  DeviceName, Context, IpAddress->in_addr,
+                   (IpAddress->in_addr & 0xFF000000) >> 24,
+                   (IpAddress->in_addr & 0x00FF0000) >> 16,
+                   (IpAddress->in_addr & 0x0000FF00) >> 8,
+                   (IpAddress->in_addr & 0x000000FF) >> 0 ));
+
+        slot = KsSearchIpAddress(DeviceName);
+
+        if (slot != NULL) {
+            slot->up = TRUE;
+            slot->ip_addr = ntohl(IpAddress->in_addr);
+        } else {
+            slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO);
+            if (slot != NULL) {
+                spin_lock(&ks_data.ksnd_addrs_lock);
+                InsertTailList(&ks_data.ksnd_addrs_list, &slot->link);
+                sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++);
+                slot->ip_addr = ntohl(IpAddress->in_addr);
+                slot->up = TRUE;
+                RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length);
+                slot->devname.Length = DeviceName->Length;
+                slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR);
+                slot->devname.Buffer = slot->buffer;
+                spin_unlock(&ks_data.ksnd_addrs_lock);
+            }
+        }
+    }
+}
+
+VOID
+KsDelAddressHandler(
+    IN  PTA_ADDRESS      Address,
+    IN  PUNICODE_STRING  DeviceName,
+    IN  PTDI_PNP_CONTEXT Context
+    )
+{
+    PTDI_ADDRESS_IP IpAddress = NULL;
+
+    if ( Address->AddressType == TDI_ADDRESS_TYPE_IP &&
+         Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) {
+
+        ks_addr_slot_t * slot = NULL;
+
+        slot = KsSearchIpAddress(DeviceName);
+
+        if (slot != NULL) {
+            slot->up = FALSE;
+        }
+
+        IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0];
+        KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n",
+                  DeviceName, Context, IpAddress->in_addr,
+                   (IpAddress->in_addr & 0xFF000000) >> 24,
+                   (IpAddress->in_addr & 0x00FF0000) >> 16,
+                   (IpAddress->in_addr & 0x0000FF00) >> 8,
+                   (IpAddress->in_addr & 0x000000FF) >> 0 ));
+    }
+}
+
+NTSTATUS
+KsRegisterPnpHandlers()
+{
+    TDI20_CLIENT_INTERFACE_INFO ClientInfo;
+
+    /* initialize the global ks_data members */
+    RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME);
+    spin_lock_init(&ks_data.ksnd_addrs_lock);
+    InitializeListHead(&ks_data.ksnd_addrs_list);
+
+    /* register the pnp handlers */
+    RtlZeroMemory(&ClientInfo, sizeof(ClientInfo));
+    ClientInfo.TdiVersion = TDI_CURRENT_VERSION;
+
+    ClientInfo.ClientName = &ks_data.ksnd_client_name;
+    ClientInfo.AddAddressHandlerV2 =  KsAddAddressHandler;
+    ClientInfo.DelAddressHandlerV2 =  KsDelAddressHandler;
+
+    return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo),
+                                  &ks_data.ksnd_pnp_handle);
+}
+
+VOID
+KsDeregisterPnpHandlers()
+{
+    if (ks_data.ksnd_pnp_handle) {
+
+        /* De-register the pnp handlers */
+
+        TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle);
+        ks_data.ksnd_pnp_handle = NULL;
+
+        /* cleanup all the ip address slots */
+        KsCleanupIpAddresses();
+    }
+}
+
+/*
+ * KsConnectEventHandler
+ *   Connect event handler event handler, called by the underlying TDI
+ *   transport in response to an incoming request to the listening daemon.
+ *
+ *   it will grab a vacancy backlog from the children tconn list, and
+ *   build an acception Irp with it, then transfer the Irp to TDI driver.
+ *
+ * Arguments:
+ *   TdiEventContext:  the tdi connnection object of the listening daemon
+ *   ......
+ *
+ * Return Value:
+ *   Nt kernel status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsConnectEventHandler(
+    IN PVOID                    TdiEventContext,
+    IN LONG                     RemoteAddressLength,
+    IN PVOID                    RemoteAddress,
+    IN LONG                     UserDataLength,
+    IN PVOID                    UserData,
+    IN LONG                     OptionsLength,
+    IN PVOID                    Options,
+    OUT CONNECTION_CONTEXT *    ConnectionContext,
+    OUT PIRP *                  AcceptIrp
+    )
+{
+    ksock_tconn_t *             parent;
+    ksock_tconn_t *             child;
+
+    PFILE_OBJECT                FileObject;
+    PDEVICE_OBJECT              DeviceObject;
+    NTSTATUS                    Status;
+
+    PIRP                        Irp = NULL;
+    PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL;
+
+    KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql()));
+    parent = (ksock_tconn_t *) TdiEventContext;
+
+    LASSERT(parent->kstc_type == kstt_listener);
+
+    spin_lock(&(parent->kstc_lock));
+
+    if (parent->kstc_state == ksts_listening) {
+
+        /* allocate a new ConnectionInfo to backup the peer's info */
+
+        ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
+                NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) +
+                RemoteAddressLength, 'iCsK' );
+
+        if (NULL == ConnectionInfo) {
+
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+        /* initializing ConnectionInfo structure ... */
+
+        ConnectionInfo->UserDataLength = UserDataLength;
+        ConnectionInfo->UserData = UserData;
+        ConnectionInfo->OptionsLength = OptionsLength;
+        ConnectionInfo->Options = Options;
+        ConnectionInfo->RemoteAddressLength = RemoteAddressLength;
+        ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
+
+        RtlCopyMemory(
+                ConnectionInfo->RemoteAddress,
+                RemoteAddress,
+                RemoteAddressLength
+                );
+
+        /* get the vacancy listening child tdi connections */
+
+        child = ks_get_vacancy_backlog(parent);
+
+        if (child) {
+
+            spin_lock(&(child->kstc_lock));
+            child->child.kstc_info.ConnectionInfo = ConnectionInfo;
+            child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress;
+            child->kstc_state = ksts_connecting;
+            spin_unlock(&(child->kstc_lock));
+
+        } else {
+
+            KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent));
+
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+
+            goto errorout;
+        }
+
+        FileObject = child->child.kstc_info.FileObject;
+        DeviceObject = IoGetRelatedDeviceObject (FileObject);
+
+        Irp = KsBuildTdiIrp(DeviceObject);
+
+        TdiBuildAccept(
+                Irp,
+                DeviceObject,
+                FileObject,
+                KsAcceptCompletionRoutine,
+                child,
+                NULL,
+                NULL
+                );
+
+        IoSetNextIrpStackLocation(Irp);
+
+        /* grap the refer of the child tdi connection */
+        ks_get_tconn(child);
+
+        Status = STATUS_MORE_PROCESSING_REQUIRED;
+
+        *AcceptIrp = Irp;
+        *ConnectionContext = child;
+
+    } else {
+
+        Status = STATUS_CONNECTION_REFUSED;
+        goto errorout;
+    }
+
+    spin_unlock(&(parent->kstc_lock));
+
+    return Status;
+
+errorout:
+
+    spin_unlock(&(parent->kstc_lock));
+
+    {
+        *AcceptIrp = NULL;
+        *ConnectionContext = NULL;
+
+        if (ConnectionInfo) {
+
+            ExFreePool(ConnectionInfo);
+        }
+
+        if (Irp) {
+
+            IoFreeIrp (Irp);
+        }
+    }
+
+    return Status;
+}
+
+/*
+ * KsDisconnectCompletionRoutine
+ *   the Irp completion routine for TdiBuildDisconect
+ *
+ *   We just signal the event and return MORE_PRO... to
+ *   let the caller take the responsibility of the Irp.
+ *
+ * Arguments:
+ *   DeviceObject:  the device object of the transport
+ *   Irp:           the Irp is being completed.
+ *   Context:       the event specified by the caller
+ *
+ * Return Value:
+ *   Nt status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsDisconectCompletionRoutine (
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN PVOID            Context
+    )
+{
+
+    KeSetEvent((PKEVENT) Context, 0, FALSE);
+
+    return STATUS_MORE_PROCESSING_REQUIRED;
+
+    UNREFERENCED_PARAMETER(DeviceObject);
+}
+
+
+/*
+ * KsDisconnectHelper
+ *   the routine to be executed in the WorkItem procedure
+ *   this routine is to disconnect a tdi connection
+ *
+ * Arguments:
+ *   Workitem:  the context transferred to the workitem
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   tconn is already referred in abort_connecton ...
+ */
+
+VOID
+KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem)
+{
+    ksock_tconn_t * tconn = WorkItem->tconn;
+
+    DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn);
+    ks_disconnect_tconn(tconn, WorkItem->Flags);
+
+    KeSetEvent(&(WorkItem->Event), 0, FALSE);
+
+    spin_lock(&(tconn->kstc_lock));
+    cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+    spin_unlock(&(tconn->kstc_lock));
+    ks_put_tconn(tconn);
+}
+
+
+/*
+ * KsDisconnectEventHandler
+ *   Disconnect event handler event handler, called by the underlying TDI transport
+ *   in response to an incoming disconnection notification from a remote node.
+ *
+ * Arguments:
+ *   ConnectionContext:  tdi connnection object
+ *   DisconnectFlags:    specifies the nature of the disconnection
+ *   ......
+ *
+ * Return Value:
+ *   Nt kernel status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+
+NTSTATUS
+KsDisconnectEventHandler(
+    IN PVOID                TdiEventContext,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN LONG                 DisconnectDataLength,
+    IN PVOID                DisconnectData,
+    IN LONG                 DisconnectInformationLength,
+    IN PVOID                DisconnectInformation,
+    IN ULONG                DisconnectFlags
+    )
+{
+    ksock_tconn_t *         tconn;
+    NTSTATUS                Status;
+    PKS_DISCONNECT_WORKITEM WorkItem;
+
+    tconn = (ksock_tconn_t *)ConnectionContext;
+
+    KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n",
+                KeGetCurrentIrql() ));
+
+    KsPrint((2, "tconn = %x DisconnectFlags= %xh\n",
+                 tconn, DisconnectFlags));
+
+    ks_get_tconn(tconn);
+    spin_lock(&(tconn->kstc_lock));
+
+    WorkItem = &(tconn->kstc_disconnect);
+
+    if (tconn->kstc_state != ksts_connected) {
+
+        Status = STATUS_SUCCESS;
+
+    } else {
+
+        if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) {
+
+            Status = STATUS_REMOTE_DISCONNECT;
+
+        } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) {
+
+            Status = STATUS_GRACEFUL_DISCONNECT;
+        }
+
+        if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
+
+            ks_get_tconn(tconn);
+
+            WorkItem->Flags = DisconnectFlags;
+            WorkItem->tconn = tconn;
+
+            cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+
+            /* queue the workitem to call */
+            ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue);
+        }
+    }
+
+    spin_unlock(&(tconn->kstc_lock));
+    ks_put_tconn(tconn);
+
+    return  (Status);
+}
+
+NTSTATUS
+KsTcpReceiveCompletionRoutine(
+    IN PIRP                         Irp,
+    IN PKS_TCP_COMPLETION_CONTEXT   Context
+    )
+{
+    NTSTATUS Status = Irp->IoStatus.Status;
+
+    if (NT_SUCCESS(Status)) {
+
+        ksock_tconn_t *tconn = Context->tconn;
+
+        PKS_TSDU_DAT  KsTsduDat = Context->CompletionContext;
+        PKS_TSDU_BUF  KsTsduBuf = Context->CompletionContext;
+
+        KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n",
+                   Context->KsTsduMgr->TotalBytes ));
+
+        spin_lock(&(tconn->kstc_lock));
+
+        if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+            if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
+                cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
+            } else {
+                cfs_enter_debugger();
+            }
+        } else {
+            ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+            if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
+                cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
+            } else {
+                cfs_enter_debugger();
+            }
+        }
+
+        spin_unlock(&(tconn->kstc_lock));
+
+        /* wake up the thread waiting for the completion of this Irp */
+        KeSetEvent(Context->Event, 0, FALSE);
+
+        /* re-active the ks connection and wake up the scheduler */
+        if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+            tconn->kstc_sched_cb( tconn, FALSE, NULL,
+                                  Context->KsTsduMgr->TotalBytes );
+        }
+
+    } else {
+
+        /* un-expected errors occur, we must abort the connection */
+        ks_abort_tconn(Context->tconn);
+    }
+
+    if (Context) {
+
+        /* Freeing the Context structure... */
+        ExFreePool(Context);
+        Context = NULL;
+    }
+
+
+    /* free the Irp */
+    if (Irp) {
+        IoFreeIrp(Irp);
+    }
+
+    return (Status);
+}
+
+
+/*
+ * KsTcpCompletionRoutine
+ *   the Irp completion routine for TdiBuildSend and TdiBuildReceive ...
+ *   We need call the use's own CompletionRoutine if specified. Or
+ *   it's a synchronous case, we need signal the event.
+ *
+ * Arguments:
+ *   DeviceObject:  the device object of the transport
+ *   Irp:           the Irp is being completed.
+ *   Context:       the context we specified when issuing the Irp
+ *
+ * Return Value:
+ *   Nt status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsTcpCompletionRoutine(
+    IN PDEVICE_OBJECT   DeviceObject,
+    IN PIRP             Irp,
+    IN PVOID            Context
+    )
+{
+    if (Context) {
+
+        PKS_TCP_COMPLETION_CONTEXT  CompletionContext = NULL;
+        ksock_tconn_t * tconn = NULL;
+
+        CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context;
+        tconn = CompletionContext->tconn;
+
+        /* release the chained mdl */
+        KsReleaseMdl(Irp->MdlAddress, FALSE);
+        Irp->MdlAddress = NULL;
+
+        if (CompletionContext->CompletionRoutine) {
+
+            if ( CompletionContext->bCounted &&
+                 InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) {
+                    goto errorout;
+            }
+
+            //
+            // Giving control to user specified CompletionRoutine ...
+            //
+
+            CompletionContext->CompletionRoutine(
+                    Irp,
+                    CompletionContext
+                    );
+
+        } else {
+
+            //
+            // Signaling  the Event ...
+            //
+
+            KeSetEvent(CompletionContext->Event, 0, FALSE);
+        }
+
+        /* drop the reference count of the tconn object */
+        ks_put_tconn(tconn);
+
+    } else {
+
+        cfs_enter_debugger();
+    }
+
+errorout:
+
+    return STATUS_MORE_PROCESSING_REQUIRED;
+}
+
+/*
+ * KsTcpSendCompletionRoutine
+ *   the user specified Irp completion routine for asynchronous
+ *   data transmission requests.
+ *
+ *   It will do th cleanup job of the ksock_tx_t and wake up the
+ *   ks scheduler thread
+ *
+ * Arguments:
+ *   Irp:           the Irp is being completed.
+ *   Context:       the context we specified when issuing the Irp
+ *
+ * Return Value:
+ *   Nt status code
+ *
+ * Notes:
+ *   N/A
+ */
+
+NTSTATUS
+KsTcpSendCompletionRoutine(
+    IN PIRP                         Irp,
+    IN PKS_TCP_COMPLETION_CONTEXT   Context
+    )
+{
+    NTSTATUS        Status = Irp->IoStatus.Status;
+    ULONG           rc = Irp->IoStatus.Information;
+    ksock_tconn_t * tconn = Context->tconn;
+    PKS_TSDUMGR     KsTsduMgr = Context->KsTsduMgr;
+
+    ENTRY;
+
+    LASSERT(tconn) ;
+
+    if (NT_SUCCESS(Status)) {
+
+        if (Context->bCounted) {
+            PVOID   tx = Context->CompletionContext;
+
+            ASSERT(tconn->kstc_update_tx != NULL);
+
+            /* update the tx, rebasing the kiov or iov pointers */
+            tx = tconn->kstc_update_tx(tconn, tx, rc);
+
+            /* update the KsTsudMgr total bytes */
+            spin_lock(&tconn->kstc_lock);
+            KsTsduMgr->TotalBytes -= rc;
+            spin_unlock(&tconn->kstc_lock);
+
+            /*
+             * now it's time to re-queue the conns into the
+             * scheduler queue and wake the scheduler thread.
+             */
+
+            if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+                tconn->kstc_sched_cb( tconn, TRUE, tx, 0);
+            }
+
+        } else {
+
+            PKS_TSDU            KsTsdu = Context->CompletionContext;
+            PKS_TSDU_BUF        KsTsduBuf = Context->CompletionContext2;
+            PKS_TSDU_DAT        KsTsduDat = Context->CompletionContext2;
+
+            spin_lock(&tconn->kstc_lock);
+            /* This is bufferred sending ... */
+            ASSERT(KsTsduBuf->StartOffset == 0);
+
+            if (KsTsduBuf->DataLength > Irp->IoStatus.Information) {
+                /* not fully sent .... we have to abort the connection */
+                spin_unlock(&tconn->kstc_lock);
+                ks_abort_tconn(tconn);
+                goto errorout;
+            }
+
+            if (KsTsduBuf->TsduType  == TSDU_TYPE_BUF) {
+                /* free the buffer */
+                ExFreePool(KsTsduBuf->UserBuffer);
+                KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength;
+                KsTsdu->StartOffset   += sizeof(KS_TSDU_BUF);
+            } else if (KsTsduDat->TsduType  == TSDU_TYPE_DAT) {
+                KsTsduMgr->TotalBytes -= KsTsduDat->DataLength;
+                KsTsdu->StartOffset   += KsTsduDat->TotalLength;
+            } else {
+                cfs_enter_debugger(); /* shoult not get here */
+            }
+
+            if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+
+                list_del(&KsTsdu->Link);
+                KsTsduMgr->NumOfTsdu--;
+                KsPutKsTsdu(KsTsdu);
+            }
+
+            spin_unlock(&tconn->kstc_lock);
+        }
+
+    } else {
+
+        /* cfs_enter_debugger(); */
+
+        /*
+         *  for the case that the transmission is ussuccessful,
+         *  we need abort the tdi connection, but not destroy it.
+         *  the socknal conn will drop the refer count, then the
+         *  tdi connection will be freed.
+         */
+
+        ks_abort_tconn(tconn);
+    }
+
+errorout:
+
+    /* freeing the Context structure... */
+
+    if (Context) {
+        ExFreePool(Context);
+        Context = NULL;
+    }
+
+    /* it's our duty to free the Irp. */
+
+    if (Irp) {
+        IoFreeIrp(Irp);
+        Irp = NULL;
+    }
+
+    EXIT;
+
+    return Status;
+}
+
+/*
+ *  Normal receive event handler
+ *
+ *  It will move data from system Tsdu to our TsduList
+ */
+
+NTSTATUS
+KsTcpReceiveEventHandler(
+    IN PVOID                TdiEventContext,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN ULONG                ReceiveFlags,
+    IN ULONG                BytesIndicated,
+    IN ULONG                BytesAvailable,
+    OUT ULONG *             BytesTaken,
+    IN PVOID                Tsdu,
+    OUT PIRP *              IoRequestPacket
+   )
+{
+    NTSTATUS            Status;
+
+    ksock_tconn_t *     tconn;
+
+    PKS_CHAIN           KsChain;
+    PKS_TSDUMGR         KsTsduMgr;
+    PKS_TSDU            KsTsdu;
+    PKS_TSDU_DAT        KsTsduDat;
+    PKS_TSDU_BUF        KsTsduBuf;
+
+    BOOLEAN             bIsExpedited;
+    BOOLEAN             bIsCompleteTsdu;
+
+    BOOLEAN             bNewTsdu = FALSE;
+    BOOLEAN             bNewBuff = FALSE;
+
+    PCHAR               Buffer = NULL;
+
+    PIRP                Irp = NULL;
+    PMDL                Mdl = NULL;
+    PFILE_OBJECT        FileObject;
+    PDEVICE_OBJECT      DeviceObject;
+
+    ULONG               BytesReceived = 0;
+
+    PKS_TCP_COMPLETION_CONTEXT context = NULL;
+
+
+    tconn = (ksock_tconn_t *) ConnectionContext;
+
+    ks_get_tconn(tconn);
+
+    /* check whether the whole body of payload is received or not */
+    if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) &&
+         (BytesIndicated == BytesAvailable) ) {
+        bIsCompleteTsdu = TRUE;
+    } else {
+        bIsCompleteTsdu = FALSE;
+    }
+
+    bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
+
+    KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable));
+    KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited ));
+
+    spin_lock(&(tconn->kstc_lock));
+
+    /*  check whether we are conntected or not listener Â¡Â­*/
+    if ( !((tconn->kstc_state == ksts_connected) &&
+           (tconn->kstc_type == kstt_sender ||
+            tconn->kstc_type == kstt_child))) {
+
+        *BytesTaken = BytesIndicated;
+
+        spin_unlock(&(tconn->kstc_lock));
+        ks_put_tconn(tconn);
+
+        return (STATUS_SUCCESS);
+    }
+
+    if (tconn->kstc_type == kstt_sender) {
+        KsChain = &(tconn->sender.kstc_recv);
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        KsChain = &(tconn->child.kstc_recv);
+    }
+
+    if (bIsExpedited) {
+        KsTsduMgr = &(KsChain->Expedited);
+    } else {
+        KsTsduMgr = &(KsChain->Normal);
+    }
+
+    /* if the Tsdu is even larger than the biggest Tsdu, we have
+       to allocate new buffer and use TSDU_TYOE_BUF to store it */
+
+    if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size -
+         KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
+        bNewBuff = TRUE;
+    }
+
+    /* retrieve the latest Tsdu buffer form TsduMgr
+       list if the list is not empty. */
+
+    if (list_empty(&(KsTsduMgr->TsduList))) {
+
+        LASSERT(KsTsduMgr->NumOfTsdu == 0);
+        KsTsdu = NULL;
+
+    } else {
+
+        LASSERT(KsTsduMgr->NumOfTsdu > 0);
+        KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+
+        /* if this Tsdu does not contain enough space, we need
+           allocate a new Tsdu queue. */
+
+        if (bNewBuff) {
+            if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) >
+                 KsTsdu->TotalLength )  {
+                KsTsdu = NULL;
+            }
+        } else {
+            if ( KS_TSDU_STRU_SIZE(BytesAvailable) >
+                 KsTsdu->TotalLength - KsTsdu->LastOffset ) {
+                KsTsdu = NULL;
+            }
+        }
+    }
+
+    /* allocating the buffer for TSDU_TYPE_BUF */
+    if (bNewBuff) {
+        Buffer = ExAllocatePool(NonPagedPool, BytesAvailable);
+        if (NULL == Buffer) {
+            /* there's no enough memory for us. We just try to
+               receive maximum bytes with a new Tsdu */
+            bNewBuff = FALSE;
+            KsTsdu = NULL;
+        }
+    }
+
+    /* allocate a new Tsdu in case we are not statisfied. */
+
+    if (NULL == KsTsdu) {
+
+        KsTsdu = KsAllocateKsTsdu();
+
+        if (NULL == KsTsdu) {
+            goto errorout;
+        } else {
+            bNewTsdu = TRUE;
+        }
+    }
+
+    KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+    KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+    if (bNewBuff) {
+
+        /* setup up the KS_TSDU_BUF record */
+
+        KsTsduBuf->TsduType     = TSDU_TYPE_BUF;
+        KsTsduBuf->TsduFlags    = 0;
+        KsTsduBuf->StartOffset  = 0;
+        KsTsduBuf->UserBuffer   = Buffer;
+        KsTsduBuf->DataLength   = BytesReceived = BytesAvailable;
+
+        KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+
+    } else {
+
+        /* setup the KS_TSDU_DATA to contain all the messages */
+
+        KsTsduDat->TsduType     =  TSDU_TYPE_DAT;
+        KsTsduDat->TsduFlags    = 0;
+
+        if ( KsTsdu->TotalLength - KsTsdu->LastOffset >=
+            KS_TSDU_STRU_SIZE(BytesAvailable) ) {
+            BytesReceived = BytesAvailable;
+        } else {
+            BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset -
+                            FIELD_OFFSET(KS_TSDU_DAT, Data);
+            BytesReceived &= (~((ULONG)3));
+        }
+        KsTsduDat->DataLength   =  BytesReceived;
+        KsTsduDat->TotalLength  =  KS_TSDU_STRU_SIZE(BytesReceived);
+        KsTsduDat->StartOffset  = 0;
+
+        Buffer = &KsTsduDat->Data[0];
+
+        KsTsdu->LastOffset += KsTsduDat->TotalLength;
+    }
+
+    KsTsduMgr->TotalBytes  +=  BytesReceived;
+
+    if (bIsCompleteTsdu) {
+
+        /* It's a complete receive, we just move all
+           the data from system to our Tsdu */
+
+        RtlMoveMemory(
+            Buffer,
+            Tsdu,
+            BytesReceived
+            );
+
+        *BytesTaken = BytesReceived;
+        Status = STATUS_SUCCESS;
+
+        if (bNewTsdu) {
+            list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+            KsTsduMgr->NumOfTsdu++;
+        }
+
+        KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+
+        /* re-active the ks connection and wake up the scheduler */
+        if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+            tconn->kstc_sched_cb( tconn, FALSE, NULL,
+                                  KsTsduMgr->TotalBytes );
+        }
+
+    } else {
+
+        /* there's still data in tdi internal queue, we need issue a new
+           Irp to receive all of them. first allocate the tcp context */
+
+        context = ExAllocatePoolWithTag(
+                        NonPagedPool,
+                        sizeof(KS_TCP_COMPLETION_CONTEXT),
+                        'cTsK');
+
+        if (!context) {
+
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto errorout;
+        }
+
+        /* setup the context */
+        RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT));
+
+        context->tconn             = tconn;
+        context->CompletionRoutine = KsTcpReceiveCompletionRoutine;
+        context->CompletionContext = KsTsdu;
+        context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat;
+        context->KsTsduMgr         = KsTsduMgr;
+        context->Event             = &(KsTsduMgr->Event);
+
+        if (tconn->kstc_type == kstt_sender) {
+            FileObject = tconn->sender.kstc_info.FileObject;
+        } else {
+            FileObject = tconn->child.kstc_info.FileObject;
+        }
+
+        DeviceObject = IoGetRelatedDeviceObject(FileObject);
+
+        /* build new tdi Irp and setup it. */
+        Irp = KsBuildTdiIrp(DeviceObject);
+
+        if (NULL == Irp) {
+            goto errorout;
+        }
+
+        Status = KsLockUserBuffer(
+                    Buffer,
+                    FALSE,
+                    BytesReceived,
+                    IoModifyAccess,
+                    &Mdl
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            goto errorout;
+        }
+
+        TdiBuildReceive(
+            Irp,
+            DeviceObject,
+            FileObject,
+            KsTcpCompletionRoutine,
+            context,
+            Mdl,
+            ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED),
+            BytesReceived
+          );
+
+        IoSetNextIrpStackLocation(Irp);
+
+        /* return the newly built Irp to transport driver,
+           it will process it to receive all the data */
+
+        *IoRequestPacket = Irp;
+        *BytesTaken = 0;
+
+        if (bNewTsdu) {
+
+            list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+            KsTsduMgr->NumOfTsdu++;
+        }
+
+        if (bNewBuff) {
+            cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING);
+        } else {
+            cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING);
+        }
+        ks_get_tconn(tconn);
+        Status = STATUS_MORE_PROCESSING_REQUIRED;
+    }
+
+    spin_unlock(&(tconn->kstc_lock));
+    ks_put_tconn(tconn);
+
+    return (Status);
+
+errorout:
+
+    spin_unlock(&(tconn->kstc_lock));
+
+    if (bNewTsdu && (KsTsdu != NULL)) {
+        KsFreeKsTsdu(KsTsdu);
+    }
+
+    if (Mdl) {
+        KsReleaseMdl(Mdl, FALSE);
+    }
+
+    if (Irp) {
+        IoFreeIrp(Irp);
+    }
+
+    if (context) {
+        ExFreePool(context);
+    }
+
+    ks_abort_tconn(tconn);
+    ks_put_tconn(tconn);
+
+    *BytesTaken = BytesAvailable;
+    Status = STATUS_SUCCESS;
+
+    return (Status);
+}
+
+/*
+ *  Expedited receive event handler
+ */
+
+NTSTATUS
+KsTcpReceiveExpeditedEventHandler(
+    IN PVOID                TdiEventContext,
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN ULONG                ReceiveFlags,
+    IN ULONG                BytesIndicated,
+    IN ULONG                BytesAvailable,
+    OUT ULONG *             BytesTaken,
+    IN PVOID                Tsdu,
+    OUT PIRP *              IoRequestPacket
+    )
+{
+    return KsTcpReceiveEventHandler(
+                TdiEventContext,
+                ConnectionContext,
+                ReceiveFlags | TDI_RECEIVE_EXPEDITED,
+                BytesIndicated,
+                BytesAvailable,
+                BytesTaken,
+                Tsdu,
+                IoRequestPacket
+                );
+}
+
+
+/*
+ *  Bulk receive event handler
+ *
+ *  It will queue all the system Tsdus to our TsduList.
+ *  Then later ks_recv_mdl will release them.
+ */
+
+NTSTATUS
+KsTcpChainedReceiveEventHandler (
+    IN PVOID TdiEventContext,       // the event context
+    IN CONNECTION_CONTEXT ConnectionContext,
+    IN ULONG ReceiveFlags,
+    IN ULONG ReceiveLength,
+    IN ULONG StartingOffset,        // offset of start of client data in TSDU
+    IN PMDL  Tsdu,                  // TSDU data chain
+    IN PVOID TsduDescriptor         // for call to TdiReturnChainedReceives
+    )
+{
+
+    NTSTATUS            Status;
+
+    ksock_tconn_t *     tconn;
+
+    PKS_CHAIN           KsChain;
+    PKS_TSDUMGR         KsTsduMgr;
+    PKS_TSDU            KsTsdu;
+    PKS_TSDU_MDL        KsTsduMdl;
+
+    BOOLEAN             bIsExpedited;
+    BOOLEAN             bNewTsdu = FALSE;
+
+    tconn = (ksock_tconn_t *) ConnectionContext;
+
+    bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED);
+
+    KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited));
+
+    ks_get_tconn(tconn);
+    spin_lock(&(tconn->kstc_lock));
+
+    /* check whether we are conntected or not listener Â¡Â­*/
+    if ( !((tconn->kstc_state == ksts_connected) &&
+         (tconn->kstc_type == kstt_sender ||
+          tconn->kstc_type == kstt_child))) {
+
+        spin_unlock(&(tconn->kstc_lock));
+        ks_put_tconn(tconn);
+
+        return (STATUS_SUCCESS);
+    }
+
+    /* get the latest Tsdu buffer form TsduMgr list.
+       just set NULL if the list is empty. */
+
+    if (tconn->kstc_type == kstt_sender) {
+        KsChain = &(tconn->sender.kstc_recv);
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        KsChain = &(tconn->child.kstc_recv);
+    }
+
+    if (bIsExpedited) {
+        KsTsduMgr = &(KsChain->Expedited);
+    } else {
+        KsTsduMgr = &(KsChain->Normal);
+    }
+
+    if (list_empty(&(KsTsduMgr->TsduList))) {
+
+        LASSERT(KsTsduMgr->NumOfTsdu == 0);
+        KsTsdu = NULL;
+
+    } else {
+
+        LASSERT(KsTsduMgr->NumOfTsdu > 0);
+        KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+        if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) {
+            KsTsdu = NULL;
+        }
+    }
+
+    /* if there's no Tsdu or the free size is not enough for this
+       KS_TSDU_MDL structure. We need re-allocate a new Tsdu.  */
+
+    if (NULL == KsTsdu) {
+
+        KsTsdu = KsAllocateKsTsdu();
+
+        if (NULL == KsTsdu) {
+            goto errorout;
+        } else {
+            bNewTsdu = TRUE;
+        }
+    }
+
+    /* just queue the KS_TSDU_MDL to the Tsdu buffer */
+
+    KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+    KsTsduMdl->TsduType     =  TSDU_TYPE_MDL;
+    KsTsduMdl->DataLength   =  ReceiveLength;
+    KsTsduMdl->StartOffset  =  StartingOffset;
+    KsTsduMdl->Mdl          =  Tsdu;
+    KsTsduMdl->Descriptor   =  TsduDescriptor;
+
+    KsTsdu->LastOffset     += sizeof(KS_TSDU_MDL);
+    KsTsduMgr->TotalBytes  += ReceiveLength;
+
+    KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n",
+                KsTsduMgr->TotalBytes ));
+
+    Status = STATUS_PENDING;
+
+    /* attach it to the TsduMgr list if the Tsdu is newly created. */
+    if (bNewTsdu) {
+
+        list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+        KsTsduMgr->NumOfTsdu++;
+    }
+
+    spin_unlock(&(tconn->kstc_lock));
+
+    /* wake up the threads waiing in ks_recv_mdl */
+    KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+
+    if (tconn->kstc_conn && tconn->kstc_sched_cb) {
+        tconn->kstc_sched_cb( tconn, FALSE, NULL,
+                              KsTsduMgr->TotalBytes );
+    }
+
+    ks_put_tconn(tconn);
+
+    /* Return STATUS_PENDING to system because we are still
+       owning the MDL resources. ks_recv_mdl is expected
+       to free the MDL resources. */
+
+    return (Status);
+
+errorout:
+
+    spin_unlock(&(tconn->kstc_lock));
+
+    if (bNewTsdu && (KsTsdu != NULL)) {
+        KsFreeKsTsdu(KsTsdu);
+    }
+
+    /* abort the tdi connection */
+    ks_abort_tconn(tconn);
+    ks_put_tconn(tconn);
+
+
+    Status = STATUS_SUCCESS;
+
+    return (Status);
+}
+
+
+/*
+ *  Expedited & Bulk receive event handler
+ */
+
+NTSTATUS
+KsTcpChainedReceiveExpeditedEventHandler (
+    IN PVOID                TdiEventContext,       // the event context
+    IN CONNECTION_CONTEXT   ConnectionContext,
+    IN ULONG                ReceiveFlags,
+    IN ULONG                ReceiveLength,
+    IN ULONG                StartingOffset,        // offset of start of client data in TSDU
+    IN PMDL                 Tsdu,                  // TSDU data chain
+    IN PVOID                TsduDescriptor         // for call to TdiReturnChainedReceives
+    )
+{
+    return KsTcpChainedReceiveEventHandler(
+                TdiEventContext,
+                ConnectionContext,
+                ReceiveFlags | TDI_RECEIVE_EXPEDITED,
+                ReceiveLength,
+                StartingOffset,
+                Tsdu,
+                TsduDescriptor );
+}
+
+
+VOID
+KsPrintProviderInfo(
+   PWSTR DeviceName,
+   PTDI_PROVIDER_INFO ProviderInfo
+   )
+{
+    KsPrint((2, "%ws ProviderInfo:\n", DeviceName));
+
+    KsPrint((2, "  Version              : 0x%4.4X\n", ProviderInfo->Version ));
+    KsPrint((2, "  MaxSendSize          : %d\n", ProviderInfo->MaxSendSize ));
+    KsPrint((2, "  MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData ));
+    KsPrint((2, "  MaxDatagramSize      : %d\n", ProviderInfo->MaxDatagramSize ));
+    KsPrint((2, "  ServiceFlags         : 0x%8.8X\n", ProviderInfo->ServiceFlags ));
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) {
+        KsPrint((2, "  CONNECTION_MODE\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) {
+        KsPrint((2, "  ORDERLY_RELEASE\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) {
+        KsPrint((2, "  CONNECTIONLESS_MODE\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) {
+        KsPrint((2, "  ERROR_FREE_DELIVERY\n"));
+    }
+
+    if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) {
+        KsPrint((2, "  SECURITY_LEVEL\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) {
+        KsPrint((2, "  BROADCAST_SUPPORTED\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) {
+        KsPrint((2, "  MULTICAST_SUPPORTED\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) {
+        KsPrint((2, "  DELAYED_ACCEPTANCE\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) {
+        KsPrint((2, "  EXPEDITED_DATA\n"));
+    }
+
+    if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) {
+        KsPrint((2, "  INTERNAL_BUFFERING\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) {
+        KsPrint((2, "  ROUTE_DIRECTED\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) {
+        KsPrint((2, "  NO_ZERO_LENGTH\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) {
+        KsPrint((2, "  POINT_TO_POINT\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) {
+        KsPrint((2, "  MESSAGE_MODE\n"));
+    }
+
+    if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) {
+        KsPrint((2, "  HALF_DUPLEX\n"));
+    }
+
+    KsPrint((2, "  MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData ));
+    KsPrint((2, "  MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData ));
+    KsPrint((2, "  NumberOfResources    : %d\n", ProviderInfo->NumberOfResources ));
+}
+
+
+/*
+ * KsAllocateKsTsdu
+ *   Reuse a Tsdu from the freelist or allocate a new Tsdu
+ *   from the LookAsideList table or the NonPagedPool
+ *
+ * Arguments:
+ *   N/A
+ *
+ * Return Value:
+ *   PKS_Tsdu: the new Tsdu or NULL if it fails
+ *
+ * Notes:
+ *   N/A
+ */
+
+PKS_TSDU
+KsAllocateKsTsdu()
+{
+    PKS_TSDU    KsTsdu = NULL;
+
+    spin_lock(&(ks_data.ksnd_tsdu_lock));
+
+    if (!list_empty (&(ks_data.ksnd_freetsdus))) {
+
+        LASSERT(ks_data.ksnd_nfreetsdus > 0);
+
+        KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link);
+        list_del(&(KsTsdu->Link));
+        ks_data.ksnd_nfreetsdus--;
+
+    } else {
+
+        KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc(
+                        ks_data.ksnd_tsdu_slab, 0);
+    }
+
+    spin_unlock(&(ks_data.ksnd_tsdu_lock));
+
+    if (NULL != KsTsdu) {
+        KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size);
+    }
+
+    return (KsTsdu);
+}
+
+
+/*
+ * KsPutKsTsdu
+ *   Move the Tsdu to the free tsdu list in ks_data.
+ *
+ * Arguments:
+ *   KsTsdu: Tsdu to be moved.
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+VOID
+KsPutKsTsdu(
+    PKS_TSDU  KsTsdu
+    )
+{
+    spin_lock(&(ks_data.ksnd_tsdu_lock));
+
+    list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus));
+    ks_data.ksnd_nfreetsdus++;
+
+    spin_unlock(&(ks_data.ksnd_tsdu_lock));
+}
+
+
+/*
+ * KsFreeKsTsdu
+ *   Release a Tsdu: uninitialize then free it.
+ *
+ * Arguments:
+ *   KsTsdu: Tsdu to be freed.
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+VOID
+KsFreeKsTsdu(
+    PKS_TSDU  KsTsdu
+    )
+{
+    cfs_mem_cache_free(
+            ks_data.ksnd_tsdu_slab,
+            KsTsdu );
+}
+
+
+/*
+ * KsInitializeKsTsdu
+ *   Initialize the Tsdu buffer header
+ *
+ * Arguments:
+ *   KsTsdu: the Tsdu to be initialized
+ *   Length: the total length of the Tsdu
+ *
+ * Return Value:
+ *   VOID
+ *
+ * NOTES:
+ *   N/A
+ */
+
+VOID
+KsInitializeKsTsdu(
+    PKS_TSDU    KsTsdu,
+    ULONG       Length
+    )
+{
+    RtlZeroMemory(KsTsdu, Length);
+    KsTsdu->Magic = KS_TSDU_MAGIC;
+    KsTsdu->TotalLength = Length;
+    KsTsdu->StartOffset = KsTsdu->LastOffset =
+    KS_DWORD_ALIGN(sizeof(KS_TSDU));
+}
+
+
+/*
+ * KsInitializeKsTsduMgr
+ *   Initialize the management structure of
+ *   Tsdu buffers
+ *
+ * Arguments:
+ *   TsduMgr: the TsduMgr to be initialized
+ *
+ * Return Value:
+ *   VOID
+ *
+ * NOTES:
+ *   N/A
+ */
+
+VOID
+KsInitializeKsTsduMgr(
+    PKS_TSDUMGR     TsduMgr
+    )
+{
+    KeInitializeEvent(
+            &(TsduMgr->Event),
+            NotificationEvent,
+            FALSE
+            );
+
+    CFS_INIT_LIST_HEAD(
+            &(TsduMgr->TsduList)
+            );
+
+    TsduMgr->NumOfTsdu  = 0;
+    TsduMgr->TotalBytes = 0;
+}
+
+
+/*
+ * KsInitializeKsChain
+ *   Initialize the China structure for receiving
+ *   or transmitting
+ *
+ * Arguments:
+ *   KsChain: the KsChain to be initialized
+ *
+ * Return Value:
+ *   VOID
+ *
+ * NOTES:
+ *   N/A
+ */
+
+VOID
+KsInitializeKsChain(
+    PKS_CHAIN       KsChain
+    )
+{
+    KsInitializeKsTsduMgr(&(KsChain->Normal));
+    KsInitializeKsTsduMgr(&(KsChain->Expedited));
+}
+
+
+/*
+ * KsCleanupTsduMgr
+ *   Clean up all the Tsdus in the TsduMgr list
+ *
+ * Arguments:
+ *   KsTsduMgr: the Tsdu list manager
+ *
+ * Return Value:
+ *   NTSTATUS:  nt status code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsCleanupTsduMgr(
+    PKS_TSDUMGR     KsTsduMgr
+    )
+{
+    PKS_TSDU        KsTsdu;
+    PKS_TSDU_DAT    KsTsduDat;
+    PKS_TSDU_BUF    KsTsduBuf;
+    PKS_TSDU_MDL    KsTsduMdl;
+
+    LASSERT(NULL != KsTsduMgr);
+
+    KeSetEvent(&(KsTsduMgr->Event), 0, FALSE);
+
+    while (!list_empty(&KsTsduMgr->TsduList)) {
+
+        KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+        if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+
+            //
+            // KsTsdu is empty now, we need free it ...
+            //
+
+            list_del(&(KsTsdu->Link));
+            KsTsduMgr->NumOfTsdu--;
+
+            KsFreeKsTsdu(KsTsdu);
+
+        } else {
+
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+
+            if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+
+                KsTsdu->StartOffset += KsTsduDat->TotalLength;
+
+            } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) {
+
+                ASSERT(KsTsduBuf->UserBuffer != NULL);
+
+                if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) {
+                    ExFreePool(KsTsduBuf->UserBuffer);
+                } else {
+                    cfs_enter_debugger();
+                }
+
+                KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+
+            } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
+
+                //
+                // MDL Tsdu Unit ...
+                //
+
+                TdiReturnChainedReceives(
+                    &(KsTsduMdl->Descriptor),
+                    1 );
+
+                KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+            }
+        }
+    }
+
+    return STATUS_SUCCESS;
+}
+
+
+/*
+ * KsCleanupKsChain
+ *   Clean up the TsduMgrs of the KsChain
+ *
+ * Arguments:
+ *   KsChain: the chain managing TsduMgr
+ *
+ * Return Value:
+ *   NTSTATUS:  nt status code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsCleanupKsChain(
+    PKS_CHAIN   KsChain
+    )
+{
+    NTSTATUS    Status;
+
+    LASSERT(NULL != KsChain);
+
+    Status = KsCleanupTsduMgr(
+                &(KsChain->Normal)
+                );
+
+    if (!NT_SUCCESS(Status)) {
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    Status = KsCleanupTsduMgr(
+                &(KsChain->Expedited)
+                );
+
+    if (!NT_SUCCESS(Status)) {
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+errorout:
+
+    return Status;
+}
+
+
+/*
+ * KsCleanupTsdu
+ *   Clean up all the Tsdus of a tdi connected object
+ *
+ * Arguments:
+ *   tconn: the tdi connection which is connected already.
+ *
+ * Return Value:
+ *   Nt status code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsCleanupTsdu(
+    ksock_tconn_t * tconn
+    )
+{
+    NTSTATUS        Status = STATUS_SUCCESS;
+
+
+    if (tconn->kstc_type != kstt_sender &&
+        tconn->kstc_type != kstt_child ) {
+
+        goto errorout;
+    }
+
+    if (tconn->kstc_type == kstt_sender) {
+
+        Status = KsCleanupKsChain(
+                    &(tconn->sender.kstc_recv)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+        Status = KsCleanupKsChain(
+                    &(tconn->sender.kstc_send)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+    } else {
+
+        Status = KsCleanupKsChain(
+                    &(tconn->child.kstc_recv)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+        Status = KsCleanupKsChain(
+                    &(tconn->child.kstc_send)
+                    );
+
+        if (!NT_SUCCESS(Status)) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+
+    }
+
+errorout:
+
+    return (Status);
+}
+
+
+/*
+ * KsCopyMdlChainToMdlChain
+ *   Copy data from  a [chained] Mdl to anther [chained] Mdl.
+ *   Tdi library does not provide this function. We have to
+ *   realize it ourselives.
+ *
+ * Arguments:
+ *   SourceMdlChain: the source mdl
+ *   SourceOffset:   start offset of the source
+ *   DestinationMdlChain: the dst mdl
+ *   DestinationOffset: the offset where data are to be copied.
+ *   BytesTobecopied:   the expteced bytes to be copied
+ *   BytesCopied:    to store the really copied data length
+ *
+ * Return Value:
+ *   NTSTATUS: STATUS_SUCCESS or other error code
+ *
+ * NOTES:
+ *   The length of source mdl must be >= SourceOffset + BytesTobecopied
+ */
+
+NTSTATUS
+KsCopyMdlChainToMdlChain(
+    IN PMDL     SourceMdlChain,
+    IN ULONG    SourceOffset,
+    IN PMDL     DestinationMdlChain,
+    IN ULONG    DestinationOffset,
+    IN ULONG    BytesTobecopied,
+    OUT PULONG  BytesCopied
+    )
+{
+    PMDL        SrcMdl = SourceMdlChain;
+    PMDL        DstMdl = DestinationMdlChain;
+
+    PUCHAR      SrcBuf = NULL;
+    PUCHAR      DstBuf = NULL;
+
+    ULONG       dwBytes = 0;
+
+    NTSTATUS    Status = STATUS_SUCCESS;
+
+
+    while (dwBytes < BytesTobecopied) {
+
+        ULONG   Length = 0;
+
+        while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) {
+
+            SourceOffset -= MmGetMdlByteCount(SrcMdl);
+
+            SrcMdl = SrcMdl->Next;
+
+            if (NULL == SrcMdl) {
+
+                Status = STATUS_INVALID_PARAMETER;
+                goto errorout;
+            }
+        }
+
+        while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) {
+
+            DestinationOffset -= MmGetMdlByteCount(DstMdl);
+
+            DstMdl = DstMdl->Next;
+
+            if (NULL == DstMdl) {
+
+                Status = STATUS_INVALID_PARAMETER;
+                goto errorout;
+            }
+        }
+
+        DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl);
+
+        if ((NULL == DstBuf)) {
+            Status = STATUS_INSUFFICIENT_RESOURCES;
+            goto errorout;
+        }
+
+        //
+        // Here we need skip the OVERFLOW case via RtlCopyMemory :-(
+        //
+
+        if ( KsQueryMdlsSize(SrcMdl) - SourceOffset >
+             MmGetMdlByteCount(DstMdl) - DestinationOffset ) {
+
+            Length = BytesTobecopied - dwBytes;
+
+            if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) {
+                Length = KsQueryMdlsSize(SrcMdl) - SourceOffset;
+            }
+
+            if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) {
+                Length = MmGetMdlByteCount(DstMdl) - DestinationOffset;
+            }
+
+            SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl);
+
+            if ((NULL == DstBuf)) {
+                Status = STATUS_INSUFFICIENT_RESOURCES;
+                goto errorout;
+            }
+
+            RtlCopyMemory(
+                DstBuf + DestinationOffset,
+                SrcBuf + SourceOffset,
+                Length
+                );
+
+        } else {
+
+            Status = TdiCopyMdlToBuffer(
+                        SrcMdl,
+                        SourceOffset,
+                        DstBuf,
+                        DestinationOffset,
+                        MmGetMdlByteCount(DstMdl),
+                        &Length
+                        );
+
+            if (STATUS_BUFFER_OVERFLOW == Status) {
+                cfs_enter_debugger();
+            } else if (!NT_SUCCESS(Status)) {
+                cfs_enter_debugger();
+                goto errorout;
+            }
+        }
+
+        SourceOffset += Length;
+        DestinationOffset += Length;
+        dwBytes += Length;
+    }
+
+errorout:
+
+    if (NT_SUCCESS(Status)) {
+        *BytesCopied = dwBytes;
+    } else {
+        *BytesCopied = 0;
+    }
+
+    return Status;
+}
+
+
+
+/*
+ * KsQueryMdlSize
+ *   Query the whole size of a MDL (may be chained)
+ *
+ * Arguments:
+ *   Mdl:  the Mdl to be queried
+ *
+ * Return Value:
+ *   ULONG: the total size of the mdl
+ *
+ * NOTES:
+ *   N/A
+ */
+
+ULONG
+KsQueryMdlsSize (PMDL Mdl)
+{
+    PMDL    Next = Mdl;
+    ULONG   Length = 0;
+
+
+    //
+    // Walking the MDL Chain ...
+    //
+
+    while (Next) {
+        Length += MmGetMdlByteCount(Next);
+        Next = Next->Next;
+    }
+
+    return (Length);
+}
+
+
+/*
+ * KsLockUserBuffer
+ *   Allocate MDL for the buffer and lock the pages into
+ *   nonpaged pool
+ *
+ * Arguments:
+ *   UserBuffer:  the user buffer to be locked
+ *   Length:      length in bytes of the buffer
+ *   Operation:   read or write access
+ *   pMdl:        the result of the created mdl
+ *
+ * Return Value:
+ *   NTSTATUS:     kernel status code (STATUS_SUCCESS
+ *                 or other error code)
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+KsLockUserBuffer (
+    IN PVOID            UserBuffer,
+    IN BOOLEAN          bPaged,
+    IN ULONG            Length,
+    IN LOCK_OPERATION   Operation,
+    OUT PMDL *          pMdl
+    )
+{
+    NTSTATUS    Status;
+    PMDL        Mdl = NULL;
+
+    LASSERT(UserBuffer != NULL);
+
+    *pMdl = NULL;
+
+    Mdl = IoAllocateMdl(
+                UserBuffer,
+                Length,
+                FALSE,
+                FALSE,
+                NULL
+                );
+
+    if (Mdl == NULL) {
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+
+    } else {
+
+        __try {
+
+            if (bPaged) {
+                MmProbeAndLockPages(
+                    Mdl,
+                    KernelMode,
+                    Operation
+                    );
+            } else {
+                MmBuildMdlForNonPagedPool(
+                    Mdl
+                    );
+            }
+
+            Status = STATUS_SUCCESS;
+
+            *pMdl = Mdl;
+
+        } __except (EXCEPTION_EXECUTE_HANDLER) {
+
+            IoFreeMdl(Mdl);
+
+            Mdl = NULL;
+
+            cfs_enter_debugger();
+
+            Status = STATUS_INVALID_USER_BUFFER;
+        }
+    }
+
+    return Status;
+}
+
+/*
+ * KsMapMdlBuffer
+ *   Map the mdl into a buffer in kernel space
+ *
+ * Arguments:
+ *   Mdl:  the mdl to be mapped
+ *
+ * Return Value:
+ *   PVOID: the buffer mapped or NULL in failure
+ *
+ * NOTES:
+ *   N/A
+ */
+
+PVOID
+KsMapMdlBuffer (PMDL    Mdl)
+{
+    LASSERT(Mdl != NULL);
+
+    return MmGetSystemAddressForMdlSafe(
+                Mdl,
+                NormalPagePriority
+                );
+}
+
+
+/*
+ * KsReleaseMdl
+ *   Unlock all the pages in the mdl
+ *
+ * Arguments:
+ *   Mdl:  memory description list to be released
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
+
+VOID
+KsReleaseMdl (IN PMDL   Mdl,
+              IN int    Paged )
+{
+    LASSERT(Mdl != NULL);
+
+    while (Mdl) {
+
+        PMDL    Next;
+
+        Next = Mdl->Next;
+
+        if (Paged) {
+            MmUnlockPages(Mdl);
+        }
+
+        IoFreeMdl(Mdl);
+
+        Mdl = Next;
+    }
+}
+
+
+/*
+ * ks_lock_buffer
+ *   allocate MDL for the user spepcified buffer and lock (paging-in)
+ *   all the pages of the buffer into system memory
+ *
+ * Arguments:
+ *   buffer:  the user buffer to be locked
+ *   length:  length in bytes of the buffer
+ *   access:  read or write access
+ *   mdl:     the result of the created mdl
+ *
+ * Return Value:
+ *   int:     the ks error code: 0: success / -x: failture
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_lock_buffer (
+    void *            buffer,
+    int               paged,
+    int               length,
+    LOCK_OPERATION    access,
+    ksock_mdl_t **    kmdl
+    )
+{
+    NTSTATUS        status;
+
+    status = KsLockUserBuffer(
+                    buffer,
+                    paged !=0,
+                    length,
+                    access,
+                    kmdl
+                    );
+
+    return cfs_error_code(status);
+}
+
+
+/*
+ * ks_map_mdl
+ *   Map the mdl pages into kernel space
+ *
+ * Arguments:
+ *   mdl:  the mdl to be mapped
+ *
+ * Return Value:
+ *   void *: the buffer mapped or NULL in failure
+ *
+ * Notes:
+ *   N/A
+ */
+
+void *
+ks_map_mdl (ksock_mdl_t * mdl)
+{
+    LASSERT(mdl != NULL);
+
+    return KsMapMdlBuffer(mdl);
+}
+
+/*
+ *  ks_release_mdl
+ *   Unlock all the pages in the mdl and release the mdl
+ *
+ * Arguments:
+ *   mdl:  memory description list to be released
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_release_mdl (ksock_mdl_t *mdl, int paged)
+{
+    LASSERT(mdl != NULL);
+
+    KsReleaseMdl(mdl, paged);
+}
+
+
+/*
+ * ks_create_tconn
+ *   allocate a new tconn structure from the SLAB cache or
+ *   NonPaged sysetm pool
+ *
+ * Arguments:
+ *   N/A
+ *
+ * Return Value:
+ *   ksock_tconn_t *: the address of tconn or NULL if it fails
+ *
+ * NOTES:
+ *   N/A
+ */
+
+ksock_tconn_t *
+ks_create_tconn()
+{
+    ksock_tconn_t * tconn = NULL;
+
+    /* allocate ksoc_tconn_t from the slab cache memory */
+
+    tconn = (ksock_tconn_t *)cfs_mem_cache_alloc(
+                ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO);
+
+    if (tconn) {
+
+        /* zero tconn elements */
+        memset(tconn, 0, sizeof(ksock_tconn_t));
+
+        /* initialize the tconn ... */
+        tconn->kstc_magic = KS_TCONN_MAGIC;
+
+        ExInitializeWorkItem(
+            &(tconn->kstc_disconnect.WorkItem),
+            KsDisconnectHelper,
+            &(tconn->kstc_disconnect)
+            );
+
+        KeInitializeEvent(
+                &(tconn->kstc_disconnect.Event),
+                SynchronizationEvent,
+                FALSE );
+
+        ExInitializeWorkItem(
+            &(tconn->kstc_destroy),
+            ks_destroy_tconn,
+            tconn
+            );
+
+        spin_lock_init(&(tconn->kstc_lock));
+
+        ks_get_tconn(tconn);
+
+        spin_lock(&(ks_data.ksnd_tconn_lock));
+
+        /* attach it into global list in ks_data */
+
+        list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns));
+        ks_data.ksnd_ntconns++;
+        spin_unlock(&(ks_data.ksnd_tconn_lock));
+
+        tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000;
+    }
+
+    return (tconn);
+}
+
+
+/*
+ * ks_free_tconn
+ *   free the tconn structure to the SLAB cache or NonPaged
+ *   sysetm pool
+ *
+ * Arguments:
+ *   tconn:  the tcon is to be freed
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_free_tconn(ksock_tconn_t * tconn)
+{
+    LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0);
+
+    spin_lock(&(ks_data.ksnd_tconn_lock));
+
+    /* remove it from the global list */
+    list_del(&tconn->kstc_list);
+    ks_data.ksnd_ntconns--;
+
+    /* if this is the last tconn, it would be safe for
+       ks_tdi_fini_data to quit ... */
+    if (ks_data.ksnd_ntconns == 0) {
+        cfs_wake_event(&ks_data.ksnd_tconn_exit);
+    }
+    spin_unlock(&(ks_data.ksnd_tconn_lock));
+
+    /* free the structure memory */
+    cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn);
+}
+
+
+/*
+ * ks_init_listener
+ *   Initialize the tconn as a listener (daemon)
+ *
+ * Arguments:
+ *   tconn: the listener tconn
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_init_listener(
+    ksock_tconn_t * tconn
+    )
+{
+    /* preparation: intialize the tconn members */
+
+    tconn->kstc_type = kstt_listener;
+
+    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+
+    CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list));
+    CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list));
+
+    cfs_init_event( &(tconn->listener.kstc_accept_event),
+                    TRUE,
+                    FALSE );
+
+    cfs_init_event( &(tconn->listener.kstc_destroy_event),
+                    TRUE,
+                    FALSE );
+
+    tconn->kstc_state = ksts_inited;
+}
+
+
+/*
+ * ks_init_sender
+ *   Initialize the tconn as a sender
+ *
+ * Arguments:
+ *   tconn: the sender tconn
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_init_sender(
+    ksock_tconn_t * tconn
+    )
+{
+    tconn->kstc_type = kstt_sender;
+    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+
+    KsInitializeKsChain(&(tconn->sender.kstc_recv));
+    KsInitializeKsChain(&(tconn->sender.kstc_send));
+
+    tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+    tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+
+    tconn->kstc_state = ksts_inited;
+}
+
+/*
+ * ks_init_child
+ *   Initialize the tconn as a child
+ *
+ * Arguments:
+ *   tconn: the child tconn
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
+
+void
+ks_init_child(
+    ksock_tconn_t * tconn
+    )
+{
+    tconn->kstc_type = kstt_child;
+    RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME);
+
+    KsInitializeKsChain(&(tconn->child.kstc_recv));
+    KsInitializeKsChain(&(tconn->child.kstc_send));
+
+    tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+    tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE;
+
+    tconn->kstc_state = ksts_inited;
+}
+
+/*
+ * ks_get_tconn
+ *   increase the reference count of the tconn with 1
+ *
+ * Arguments:
+ *   tconn: the tdi connection to be referred
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
+
+void
+ks_get_tconn(
+    ksock_tconn_t * tconn
+    )
+{
+    atomic_inc(&(tconn->kstc_refcount));
+}
+
+/*
+ * ks_put_tconn
+ *   decrease the reference count of the tconn and destroy
+ *   it if the refercount becomes 0.
+ *
+ * Arguments:
+ *   tconn: the tdi connection to be dereferred
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
+
+void
+ks_put_tconn(
+    ksock_tconn_t *tconn
+    )
+{
+    if (atomic_dec_and_test(&(tconn->kstc_refcount))) {
+
+        spin_lock(&(tconn->kstc_lock));
+
+        if ( ( tconn->kstc_type == kstt_child ||
+               tconn->kstc_type == kstt_sender ) &&
+             ( tconn->kstc_state == ksts_connected ) ) {
+
+            spin_unlock(&(tconn->kstc_lock));
+
+            ks_abort_tconn(tconn);
+
+        } else {
+
+            if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) {
+                cfs_enter_debugger();
+            } else {
+                ExQueueWorkItem(
+                        &(tconn->kstc_destroy),
+                        DelayedWorkQueue
+                        );
+
+                cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY);
+            }
+
+            spin_unlock(&(tconn->kstc_lock));
+        }
+    }
+}
+
+/*
+ * ks_destroy_tconn
+ *   cleanup the tdi connection and free it
+ *
+ * Arguments:
+ *   tconn: the tdi connection to be cleaned.
+ *
+ * Return Value:
+ *   N/A
+ *
+ * NOTES:
+ *   N/A
+ */
+
+void
+ks_destroy_tconn(
+    ksock_tconn_t *     tconn
+    )
+{
+    LASSERT(tconn->kstc_refcount.counter == 0);
+
+    if (tconn->kstc_type == kstt_listener) {
+
+        ks_reset_handlers(tconn);
+
+        /* for listener, we just need to close the address object */
+        KsCloseAddress(
+                tconn->kstc_addr.Handle,
+                tconn->kstc_addr.FileObject
+                );
+
+        tconn->kstc_state = ksts_inited;
+
+    } else if (tconn->kstc_type == kstt_child) {
+
+        /* for child tdi conections */
+
+        /* disassociate the relation between it's connection object
+           and the address object */
+
+        if (tconn->kstc_state == ksts_associated) {
+            KsDisassociateAddress(
+                tconn->child.kstc_info.FileObject
+                );
+        }
+
+        /* release the connection object */
+
+        KsCloseConnection(
+                tconn->child.kstc_info.Handle,
+                tconn->child.kstc_info.FileObject
+                );
+
+        /* release it's refer of it's parent's address object */
+        KsCloseAddress(
+                NULL,
+                tconn->kstc_addr.FileObject
+                );
+
+        spin_lock(&tconn->child.kstc_parent->kstc_lock);
+        spin_lock(&tconn->kstc_lock);
+
+        tconn->kstc_state = ksts_inited;
+
+        /* remove it frome it's parent's queues */
+
+        if (tconn->child.kstc_queued) {
+
+            list_del(&(tconn->child.kstc_link));
+
+            if (tconn->child.kstc_queueno) {
+
+                LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0);
+                tconn->child.kstc_parent->listener.kstc_accepted.num -= 1;
+
+            } else {
+
+                LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0);
+                tconn->child.kstc_parent->listener.kstc_listening.num -= 1;
+            }
+
+            tconn->child.kstc_queued = FALSE;
+        }
+
+        spin_unlock(&tconn->kstc_lock);
+        spin_unlock(&tconn->child.kstc_parent->kstc_lock);
+
+        /* drop the reference of the parent tconn */
+        ks_put_tconn(tconn->child.kstc_parent);
+
+    } else if (tconn->kstc_type == kstt_sender) {
+
+        ks_reset_handlers(tconn);
+
+        /* release the connection object */
+
+        KsCloseConnection(
+                tconn->sender.kstc_info.Handle,
+                tconn->sender.kstc_info.FileObject
+                );
+
+        /* release it's refer of it's parent's address object */
+        KsCloseAddress(
+                tconn->kstc_addr.Handle,
+                tconn->kstc_addr.FileObject
+                );
+
+        tconn->kstc_state = ksts_inited;
+
+    } else {
+        cfs_enter_debugger();
+    }
+
+    /* free the tconn structure ... */
+
+    ks_free_tconn(tconn);
+}
+
+int
+ks_query_data(
+    ksock_tconn_t * tconn,
+    size_t *        size,
+    int             bIsExpedited )
+{
+    int             rc = 0;
+
+    PKS_CHAIN       KsChain;
+    PKS_TSDUMGR     KsTsduMgr;
+
+    *size = 0;
+
+    ks_get_tconn(tconn);
+    spin_lock(&(tconn->kstc_lock));
+
+    if ( tconn->kstc_type != kstt_sender &&
+         tconn->kstc_type != kstt_child) {
+        rc = -EINVAL;
+        spin_unlock(&(tconn->kstc_lock));
+        goto errorout;
+    }
+
+    if (tconn->kstc_state != ksts_connected) {
+        rc = -ENOTCONN;
+        spin_unlock(&(tconn->kstc_lock));
+        goto errorout;
+    }
+
+    if (tconn->kstc_type == kstt_sender) {
+        KsChain = &(tconn->sender.kstc_recv);
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        KsChain = &(tconn->child.kstc_recv);
+    }
+
+    if (bIsExpedited) {
+        KsTsduMgr = &(KsChain->Expedited);
+    } else {
+        KsTsduMgr = &(KsChain->Normal);
+    }
+
+    *size = KsTsduMgr->TotalBytes;
+    spin_unlock(&(tconn->kstc_lock));
+
+errorout:
+
+    ks_put_tconn(tconn);
+
+    return (rc);
+}
+
+/*
+ * ks_get_tcp_option
+ *   Query the the options of the tcp stream connnection
+ *
+ * Arguments:
+ *   tconn:         the tdi connection
+ *   ID:            option id
+ *   OptionValue:   buffer to store the option value
+ *   Length:        the length of the value, to be returned
+ *
+ * Return Value:
+ *   int:           ks return code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+int
+ks_get_tcp_option (
+    ksock_tconn_t *     tconn,
+    ULONG               ID,
+    PVOID               OptionValue,
+    PULONG              Length
+    )
+{
+    NTSTATUS            Status = STATUS_SUCCESS;
+
+    IO_STATUS_BLOCK     IoStatus;
+
+    TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx;
+
+    PFILE_OBJECT        ConnectionObject;
+    PDEVICE_OBJECT      DeviceObject = NULL;
+
+    PIRP                Irp = NULL;
+    PIO_STACK_LOCATION  IrpSp = NULL;
+
+    KEVENT              Event;
+
+    /* make sure the tdi connection is connected ? */
+
+    ks_get_tconn(tconn);
+
+    if (tconn->kstc_state != ksts_connected) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto errorout;
+    }
+
+    LASSERT(tconn->kstc_type == kstt_sender ||
+           tconn->kstc_type == kstt_child);
+
+    if (tconn->kstc_type == kstt_sender) {
+        ConnectionObject = tconn->sender.kstc_info.FileObject;
+    } else {
+        ConnectionObject = tconn->child.kstc_info.FileObject;
+    }
+
+    QueryInfoEx.ID.toi_id = ID;
+    QueryInfoEx.ID.toi_type   = INFO_TYPE_CONNECTION;
+    QueryInfoEx.ID.toi_class  = INFO_CLASS_PROTOCOL;
+    QueryInfoEx.ID.toi_entity.tei_entity   = CO_TL_ENTITY;
+    QueryInfoEx.ID.toi_entity.tei_instance = 0;
+
+    RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE);
+
+    KeInitializeEvent(&Event, NotificationEvent, FALSE);
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    Irp = IoBuildDeviceIoControlRequest(
+                IOCTL_TCP_QUERY_INFORMATION_EX,
+                DeviceObject,
+                &QueryInfoEx,
+                sizeof(TCP_REQUEST_QUERY_INFORMATION_EX),
+                OptionValue,
+                *Length,
+                FALSE,
+                &Event,
+                &IoStatus
+                );
+
+    if (Irp == NULL) {
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    IrpSp = IoGetNextIrpStackLocation(Irp);
+
+    if (IrpSp == NULL) {
+
+        IoFreeIrp(Irp);
+        Irp = NULL;
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    IrpSp->FileObject = ConnectionObject;
+    IrpSp->DeviceObject = DeviceObject;
+
+    Status = IoCallDriver(DeviceObject, Irp);
+
+    if (Status == STATUS_PENDING) {
+
+        KeWaitForSingleObject(
+                &Event,
+                Executive,
+                KernelMode,
+                FALSE,
+                NULL
+                );
+
+        Status = IoStatus.Status;
+    }
+
+
+    if (NT_SUCCESS(Status)) {
+        *Length = IoStatus.Information;
+    } else {
+        cfs_enter_debugger();
+        memset(OptionValue, 0, *Length);
+        Status = STATUS_SUCCESS;
+    }
+
+errorout:
+
+    ks_put_tconn(tconn);
+
+    return cfs_error_code(Status);
+}
+
+/*
+ * ks_set_tcp_option
+ *   Set the the options for the tcp stream connnection
+ *
+ * Arguments:
+ *   tconn:     the tdi connection
+ *   ID:        option id
+ *   OptionValue: buffer containing the new option value
+ *   Length:    the length of the value
+ *
+ * Return Value:
+ *   int:       ks return code
+ *
+ * NOTES:
+ *   N/A
+ */
+
+NTSTATUS
+ks_set_tcp_option (
+    ksock_tconn_t * tconn,
+    ULONG           ID,
+    PVOID           OptionValue,
+    ULONG           Length
+    )
+{
+    NTSTATUS            Status = STATUS_SUCCESS;
+
+    IO_STATUS_BLOCK     IoStatus;
+
+    ULONG               SetInfoExLength;
+    PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL;
+
+    PFILE_OBJECT        ConnectionObject;
+    PDEVICE_OBJECT      DeviceObject = NULL;
+
+    PIRP                Irp = NULL;
+    PIO_STACK_LOCATION  IrpSp = NULL;
+
+    PKEVENT             Event;
+
+    /* make sure the tdi connection is connected ? */
+
+    ks_get_tconn(tconn);
+
+    if (tconn->kstc_state != ksts_connected) {
+        Status = STATUS_INVALID_PARAMETER;
+        goto errorout;
+    }
+
+    LASSERT(tconn->kstc_type == kstt_sender ||
+           tconn->kstc_type == kstt_child);
+
+    if (tconn->kstc_type == kstt_sender) {
+        ConnectionObject = tconn->sender.kstc_info.FileObject;
+    } else {
+        ConnectionObject = tconn->child.kstc_info.FileObject;
+    }
+
+    SetInfoExLength =  sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT);
+
+    SetInfoEx = ExAllocatePoolWithTag(
+                    NonPagedPool,
+                    SetInfoExLength,
+                    'TSSK'
+                    );
+
+    if (SetInfoEx == NULL) {
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    SetInfoEx->ID.toi_id = ID;
+
+    SetInfoEx->ID.toi_type  = INFO_TYPE_CONNECTION;
+    SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL;
+    SetInfoEx->ID.toi_entity.tei_entity   = CO_TL_ENTITY;
+    SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE;
+
+    SetInfoEx->BufferSize = Length;
+    RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length);
+
+    Event = (PKEVENT)(&(SetInfoEx->Buffer[Length]));
+    KeInitializeEvent(Event, NotificationEvent, FALSE);
+
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    Irp = IoBuildDeviceIoControlRequest(
+                IOCTL_TCP_SET_INFORMATION_EX,
+                DeviceObject,
+                SetInfoEx,
+                SetInfoExLength,
+                NULL,
+                0,
+                FALSE,
+                Event,
+                &IoStatus
+                );
+
+    if (Irp == NULL) {
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    IrpSp = IoGetNextIrpStackLocation(Irp);
+
+    if (IrpSp == NULL) {
+        IoFreeIrp(Irp);
+        Irp = NULL;
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    IrpSp->FileObject = ConnectionObject;
+    IrpSp->DeviceObject = DeviceObject;
+
+    Status = IoCallDriver(DeviceObject, Irp);
+
+    if (Status == STATUS_PENDING) {
+
+        KeWaitForSingleObject(
+                Event,
+                Executive,
+                KernelMode,
+                FALSE,
+                NULL
+                );
+
+        Status = IoStatus.Status;
+    }
+
+errorout:
+
+    if (SetInfoEx) {
+        ExFreePool(SetInfoEx);
+    }
+
+    if (!NT_SUCCESS(Status)) {
+        printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n",
+               ID, Status);
+        Status = STATUS_SUCCESS;
+    }
+
+    ks_put_tconn(tconn);
+
+    return cfs_error_code(Status);
+}
+
+/*
+ * ks_bind_tconn
+ *   bind the tdi connection object with an address
+ *
+ * Arguments:
+ *   tconn:    tconn to be bound
+ *   parent:   the parent tconn object
+ *   ipaddr:   the ip address
+ *   port:     the port number
+ *
+ * Return Value:
+ *   int:   0 for success or ks error codes.
+ *
+ * NOTES:
+ *   N/A
+ */
+
+int
+ks_bind_tconn (
+    ksock_tconn_t * tconn,
+    ksock_tconn_t * parent,
+    ulong_ptr   addr,
+    unsigned short  port
+    )
+{
+    NTSTATUS            status;
+    int                 rc = 0;
+
+    ksock_tdi_addr_t    taddr;
+
+    memset(&taddr, 0, sizeof(ksock_tdi_addr_t));
+
+    if (tconn->kstc_state != ksts_inited) {
+
+        status = STATUS_INVALID_PARAMETER;
+        rc = cfs_error_code(status);
+
+        goto errorout;
+
+    } else if (tconn->kstc_type == kstt_child) {
+
+        if (NULL == parent) {
+            status = STATUS_INVALID_PARAMETER;
+            rc = cfs_error_code(status);
+
+            goto errorout;
+        }
+
+        /* refer it's parent's address object */
+
+        taddr = parent->kstc_addr;
+        ObReferenceObject(taddr.FileObject);
+
+        ks_get_tconn(parent);
+
+    } else {
+
+        PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi);
+        ULONG              AddrLen = 0;
+
+        /* intialize the tdi address*/
+
+        TdiAddress->TAAddressCount = 1;
+        TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
+        TdiAddress->Address[0].AddressType   = TDI_ADDRESS_TYPE_IP;
+
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
+
+        memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
+
+
+        /* open the transport address object */
+
+        AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) +
+                  TDI_ADDRESS_LENGTH_IP;
+
+        status = KsOpenAddress(
+                    &(tconn->kstc_dev),
+                    &(taddr.Tdi),
+                    AddrLen,
+                    &(taddr.Handle),
+                    &(taddr.FileObject)
+                    );
+
+        if (!NT_SUCCESS(status)) {
+
+            KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n",
+                        addr, port,  status ));
+            rc = cfs_error_code(status);
+            goto errorout;
+        }
+    }
+
+    if (tconn->kstc_type == kstt_child) {
+        tconn->child.kstc_parent = parent;
+    }
+
+    tconn->kstc_state = ksts_bind;
+    tconn->kstc_addr  = taddr;
+
+errorout:
+
+    return (rc);
+}
+
+/*
+ * ks_build_tconn
+ *  build tcp/streaming connection to remote peer
+ *
+ * Arguments:
+ *   tconn:    tconn to be connected to the peer
+ *   addr:     the peer's ip address
+ *   port:     the peer's port number
+ *
+ * Return Value:
+ *   int:   0 for success or ks error codes.
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_build_tconn(
+    ksock_tconn_t *                 tconn,
+    ulong_ptr                       addr,
+    unsigned short                  port
+    )
+{
+    int                             rc = 0;
+    NTSTATUS                        status = STATUS_SUCCESS;
+
+
+    PFILE_OBJECT                    ConnectionObject = NULL;
+    PDEVICE_OBJECT                  DeviceObject = NULL;
+
+    PTDI_CONNECTION_INFORMATION     ConnectionInfo = NULL;
+    ULONG                           AddrLength;
+
+    PIRP                            Irp = NULL;
+
+    LASSERT(tconn->kstc_type == kstt_sender);
+    LASSERT(tconn->kstc_state == ksts_bind);
+
+    ks_get_tconn(tconn);
+
+    {
+        /* set the event callbacks */
+        rc = ks_set_handlers(tconn);
+
+        if (rc < 0) {
+            cfs_enter_debugger();
+            goto errorout;
+        }
+    }
+
+    /* create the connection file handle / object  */
+    status = KsOpenConnection(
+                &(tconn->kstc_dev),
+                (CONNECTION_CONTEXT)tconn,
+                &(tconn->sender.kstc_info.Handle),
+                &(tconn->sender.kstc_info.FileObject)
+                );
+
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    /* associdate the the connection with the adress object of the tconn */
+
+    status = KsAssociateAddress(
+                tconn->kstc_addr.Handle,
+                tconn->sender.kstc_info.FileObject
+                );
+
+    if (!NT_SUCCESS(status)) {
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    tconn->kstc_state = ksts_associated;
+
+    /* Allocating Connection Info Together with the Address */
+    AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address)
+                 + TDI_ADDRESS_LENGTH_IP;
+
+    ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag(
+    NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK');
+
+    if (NULL == ConnectionInfo) {
+
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    /* Initializing ConnectionInfo ... */
+    {
+        PTRANSPORT_ADDRESS TdiAddress;
+
+        /* ConnectionInfo settings */
+
+        ConnectionInfo->UserDataLength = 0;
+        ConnectionInfo->UserData = NULL;
+        ConnectionInfo->OptionsLength = 0;
+        ConnectionInfo->Options = NULL;
+        ConnectionInfo->RemoteAddressLength = AddrLength;
+        ConnectionInfo->RemoteAddress = ConnectionInfo + 1;
+
+
+        /* intialize the tdi address*/
+
+        TdiAddress = ConnectionInfo->RemoteAddress;
+
+        TdiAddress->TAAddressCount = 1;
+        TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP;
+        TdiAddress->Address[0].AddressType   = TDI_ADDRESS_TYPE_IP;
+
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port);
+        ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr);
+
+        memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8);
+    }
+
+    /* Now prepare to connect the remote peer ... */
+
+    ConnectionObject = tconn->sender.kstc_info.FileObject;
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    /* allocate a new Irp */
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        rc = cfs_error_code(status);
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    /* setup the Irp */
+
+    TdiBuildConnect(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            NULL,
+            NULL,
+            NULL,
+            ConnectionInfo,
+            NULL
+            );
+
+
+    /* sumbit the Irp to the underlying transport driver */
+    status = KsSubmitTdiIrp(
+                    DeviceObject,
+                    Irp,
+                    TRUE,
+                    NULL
+                    );
+
+    spin_lock(&(tconn->kstc_lock));
+
+    if (NT_SUCCESS(status)) {
+
+        /* Connected! the conneciton is built successfully. */
+
+        tconn->kstc_state = ksts_connected;
+
+        tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo;
+        tconn->sender.kstc_info.Remote         = ConnectionInfo->RemoteAddress;
+
+        spin_unlock(&(tconn->kstc_lock));
+
+    } else {
+
+        /* Not connected! Abort it ... */
+
+        if (rc != 0) {
+            cfs_enter_debugger();
+        }
+
+        Irp = NULL;
+        rc = cfs_error_code(status);
+
+        tconn->kstc_state = ksts_associated;
+        spin_unlock(&(tconn->kstc_lock));
+
+        /* disassocidate the connection and the address object,
+           after cleanup,  it's safe to set the state to abort ... */
+
+        if ( NT_SUCCESS(KsDisassociateAddress(
+                        tconn->sender.kstc_info.FileObject))) {
+            tconn->kstc_state = ksts_aborted;
+        }
+
+        /* reset the event callbacks */
+        rc = ks_reset_handlers(tconn);
+
+        goto errorout;
+    }
+
+errorout:
+
+    if (NT_SUCCESS(status)) {
+
+        ks_query_local_ipaddr(tconn);
+
+    } else {
+
+        if (ConnectionInfo) {
+            ExFreePool(ConnectionInfo);
+        }
+        if (Irp) {
+            IoFreeIrp(Irp);
+        }
+    }
+
+    ks_put_tconn(tconn);
+
+    return (rc);
+}
+
+
+/*
+ * ks_disconnect_tconn
+ *   disconnect the tconn from a connection
+ *
+ * Arguments:
+ *   tconn: the tdi connecton object connected already
+ *   flags: flags & options for disconnecting
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_disconnect_tconn(
+    ksock_tconn_t *     tconn,
+    ulong_ptr       flags
+    )
+{
+    NTSTATUS            status = STATUS_SUCCESS;
+
+    ksock_tconn_info_t * info;
+
+    PFILE_OBJECT        ConnectionObject;
+    PDEVICE_OBJECT      DeviceObject = NULL;
+
+    PIRP                Irp = NULL;
+
+    KEVENT              Event;
+
+    ks_get_tconn(tconn);
+
+    /* make sure tt's connected already and it
+       must be a sender or a child ...       */
+
+    LASSERT(tconn->kstc_state == ksts_connected);
+    LASSERT( tconn->kstc_type == kstt_sender ||
+            tconn->kstc_type == kstt_child);
+
+    /* reset all the event handlers to NULL */
+
+    if (tconn->kstc_type != kstt_child) {
+        ks_reset_handlers (tconn);
+    }
+
+    /* Disconnecting to the remote peer ... */
+
+    if (tconn->kstc_type == kstt_sender) {
+        info = &(tconn->sender.kstc_info);
+    } else {
+        info = &(tconn->child.kstc_info);
+    }
+
+    ConnectionObject = info->FileObject;
+    DeviceObject = IoGetRelatedDeviceObject(ConnectionObject);
+
+    /* allocate an Irp and setup it */
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    KeInitializeEvent(
+            &Event,
+            SynchronizationEvent,
+            FALSE
+            );
+
+    TdiBuildDisconnect(
+            Irp,
+            DeviceObject,
+            ConnectionObject,
+            KsDisconectCompletionRoutine,
+            &Event,
+            NULL,
+            flags,
+            NULL,
+            NULL
+            );
+
+    /* issue the Irp to the underlying transport
+       driver to disconnect the connection    */
+
+    status = IoCallDriver(DeviceObject, Irp);
+
+    if (STATUS_PENDING == status) {
+
+        status = KeWaitForSingleObject(
+                     &Event,
+                     Executive,
+                     KernelMode,
+                     FALSE,
+                     NULL
+                     );
+
+        status = Irp->IoStatus.Status;
+    }
+
+    KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n",
+                status, KsNtStatusToString(status)));
+
+    IoFreeIrp(Irp);
+
+    if (info->ConnectionInfo) {
+
+        /* disassociate the association between connection/address objects */
+
+        status = KsDisassociateAddress(ConnectionObject);
+
+        if (!NT_SUCCESS(status)) {
+            cfs_enter_debugger();
+        }
+
+        spin_lock(&(tconn->kstc_lock));
+
+        /* cleanup the tsdumgr Lists */
+        KsCleanupTsdu (tconn);
+
+        /* set the state of the tconn */
+        if (NT_SUCCESS(status)) {
+            tconn->kstc_state = ksts_disconnected;
+        } else {
+            tconn->kstc_state = ksts_associated;
+        }
+
+        /* free  the connection info to system pool*/
+        ExFreePool(info->ConnectionInfo);
+        info->ConnectionInfo = NULL;
+        info->Remote = NULL;
+
+        spin_unlock(&(tconn->kstc_lock));
+    }
+
+    status = STATUS_SUCCESS;
+
+errorout:
+
+    ks_put_tconn(tconn);
+
+    return cfs_error_code(status);
+}
+
+
+/*
+ * ks_abort_tconn
+ *   The connection is broken un-expectedly. We need do
+ *   some cleanup.
+ *
+ * Arguments:
+ *   tconn: the tdi connection
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_abort_tconn(
+    ksock_tconn_t *     tconn
+    )
+{
+    PKS_DISCONNECT_WORKITEM WorkItem = NULL;
+
+    WorkItem = &(tconn->kstc_disconnect);
+
+    ks_get_tconn(tconn);
+    spin_lock(&(tconn->kstc_lock));
+
+    if (tconn->kstc_state != ksts_connected) {
+        ks_put_tconn(tconn);
+    } else {
+
+        if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) {
+
+            WorkItem->Flags = TDI_DISCONNECT_ABORT;
+            WorkItem->tconn = tconn;
+
+            cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY);
+
+            ExQueueWorkItem(
+                    &(WorkItem->WorkItem),
+                    DelayedWorkQueue
+                    );
+        }
+    }
+
+    spin_unlock(&(tconn->kstc_lock));
+}
+
+
+/*
+ * ks_query_local_ipaddr
+ *   query the local connection ip address
+ *
+ * Arguments:
+ *   tconn:  the tconn which is connected
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_query_local_ipaddr(
+    ksock_tconn_t *     tconn
+    )
+{
+    PFILE_OBJECT    FileObject = NULL;
+    NTSTATUS        status;
+
+    PTRANSPORT_ADDRESS TdiAddress;
+    ULONG              AddressLength;
+
+    if (tconn->kstc_type == kstt_sender) {
+        FileObject = tconn->sender.kstc_info.FileObject;
+    } else if (tconn->kstc_type == kstt_child) {
+        FileObject = tconn->child.kstc_info.FileObject;
+    } else {
+        status = STATUS_INVALID_PARAMETER;
+        goto errorout;
+    }
+
+    TdiAddress = &(tconn->kstc_addr.Tdi);
+    AddressLength = MAX_ADDRESS_LENGTH;
+
+    status =  KsQueryIpAddress(FileObject, TdiAddress, &AddressLength);
+
+    if (NT_SUCCESS(status)) {
+
+        KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n",
+                ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr,
+                ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port ));
+    } else {
+        KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n"));
+    }
+
+errorout:
+
+    return cfs_error_code(status);
+}
+
+/*
+ * ks_send_mdl
+ *   send MDL chain to the peer for a stream connection
+ *
+ * Arguments:
+ *   tconn: tdi connection object
+ *   tx:    the transmit context
+ *   mdl:   the mdl chain containing the data
+ *   len:   length of the data
+ *   flags: flags of the transmission
+ *
+ * Return Value:
+ *   ks return code
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_send_mdl(
+    ksock_tconn_t * tconn,
+    void *          tx,
+    ksock_mdl_t *   mdl,
+    int             len,
+    int             flags
+    )
+{
+    NTSTATUS            Status;
+    int                 rc = 0;
+    ulong_ptr       length;
+    ulong_ptr       tflags;
+    ksock_tdi_tx_t *    context;
+
+    PKS_CHAIN           KsChain;
+    PKS_TSDUMGR         KsTsduMgr;
+    PKS_TSDU            KsTsdu;
+    PKS_TSDU_BUF        KsTsduBuf;
+    PKS_TSDU_DAT        KsTsduDat;
+
+    BOOLEAN             bNewTsdu = FALSE;   /* newly allocated */
+    BOOLEAN             bNewBuff = FALSE;   /* newly allocated */
+
+    BOOLEAN             bBuffed;            /* bufferred sending */
+
+    PUCHAR              Buffer = NULL;
+    ksock_mdl_t *       NewMdl = NULL;
+
+    PIRP                Irp = NULL;
+    PFILE_OBJECT        ConnObject;
+    PDEVICE_OBJECT      DeviceObject;
+
+    BOOLEAN             bIsNonBlock;
+
+    ks_get_tconn(tconn);
+
+    tflags = ks_tdi_send_flags(flags);
+    bIsNonBlock  = cfs_is_flag_set(flags, MSG_DONTWAIT);
+
+    spin_lock(&tconn->kstc_lock);
+
+    LASSERT( tconn->kstc_type == kstt_sender ||
+             tconn->kstc_type == kstt_child );
+
+    if (tconn->kstc_state != ksts_connected) {
+        spin_unlock(&tconn->kstc_lock);
+        ks_put_tconn(tconn);
+        return -ENOTCONN;
+    }
+
+    /* get the latest Tsdu buffer form TsduMgr list.
+       just set NULL if the list is empty. */
+
+    if (tconn->kstc_type == kstt_sender) {
+        KsChain = &(tconn->sender.kstc_send);
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        KsChain = &(tconn->child.kstc_send);
+    }
+
+    if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) {
+        KsTsduMgr = &(KsChain->Expedited);
+    } else {
+        KsTsduMgr = &(KsChain->Normal);
+    }
+
+    if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) {
+        bBuffed = TRUE;
+    } else {
+        bBuffed = FALSE;
+    }
+
+    /* do the preparation work for bufferred sending */
+
+    if (bBuffed) {
+
+        /* if the data is even larger than the biggest Tsdu, we have
+           to allocate new buffer and use TSDU_TYOE_BUF to store it */
+
+        if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size
+             - KS_DWORD_ALIGN(sizeof(KS_TSDU))) {
+            bNewBuff = TRUE;
+        }
+
+        if (list_empty(&(KsTsduMgr->TsduList))) {
+
+            LASSERT(KsTsduMgr->NumOfTsdu == 0);
+            KsTsdu = NULL;
+
+        } else {
+
+            LASSERT(KsTsduMgr->NumOfTsdu > 0);
+            KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link);
+            LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+
+            /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */
+            if (bNewBuff) {
+                if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) {
+                    KsTsdu = NULL;
+                }
+            } else {
+                if ( KS_TSDU_STRU_SIZE((ULONG)len) >
+                     KsTsdu->TotalLength - KsTsdu->LastOffset ) {
+                    KsTsdu = NULL;
+                }
+            }
+        }
+
+        /* if there's no Tsdu or the free size is not enough for the
+           KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu.  */
+
+        if (NULL == KsTsdu) {
+
+            KsTsdu = KsAllocateKsTsdu();
+
+            if (NULL == KsTsdu) {
+                bBuffed = FALSE;
+                bNewBuff = FALSE;
+            } else {
+                bNewTsdu = TRUE;
+            }
+        }
+
+        /* process the case that a new buffer is to be allocated from system memory */
+        if (bNewBuff) {
+
+            /* now allocating internal buffer to contain the payload */
+            Buffer = ExAllocatePool(NonPagedPool, len);
+
+            if (NULL == Buffer) {
+                bBuffed = FALSE;
+            }
+        }
+    }
+
+    if (bBuffed) {
+
+        if (bNewBuff) {
+
+            /* queue a new KS_TSDU_BUF to the Tsdu buffer */
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+            KsTsduBuf->TsduFlags    =  0;
+            KsTsduBuf->DataLength   =  (ULONG)len;
+            KsTsduBuf->StartOffset  =  0;
+            KsTsduBuf->UserBuffer   =  Buffer;
+        } else {
+            /* queue a new KS_TSDU_BUF to the Tsdu buffer */
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset);
+
+            KsTsduDat->TsduFlags    =  0;
+            KsTsduDat->DataLength   =  (ULONG)len;
+            KsTsduDat->StartOffset  =  0;
+            KsTsduDat->TotalLength  = KS_TSDU_STRU_SIZE((ULONG)len);
+
+            Buffer = &KsTsduDat->Data[0];
+        }
+
+        /* now locking the Buffer and copy user payload into the buffer */
+        ASSERT(Buffer != NULL);
+
+        rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl);
+        if (rc != 0) {
+            printk("ks_send_mdl: bufferred: error allocating mdl.\n");
+            bBuffed = FALSE;
+        } else {
+            ULONG BytesCopied = 0;
+            TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied);
+            if (BytesCopied != (ULONG) len) {
+                bBuffed = FALSE;
+            }
+        }
+
+        /* Do the finializing job if we succeed to to lock the buffer and move
+           user data. Or we need do cleaning up ... */
+        if (bBuffed) {
+
+            if (bNewBuff) {
+                KsTsduBuf->TsduType     =  TSDU_TYPE_BUF;
+                KsTsdu->LastOffset += sizeof(KS_TSDU_BUF);
+
+            } else {
+                KsTsduDat->TsduType     =  TSDU_TYPE_DAT;
+                KsTsdu->LastOffset += KsTsduDat->TotalLength;
+            }
+
+            /* attach it to the TsduMgr list if the Tsdu is newly created. */
+            if (bNewTsdu) {
+
+                list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+                KsTsduMgr->NumOfTsdu++;
+            }
+
+        } else {
+
+            if (NewMdl) {
+                ks_release_mdl(NewMdl, FALSE);
+                NewMdl = NULL;
+            }
+
+            if (bNewBuff) {
+                ExFreePool(Buffer);
+                Buffer = NULL;
+                bNewBuff = FALSE;
+            }
+        }
+    }
+
+    /* update the TotalBytes being in sending */
+    KsTsduMgr->TotalBytes += (ULONG)len;
+
+    spin_unlock(&tconn->kstc_lock);
+
+    /* cleanup the Tsdu if not successful */
+    if (!bBuffed && bNewTsdu) {
+        KsPutKsTsdu(KsTsdu);
+        bNewTsdu = FALSE;
+        KsTsdu = NULL;
+    }
+
+    /* we need allocate the ksock_tx_t structure from memory pool. */
+
+    context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0);
+    if (!context) {
+        /* release the chained mdl */
+        ks_release_mdl(mdl, FALSE);
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    /* intialize the TcpContext */
+
+    memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT));
+
+    context->tconn = tconn;
+    context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t));
+
+    KeInitializeEvent(context->Event, SynchronizationEvent, FALSE);
+
+    if (bBuffed) {
+
+         /* for bufferred transmission, we need set
+            the internal completion routine.  */
+
+        context->CompletionRoutine  = KsTcpSendCompletionRoutine;
+        context->KsTsduMgr          = KsTsduMgr;
+        context->CompletionContext  = KsTsdu;
+        context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat);
+        context->bCounted = FALSE;
+
+    } else if (bIsNonBlock) {
+
+         /* for non-blocking transmission, we need set
+            the internal completion routine too.  */
+
+        context->CompletionRoutine = KsTcpSendCompletionRoutine;
+        context->CompletionContext = tx;
+        context->KsTsduMgr         = KsTsduMgr;
+        context->bCounted = TRUE;
+        context->ReferCount = 2;
+    }
+
+    if (tconn->kstc_type == kstt_sender) {
+        ConnObject = tconn->sender.kstc_info.FileObject;
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        ConnObject = tconn->child.kstc_info.FileObject;
+    }
+
+    DeviceObject = IoGetRelatedDeviceObject(ConnObject);
+
+    Irp = KsBuildTdiIrp(DeviceObject);
+
+    if (NULL == Irp) {
+
+        /* release the chained mdl */
+        ks_release_mdl(mdl, FALSE);
+
+        Status = STATUS_INSUFFICIENT_RESOURCES;
+        goto errorout;
+    }
+
+    length = KsQueryMdlsSize(mdl);
+
+    LASSERT((ULONG)len <= length);
+
+    ks_get_tconn(tconn);
+
+    TdiBuildSend(
+        Irp,
+        DeviceObject,
+        ConnObject,
+        KsTcpCompletionRoutine,
+        context,
+        (bBuffed ? NewMdl : mdl),
+        (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags),
+        (ULONG)len;
+      );
+
+    Status = IoCallDriver(DeviceObject, Irp);
+
+    if (bBuffed) {
+        ks_release_mdl(mdl, FALSE);
+        NewMdl = NULL;
+    }
+
+    if (!NT_SUCCESS(Status)) {
+        cfs_enter_debugger();
+        rc = cfs_error_code(Status);
+        goto errorout;
+    }
+
+    if (bBuffed) {
+        Status = STATUS_SUCCESS;
+        rc  = len;
+        context = NULL;
+    } else {
+        if (bIsNonBlock) {
+            if (InterlockedDecrement(&context->ReferCount) == 0) {
+                Status = Irp->IoStatus.Status;
+            } else {
+                Status = STATUS_PENDING;
+                context = NULL;
+            }
+        } else {
+            if (STATUS_PENDING == Status) {
+                Status = KeWaitForSingleObject(
+                         context->Event,
+                         Executive,
+                         KernelMode,
+                         FALSE,
+                         NULL
+                         );
+
+                if (NT_SUCCESS(Status)) {
+                    Status = Irp->IoStatus.Status;
+                }
+            }
+        }
+
+        if (Status == STATUS_SUCCESS) {
+            rc = (int)(Irp->IoStatus.Information);
+
+            spin_lock(&tconn->kstc_lock);
+            KsTsduMgr->TotalBytes -= rc;
+            spin_unlock(&tconn->kstc_lock);
+
+        } else {
+            rc = cfs_error_code(Status);
+        }
+    }
+
+errorout:
+
+    if (bBuffed) {
+
+        if (NewMdl) {
+            ks_release_mdl(NewMdl, FALSE);
+            NewMdl = NULL;
+        }
+
+        if (bNewBuff) {
+            if (!NT_SUCCESS(Status)) {
+                ExFreePool(Buffer);
+                Buffer = NULL;
+            }
+        }
+
+    } else {
+
+        if (Status != STATUS_PENDING) {
+
+            if (Irp) {
+
+                /* Freeing the Irp ... */
+
+                IoFreeIrp(Irp);
+                Irp = NULL;
+            }
+        }
+    }
+
+    if (!NT_SUCCESS(Status)) {
+
+        spin_lock(&tconn->kstc_lock);
+
+        KsTsduMgr->TotalBytes -= (ULONG)len;
+
+        if (bBuffed) {
+
+            /* attach it to the TsduMgr list if the Tsdu is newly created. */
+            if (bNewTsdu) {
+
+                list_del(&(KsTsdu->Link));
+                KsTsduMgr->NumOfTsdu--;
+
+                KsPutKsTsdu(KsTsdu);
+            } else {
+                if (bNewBuff) {
+                    if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) ==
+                         (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
+                        KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF);
+                        KsTsduBuf->TsduType = 0;
+                    } else {
+                        cfs_enter_debugger();
+                        KsTsduBuf->StartOffset = KsTsduBuf->DataLength;
+                    }
+                } else {
+                    if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength ==
+                         (ulong_ptr)KsTsdu + KsTsdu->LastOffset) {
+                        KsTsdu->LastOffset -= KsTsduDat->TotalLength;
+                        KsTsduDat->TsduType = 0;
+                    } else {
+                        cfs_enter_debugger();
+                        KsTsduDat->StartOffset = KsTsduDat->DataLength;
+                    }
+                }
+            }
+        }
+
+        spin_unlock(&tconn->kstc_lock);
+    }
+
+    /* free the context if is not used at all */
+    if (context) {
+        cfs_free(context);
+    }
+
+    ks_put_tconn(tconn);
+
+    return rc;
+}
+
+/*
+ * ks_recv_mdl
+ *   Receive data from the peer for a stream connection
+ *
+ * Arguments:
+ *   tconn: tdi connection object
+ *   mdl:   the mdl chain to contain the incoming data
+ *   len:   length of the data
+ *   flags: flags of the receiving
+ *
+ * Return Value:
+ *   ks return code
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_recv_mdl(
+    ksock_tconn_t * tconn,
+    ksock_mdl_t *   mdl,
+    int             size,
+    int             flags
+    )
+{
+    NTSTATUS        Status = STATUS_SUCCESS;
+    int             rc = 0;
+
+    BOOLEAN         bIsNonBlock;
+    BOOLEAN         bIsExpedited;
+
+    PKS_CHAIN       KsChain;
+    PKS_TSDUMGR     KsTsduMgr;
+    PKS_TSDU        KsTsdu;
+    PKS_TSDU_DAT    KsTsduDat;
+    PKS_TSDU_BUF    KsTsduBuf;
+    PKS_TSDU_MDL    KsTsduMdl;
+
+    PUCHAR          Buffer;
+
+    ULONG           BytesRecved = 0;
+    ULONG           RecvedOnce;
+
+    bIsNonBlock  = cfs_is_flag_set(flags, MSG_DONTWAIT);
+    bIsExpedited = cfs_is_flag_set(flags, MSG_OOB);
+
+    ks_get_tconn(tconn);
+
+Again:
+
+    RecvedOnce = 0;
+
+    spin_lock(&(tconn->kstc_lock));
+
+    if ( tconn->kstc_type != kstt_sender &&
+         tconn->kstc_type != kstt_child) {
+
+        rc = -EINVAL;
+        spin_unlock(&(tconn->kstc_lock));
+
+        goto errorout;
+    }
+
+    if (tconn->kstc_state != ksts_connected) {
+
+        rc = -ENOTCONN;
+        spin_unlock(&(tconn->kstc_lock));
+
+        goto errorout;
+    }
+
+    if (tconn->kstc_type == kstt_sender) {
+        KsChain = &(tconn->sender.kstc_recv);
+    } else {
+        LASSERT(tconn->kstc_type == kstt_child);
+        KsChain = &(tconn->child.kstc_recv);
+    }
+
+    if (bIsExpedited) {
+        KsTsduMgr = &(KsChain->Expedited);
+    } else {
+        KsTsduMgr = &(KsChain->Normal);
+    }
+
+NextTsdu:
+
+    if (list_empty(&(KsTsduMgr->TsduList))) {
+
+        //
+        // It's a notification event. We need reset it to
+        // un-signaled state in case there no any tsdus.
+        //
+
+        KeResetEvent(&(KsTsduMgr->Event));
+
+    } else {
+
+        KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link);
+        LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC);
+
+        /* remove the KsTsdu from TsduMgr list to release the lock */
+        list_del(&(KsTsdu->Link));
+        KsTsduMgr->NumOfTsdu--;
+
+        spin_unlock(&(tconn->kstc_lock));
+
+        while ((ULONG)size > BytesRecved) {
+
+            ULONG BytesCopied = 0;
+            ULONG BytesToCopy = 0;
+            ULONG StartOffset = 0;
+
+            KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+            KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset);
+
+            if ( TSDU_TYPE_DAT == KsTsduDat->TsduType ||
+                 TSDU_TYPE_BUF == KsTsduBuf->TsduType ) {
+
+
+                //
+                // Data Tsdu Unit ...
+                //
+
+                if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+
+                    if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) {
+                        /* data is not ready yet*/
+                        KeResetEvent(&(KsTsduMgr->Event));
+                        printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat);
+                        break;
+                    }
+
+                    Buffer = &KsTsduDat->Data[0];
+                    StartOffset = KsTsduDat->StartOffset;
+                    if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) {
+                        /* Recvmsg requst could be statisfied ... */
+                        BytesToCopy = size - BytesRecved;
+                    } else {
+                        BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset;
+                    }
+
+                } else {
+
+                    if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) {
+                        /* data is not ready yet*/
+                        KeResetEvent(&(KsTsduMgr->Event));
+                        DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf);
+                        break;
+                    }
+
+                    ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+                    Buffer = KsTsduBuf->UserBuffer;
+                    StartOffset = KsTsduBuf->StartOffset;
+
+                    if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) {
+                        /* Recvmsg requst could be statisfied ... */
+                        BytesToCopy = size - BytesRecved;
+                    } else {
+                        BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset;
+                    }
+                }
+
+                if (BytesToCopy > 0) {
+                    Status = TdiCopyBufferToMdl(
+                                    Buffer,
+                                    StartOffset,
+                                    BytesToCopy,
+                                    mdl,
+                                    BytesRecved,
+                                    &BytesCopied
+                                    );
+
+                    if (NT_SUCCESS(Status)) {
+
+                        if (BytesToCopy != BytesCopied) {
+                            cfs_enter_debugger();
+                        }
+
+                        BytesRecved += BytesCopied;
+                        RecvedOnce  += BytesCopied;
+
+                    } else {
+
+                        cfs_enter_debugger();
+
+                        if (STATUS_BUFFER_OVERFLOW == Status) {
+                        }
+                    }
+                }
+
+                if (TSDU_TYPE_DAT == KsTsduDat->TsduType) {
+
+                    KsTsduDat->StartOffset += BytesCopied;
+
+                    if (KsTsduDat->StartOffset == KsTsduDat->DataLength) {
+                        KsTsdu->StartOffset += KsTsduDat->TotalLength;
+                    }
+
+                } else {
+
+                    ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType);
+                    KsTsduBuf->StartOffset += BytesCopied;
+                    if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) {
+                        KsTsdu->StartOffset += sizeof(KS_TSDU_BUF);
+                        /* now we need release the buf to system pool */
+                        ExFreePool(KsTsduBuf->UserBuffer);
+                    }
+                }
+
+            } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) {
+
+                //
+                // MDL Tsdu Unit ...
+                //
+
+                if (KsTsduMdl->DataLength > size - BytesRecved) {
+
+                    /* Recvmsg requst could be statisfied ... */
+
+                    BytesToCopy = size - BytesRecved;
+
+                } else {
+
+                    BytesToCopy = KsTsduMdl->DataLength;
+                }
+
+                Status = KsCopyMdlChainToMdlChain(
+                            KsTsduMdl->Mdl,
+                            KsTsduMdl->StartOffset,
+                            mdl,
+                            BytesRecved,
+                            BytesToCopy,
+                            &BytesCopied
+                            );
+
+                if (NT_SUCCESS(Status)) {
+
+                    if (BytesToCopy != BytesCopied) {
+                        cfs_enter_debugger();
+                    }
+
+                    KsTsduMdl->StartOffset += BytesCopied;
+                    KsTsduMdl->DataLength  -= BytesCopied;
+
+                    BytesRecved += BytesCopied;
+                    RecvedOnce  += BytesCopied;
+                } else {
+                    cfs_enter_debugger();
+                }
+
+                if (0 == KsTsduMdl->DataLength) {
+
+                    //
+                    // Call TdiReturnChainedReceives to release the Tsdu memory
+                    //
+
+                    TdiReturnChainedReceives(
+                        &(KsTsduMdl->Descriptor),
+                        1 );
+
+                    KsTsdu->StartOffset += sizeof(KS_TSDU_MDL);
+                }
+
+            } else {
+                printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n",
+                        KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength);
+                printk("        Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x",
+                        KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength);
+                cfs_enter_debugger();
+            }
+
+            if (KsTsdu->StartOffset == KsTsdu->LastOffset) {
+
+                //
+                // KsTsdu is empty now, we need free it ...
+                //
+
+                KsPutKsTsdu(KsTsdu);
+                KsTsdu = NULL;
+
+                break;
+            }
+        }
+
+        spin_lock(&(tconn->kstc_lock));
+
+        /* we need attach the KsTsdu to the list header */
+        if (KsTsdu) {
+            KsTsduMgr->NumOfTsdu++;
+            list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList));
+        } else if ((ULONG)size > BytesRecved) {
+            goto NextTsdu;
+        }
+    }
+
+    if (KsTsduMgr->TotalBytes < RecvedOnce) {
+        cfs_enter_debugger();
+        KsTsduMgr->TotalBytes = 0;
+    } else {
+        KsTsduMgr->TotalBytes -= RecvedOnce;
+    }
+
+    spin_unlock(&(tconn->kstc_lock));
+
+    if (NT_SUCCESS(Status)) {
+
+        if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) {
+
+            KeWaitForSingleObject(
+                &(KsTsduMgr->Event),
+                Executive,
+                KernelMode,
+                FALSE,
+                NULL
+                );
+
+            goto Again;
+        }
+
+        if (bIsNonBlock && (BytesRecved == 0)) {
+            rc = -EAGAIN;
+        } else {
+            rc = BytesRecved;
+        }
+    }
+
+errorout:
+
+    ks_put_tconn(tconn);
+
+    if (rc > 0) {
+        KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc));
+    } else {
+        KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status));
+    }
+
+    /* release the chained mdl */
+    ks_release_mdl(mdl, FALSE);
+
+    return (rc);
+}
+
+
+/*
+ * ks_init_tdi_data
+ *   initialize the global data in ksockal_data
+ *
+ * Arguments:
+ *   N/A
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_init_tdi_data()
+{
+    int rc = 0;
+
+    /* initialize tconn related globals */
+    RtlZeroMemory(&ks_data, sizeof(ks_data_t));
+
+    spin_lock_init(&ks_data.ksnd_tconn_lock);
+    CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns);
+    cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE);
+
+    ks_data.ksnd_tconn_slab = cfs_mem_cache_create(
+        "tcon", sizeof(ksock_tconn_t) , 0, 0);
+
+    if (!ks_data.ksnd_tconn_slab) {
+        rc = -ENOMEM;
+        goto errorout;
+    }
+
+    /* initialize tsdu related globals */
+
+    spin_lock_init(&ks_data.ksnd_tsdu_lock);
+    CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus);
+    ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */
+    ks_data.ksnd_tsdu_slab = cfs_mem_cache_create(
+        "tsdu", ks_data.ksnd_tsdu_size, 0, 0);
+
+    if (!ks_data.ksnd_tsdu_slab) {
+        rc = -ENOMEM;
+        cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
+        ks_data.ksnd_tconn_slab = NULL;
+        goto errorout;
+    }
+
+    /* initialize daemon related globals */
+
+    spin_lock_init(&ks_data.ksnd_daemon_lock);
+    CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons);
+    cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE);
+
+    KsRegisterPnpHandlers();
+
+errorout:
+
+    return rc;
+}
+
+
+/*
+ * ks_fini_tdi_data
+ *   finalize the global data in ksockal_data
+ *
+ * Arguments:
+ *   N/A
+ *
+ * Return Value:
+ *   int: ks error code
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_fini_tdi_data()
+{
+    PKS_TSDU            KsTsdu = NULL;
+    struct list_head *  list   = NULL;
+
+    /* clean up the pnp handler and address slots */
+    KsDeregisterPnpHandlers();
+
+    /* we need wait until all the tconn are freed */
+    spin_lock(&(ks_data.ksnd_tconn_lock));
+
+    if (list_empty(&(ks_data.ksnd_tconns))) {
+        cfs_wake_event(&ks_data.ksnd_tconn_exit);
+    }
+    spin_unlock(&(ks_data.ksnd_tconn_lock));
+
+    /* now wait on the tconn exit event */
+    cfs_wait_event(&ks_data.ksnd_tconn_exit, 0);
+
+    /* it's safe to delete the tconn slab ... */
+    cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab);
+    ks_data.ksnd_tconn_slab = NULL;
+
+    /* clean up all the tsud buffers in the free list */
+    spin_lock(&(ks_data.ksnd_tsdu_lock));
+    list_for_each (list, &ks_data.ksnd_freetsdus) {
+        KsTsdu = list_entry (list, KS_TSDU, Link);
+
+        cfs_mem_cache_free(
+                ks_data.ksnd_tsdu_slab,
+                KsTsdu );
+    }
+    spin_unlock(&(ks_data.ksnd_tsdu_lock));
+
+    /* it's safe to delete the tsdu slab ... */
+    cfs_mem_cache_destroy(ks_data.ksnd_tsdu_slab);
+    ks_data.ksnd_tsdu_slab = NULL;
+
+    /* good! it's smooth to do the cleaning up...*/
+}
+
+/*
+ * ks_create_child_tconn
+ *   Create the backlog child connection for a listener
+ *
+ * Arguments:
+ *   parent: the listener daemon connection
+ *
+ * Return Value:
+ *   the child connection or NULL in failure
+ *
+ * Notes:
+ *   N/A
+ */
+
+ksock_tconn_t *
+ks_create_child_tconn(
+    ksock_tconn_t * parent
+    )
+{
+    NTSTATUS            status;
+    ksock_tconn_t *     backlog;
+
+    /* allocate the tdi connecton object */
+    backlog = ks_create_tconn();
+
+    if (!backlog) {
+        goto errorout;
+    }
+
+    /* initialize the tconn as a child */
+    ks_init_child(backlog);
+
+
+    /* now bind it */
+    if (ks_bind_tconn(backlog, parent, 0, 0) < 0) {
+        ks_free_tconn(backlog);
+        backlog = NULL;
+        goto errorout;
+    }
+
+    /* open the connection object */
+    status = KsOpenConnection(
+                &(backlog->kstc_dev),
+                (PVOID)backlog,
+                &(backlog->child.kstc_info.Handle),
+                &(backlog->child.kstc_info.FileObject)
+                );
+
+    if (!NT_SUCCESS(status)) {
+
+        ks_put_tconn(backlog);
+        backlog = NULL;
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    /* associate it now ... */
+    status = KsAssociateAddress(
+                backlog->kstc_addr.Handle,
+                backlog->child.kstc_info.FileObject
+                );
+
+    if (!NT_SUCCESS(status)) {
+
+        ks_put_tconn(backlog);
+        backlog = NULL;
+        cfs_enter_debugger();
+        goto errorout;
+    }
+
+    backlog->kstc_state = ksts_associated;
+
+errorout:
+
+    return backlog;
+}
+
+/*
+ * ks_replenish_backlogs(
+ *   to replenish the backlogs listening...
+ *
+ * Arguments:
+ *   tconn: the parent listen tdi connect
+ *   nbacklog: number fo child connections in queue
+ *
+ * Return Value:
+ *   N/A
+ *
+ * Notes:
+ *   N/A
+ */
+
+void
+ks_replenish_backlogs(
+    ksock_tconn_t * parent,
+    int     nbacklog
+    )
+{
+    ksock_tconn_t * backlog;
+    int            n = 0;
+
+    /* calculate how many backlogs needed */
+    if ( ( parent->listener.kstc_listening.num +
+           parent->listener.kstc_accepted.num ) < nbacklog ) {
+        n = nbacklog - ( parent->listener.kstc_listening.num +
+            parent->listener.kstc_accepted.num );
+    } else {
+        n = 0;
+    }
+
+    while (n--) {
+
+        /* create the backlog child tconn */
+        backlog = ks_create_child_tconn(parent);
+
+        spin_lock(&(parent->kstc_lock));
+
+        if (backlog) {
+            spin_lock(&backlog->kstc_lock);
+            /* attch it into the listing list of daemon */
+            list_add( &backlog->child.kstc_link,
+                      &parent->listener.kstc_listening.list );
+            parent->listener.kstc_listening.num++;
+
+            backlog->child.kstc_queued = TRUE;
+            spin_unlock(&backlog->kstc_lock);
+        } else {
+            cfs_enter_debugger();
+        }
+
+        spin_unlock(&(parent->kstc_lock));
+    }
+}
+
+/*
+ * ks_start_listen
+ *   setup the listener tdi connection and make it listen
+ *    on the user specified ip address and port.
+ *
+ * Arguments:
+ *   tconn: the parent listen tdi connect
+ *   nbacklog: number fo child connections in queue
+ *
+ * Return Value:
+ *   ks error code >=: success; otherwise error.
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_start_listen(ksock_tconn_t *tconn, int nbacklog)
+{
+    int rc = 0;
+
+    /* now replenish the backlogs */
+    ks_replenish_backlogs(tconn, nbacklog);
+
+    /* set the event callback handlers */
+    rc = ks_set_handlers(tconn);
+
+    if (rc < 0) {
+        return rc;
+    }
+
+    spin_lock(&(tconn->kstc_lock));
+    tconn->listener.nbacklog = nbacklog;
+    tconn->kstc_state = ksts_listening;
+    cfs_set_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED);
+    spin_unlock(&(tconn->kstc_lock));
+
+    return rc;
+}
+
+void
+ks_stop_listen(ksock_tconn_t *tconn)
+{
+    struct list_head *      list;
+    ksock_tconn_t *         backlog;
+
+    /* reset all tdi event callbacks to NULL */
+    ks_reset_handlers (tconn);
+
+    spin_lock(&tconn->kstc_lock);
+
+    cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED);
+
+    /* cleanup all the listening backlog child connections */
+    list_for_each (list, &(tconn->listener.kstc_listening.list)) {
+        backlog = list_entry(list, ksock_tconn_t, child.kstc_link);
+
+        /* destory and free it */
+        ks_put_tconn(backlog);
+    }
+
+    spin_unlock(&tconn->kstc_lock);
+
+    /* wake up it from the waiting on new incoming connections */
+    KeSetEvent(&tconn->listener.kstc_accept_event, 0, FALSE);
+
+    /* free the listening daemon tconn */
+    ks_put_tconn(tconn);
+}
+
+
+/*
+ * ks_wait_child_tconn
+ *   accept a child connection from peer
+ *
+ * Arguments:
+ *   parent:   the daemon tdi connection listening
+ *   child:    to contain the accepted connection
+ *
+ * Return Value:
+ *   ks error code;
+ *
+ * Notes:
+ *   N/A
+ */
+
+int
+ks_wait_child_tconn(
+    ksock_tconn_t *  parent,
+    ksock_tconn_t ** child
+    )
+{
+    struct list_head * tmp;
+    ksock_tconn_t * backlog = NULL;
+
+    ks_replenish_backlogs(parent, parent->listener.nbacklog);
+
+    spin_lock(&(parent->kstc_lock));
+
+    if (parent->listener.kstc_listening.num <= 0) {
+        spin_unlock(&(parent->kstc_lock));
+        return -1;
+    }
+
+again:
+
+    /* check the listening queue and try to search the accepted connecton */
+
+    list_for_each(tmp, &(parent->listener.kstc_listening.list)) {
+        backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link);
+
+        spin_lock(&(backlog->kstc_lock));
+
+        if (backlog->child.kstc_accepted) {
+
+            LASSERT(backlog->kstc_state == ksts_connected);
+            LASSERT(backlog->child.kstc_busy);
+
+            list_del(&(backlog->child.kstc_link));
+            list_add(&(backlog->child.kstc_link),
+                     &(parent->listener.kstc_accepted.list));
+            parent->listener.kstc_accepted.num++;
+            parent->listener.kstc_listening.num--;
+            backlog->child.kstc_queueno = 1;
+
+            spin_unlock(&(backlog->kstc_lock));
+
+            break;
+        } else {
+            spin_unlock(&(backlog->kstc_lock));
+            backlog = NULL;
+        }
+    }
+
+    spin_unlock(&(parent->kstc_lock));
+
+    /* we need wait until new incoming connections are requested
+       or the case of shuting down the listenig daemon thread  */
+    if (backlog == NULL) {
+
+        NTSTATUS    Status;
+
+        Status = KeWaitForSingleObject(
+                &(parent->listener.kstc_accept_event),
+                Executive,
+                KernelMode,
+                FALSE,
+                NULL
+                );
+
+        spin_lock(&(parent->kstc_lock));
+
+        /* check whether it's exptected to exit ? */
+        if (!cfs_is_flag_set(parent->kstc_flags, KS_TCONN_DAEMON_STARTED)) {
+            spin_unlock(&(parent->kstc_lock));
+        } else {
+            goto again;
+        }
+    }
+
+    if (backlog) {
+        /* query the local ip address of the connection */
+        ks_query_local_ipaddr(backlog);
+    }
+
+    *child = backlog;
+
+    return 0;
+}
+
+int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask)
+{
+    ks_addr_slot_t * slot = NULL;
+    PLIST_ENTRY      list = NULL;
+
+    spin_lock(&ks_data.ksnd_addrs_lock);
+
+    list = ks_data.ksnd_addrs_list.Flink;
+    while (list != &ks_data.ksnd_addrs_list) {
+        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
+        if (_stricmp(name, &slot->iface[0]) == 0) {
+            *up = slot->up;
+            *ip = slot->ip_addr;
+            *mask = slot->netmask;
+            break;
+        }
+        list = list->Flink;
+        slot = NULL;
+    }
+
+    spin_unlock(&ks_data.ksnd_addrs_lock);
+
+    return (int)(slot == NULL);
+}
+
+int libcfs_ipif_enumerate(char ***names)
+{
+    ks_addr_slot_t * slot = NULL;
+    PLIST_ENTRY      list = NULL;
+    int              nips = 0;
+
+    spin_lock(&ks_data.ksnd_addrs_lock);
+
+    *names = cfs_alloc(sizeof(char *) * ks_data.ksnd_naddrs, CFS_ALLOC_ZERO);
+    if (*names == NULL) {
+        goto errorout;
+    }
+
+    list = ks_data.ksnd_addrs_list.Flink;
+    while (list != &ks_data.ksnd_addrs_list) {
+        slot = CONTAINING_RECORD(list, ks_addr_slot_t, link);
+        list = list->Flink;
+        (*names)[nips++] = slot->iface;
+        cfs_assert(nips <= ks_data.ksnd_naddrs);
+    }
+
+    cfs_assert(nips == ks_data.ksnd_naddrs);
+
+errorout:
+
+    spin_unlock(&ks_data.ksnd_addrs_lock);
+    return nips;
+}
+
+void libcfs_ipif_free_enumeration(char **names, int n)
+{
+    if (names) {
+        cfs_free(names);
+    }
+}
+
+int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog)
+{
+    int                     rc = 0;
+    ksock_tconn_t *         parent;
+
+    parent = ks_create_tconn();
+    if (!parent) {
+        rc = -ENOMEM;
+        goto errorout;
+    }
+
+    /* initialize the tconn as a listener */
+    ks_init_listener(parent);
+
+    /* bind the daemon->tconn */
+    rc = ks_bind_tconn(parent, NULL, ip, (unsigned short)port);
+
+    if (rc < 0) {
+        ks_free_tconn(parent);
+        goto errorout;
+    }
+
+    /* create listening children and make it to listen state*/
+    rc = ks_start_listen(parent, backlog);
+    if (rc < 0) {
+        ks_stop_listen(parent);
+        goto errorout;
+    }
+
+    *sockp = parent;
+
+errorout:
+
+    return rc;
+}
+
+int libcfs_sock_accept(struct socket **newsockp, struct socket *sock)
+{
+    /* wait for incoming connecitons */
+    return ks_wait_child_tconn(sock, newsockp);
+}
+
+void libcfs_sock_abort_accept(struct socket *sock)
+{
+    LASSERT(sock->kstc_type == kstt_listener);
+
+    spin_lock(&(sock->kstc_lock));
+
+    /* clear the daemon flag */
+    cfs_clear_flag(sock->kstc_flags, KS_TCONN_DAEMON_STARTED);
+
+    /* wake up it from the waiting on new incoming connections */
+    KeSetEvent(&sock->listener.kstc_accept_event, 0, FALSE);
+
+    spin_unlock(&(sock->kstc_lock));
+}
+
+/*
+ * libcfs_sock_connect
+ *   build a conntion between local ip/port and the peer ip/port.
+ *
+ * Arguments:
+ *   laddr: local ip address
+ *   lport: local port number
+ *   paddr: peer's ip address
+ *   pport: peer's port number
+ *
+ * Return Value:
+ *   int:   return code ...
+ *
+ * Notes:
+ *   N/A
+ */
+
+
+int libcfs_sock_connect(struct socket **sockp, int *fatal,
+                        __u32 local_ip, int local_port,
+                        __u32 peer_ip, int peer_port)
+{
+    ksock_tconn_t * tconn = NULL;
+    int             rc = 0;
+
+    *sockp = NULL;
+
+    KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n",
+                peer_ip, peer_port, local_ip, local_port ));
+
+    /* create the tdi connecion structure */
+    tconn = ks_create_tconn();
+    if (!tconn) {
+        rc = -ENOMEM;
+        goto errorout;
+    }
+
+    /* initialize the tdi sender connection */
+    ks_init_sender(tconn);
+
+    /* bind the local ip address with the tconn */
+    rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port);
+    if (rc < 0) {
+        KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n",
+                    local_ip, local_port ));
+        ks_free_tconn(tconn);
+        goto errorout;
+    }
+
+    /* connect to the remote peer */
+    rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port);
+    if (rc < 0) {
+        KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n",
+                    peer_ip, peer_port ));
+
+        ks_put_tconn(tconn);
+        goto errorout;
+    }
+
+    *sockp = tconn;
+
+errorout:
+
+    return rc;
+}
+
+int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize)
+{
+    return 0;
+}
+
+int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize)
+{
+    return 0;
+}
+
+int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port)
+{
+    PTRANSPORT_ADDRESS  taddr = NULL;
+
+    spin_lock(&socket->kstc_lock);
+    if (remote) {
+        if (socket->kstc_type == kstt_sender) {
+            taddr = socket->sender.kstc_info.Remote;
+        } else if (socket->kstc_type == kstt_child) {
+            taddr = socket->child.kstc_info.Remote;
+        }
+    } else {
+        taddr = &(socket->kstc_addr.Tdi);
+    }
+
+    if (taddr) {
+        PTDI_ADDRESS_IP addr = (PTDI_ADDRESS_IP)(&(taddr->Address[0].Address));
+        if (ip != NULL)
+            *ip = ntohl (addr->in_addr);
+        if (port != NULL)
+            *port = ntohs (addr->sin_port);
+    } else {
+        spin_unlock(&socket->kstc_lock);
+        return -ENOTCONN;
+    }
+
+    spin_unlock(&socket->kstc_lock);
+    return 0;
+}
+
+int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout)
+{
+    int           rc;
+    ksock_mdl_t * mdl;
+
+    int           offset = 0;
+
+    while (nob > offset) {
+
+        /* lock the user buffer */
+        rc = ks_lock_buffer( (char *)buffer + offset,
+                        FALSE, nob - offset, IoReadAccess, &mdl );
+
+        if (rc < 0) {
+            return (rc);
+        }
+
+        /* send out the whole mdl */
+        rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 );
+
+        if (rc > 0) {
+            offset += rc;
+        } else {
+            return (rc);
+        }
+    }
+
+    return (0);
+}
+
+int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout)
+{
+    int           rc;
+    ksock_mdl_t * mdl;
+
+    int           offset = 0;
+
+    while (nob > offset) {
+
+        /* lock the user buffer */
+        rc = ks_lock_buffer( (char *)buffer + offset,
+                               FALSE, nob - offset, IoWriteAccess, &mdl );
+
+        if (rc < 0) {
+            return (rc);
+        }
+
+        /* recv the requested buffer */
+        rc = ks_recv_mdl( sock, mdl, nob - offset, 0 );
+
+        if (rc > 0) {
+            offset += rc;
+        } else {
+            return (rc);
+        }
+    }
+
+    return (0);
+}
+
+void libcfs_sock_release(struct socket *sock)
+{
+    if (sock->kstc_type == kstt_listener &&
+        sock->kstc_state == ksts_listening) {
+        ks_stop_listen(sock);
+    } else {
+        ks_put_tconn(sock);
+    }
+}
diff --git a/libcfs/libcfs/winnt/winnt-tracefile.c b/libcfs/libcfs/winnt/winnt-tracefile.c
new file mode 100644 (file)
index 0000000..61ba735
--- /dev/null
@@ -0,0 +1,224 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_LNET
+#define LUSTRE_TRACEFILE_PRIVATE
+
+#include <libcfs/libcfs.h>
+#include <libcfs/kp30.h>
+#include "tracefile.h"
+
+#ifndef get_cpu
+#define get_cpu() smp_processor_id()
+#define put_cpu() do { } while (0)
+#endif
+
+#define TCD_TYPE_MAX        1
+
+event_t     tracefile_event;
+
+void tracefile_init_arch()
+{
+       int    i;
+       int    j;
+    struct trace_cpu_data *tcd;
+
+    cfs_init_event(&tracefile_event, TRUE, TRUE);
+
+    /* initialize trace_data */
+    memset(trace_data, 0, sizeof(trace_data));
+    for (i = 0; i < TCD_TYPE_MAX; i++) {
+        trace_data[i]=cfs_alloc(sizeof(struct trace_data_union)*NR_CPUS, 0);
+        if (trace_data[i] == NULL)
+            goto out;
+    }
+
+    /* arch related info initialized */
+    tcd_for_each(tcd, i, j) {
+        tcd->tcd_pages_factor = 100; /* Only one type */
+        tcd->tcd_cpu = j;
+        tcd->tcd_type = i;
+    }
+
+    memset(trace_console_buffers, 0, sizeof(trace_console_buffers));
+
+       for (i = 0; i < NR_CPUS; i++) {
+               for (j = 0; j < 1; j++) {
+                       trace_console_buffers[i][j] =
+                               cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE,
+                                       CFS_ALLOC_ZERO);
+
+                       if (trace_console_buffers[i][j] == NULL)
+                goto out;
+               }
+    }
+
+       return 0;
+
+out:
+       tracefile_fini_arch();
+       KsPrint((0, "lnet: No enough memory\n"));
+       return -ENOMEM;
+}
+
+void tracefile_fini_arch()
+{
+       int    i;
+       int    j;
+
+       for (i = 0; i < NR_CPUS; i++) {
+               for (j = 0; j < 2; j++) {
+                       if (trace_console_buffers[i][j] != NULL) {
+                               cfs_free(trace_console_buffers[i][j]);
+                               trace_console_buffers[i][j] = NULL;
+                       }
+        }
+    }
+
+    for (i = 0; trace_data[i] != NULL; i++) {
+        cfs_free(trace_data[i]);
+        trace_data[i] = NULL;
+    }
+}
+
+void tracefile_read_lock()
+{
+    cfs_wait_event(&tracefile_event, 0);
+}
+
+void tracefile_read_unlock()
+{
+    cfs_wake_event(&tracefile_event);
+}
+
+void tracefile_write_lock()
+{
+    cfs_wait_event(&tracefile_event, 0);
+}
+
+void tracefile_write_unlock()
+{
+    cfs_wake_event(&tracefile_event);
+}
+
+char *
+trace_get_console_buffer(void)
+{
+#pragma message ("is there possible problem with pre-emption ?")
+    int cpu = (int) KeGetCurrentProcessorNumber();
+    return trace_console_buffers[cpu][0];
+}
+
+void
+trace_put_console_buffer(char *buffer)
+{
+}
+
+struct trace_cpu_data *
+trace_get_tcd(void)
+{
+#pragma message("todo: return NULL if in interrupt context")
+
+       int cpu = (int) KeGetCurrentProcessorNumber();
+       return &(*trace_data[0])[cpu].tcd;
+}
+
+void
+trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags)
+{
+}
+
+int 
+trace_lock_tcd(struct trace_cpu_data *tcd)
+{
+    __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+    return 1;
+}
+
+void
+trace_unlock_tcd(struct trace_cpu_data *tcd)
+{
+    __LASSERT(tcd->tcd_type < TCD_TYPE_MAX);
+}
+
+void
+set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask,
+                    const int line, unsigned long stack)
+{
+       struct timeval tv;
+
+       do_gettimeofday(&tv);
+
+       header->ph_subsys = subsys;
+       header->ph_mask = mask;
+       header->ph_cpu_id = smp_processor_id();
+       header->ph_sec = (__u32)tv.tv_sec;
+       header->ph_usec = tv.tv_usec;
+       header->ph_stack = stack;
+       header->ph_pid = current->pid;
+       header->ph_line_num = line;
+       header->ph_extern_pid = 0;
+       return;
+}
+
+void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf,
+                                 int len, const char *file, const char *fn)
+{
+       char *prefix = NULL, *ptype = NULL;
+
+       if ((mask & D_EMERG) != 0) {
+               prefix = "LustreError";
+               ptype = KERN_EMERG;
+       } else if ((mask & D_ERROR) != 0) {
+               prefix = "LustreError";
+               ptype = KERN_ERR;
+       } else if ((mask & D_WARNING) != 0) {
+               prefix = "Lustre";
+               ptype = KERN_WARNING;
+       } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) {
+               prefix = "Lustre";
+               ptype = KERN_INFO;
+       }
+
+       if ((mask & D_CONSOLE) != 0) {
+               printk("%s%s: %s", ptype, prefix, buf);
+       } else {
+               printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid,
+                      hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf);
+       }
+       return;
+}
+
+int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage)
+{
+       return 1;
+}
+
+int trace_max_debug_mb(void)
+{
+       int  total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT));
+       
+       return MAX(512, (total_mb * 80)/100);
+}
+
+void
+trace_call_on_all_cpus(void (*fn)(void *arg), void *arg)
+{
+#error "tbd"
+}
+
diff --git a/libcfs/libcfs/winnt/winnt-usr.c b/libcfs/libcfs/winnt/winnt-usr.c
new file mode 100644 (file)
index 0000000..f79347b
--- /dev/null
@@ -0,0 +1,85 @@
+
+#ifndef __KERNEL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <io.h>
+#include <time.h>
+#include <windows.h>
+
+void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
+                              const int line, unsigned long stack,
+                              char *format, ...) {
+    }
+
+int cfs_proc_mknod(const char *path, unsigned short  mode,  unsigned int dev)
+{
+    return 0;
+}
+
+
+void print_last_error(char* Prefix)
+{
+    LPVOID lpMsgBuf;
+
+    FormatMessage( 
+        FORMAT_MESSAGE_ALLOCATE_BUFFER |
+        FORMAT_MESSAGE_FROM_SYSTEM |
+        FORMAT_MESSAGE_IGNORE_INSERTS,
+        NULL,
+        GetLastError(),
+        0,
+        (LPTSTR) &lpMsgBuf,
+        0,
+        NULL
+        );
+
+    printf("%s %s", Prefix, (LPTSTR) lpMsgBuf);
+
+    LocalFree(lpMsgBuf);
+}
+
+//
+// The following declarations are defined in io.h of VC
+// sys/types.h will conflict with io.h, so we need place
+// these declartions here.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+    void
+    __declspec (naked) __cdecl _chkesp(void)
+    {
+#if _X86_
+        __asm {  jz      exit_chkesp     };
+        __asm {  int     3               };
+    exit_chkesp:
+        __asm {  ret                     };
+#endif
+    }
+#ifdef __cplusplus
+}
+#endif
+
+unsigned int sleep (unsigned int seconds)
+{
+    Sleep(seconds * 1000);
+    return 0;
+}
+
+int gethostname(char * name, int namelen)
+{
+    return 0;
+}
+
+int ioctl (
+    int handle,
+    int cmd,
+    void *buffer
+    )
+{
+    printf("hello, world\n");
+    return 0;
+}
+
+#endif /* __KERNEL__ */
\ No newline at end of file
diff --git a/libcfs/libcfs/winnt/winnt-utils.c b/libcfs/libcfs/winnt/winnt-utils.c
new file mode 100644 (file)
index 0000000..cd33aa2
--- /dev/null
@@ -0,0 +1,158 @@
+/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=4:tabstop=4:
+ *
+ *  Copyright (c) 2004 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or modify it under
+ *   the terms of version 2 of the GNU General Public License as published by
+ *   the Free Software Foundation. Lustre is distributed in the hope that it
+ *   will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details. You should have received a
+ *   copy of the GNU General Public License along with Lustre; if not, write
+ *   to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,
+ *   USA.
+ */
+
+
+/*
+ * miscellaneous libcfs stuff
+ */
+#define DEBUG_SUBSYSTEM S_LNET
+#include <lnet/types.h>
+
+/*
+ * Convert server error code to client format. Error codes are from
+ * Linux errno.h, so for Linux client---identity.
+ */
+int convert_server_error(__u64 ecode)
+{
+       return cfs_error_code((NTSTATUS)ecode);
+}
+
+/*
+ * convert <fcntl.h> flag from client to server.
+ * 
+ * nt kernel uses several members to describe the open flags
+ * such as DesiredAccess/ShareAccess/CreateDisposition/CreateOptions
+ * so it's better to convert when using, not here.
+ */
+
+int convert_client_oflag(int cflag, int *result)
+{
+    *result = 0;
+       return 0;
+}
+
+
+int cfs_error_code(NTSTATUS Status)
+{
+    switch (Status) {
+
+        case STATUS_ACCESS_DENIED:
+            return (-EACCES);
+
+        case STATUS_ACCESS_VIOLATION:
+            return (-EFAULT);
+    
+        case STATUS_BUFFER_TOO_SMALL:
+            return (-ETOOSMALL);
+
+        case STATUS_INVALID_PARAMETER:
+            return (-EINVAL);
+
+        case STATUS_NOT_IMPLEMENTED:
+        case STATUS_NOT_SUPPORTED:
+            return (-EOPNOTSUPP);
+
+        case STATUS_INVALID_ADDRESS:
+        case STATUS_INVALID_ADDRESS_COMPONENT:
+            return (-EADDRNOTAVAIL);
+
+        case STATUS_NO_SUCH_DEVICE:
+        case STATUS_NO_SUCH_FILE:
+        case STATUS_OBJECT_NAME_NOT_FOUND:
+        case STATUS_OBJECT_PATH_NOT_FOUND:  
+        case STATUS_NETWORK_BUSY:
+        case STATUS_INVALID_NETWORK_RESPONSE:
+        case STATUS_UNEXPECTED_NETWORK_ERROR:
+            return (-ENETDOWN);
+
+        case STATUS_BAD_NETWORK_PATH:
+        case STATUS_NETWORK_UNREACHABLE:
+        case STATUS_PROTOCOL_UNREACHABLE:     
+            return (-ENETUNREACH);
+
+        case STATUS_LOCAL_DISCONNECT:
+        case STATUS_TRANSACTION_ABORTED:
+        case STATUS_CONNECTION_ABORTED:
+            return (-ECONNABORTED);
+
+        case STATUS_REMOTE_DISCONNECT:
+        case STATUS_LINK_FAILED:
+        case STATUS_CONNECTION_DISCONNECTED:
+        case STATUS_CONNECTION_RESET:
+        case STATUS_PORT_UNREACHABLE:
+            return (-ECONNRESET);
+
+        case STATUS_PAGEFILE_QUOTA:
+        case STATUS_NO_MEMORY:
+        case STATUS_CONFLICTING_ADDRESSES:
+        case STATUS_QUOTA_EXCEEDED:
+        case STATUS_TOO_MANY_PAGING_FILES:
+        case STATUS_INSUFFICIENT_RESOURCES:
+        case STATUS_WORKING_SET_QUOTA:
+        case STATUS_COMMITMENT_LIMIT:
+        case STATUS_TOO_MANY_ADDRESSES:
+        case STATUS_REMOTE_RESOURCES:
+            return (-ENOBUFS);
+
+        case STATUS_INVALID_CONNECTION:
+            return (-ENOTCONN);
+
+        case STATUS_PIPE_DISCONNECTED:
+            return (-ESHUTDOWN);
+
+        case STATUS_TIMEOUT:
+        case STATUS_IO_TIMEOUT:
+        case STATUS_LINK_TIMEOUT:
+            return (-ETIMEDOUT);
+
+        case STATUS_REMOTE_NOT_LISTENING:
+        case STATUS_CONNECTION_REFUSED:
+            return (-ECONNREFUSED);
+
+        case STATUS_HOST_UNREACHABLE:
+            return (-EHOSTUNREACH);
+
+        case STATUS_PENDING:
+        case STATUS_DEVICE_NOT_READY:
+            return (-EAGAIN);
+
+        case STATUS_CANCELLED:
+        case STATUS_REQUEST_ABORTED:
+            return (-EINTR);
+
+        case STATUS_BUFFER_OVERFLOW:
+        case STATUS_INVALID_BUFFER_SIZE:
+            return (-EMSGSIZE);
+
+    }
+
+    if (NT_SUCCESS(Status)) 
+        return 0;
+
+    return (-EINVAL);
+}
+
+
+void cfs_stack_trace_fill(struct cfs_stack_trace *trace)
+{
+}
+
+void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no)
+{
+    return NULL;
+}