From e1b3d71a27c166bebd26ab33f7299c41bd75dab5 Mon Sep 17 00:00:00 2001 From: "robert.read" Date: Thu, 19 Jun 2008 22:04:35 +0000 Subject: [PATCH] Branch HEAD b=15969 i=isaac i=adilger Move libcfs to a module. --- libcfs/.cvsignore | 11 + libcfs/Kernelenv.in | 6 + libcfs/Kernelenv.mk | 4 + libcfs/Makefile.in | 3 + libcfs/autoMakefile.am | 9 + libcfs/autoconf/.cvsignore | 2 + libcfs/autoconf/Makefile.am | 1 + libcfs/autoconf/lustre-libcfs.m4 | 517 ++ libcfs/include/Makefile.am | 1 + libcfs/include/libcfs/.cvsignore | 2 + libcfs/include/libcfs/Makefile.am | 9 + libcfs/include/libcfs/bitmap.h | 80 + libcfs/include/libcfs/curproc.h | 64 + libcfs/include/libcfs/darwin/.cvsignore | 2 + libcfs/include/libcfs/darwin/Makefile.am | 3 + libcfs/include/libcfs/darwin/darwin-fs.h | 193 + libcfs/include/libcfs/darwin/darwin-lock.h | 284 + libcfs/include/libcfs/darwin/darwin-mem.h | 232 + libcfs/include/libcfs/darwin/darwin-prim.h | 527 ++ libcfs/include/libcfs/darwin/darwin-sync.h | 332 ++ libcfs/include/libcfs/darwin/darwin-tcpip.h | 90 + libcfs/include/libcfs/darwin/darwin-time.h | 249 + libcfs/include/libcfs/darwin/darwin-types.h | 95 + libcfs/include/libcfs/darwin/darwin-utils.h | 67 + libcfs/include/libcfs/darwin/kp30.h | 101 + libcfs/include/libcfs/darwin/libcfs.h | 193 + libcfs/include/libcfs/darwin/lltrace.h | 26 + libcfs/include/libcfs/darwin/portals_utils.h | 18 + libcfs/include/libcfs/kp30.h | 610 +++ libcfs/include/libcfs/libcfs.h | 649 +++ libcfs/include/libcfs/linux/.cvsignore | 2 + libcfs/include/libcfs/linux/Makefile.am | 3 + libcfs/include/libcfs/linux/kp30.h | 379 ++ libcfs/include/libcfs/linux/libcfs.h | 173 + libcfs/include/libcfs/linux/linux-fs.h | 82 + libcfs/include/libcfs/linux/linux-lock.h | 105 + libcfs/include/libcfs/linux/linux-mem.h | 129 + libcfs/include/libcfs/linux/linux-prim.h | 311 ++ libcfs/include/libcfs/linux/linux-tcpip.h | 66 + libcfs/include/libcfs/linux/linux-time.h | 327 ++ libcfs/include/libcfs/linux/lltrace.h | 28 + libcfs/include/libcfs/linux/portals_compat25.h | 125 + libcfs/include/libcfs/linux/portals_utils.h | 51 + libcfs/include/libcfs/list.h | 463 ++ libcfs/include/libcfs/lltrace.h | 167 + libcfs/include/libcfs/portals_utils.h | 21 + libcfs/include/libcfs/types.h | 17 + libcfs/include/libcfs/user-bitops.h | 102 + libcfs/include/libcfs/user-lock.h | 243 + libcfs/include/libcfs/user-prim.h | 328 ++ libcfs/include/libcfs/user-tcpip.h | 90 + libcfs/include/libcfs/user-time.h | 205 + libcfs/include/libcfs/winnt/kp30.h | 157 + libcfs/include/libcfs/winnt/libcfs.h | 126 + libcfs/include/libcfs/winnt/lltrace.h | 33 + libcfs/include/libcfs/winnt/portals_compat25.h | 28 + libcfs/include/libcfs/winnt/portals_utils.h | 168 + libcfs/include/libcfs/winnt/winnt-fs.h | 254 + libcfs/include/libcfs/winnt/winnt-lock.h | 686 +++ libcfs/include/libcfs/winnt/winnt-mem.h | 133 + libcfs/include/libcfs/winnt/winnt-prim.h | 1082 ++++ libcfs/include/libcfs/winnt/winnt-tcpip.h | 660 +++ libcfs/include/libcfs/winnt/winnt-time.h | 316 ++ libcfs/include/libcfs/winnt/winnt-types.h | 647 +++ libcfs/libcfs/.cvsignore | 11 + libcfs/libcfs/Info.plist | 35 + libcfs/libcfs/Makefile.in | 33 + libcfs/libcfs/autoMakefile.am | 53 + libcfs/libcfs/darwin/.cvsignore | 2 + libcfs/libcfs/darwin/Makefile.am | 12 + libcfs/libcfs/darwin/darwin-curproc.c | 164 + libcfs/libcfs/darwin/darwin-debug.c | 77 + libcfs/libcfs/darwin/darwin-fs.c | 451 ++ libcfs/libcfs/darwin/darwin-internal.h | 22 + libcfs/libcfs/darwin/darwin-mem.c | 480 ++ libcfs/libcfs/darwin/darwin-module.c | 191 + libcfs/libcfs/darwin/darwin-prim.c | 581 ++ libcfs/libcfs/darwin/darwin-proc.c | 467 ++ libcfs/libcfs/darwin/darwin-sync.c | 1025 ++++ libcfs/libcfs/darwin/darwin-tcpip.c | 1339 +++++ libcfs/libcfs/darwin/darwin-tracefile.c | 191 + libcfs/libcfs/darwin/darwin-utils.c | 578 ++ libcfs/libcfs/debug.c | 839 +++ libcfs/libcfs/libcfs.xcode/project.pbxproj | 439 ++ libcfs/libcfs/linux/.cvsignore | 3 + libcfs/libcfs/linux/Makefile.am | 4 + libcfs/libcfs/linux/linux-curproc.c | 133 + libcfs/libcfs/linux/linux-debug.c | 239 + libcfs/libcfs/linux/linux-fs.c | 100 + libcfs/libcfs/linux/linux-lock.c | 4 + libcfs/libcfs/linux/linux-lwt.c | 2 + libcfs/libcfs/linux/linux-mem.c | 145 + libcfs/libcfs/linux/linux-module.c | 151 + libcfs/libcfs/linux/linux-prim.c | 154 + libcfs/libcfs/linux/linux-proc.c | 443 ++ libcfs/libcfs/linux/linux-sync.c | 2 + libcfs/libcfs/linux/linux-tcpip.c | 683 +++ libcfs/libcfs/linux/linux-tracefile.c | 266 + libcfs/libcfs/linux/linux-utils.c | 60 + libcfs/libcfs/lwt.c | 270 + libcfs/libcfs/module.c | 423 ++ libcfs/libcfs/nidstrings.c | 540 ++ libcfs/libcfs/tracefile.c | 1114 ++++ libcfs/libcfs/tracefile.h | 248 + libcfs/libcfs/user-bitops.c | 98 + libcfs/libcfs/user-lock.c | 343 ++ libcfs/libcfs/user-prim.c | 399 ++ libcfs/libcfs/user-tcpip.c | 606 +++ libcfs/libcfs/watchdog.c | 427 ++ libcfs/libcfs/winnt/winnt-curproc.c | 453 ++ libcfs/libcfs/winnt/winnt-debug.c | 1057 ++++ libcfs/libcfs/winnt/winnt-fs.c | 541 ++ libcfs/libcfs/winnt/winnt-lock.c | 353 ++ libcfs/libcfs/winnt/winnt-lwt.c | 20 + libcfs/libcfs/winnt/winnt-mem.c | 332 ++ libcfs/libcfs/winnt/winnt-module.c | 160 + libcfs/libcfs/winnt/winnt-prim.c | 650 +++ libcfs/libcfs/winnt/winnt-proc.c | 2039 +++++++ libcfs/libcfs/winnt/winnt-sync.c | 449 ++ libcfs/libcfs/winnt/winnt-tcpip.c | 6706 ++++++++++++++++++++++++ libcfs/libcfs/winnt/winnt-tracefile.c | 224 + libcfs/libcfs/winnt/winnt-usr.c | 85 + libcfs/libcfs/winnt/winnt-utils.c | 158 + 123 files changed, 39161 insertions(+) create mode 100644 libcfs/.cvsignore create mode 100644 libcfs/Kernelenv.in create mode 100644 libcfs/Kernelenv.mk create mode 100644 libcfs/Makefile.in create mode 100644 libcfs/autoMakefile.am create mode 100644 libcfs/autoconf/.cvsignore create mode 100644 libcfs/autoconf/Makefile.am create mode 100644 libcfs/autoconf/lustre-libcfs.m4 create mode 100644 libcfs/include/Makefile.am create mode 100644 libcfs/include/libcfs/.cvsignore create mode 100644 libcfs/include/libcfs/Makefile.am create mode 100644 libcfs/include/libcfs/bitmap.h create mode 100644 libcfs/include/libcfs/curproc.h create mode 100644 libcfs/include/libcfs/darwin/.cvsignore create mode 100644 libcfs/include/libcfs/darwin/Makefile.am create mode 100644 libcfs/include/libcfs/darwin/darwin-fs.h create mode 100644 libcfs/include/libcfs/darwin/darwin-lock.h create mode 100644 libcfs/include/libcfs/darwin/darwin-mem.h create mode 100644 libcfs/include/libcfs/darwin/darwin-prim.h create mode 100644 libcfs/include/libcfs/darwin/darwin-sync.h create mode 100644 libcfs/include/libcfs/darwin/darwin-tcpip.h create mode 100644 libcfs/include/libcfs/darwin/darwin-time.h create mode 100644 libcfs/include/libcfs/darwin/darwin-types.h create mode 100644 libcfs/include/libcfs/darwin/darwin-utils.h create mode 100644 libcfs/include/libcfs/darwin/kp30.h create mode 100644 libcfs/include/libcfs/darwin/libcfs.h create mode 100644 libcfs/include/libcfs/darwin/lltrace.h create mode 100644 libcfs/include/libcfs/darwin/portals_utils.h create mode 100644 libcfs/include/libcfs/kp30.h create mode 100644 libcfs/include/libcfs/libcfs.h create mode 100644 libcfs/include/libcfs/linux/.cvsignore create mode 100644 libcfs/include/libcfs/linux/Makefile.am create mode 100644 libcfs/include/libcfs/linux/kp30.h create mode 100644 libcfs/include/libcfs/linux/libcfs.h create mode 100644 libcfs/include/libcfs/linux/linux-fs.h create mode 100644 libcfs/include/libcfs/linux/linux-lock.h create mode 100644 libcfs/include/libcfs/linux/linux-mem.h create mode 100644 libcfs/include/libcfs/linux/linux-prim.h create mode 100644 libcfs/include/libcfs/linux/linux-tcpip.h create mode 100644 libcfs/include/libcfs/linux/linux-time.h create mode 100644 libcfs/include/libcfs/linux/lltrace.h create mode 100644 libcfs/include/libcfs/linux/portals_compat25.h create mode 100644 libcfs/include/libcfs/linux/portals_utils.h create mode 100644 libcfs/include/libcfs/list.h create mode 100644 libcfs/include/libcfs/lltrace.h create mode 100644 libcfs/include/libcfs/portals_utils.h create mode 100644 libcfs/include/libcfs/types.h create mode 100644 libcfs/include/libcfs/user-bitops.h create mode 100644 libcfs/include/libcfs/user-lock.h create mode 100644 libcfs/include/libcfs/user-prim.h create mode 100644 libcfs/include/libcfs/user-tcpip.h create mode 100644 libcfs/include/libcfs/user-time.h create mode 100644 libcfs/include/libcfs/winnt/kp30.h create mode 100644 libcfs/include/libcfs/winnt/libcfs.h create mode 100644 libcfs/include/libcfs/winnt/lltrace.h create mode 100644 libcfs/include/libcfs/winnt/portals_compat25.h create mode 100644 libcfs/include/libcfs/winnt/portals_utils.h create mode 100644 libcfs/include/libcfs/winnt/winnt-fs.h create mode 100644 libcfs/include/libcfs/winnt/winnt-lock.h create mode 100644 libcfs/include/libcfs/winnt/winnt-mem.h create mode 100644 libcfs/include/libcfs/winnt/winnt-prim.h create mode 100644 libcfs/include/libcfs/winnt/winnt-tcpip.h create mode 100644 libcfs/include/libcfs/winnt/winnt-time.h create mode 100644 libcfs/include/libcfs/winnt/winnt-types.h create mode 100644 libcfs/libcfs/.cvsignore create mode 100644 libcfs/libcfs/Info.plist create mode 100644 libcfs/libcfs/Makefile.in create mode 100644 libcfs/libcfs/autoMakefile.am create mode 100644 libcfs/libcfs/darwin/.cvsignore create mode 100644 libcfs/libcfs/darwin/Makefile.am create mode 100644 libcfs/libcfs/darwin/darwin-curproc.c create mode 100644 libcfs/libcfs/darwin/darwin-debug.c create mode 100644 libcfs/libcfs/darwin/darwin-fs.c create mode 100644 libcfs/libcfs/darwin/darwin-internal.h create mode 100644 libcfs/libcfs/darwin/darwin-mem.c create mode 100644 libcfs/libcfs/darwin/darwin-module.c create mode 100644 libcfs/libcfs/darwin/darwin-prim.c create mode 100644 libcfs/libcfs/darwin/darwin-proc.c create mode 100644 libcfs/libcfs/darwin/darwin-sync.c create mode 100644 libcfs/libcfs/darwin/darwin-tcpip.c create mode 100644 libcfs/libcfs/darwin/darwin-tracefile.c create mode 100644 libcfs/libcfs/darwin/darwin-utils.c create mode 100644 libcfs/libcfs/debug.c create mode 100644 libcfs/libcfs/libcfs.xcode/project.pbxproj create mode 100644 libcfs/libcfs/linux/.cvsignore create mode 100644 libcfs/libcfs/linux/Makefile.am create mode 100644 libcfs/libcfs/linux/linux-curproc.c create mode 100644 libcfs/libcfs/linux/linux-debug.c create mode 100644 libcfs/libcfs/linux/linux-fs.c create mode 100644 libcfs/libcfs/linux/linux-lock.c create mode 100644 libcfs/libcfs/linux/linux-lwt.c create mode 100644 libcfs/libcfs/linux/linux-mem.c create mode 100644 libcfs/libcfs/linux/linux-module.c create mode 100644 libcfs/libcfs/linux/linux-prim.c create mode 100644 libcfs/libcfs/linux/linux-proc.c create mode 100644 libcfs/libcfs/linux/linux-sync.c create mode 100644 libcfs/libcfs/linux/linux-tcpip.c create mode 100644 libcfs/libcfs/linux/linux-tracefile.c create mode 100644 libcfs/libcfs/linux/linux-utils.c create mode 100644 libcfs/libcfs/lwt.c create mode 100644 libcfs/libcfs/module.c create mode 100644 libcfs/libcfs/nidstrings.c create mode 100644 libcfs/libcfs/tracefile.c create mode 100644 libcfs/libcfs/tracefile.h create mode 100644 libcfs/libcfs/user-bitops.c create mode 100644 libcfs/libcfs/user-lock.c create mode 100644 libcfs/libcfs/user-prim.c create mode 100644 libcfs/libcfs/user-tcpip.c create mode 100644 libcfs/libcfs/watchdog.c create mode 100644 libcfs/libcfs/winnt/winnt-curproc.c create mode 100644 libcfs/libcfs/winnt/winnt-debug.c create mode 100644 libcfs/libcfs/winnt/winnt-fs.c create mode 100644 libcfs/libcfs/winnt/winnt-lock.c create mode 100644 libcfs/libcfs/winnt/winnt-lwt.c create mode 100644 libcfs/libcfs/winnt/winnt-mem.c create mode 100644 libcfs/libcfs/winnt/winnt-module.c create mode 100644 libcfs/libcfs/winnt/winnt-prim.c create mode 100644 libcfs/libcfs/winnt/winnt-proc.c create mode 100644 libcfs/libcfs/winnt/winnt-sync.c create mode 100644 libcfs/libcfs/winnt/winnt-tcpip.c create mode 100644 libcfs/libcfs/winnt/winnt-tracefile.c create mode 100644 libcfs/libcfs/winnt/winnt-usr.c create mode 100644 libcfs/libcfs/winnt/winnt-utils.c diff --git a/libcfs/.cvsignore b/libcfs/.cvsignore new file mode 100644 index 0000000..f30d862 --- /dev/null +++ b/libcfs/.cvsignore @@ -0,0 +1,11 @@ +Kernelenv +Makefile +autoMakefile +autoMakefile.in +aclocal.m4 +autom4te.cache +config.log +config.status +configure +.*.cmd +.depend diff --git a/libcfs/Kernelenv.in b/libcfs/Kernelenv.in new file mode 100644 index 0000000..59eda30 --- /dev/null +++ b/libcfs/Kernelenv.in @@ -0,0 +1,6 @@ +EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include +# lnet/utils/debug.c wants from userspace. sigh. +HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS) +LIBREADLINE := @LIBREADLINE@ +# 2.5's makefiles aren't nice to cross dir libraries in host programs +PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/libcfs/Kernelenv.mk b/libcfs/Kernelenv.mk new file mode 100644 index 0000000..d973e5d --- /dev/null +++ b/libcfs/Kernelenv.mk @@ -0,0 +1,4 @@ +EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/lnet/include +HOSTCFLAGS := $(EXTRA_CFLAGS) +# the kernel doesn't want us to build archives for host binaries :/ +PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o diff --git a/libcfs/Makefile.in b/libcfs/Makefile.in new file mode 100644 index 0000000..8c58d42 --- /dev/null +++ b/libcfs/Makefile.in @@ -0,0 +1,3 @@ +subdir-m += libcfs + +@INCLUDE_RULES@ diff --git a/libcfs/autoMakefile.am b/libcfs/autoMakefile.am new file mode 100644 index 0000000..ef90449 --- /dev/null +++ b/libcfs/autoMakefile.am @@ -0,0 +1,9 @@ +# Copyright (C) 2001 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +SUBDIRS = libcfs include autoconf + +sources: + $(MAKE) sources -C libcfs diff --git a/libcfs/autoconf/.cvsignore b/libcfs/autoconf/.cvsignore new file mode 100644 index 0000000..282522d --- /dev/null +++ b/libcfs/autoconf/.cvsignore @@ -0,0 +1,2 @@ +Makefile +Makefile.in diff --git a/libcfs/autoconf/Makefile.am b/libcfs/autoconf/Makefile.am new file mode 100644 index 0000000..3927b04 --- /dev/null +++ b/libcfs/autoconf/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST := lustre-libcfs.m4 diff --git a/libcfs/autoconf/lustre-libcfs.m4 b/libcfs/autoconf/lustre-libcfs.m4 new file mode 100644 index 0000000..86fe0e6 --- /dev/null +++ b/libcfs/autoconf/lustre-libcfs.m4 @@ -0,0 +1,517 @@ +# +# LIBCFS_CONFIG_CDEBUG +# +# whether to enable various libcfs debugs (CDEBUG, ENTRY/EXIT, LASSERT, etc.) +# +AC_DEFUN([LIBCFS_CONFIG_CDEBUG], +[ +AC_MSG_CHECKING([whether to enable CDEBUG, CWARN]) +AC_ARG_ENABLE([libcfs_cdebug], + AC_HELP_STRING([--disable-libcfs-cdebug], + [disable libcfs CDEBUG, CWARN]), + [],[enable_libcfs_cdebug='yes']) +AC_MSG_RESULT([$enable_libcfs_cdebug]) +if test x$enable_libcfs_cdebug = xyes; then + AC_DEFINE(CDEBUG_ENABLED, 1, [enable libcfs CDEBUG, CWARN]) +else + AC_DEFINE(CDEBUG_ENABLED, 0, [disable libcfs CDEBUG, CWARN]) +fi + +AC_MSG_CHECKING([whether to enable ENTRY/EXIT]) +AC_ARG_ENABLE([libcfs_trace], + AC_HELP_STRING([--disable-libcfs-trace], + [disable libcfs ENTRY/EXIT]), + [],[enable_libcfs_trace='yes']) +AC_MSG_RESULT([$enable_libcfs_trace]) +if test x$enable_libcfs_trace = xyes; then + AC_DEFINE(CDEBUG_ENTRY_EXIT, 1, [enable libcfs ENTRY/EXIT]) +else + AC_DEFINE(CDEBUG_ENTRY_EXIT, 0, [disable libcfs ENTRY/EXIT]) +fi + +AC_MSG_CHECKING([whether to enable LASSERT, LASSERTF]) +AC_ARG_ENABLE([libcfs_assert], + AC_HELP_STRING([--disable-libcfs-assert], + [disable libcfs LASSERT, LASSERTF]), + [],[enable_libcfs_assert='yes']) +AC_MSG_RESULT([$enable_libcfs_assert]) +if test x$enable_libcfs_assert = xyes; then + AC_DEFINE(LIBCFS_DEBUG, 1, [enable libcfs LASSERT, LASSERTF]) +fi +]) + +# +# LIBCFS_CONFIG_PANIC_DUMPLOG +# +# check if tunable panic_dumplog is wanted +# +AC_DEFUN([LIBCFS_CONFIG_PANIC_DUMPLOG], +[AC_MSG_CHECKING([for tunable panic_dumplog support]) +AC_ARG_ENABLE([panic_dumplog], + AC_HELP_STRING([--enable-panic_dumplog], + [enable panic_dumplog]), + [],[enable_panic_dumplog='no']) +if test x$enable_panic_dumplog = xyes ; then + AC_DEFINE(LNET_DUMP_ON_PANIC, 1, [use dumplog on panic]) + AC_MSG_RESULT([yes (by request)]) +else + AC_MSG_RESULT([no]) +fi +]) + +# +# LIBCFS_STRUCT_PAGE_LIST +# +# 2.6.4 no longer has page->list +# +AC_DEFUN([LIBCFS_STRUCT_PAGE_LIST], +[AC_MSG_CHECKING([if struct page has a list field]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct page page; + &page.list; +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# LIBCFS_STRUCT_SIGHAND +# +# red hat 2.4 adds sighand to struct task_struct +# +AC_DEFUN([LIBCFS_STRUCT_SIGHAND], +[AC_MSG_CHECKING([if task_struct has a sighand field]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct task_struct p; + p.sighand = NULL; +],[ + AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches]) + AC_MSG_RESULT([yes]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# LIBCFS_FUNC_CPU_ONLINE +# +# cpu_online is different in rh 2.4, vanilla 2.4, and 2.6 +# +AC_DEFUN([LIBCFS_FUNC_CPU_ONLINE], +[AC_MSG_CHECKING([if kernel defines cpu_online()]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + cpu_online(0); +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# LIBCFS_TYPE_GFP_T +# +# check if gfp_t is typedef-ed +# +AC_DEFUN([LIBCFS_TYPE_GFP_T], +[AC_MSG_CHECKING([if kernel defines gfp_t]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + return sizeof(gfp_t); +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_GFP_T, 1, [gfp_t found]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# LIBCFS_TYPE_CPUMASK_T +# +# same goes for cpumask_t +# +AC_DEFUN([LIBCFS_TYPE_CPUMASK_T], +[AC_MSG_CHECKING([if kernel defines cpumask_t]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + return sizeof (cpumask_t); +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found]) +],[ + AC_MSG_RESULT([no]) +]) +]) + +# +# LIBCFS_FUNC_SHOW_TASK +# +# we export show_task(), but not all kernels have it (yet) +# +AC_DEFUN([LIBCFS_FUNC_SHOW_TASK], +[LB_CHECK_SYMBOL_EXPORT([show_task], +[kernel/ksyms.c kernel/sched.c],[ +AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported]) +],[ +]) +]) + +# check userland __u64 type +AC_DEFUN([LIBCFS_U64_LONG_LONG], +[AC_MSG_CHECKING([u64 is long long type]) +tmp_flags="$CFLAGS" +CFLAGS="$CFLAGS -Werror" +AC_COMPILE_IFELSE([ + #include + int main(void) { + unsigned long long *data1; + __u64 *data2; + + data1 = data2; + return 0; + } +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_U64_LONG_LONG, 1, + [__u64 is long long type]) +],[ + AC_MSG_RESULT([no]) +]) +CFLAGS="$tmp_flags" +]) + +# check userland size_t type +AC_DEFUN([LIBCFS_SIZE_T_LONG], +[AC_MSG_CHECKING([size_t is unsigned long type]) +tmp_flags="$CFLAGS" +CFLAGS="$CFLAGS -Werror" +AC_COMPILE_IFELSE([ + #include + int main(void) { + unsigned long *data1; + size_t *data2; + + data1 = data2; + return 0; + } +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_SIZE_T_LONG, 1, + [size_t is long type]) +],[ + AC_MSG_RESULT([no]) +]) +CFLAGS="$tmp_flags" +]) + +AC_DEFUN([LIBCFS_SSIZE_T_LONG], +[AC_MSG_CHECKING([ssize_t is signed long type]) +tmp_flags="$CFLAGS" +CFLAGS="$CFLAGS -Werror" +AC_COMPILE_IFELSE([ + #include + int main(void) { + long *data1; + ssize_t *data2; + + data1 = data2; + return 0; + } +],[ + AC_MSG_RESULT([yes]) + AC_DEFINE(HAVE_SSIZE_T_LONG, 1, + [ssize_t is long type]) +],[ + AC_MSG_RESULT([no]) +]) +CFLAGS="$tmp_flags" +]) + + +# LIBCFS_TASKLIST_LOCK +# 2.6.18 remove tasklist_lock export +AC_DEFUN([LIBCFS_TASKLIST_LOCK], +[LB_CHECK_SYMBOL_EXPORT([tasklist_lock], +[kernel/fork.c],[ +AC_DEFINE(HAVE_TASKLIST_LOCK, 1, + [tasklist_lock exported]) +],[ +]) +]) + +# 2.6.19 API changes +# kmem_cache_destroy(cachep) return void instead of +# int +AC_DEFUN([LIBCFS_KMEM_CACHE_DESTROY_INT], +[AC_MSG_CHECKING([kmem_cache_destroy(cachep) return int]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + int i = kmem_cache_destroy(NULL); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_DESTROY_INT, 1, + [kmem_cache_destroy(cachep) return int]) +],[ + AC_MSG_RESULT(NO) +]) +]) + +# 2.6.19 API change +#panic_notifier_list use atomic_notifier operations +# +AC_DEFUN([LIBCFS_ATOMIC_PANIC_NOTIFIER], +[AC_MSG_CHECKING([panic_notifier_list is atomic]) +LB_LINUX_TRY_COMPILE([ + #include + #include +],[ + struct atomic_notifier_head panic_notifier_list; +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_ATOMIC_PANIC_NOTIFIER, 1, + [panic_notifier_list is atomic_notifier_head]) +],[ + AC_MSG_RESULT(NO) +]) +]) + +# 2.6.20 API change INIT_WORK use 2 args and not +# store data inside +AC_DEFUN([LIBCFS_3ARGS_INIT_WORK], +[AC_MSG_CHECKING([check INIT_WORK want 3 args]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + struct work_struct work; + + INIT_WORK(&work, NULL, NULL); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_3ARGS_INIT_WORK, 1, + [INIT_WORK use 3 args and store data inside]) +],[ + AC_MSG_RESULT(NO) +]) +]) + +# 2.6.21 api change. 'register_sysctl_table' use only one argument, +# instead of more old which need two. +AC_DEFUN([LIBCFS_2ARGS_REGISTER_SYSCTL], +[AC_MSG_CHECKING([check register_sysctl_table want 2 args]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + return register_sysctl_table(NULL,0); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_2ARGS_REGISTER_SYSCTL, 1, + [register_sysctl_table want 2 args]) +],[ + AC_MSG_RESULT(NO) +]) +]) + +# 2.6.21 marks kmem_cache_t deprecated and uses struct kmem_cache +# instead +AC_DEFUN([LIBCFS_KMEM_CACHE], +[AC_MSG_CHECKING([check kernel has struct kmem_cache]) +tmp_flags="$EXTRA_KCFLAGS" +EXTRA_KCFLAGS="-Werror" +LB_LINUX_TRY_COMPILE([ + #include + typedef struct kmem_cache cache_t; +],[ + cache_t *cachep = NULL; + + kmem_cache_alloc(cachep, 0); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE, 1, + [kernel has struct kmem_cache]) +],[ + AC_MSG_RESULT(NO) +]) +EXTRA_KCFLAGS="$tmp_flags" +]) +# 2.6.23 lost dtor argument +AC_DEFUN([LIBCFS_KMEM_CACHE_CREATE_DTOR], +[AC_MSG_CHECKING([check kmem_cache_create has dtor argument]) +LB_LINUX_TRY_COMPILE([ + #include +],[ + kmem_cache_create(NULL, 0, 0, 0, NULL, NULL); +],[ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_KMEM_CACHE_CREATE_DTOR, 1, + [kmem_cache_create has dtor argument]) +],[ + AC_MSG_RESULT(NO) +]) +]) + +# +# LIBCFS_PROG_LINUX +# +# LNet linux kernel checks +# +AC_DEFUN([LIBCFS_PROG_LINUX], +[ +LIBCFS_FUNC_CPU_ONLINE +LIBCFS_TYPE_GFP_T +LIBCFS_TYPE_CPUMASK_T +LIBCFS_CONFIG_PANIC_DUMPLOG + +LIBCFS_STRUCT_PAGE_LIST +LIBCFS_STRUCT_SIGHAND +LIBCFS_FUNC_SHOW_TASK +LIBCFS_U64_LONG_LONG +LIBCFS_SSIZE_T_LONG +LIBCFS_SIZE_T_LONG +# 2.6.18 +LIBCFS_TASKLIST_LOCK +# 2.6.19 +LIBCFS_KMEM_CACHE_DESTROY_INT +LIBCFS_ATOMIC_PANIC_NOTIFIER +# 2.6.20 +LIBCFS_3ARGS_INIT_WORK +# 2.6.21 +LIBCFS_2ARGS_REGISTER_SYSCTL +LIBCFS_KMEM_CACHE +# 2.6.23 +LIBCFS_KMEM_CACHE_CREATE_DTOR +]) + +# +# LIBCFS_PROG_DARWIN +# +# Darwin checks +# +AC_DEFUN([LIBCFS_PROG_DARWIN], +[LB_DARWIN_CHECK_FUNCS([get_preemption_level]) +]) + +# +# LIBCFS_PATH_DEFAULTS +# +# default paths for installed files +# +AC_DEFUN([LIBCFS_PATH_DEFAULTS], +[ +]) + +# +# LIBCFS_CONFIGURE +# +# other configure checks +# +AC_DEFUN([LIBCFS_CONFIGURE], +[# lnet/utils/portals.c +AC_CHECK_HEADERS([asm/types.h endian.h sys/ioctl.h]) + +# lnet/utils/debug.c +AC_CHECK_HEADERS([linux/version.h]) + +AC_CHECK_TYPE([spinlock_t], + [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])], + [], + [#include ]) + +# lnet/utils/wirecheck.c +AC_CHECK_FUNCS([strnlen]) + +# -------- Check for required packages -------------- + + +AC_MSG_CHECKING([if efence debugging support is requested]) +AC_ARG_ENABLE(efence, + AC_HELP_STRING([--enable-efence], + [use efence library]), + [],[enable_efence='no']) +AC_MSG_RESULT([$enable_efence]) +if test "$enable_efence" = "yes" ; then + LIBEFENCE="-lefence" + AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested]) +else + LIBEFENCE="" +fi +AC_SUBST(LIBEFENCE) + + +# -------- check for -lpthread support ---- +AC_MSG_CHECKING([whether to use libpthread for libcfs library]) +AC_ARG_ENABLE([libpthread], + AC_HELP_STRING([--disable-libpthread], + [disable libpthread]), + [],[enable_libpthread=yes]) +if test "$enable_libpthread" = "yes" ; then + AC_CHECK_LIB([pthread], [pthread_create], + [ENABLE_LIBPTHREAD="yes"], + [ENABLE_LIBPTHREAD="no"]) + if test "$ENABLE_LIBPTHREAD" = "yes" ; then + AC_MSG_RESULT([$ENABLE_LIBPTHREAD]) + PTHREAD_LIBS="-lpthread" + AC_DEFINE([HAVE_LIBPTHREAD], 1, [use libpthread]) + else + PTHREAD_LIBS="" + AC_MSG_RESULT([no libpthread is found]) + fi + AC_SUBST(PTHREAD_LIBS) +else + AC_MSG_RESULT([no (disabled explicitly)]) + ENABLE_LIBPTHREAD="no" +fi +AC_SUBST(ENABLE_LIBPTHREAD) + + +]) + +# +# LIBCFS_CONDITIONALS +# +# AM_CONDITOINAL defines for lnet +# +AC_DEFUN([LIBCFS_CONDITIONALS], +[ +]) + +# +# LIBCFS_CONFIG_FILES +# +# files that should be generated with AC_OUTPUT +# +AC_DEFUN([LIBCFS_CONFIG_FILES], +[AC_CONFIG_FILES([ +libcfs/Kernelenv +libcfs/Makefile +libcfs/autoMakefile +libcfs/autoconf/Makefile +libcfs/include/Makefile +libcfs/include/libcfs/Makefile +libcfs/include/libcfs/linux/Makefile +libcfs/libcfs/Makefile +libcfs/libcfs/autoMakefile +libcfs/libcfs/linux/Makefile +]) +case $lb_target_os in + darwin) + AC_CONFIG_FILES([ +libcfs/include/libcfs/darwin/Makefile +libcfs/libcfs/darwin/Makefile +]) + ;; +esac +]) diff --git a/libcfs/include/Makefile.am b/libcfs/include/Makefile.am new file mode 100644 index 0000000..8289f5f --- /dev/null +++ b/libcfs/include/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = libcfs diff --git a/libcfs/include/libcfs/.cvsignore b/libcfs/include/libcfs/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/libcfs/include/libcfs/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/libcfs/include/libcfs/Makefile.am b/libcfs/include/libcfs/Makefile.am new file mode 100644 index 0000000..472d0ae --- /dev/null +++ b/libcfs/include/libcfs/Makefile.am @@ -0,0 +1,9 @@ +SUBDIRS := linux +if DARWIN +SUBDIRS += darwin +endif +DIST_SUBDIRS := $(SUBDIRS) + +EXTRA_DIST := curproc.h kp30.h libcfs.h list.h lltrace.h \ + portals_utils.h types.h user-lock.h user-prim.h user-time.h \ + user-tcpip.h user-bitops.h bitmap.h diff --git a/libcfs/include/libcfs/bitmap.h b/libcfs/include/libcfs/bitmap.h new file mode 100644 index 0000000..7f6189a --- /dev/null +++ b/libcfs/include/libcfs/bitmap.h @@ -0,0 +1,80 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2007 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef _LIBCFS_BITMAP_H_ +#define _LIBCFS_BITMAP_H_ + + +typedef struct { + int size; + unsigned long data[0]; +} bitmap_t; + +#define CFS_BITMAP_SIZE(nbits) \ + (((nbits/BITS_PER_LONG)+1)*sizeof(long)+sizeof(bitmap_t)) + +static inline +bitmap_t *ALLOCATE_BITMAP(int size) +{ + bitmap_t *ptr; + + OBD_ALLOC(ptr, CFS_BITMAP_SIZE(size)); + if (ptr == NULL) + RETURN(ptr); + + ptr->size = size; + + RETURN (ptr); +} + +#define FREE_BITMAP(ptr) OBD_FREE(ptr, CFS_BITMAP_SIZE(ptr->size)) + +static inline +void cfs_bitmap_set(bitmap_t *bitmap, int nbit) +{ + set_bit(nbit, bitmap->data); +} + +static inline +void cfs_bitmap_clear(bitmap_t *bitmap, int nbit) +{ + clear_bit(nbit, bitmap->data); +} + +static inline +int cfs_bitmap_check(bitmap_t *bitmap, int nbit) +{ + return test_bit(nbit, bitmap->data); +} + +/* return 0 is bitmap has none set bits */ +static inline +int cfs_bitmap_check_empty(bitmap_t *bitmap) +{ + return find_first_bit(bitmap->data, bitmap->size) == bitmap->size; +} + +#define cfs_foreach_bit(bitmap, pos) \ + for((pos)=find_first_bit((bitmap)->data, bitmap->size); \ + (pos) < (bitmap)->size; \ + (pos) = find_next_bit((bitmap)->data, (bitmap)->size, (pos))) + +#endif diff --git a/libcfs/include/libcfs/curproc.h b/libcfs/include/libcfs/curproc.h new file mode 100644 index 0000000..6495c66 --- /dev/null +++ b/libcfs/include/libcfs/curproc.h @@ -0,0 +1,64 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre curproc API declaration + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. You should have received a copy of the GNU + * General Public License along with Lustre; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef __LIBCFS_CURPROC_H__ +#define __LIBCFS_CURPROC_H__ + +#ifdef __KERNEL__ +/* + * Portable API to access common characteristics of "current" UNIX process. + * + * Implemented in portals/include/libcfs// + */ +uid_t cfs_curproc_uid(void); +gid_t cfs_curproc_gid(void); +uid_t cfs_curproc_fsuid(void); +gid_t cfs_curproc_fsgid(void); +pid_t cfs_curproc_pid(void); +int cfs_curproc_groups_nr(void); +int cfs_curproc_is_in_groups(gid_t group); +void cfs_curproc_groups_dump(gid_t *array, int size); +mode_t cfs_curproc_umask(void); +char *cfs_curproc_comm(void); + + +/* + * Plus, platform-specific constant + * + * CFS_CURPROC_COMM_MAX, + * + * and opaque scalar type + * + * cfs_kernel_cap_t + */ +cfs_kernel_cap_t cfs_curproc_cap_get(void); +void cfs_curproc_cap_set(cfs_kernel_cap_t cap); +#endif + +/* __LIBCFS_CURPROC_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/darwin/.cvsignore b/libcfs/include/libcfs/darwin/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/libcfs/include/libcfs/darwin/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/libcfs/include/libcfs/darwin/Makefile.am b/libcfs/include/libcfs/darwin/Makefile.am new file mode 100644 index 0000000..f2f217a --- /dev/null +++ b/libcfs/include/libcfs/darwin/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \ + darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \ + darwin-lock.h darwin-sync.h darwin-tcpip.h kp30.h diff --git a/libcfs/include/libcfs/darwin/darwin-fs.h b/libcfs/include/libcfs/darwin/darwin-fs.h new file mode 100644 index 0000000..da613ba --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-fs.h @@ -0,0 +1,193 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Implementation of standard file system interfaces for XNU kernel. + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ +#ifndef __LIBCFS_DARWIN_FS_H__ +#define __LIBCFS_DARWIN_FS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * File operating APIs in kernel + */ +#ifdef __DARWIN8__ +/* + * Kernel file descriptor + */ +typedef struct cfs_kern_file { + int f_flags; + vnode_t f_vp; + vfs_context_t f_ctxt; +} cfs_file_t; + +#else + +typedef struct file cfs_file_t; + +#endif + +int kern_file_size(cfs_file_t *fp, off_t *size); +#define cfs_filp_size(fp) \ + ({ \ + off_t __size; \ + kern_file_size((fp), &__size); \ + __size; \ + }) +#define cfs_filp_poff(fp) (NULL) + +cfs_file_t *kern_file_open(const char *name, int flags, int mode, int *err); +int kern_file_close(cfs_file_t *fp); +int kern_file_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); +int kern_file_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); +int kern_file_sync(cfs_file_t *fp); + +#define cfs_filp_open(n, f, m, e) kern_file_open(n, f, m, e) +#define cfs_filp_close(f) kern_file_close(f) +#define cfs_filp_read(f, b, n, p) kern_file_read(f, b, n, p) +#define cfs_filp_write(f, b, n, p) kern_file_write(f, b, n, p) +#define cfs_filp_fsync(f) kern_file_sync(f) + +int ref_file(cfs_file_t *fp); +int rele_file(cfs_file_t *fp); +int file_count(cfs_file_t *fp); +#define cfs_get_file(f) ref_file(f) +#define cfs_put_file(f) rele_file(f) +#define cfs_file_count(f) file_count(f) + +#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) +#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) + +typedef struct flock cfs_flock_t; +#define cfs_flock_type(fl) ((fl)->l_type) +#define cfs_flock_set_type(fl, type) do { (fl)->l_type = (type); } while(0) +#define cfs_flock_pid(fl) ((fl)->l_pid) +#define cfs_flock_set_pid(fl, pid) do { (fl)->l_pid = (pid); } while(0) +#define cfs_flock_start(fl) ((fl)->l_start) +#define cfs_flock_set_start(fl, start) do { (fl)->l_start = (start); } while(0) + +static inline loff_t cfs_flock_end(cfs_flock_t *fl) +{ + return (fl->l_len == 0 ? CFS_OFFSET_MAX: (fl->l_start + fl->l_len)); +} + +static inline void cfs_flock_set_end(cfs_flock_t *fl, loff_t end) +{ + if (end == CFS_OFFSET_MAX) + fl->l_len = 0; + else + fl->l_len = end - fl->l_start; +} + +#define ATTR_MODE 0x0001 +#define ATTR_UID 0x0002 +#define ATTR_GID 0x0004 +#define ATTR_SIZE 0x0008 +#define ATTR_ATIME 0x0010 +#define ATTR_MTIME 0x0020 +#define ATTR_CTIME 0x0040 +#define ATTR_ATIME_SET 0x0080 +#define ATTR_MTIME_SET 0x0100 +#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 0x0400 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ +#define ATTR_CTIME_SET 0x2000 +#define ATTR_BLOCKS 0x4000 +#define ATTR_KILL_SUID 0 +#define ATTR_KILL_SGID 0 + +#define in_group_p(x) (0) + +struct posix_acl_entry { + short e_tag; + unsigned short e_perm; + unsigned int e_id; +}; + +struct posix_acl { + atomic_t a_refcount; + unsigned int a_count; + struct posix_acl_entry a_entries[0]; +}; + +struct posix_acl *posix_acl_alloc(int count, int flags); +static inline struct posix_acl *posix_acl_from_xattr(const void *value, + size_t size) +{ + return posix_acl_alloc(0, 0); +} +static inline void posix_acl_release(struct posix_acl *acl) {}; +static inline int posix_acl_valid(const struct posix_acl *acl) { return 0; } +static inline struct posix_acl * posix_acl_dup(struct posix_acl *acl) +{ + return acl; +} + +#else /* !__KERNEL__ */ + +typedef struct file cfs_file_t; + +#endif /* END __KERNEL__ */ + +typedef struct { + void *d; +} cfs_dentry_t; + +#ifndef O_SYNC +#define O_SYNC 0 +#endif +#ifndef O_DIRECTORY +#define O_DIRECTORY 0 +#endif +#ifndef O_LARGEFILE +#define O_LARGEFILE 0 +#endif + +#endif diff --git a/libcfs/include/libcfs/darwin/darwin-lock.h b/libcfs/include/libcfs/darwin/darwin-lock.h new file mode 100644 index 0000000..f826fef --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-lock.h @@ -0,0 +1,284 @@ +#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__ +#define __LIBCFS_DARWIN_CFS_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include + +#include +#include + +/* + * spin_lock (use Linux kernel's primitives) + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + */ +struct spin_lock { + struct kspin spin; +}; + +typedef struct spin_lock spinlock_t; + +static inline void spin_lock_init(spinlock_t *lock) +{ + kspin_init(&lock->spin); +} + +static inline void spin_lock(spinlock_t *lock) +{ + kspin_lock(&lock->spin); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + kspin_unlock(&lock->spin); +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return kspin_trylock(&lock->spin); +} + +static inline void spin_lock_done(spinlock_t *lock) +{ + kspin_done(&lock->spin); +} + +#error "does this lock out timer callbacks?" +#define spin_lock_bh(x) spin_lock(x) +#define spin_unlock_bh(x) spin_unlock(x) +#define spin_lock_bh_init(x) spin_lock_init(x) + +extern boolean_t ml_set_interrupts_enabled(boolean_t enable); +#define __disable_irq() ml_set_interrupts_enabled(FALSE) +#define __enable_irq(x) (void) ml_set_interrupts_enabled(x) + +#define spin_lock_irqsave(s, f) do{ \ + f = __disable_irq(); \ + spin_lock(s); }while(0) + +#define spin_unlock_irqrestore(s, f) do{ \ + spin_unlock(s); \ + __enable_irq(f);}while(0) + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ +struct semaphore { + struct ksem sem; +}; + +static inline void sema_init(struct semaphore *s, int val) +{ + ksem_init(&s->sem, val); +} + +static inline void __down(struct semaphore *s) +{ + ksem_down(&s->sem, 1); +} + +static inline void __up(struct semaphore *s) +{ + ksem_up(&s->sem, 1); +} + +/* + * Mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ + +#define mutex_up(s) __up(s) +#define mutex_down(s) __down(s) + +#define init_mutex(x) sema_init(x, 1) +#define init_mutex_locked(x) sema_init(x, 0) + +/* + * Completion: + * + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + */ +struct completion { + /* + * Emulate completion by semaphore for now. + * + * XXX nikita: this is not safe if completion is used to synchronize + * exit from kernel daemon thread and kext unloading. In this case + * some core function (a la complete_and_exit()) is needed. + */ + struct ksem sem; +}; + +static inline void init_completion(struct completion *c) +{ + ksem_init(&c->sem, 0); +} + +static inline void complete(struct completion *c) +{ + ksem_up(&c->sem, 1); +} + +static inline void wait_for_completion(struct completion *c) +{ + ksem_down(&c->sem, 1); +} + +/* + * rw_semaphore: + * + * - DECLARE_RWSEM(x) + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ +struct rw_semaphore { + struct krw_sem s; +}; + +static inline void init_rwsem(struct rw_semaphore *s) +{ + krw_sem_init(&s->s); +} + +static inline void fini_rwsem(struct rw_semaphore *s) +{ + krw_sem_done(&s->s); +} + +static inline void down_read(struct rw_semaphore *s) +{ + krw_sem_down_r(&s->s); +} + +static inline int down_read_trylock(struct rw_semaphore *s) +{ + int ret = krw_sem_down_r_try(&s->s); + return ret == 0; +} + +static inline void down_write(struct rw_semaphore *s) +{ + krw_sem_down_w(&s->s); +} + +static inline int down_write_trylock(struct rw_semaphore *s) +{ + int ret = krw_sem_down_w_try(&s->s); + return ret == 0; +} + +static inline void up_read(struct rw_semaphore *s) +{ + krw_sem_up_r(&s->s); +} + +static inline void up_write(struct rw_semaphore *s) +{ + krw_sem_up_w(&s->s); +} + +/* + * read-write lock : Need to be investigated more!! + * + * - DECLARE_RWLOCK(l) + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ +typedef struct krw_spin rwlock_t; + +#define rwlock_init(pl) krw_spin_init(pl) + +#define read_lock(l) krw_spin_down_r(l) +#define read_unlock(l) krw_spin_up_r(l) +#define write_lock(l) krw_spin_down_w(l) +#define write_unlock(l) krw_spin_up_w(l) + +#define write_lock_irqsave(l, f) do{ \ + f = __disable_irq(); \ + write_lock(l); }while(0) + +#define write_unlock_irqrestore(l, f) do{ \ + write_unlock(l); \ + __enable_irq(f);}while(0) + +#define read_lock_irqsave(l, f) do{ \ + f = __disable_irq(); \ + read_lock(l); }while(0) + +#define read_unlock_irqrestore(l, f) do{ \ + read_unlock(l); \ + __enable_irq(f);}while(0) +/* + * Funnel: + * + * Safe funnel in/out + */ +#ifdef __DARWIN8__ + +#define CFS_DECL_FUNNEL_DATA +#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA +#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA +#define CFS_CONE_IN do {} while(0) +#define CFS_CONE_EX do {} while(0) + +#define CFS_NET_IN do {} while(0) +#define CFS_NET_EX do {} while(0) + +#else + +#define CFS_DECL_FUNNEL_DATA \ + boolean_t __funnel_state = FALSE; \ + funnel_t *__funnel +#define CFS_DECL_CONE_DATA CFS_DECL_FUNNEL_DATA +#define CFS_DECL_NET_DATA CFS_DECL_FUNNEL_DATA + +void lustre_cone_in(boolean_t *state, funnel_t **cone); +void lustre_cone_ex(boolean_t state, funnel_t *cone); + +#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel) +#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel) + +void lustre_net_in(boolean_t *state, funnel_t **cone); +void lustre_net_ex(boolean_t state, funnel_t *cone); + +#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel) +#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel) + +#endif + +#else +#include +#endif /* __KERNEL__ */ + +/* __XNU_CFS_LOCK_H */ +#endif diff --git a/libcfs/include/libcfs/darwin/darwin-mem.h b/libcfs/include/libcfs/darwin/darwin-mem.h new file mode 100644 index 0000000..5ffcd4e --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-mem.h @@ -0,0 +1,232 @@ +#ifndef __LIBCFS_DARWIN_CFS_MEM_H__ +#define __LIBCFS_DARWIN_CFS_MEM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Basic xnu_page struct, should be binary compatibility with + * all page types in xnu (we have only xnu_raw_page, xll_page now) + */ + +/* Variable sized pages are not supported */ + +#ifdef PAGE_SHIFT +#define CFS_PAGE_SHIFT PAGE_SHIFT +#else +#define CFS_PAGE_SHIFT 12 +#endif + +#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) + +#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE - 1)) + +enum { + XNU_PAGE_RAW, + XNU_PAGE_XLL, + XNU_PAGE_NTYPES +}; + +typedef __u32 page_off_t; + +/* + * For XNU we have our own page cache built on top of underlying BSD/MACH + * infrastructure. In particular, we have two disjoint types of pages: + * + * - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM, + * based on UPLs, and + * + * - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache + * file data, owned by file system objects, hashed, lrued, etc. + * + * cfs_page_t has to cover both of them, because core Lustre code is based on + * the Linux assumption that page is _both_ memory buffer and file system + * caching entity. + * + * To achieve this, all types of pages supported on XNU has to start from + * common header that contains only "page type". Common cfs_page_t operations + * dispatch through operation vector based on page type. + * + */ +typedef struct xnu_page { + int type; +} cfs_page_t; + +struct xnu_page_ops { + void *(*page_map) (cfs_page_t *); + void (*page_unmap) (cfs_page_t *); + void *(*page_address) (cfs_page_t *); +}; + +void xnu_page_ops_register(int type, struct xnu_page_ops *ops); +void xnu_page_ops_unregister(int type); + +/* + * raw page, no cache object, just like buffer + */ +struct xnu_raw_page { + struct xnu_page header; + void *virtual; + atomic_t count; + struct list_head link; +}; + +/* + * Public interface to lustre + * + * - cfs_alloc_page(f) + * - cfs_free_page(p) + * - cfs_kmap(p) + * - cfs_kunmap(p) + * - cfs_page_address(p) + */ + +/* + * Of all functions above only cfs_kmap(), cfs_kunmap(), and + * cfs_page_address() can be called on file system pages. The rest is for raw + * pages only. + */ + +cfs_page_t *cfs_alloc_page(u_int32_t flags); +void cfs_free_page(cfs_page_t *page); +void cfs_get_page(cfs_page_t *page); +int cfs_put_page_testzero(cfs_page_t *page); +int cfs_page_count(cfs_page_t *page); +#define cfs_page_index(pg) (0) + +void *cfs_page_address(cfs_page_t *pg); +void *cfs_kmap(cfs_page_t *pg); +void cfs_kunmap(cfs_page_t *pg); + +/* + * Memory allocator + */ + +void *cfs_alloc(size_t nr_bytes, u_int32_t flags); +void cfs_free(void *addr); + +void *cfs_alloc_large(size_t nr_bytes); +void cfs_free_large(void *addr); + +extern int get_preemption_level(void); + +#define CFS_ALLOC_ATOMIC_TRY \ + (get_preemption_level() != 0 ? CFS_ALLOC_ATOMIC : 0) + +/* + * Slab: + * + * No slab in OSX, use zone allocator to simulate slab + */ +#define SLAB_HWCACHE_ALIGN 0 + +#ifdef __DARWIN8__ +/* + * In Darwin8, we cannot use zalloc_noblock(not exported by kernel), + * also, direct using of zone allocator is not recommended. + */ +#define CFS_INDIVIDUAL_ZONE (0) + +#if !CFS_INDIVIDUAL_ZONE +#include +typedef OSMallocTag mem_cache_t; +#else +typedef void* zone_t; +typedef zone_t mem_cache_t; +#endif + +#else /* !__DARWIN8__ */ + +#define CFS_INDIVIDUAL_ZONE (1) + +typedef zone_t mem_cache_t; + +#endif /* !__DARWIN8__ */ + +#define MC_NAME_MAX_LEN 64 + +typedef struct cfs_mem_cache { + int mc_size; + mem_cache_t mc_cache; + struct list_head mc_link; + char mc_name [MC_NAME_MAX_LEN]; +} cfs_mem_cache_t; + +#define KMEM_CACHE_MAX_COUNT 64 +#define KMEM_MAX_ZONE 8192 + +cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); +int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); +void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); +void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); + +/* + * Misc + */ +/* XXX Liang: num_physpages... fix me */ +#define num_physpages (64 * 1024) + +#define CFS_DECL_MMSPACE +#define CFS_MMSPACE_OPEN do {} while(0) +#define CFS_MMSPACE_CLOSE do {} while(0) + +#define copy_from_user(kaddr, uaddr, size) copyin(CAST_USER_ADDR_T(uaddr), (caddr_t)kaddr, size) +#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, CAST_USER_ADDR_T(uaddr), size) + +#if 0 +static inline int strncpy_from_user(char *kaddr, char *uaddr, int size) +{ + size_t count; + return copyinstr((const user_addr_t)uaddr, (void *)kaddr, size, &count); +} +#endif + +#if defined (__ppc__) +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("sync" : : : "memory") +#define wmb() __asm__ __volatile__ ("eieio" : : : "memory") +#elif defined (__i386__) +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() +#define wmb() __asm__ __volatile__ ("": : :"memory") +#else +#error architecture not supported +#endif + +#else /* !__KERNEL__ */ + +#define CFS_CACHE_SHIFT 12 +#define PAGE_CACHE_SIZE (1 << CFS_CACHE_SHIFT) +#include + +#endif /* __KERNEL__ */ + +#endif /* __XNU_CFS_MEM_H__ */ diff --git a/libcfs/include/libcfs/darwin/darwin-prim.h b/libcfs/include/libcfs/darwin/darwin-prim.h new file mode 100644 index 0000000..0c201c2 --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-prim.h @@ -0,0 +1,527 @@ +#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__ +#define __LIBCFS_DARWIN_CFS_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include + +#ifndef __DARWIN8__ +# ifndef __APPLE_API_PRIVATE +# define __APPLE_API_PRIVATE +# include +# undef __APPLE_API_PRIVATE +# else +# include +# endif +# include +# include +# include +#endif /* !__DARWIN8__ */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Symbol functions for libcfs + * + * OSX has no facility for use to register symbol. + * So we have to implement it. + */ +#define CFS_SYMBOL_LEN 64 + +struct cfs_symbol { + char name[CFS_SYMBOL_LEN]; + void *value; + int ref; + struct list_head sym_list; +}; + +extern kern_return_t cfs_symbol_register(const char *, const void *); +extern kern_return_t cfs_symbol_unregister(const char *); +extern void * cfs_symbol_get(const char *); +extern kern_return_t cfs_symbol_put(const char *); + +/* + * sysctl typedef + * + * User can register/unregister a list of sysctl_oids + * sysctl_oid is data struct of osx's sysctl-entry + */ +#define CONFIG_SYSCTL 1 + +typedef struct sysctl_oid * cfs_sysctl_table_t; +typedef cfs_sysctl_table_t cfs_sysctl_table_header_t; +cfs_sysctl_table_header_t *cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg); +void cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table); + +/* + * Proc file system APIs, no /proc fs support in OSX + */ +typedef struct cfs_proc_dir_entry { + void *data; +} cfs_proc_dir_entry_t; + +cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod, + cfs_proc_dir_entry_t *parent); +void cfs_free_proc_entry(cfs_proc_dir_entry_t *de); +void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry); + +typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, + int count, int *eof, void *data); +typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, + unsigned long count, void *data); + +/* + * cfs pseudo device + * + * cfs_psdev_t + * cfs_psdev_register: + * cfs_psdev_deregister: + */ +typedef struct { + int index; + void *handle; + const char *name; + struct cdevsw *devsw; + void *private; +} cfs_psdev_t; + +extern kern_return_t cfs_psdev_register(cfs_psdev_t *); +extern kern_return_t cfs_psdev_deregister(cfs_psdev_t *); + +/* + * Task struct and ... + * + * Using BSD current_proc in Darwin + */ +extern boolean_t assert_wait_possible(void); +extern void *get_bsdtask_info(task_t); + +#ifdef __DARWIN8__ + +typedef struct {} cfs_task_t; +#define cfs_current() ((cfs_task_t *)current_thread()) +#else /* !__DARWIN8__ */ + +typedef struct uthread cfs_task_t; + +#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act())) +#define cfs_current() current_uthread() + +#endif /* !__DARWIN8__ */ + +#define cfs_task_lock(t) do {;} while (0) +#define cfs_task_unlock(t) do {;} while (0) + +#define set_current_state(s) do {;} while (0) + +#define CFS_DECL_JOURNAL_DATA +#define CFS_PUSH_JOURNAL do {;} while(0) +#define CFS_POP_JOURNAL do {;} while(0) + +#define THREAD_NAME(comm, fmt, a...) +/* + * Kernel thread: + * + * OSX kernel thread can not be created with args, + * so we have to implement new APIs to create thread with args + */ + +typedef int (*cfs_thread_t)(void *); + +extern task_t kernel_task; + +/* + * cloning flags, no use in OSX, just copy them from Linux + */ +#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ +#define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ +#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ +#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PID 0x00001000 /* set if pid shared */ +#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ +#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ +#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ +#define CLONE_THREAD 0x00010000 /* Same thread group? */ +#define CLONE_NEWNS 0x00020000 /* New namespace group? */ + +#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) + +extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag); + + +/* + * Wait Queue implementation + * + * Like wait_queue in Linux + */ +typedef struct cfs_waitq { + struct ksleep_chan wq_ksleep_chan; +} cfs_waitq_t; + +typedef struct cfs_waitlink { + struct cfs_waitq *wl_waitq; + struct ksleep_link wl_ksleep_link; +} cfs_waitlink_t; + +typedef int cfs_task_state_t; + +#define CFS_TASK_INTERRUPTIBLE THREAD_ABORTSAFE +#define CFS_TASK_UNINT THREAD_UNINT + +void cfs_waitq_init(struct cfs_waitq *waitq); +void cfs_waitlink_init(struct cfs_waitlink *link); + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link); +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); +int cfs_waitq_active(struct cfs_waitq *waitq); + +void cfs_waitq_signal(struct cfs_waitq *waitq); +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); +void cfs_waitq_broadcast(struct cfs_waitq *waitq); + +void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); +cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, + cfs_task_state_t state, + cfs_duration_t timeout); + +/* + * Thread schedule APIs. + */ +#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12)) +extern void thread_set_timer_deadline(__u64 deadline); +extern void thread_cancel_timer(void); + +static inline int cfs_schedule_timeout(int state, int64_t timeout) +{ + int result; + +#ifdef __DARWIN8__ + result = assert_wait((event_t)current_thread(), state); +#else + result = assert_wait((event_t)current_uthread(), state); +#endif + if (timeout > 0) { + __u64 expire; + nanoseconds_to_absolutetime(timeout, &expire); + clock_absolutetime_interval_to_deadline(expire, &expire); + thread_set_timer_deadline(expire); + } + if (result == THREAD_WAITING) + result = thread_block(THREAD_CONTINUE_NULL); + if (timeout > 0) + thread_cancel_timer(); + if (result == THREAD_TIMED_OUT) + result = 0; + else + result = 1; + return result; +} + +#define cfs_schedule() cfs_schedule_timeout(CFS_TASK_UNINT, CFS_TICK) +#define cfs_pause(tick) cfs_schedule_timeout(CFS_TASK_UNINT, tick) + +#define __wait_event(wq, condition) \ +do { \ + struct cfs_waitlink __wait; \ + \ + cfs_waitlink_init(&__wait); \ + for (;;) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) \ + break; \ + cfs_waitq_wait(&__wait, CFS_TASK_UNINT); \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ +} while (0) + +#define wait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __wait_event(wq, condition); \ +} while (0) + +#define __wait_event_interruptible(wq, condition, ex, ret) \ +do { \ + struct cfs_waitlink __wait; \ + \ + cfs_waitlink_init(&__wait); \ + for (;;) { \ + if (ex == 0) \ + cfs_waitq_add(&wq, &__wait); \ + else \ + cfs_waitq_add_exclusive(&wq, &__wait); \ + if (condition) \ + break; \ + if (!cfs_signal_pending()) { \ + cfs_waitq_wait(&__wait, \ + CFS_TASK_INTERRUPTIBLE); \ + cfs_waitq_del(&wq, &__wait); \ + continue; \ + } \ + ret = -ERESTARTSYS; \ + break; \ + } \ + cfs_waitq_del(&wq, &__wait); \ +} while (0) + +#define wait_event_interruptible(wq, condition) \ +({ \ + int __ret = 0; \ + if (!condition) \ + __wait_event_interruptible(wq, condition, \ + 0, __ret); \ + __ret; \ +}) + +#define wait_event_interruptible_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + if (!condition) \ + __wait_event_interruptible(wq, condition, \ + 1, __ret); \ + __ret; \ +}) + +#ifndef __DARWIN8__ +extern void wakeup_one __P((void * chan)); +#endif +/* only used in tests */ +#define wake_up_process(p) \ + do { \ + wakeup_one((caddr_t)p); \ + } while (0) + +/* used in couple of places */ +static inline void sleep_on(cfs_waitq_t *waitq) +{ + cfs_waitlink_t link; + + cfs_waitlink_init(&link); + cfs_waitq_add(waitq, &link); + cfs_waitq_wait(&link, CFS_TASK_UNINT); + cfs_waitq_del(waitq, &link); +} + +/* + * Signal + */ +typedef sigset_t cfs_sigset_t; + +#define SIGNAL_MASK_ASSERT() +/* + * Timer + */ +typedef struct cfs_timer { + struct ktimer t; +} cfs_timer_t; + +#define cfs_init_timer(t) do {} while(0) +void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); +void cfs_timer_done(struct cfs_timer *t); +void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); +void cfs_timer_disarm(struct cfs_timer *t); +int cfs_timer_is_armed(struct cfs_timer *t); + +cfs_time_t cfs_timer_deadline(struct cfs_timer *t); + +/* + * Ioctl + * We don't need to copy out everything in osx + */ +#define cfs_ioctl_data_out(a, d, l) \ + ({ \ + int __size; \ + int __rc = 0; \ + assert((l) >= sizeof(*d)); \ + __size = (l) - sizeof(*d); \ + if (__size > 0) \ + __rc = copy_to_user((void *)a + __size, \ + (void *)d + __size, \ + __size); \ + __rc; \ + }) + +/* + * CPU + */ +/* Run in PowerG5 who is PPC64 */ +#define SMP_CACHE_BYTES 128 +#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define NR_CPUS 2 + +/* + * XXX Liang: patch xnu and export current_processor()? + * + * #define smp_processor_id() current_processor() + */ +#define smp_processor_id() 0 +/* XXX smp_call_function is not supported in xnu */ +#define smp_call_function(f, a, n, w) do {} while(0) +int cfs_online_cpus(void); +#define smp_num_cpus cfs_online_cpus() + +/* + * Misc + */ +extern int is_suser(void); + +#ifndef likely +#define likely(exp) (exp) +#endif +#ifndef unlikely +#define unlikely(exp) (exp) +#endif + +#define lock_kernel() do {} while(0) +#define unlock_kernel() do {} while(0) + +#define CAP_SYS_BOOT 0 +#define CAP_SYS_ADMIN 1 +#define capable(a) ((a) == CAP_SYS_BOOT ? is_suser(): is_suser1()) + +#define USERMODEHELPER(path, argv, envp) (0) + +#define cfs_module(name, version, init, fini) \ +extern kern_return_t _start(kmod_info_t *ki, void *data); \ +extern kern_return_t _stop(kmod_info_t *ki, void *data); \ +__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \ +__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \ + \ +kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ + { "com.clusterfs.lustre." #name }, { version }, \ + -1, 0, 0, 0, 0, name##_start, name##_stop }; \ + \ +__private_extern__ kmod_start_func_t *_realmain = name##_start; \ +__private_extern__ kmod_stop_func_t *_antimain = name##_stop; \ +__private_extern__ int _kext_apple_cc = __APPLE_CC__ ; \ + \ +kern_return_t name##_start(kmod_info_t *ki, void *d) \ +{ \ + return init(); \ +} \ + \ +kern_return_t name##_stop(kmod_info_t *ki, void *d) \ +{ \ + fini(); \ + return KERN_SUCCESS; \ +} \ + \ +/* \ + * to allow semicolon after cfs_module(...) \ + */ \ +struct __dummy_ ## name ## _struct {} + +#define inter_module_get(n) cfs_symbol_get(n) +#define inter_module_put(n) cfs_symbol_put(n) + +static inline int request_module(char *name) +{ + return (-EINVAL); +} + +#ifndef __exit +#define __exit +#endif +#ifndef __init +#define __init +#endif + +#define EXPORT_SYMBOL(s) +#define MODULE_AUTHOR(s) +#define MODULE_DESCRIPTION(s) +#define MODULE_LICENSE(s) +#define MODULE_PARM(a, b) +#define MODULE_PARM_DESC(a, b) + +#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) +#define LINUX_VERSION_CODE KERNEL_VERSION(2,5,0) + +#define NR_IRQS 512 +#define in_interrupt() ml_at_interrupt_context() + +#define KERN_EMERG "<0>" /* system is unusable */ +#define KERN_ALERT "<1>" /* action must be taken immediately */ +#define KERN_CRIT "<2>" /* critical conditions */ +#define KERN_ERR "<3>" /* error conditions */ +#define KERN_WARNING "<4>" /* warning conditions */ +#define KERN_NOTICE "<5>" /* normal but significant condition */ +#define KERN_INFO "<6>" /* informational */ +#define KERN_DEBUG "<7>" /* debug-level messages */ + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +#define ERR_PTR(err) ((void *)err) +#define IS_ERR(p) ((unsigned long)(p) + 1000 < 1000) + +#else /* !__KERNEL__ */ + +typedef struct cfs_proc_dir_entry { + void *data; +} cfs_proc_dir_entry_t; + +#include +#define __WORDSIZE 32 + +#endif /* END __KERNEL__ */ +/* + * Error number + */ +#ifndef EPROTO +#define EPROTO EPROTOTYPE +#endif +#ifndef EBADR +#define EBADR EBADRPC +#endif +#ifndef ERESTARTSYS +#define ERESTARTSYS 512 +#endif +#ifndef EDEADLOCK +#define EDEADLOCK EDEADLK +#endif +#ifndef ECOMM +#define ECOMM EINVAL +#endif +#ifndef ENODATA +#define ENODATA EINVAL +#endif +#ifndef ENOTSUPP +#define ENOTSUPP EINVAL +#endif + +#if BYTE_ORDER == BIG_ENDIAN +# define __BIG_ENDIAN +#else +# define __LITTLE_ENDIAN +#endif + +#endif /* __LIBCFS_DARWIN_CFS_PRIM_H__ */ diff --git a/libcfs/include/libcfs/darwin/darwin-sync.h b/libcfs/include/libcfs/darwin/darwin-sync.h new file mode 100644 index 0000000..7817b38 --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-sync.h @@ -0,0 +1,332 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Implementation of standard libcfs synchronization primitives for XNU + * kernel. + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +/* + * xnu_sync.h + * + * Created by nikita on Sun Jul 18 2004. + * + * Prototypes of XNU synchronization primitives. + */ + +#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__ +#define __LIBCFS_DARWIN_XNU_SYNC_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#define XNU_SYNC_DEBUG (1) + +#if XNU_SYNC_DEBUG +#define ON_SYNC_DEBUG(e) e +#else +#define ON_SYNC_DEBUG(e) +#endif + +enum { + /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */ + KMUT_MAGIC = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */ + KSEM_MAGIC = 0x1abe11ed, + KCOND_MAGIC = 0xb01dface, + KRW_MAGIC = 0xdabb1edd, + KSPIN_MAGIC = 0xca11ab1e, + KRW_SPIN_MAGIC = 0xbabeface, + KSLEEP_CHAN_MAGIC = 0x0debac1e, + KSLEEP_LINK_MAGIC = 0xacc01ade, + KTIMER_MAGIC = 0xbefadd1e +}; + +/* ------------------------- spin lock ------------------------- */ + +/* + * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h + */ +#define SMP (1) + +#include + +#ifdef __DARWIN8__ + +#include +#include +#include +#include + +/* + * hw_lock is not available in Darwin8 (hw_lock_* are not exported at all), + * so use lck_spin_t. we can hack out lck_spin_t easily, it's the only + * hacking in Darwin8.x. We did so because it'll take a lot of time to + * add lock_done for all locks, maybe it should be done in the future. + * If lock_done for all locks were added, we can: + * + * typedef lck_spin_t *xnu_spin_t; + */ +#if defined (__ppc__) +typedef struct { + unsigned int opaque[3]; +} xnu_spin_t; +#elif defined (__i386__) +typedef struct { + unsigned int opaque[10]; +} xnu_spin_t; +#endif + +/* + * wait_queue is not available in Darwin8 (wait_queue_* are not exported), + * use assert_wait/wakeup/wake_one (wait_queue in kernel hash). + */ +typedef void * xnu_wait_queue_t; + +/* DARWIN8 */ +#else + +#include +#include +#include + +typedef hw_lock_data_t xnu_spin_t; +typedef struct wait_queue xnu_wait_queue_t; + +/* DARWIN8 */ +#endif + +struct kspin { +#if SMP + xnu_spin_t lock; +#endif +#if XNU_SYNC_DEBUG + unsigned magic; + thread_t owner; +#endif +}; + +void kspin_init(struct kspin *spin); +void kspin_done(struct kspin *spin); +void kspin_lock(struct kspin *spin); +void kspin_unlock(struct kspin *spin); +int kspin_trylock(struct kspin *spin); + +#if XNU_SYNC_DEBUG +/* + * two functions below are for use in assertions + */ +/* true, iff spin-lock is locked by the current thread */ +int kspin_islocked(struct kspin *spin); +/* true, iff spin-lock is not locked by the current thread */ +int kspin_isnotlocked(struct kspin *spin); +#else +#define kspin_islocked(s) (1) +#define kspin_isnotlocked(s) (1) +#endif + +/* ------------------------- rw spinlock ----------------------- */ +struct krw_spin { + struct kspin guard; + int count; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void krw_spin_init(struct krw_spin *sem); +void krw_spin_done(struct krw_spin *sem); +void krw_spin_down_r(struct krw_spin *sem); +void krw_spin_down_w(struct krw_spin *sem); +void krw_spin_up_r(struct krw_spin *sem); +void krw_spin_up_w(struct krw_spin *sem); + +/* ------------------------- semaphore ------------------------- */ + +struct ksem { + struct kspin guard; + xnu_wait_queue_t q; + int value; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void ksem_init(struct ksem *sem, int value); +void ksem_done(struct ksem *sem); +int ksem_up (struct ksem *sem, int value); +void ksem_down(struct ksem *sem, int value); +int ksem_trydown(struct ksem *sem, int value); + +/* ------------------------- mutex ------------------------- */ + +struct kmut { + struct ksem s; +#if XNU_SYNC_DEBUG + unsigned magic; + thread_t owner; +#endif +}; + +void kmut_init(struct kmut *mut); +void kmut_done(struct kmut *mut); + +void kmut_lock (struct kmut *mut); +void kmut_unlock (struct kmut *mut); +int kmut_trylock(struct kmut *mut); + +#if XNU_SYNC_DEBUG +/* + * two functions below are for use in assertions + */ +/* true, iff mutex is locked by the current thread */ +int kmut_islocked(struct kmut *mut); +/* true, iff mutex is not locked by the current thread */ +int kmut_isnotlocked(struct kmut *mut); +#else +#define kmut_islocked(m) (1) +#define kmut_isnotlocked(m) (1) +#endif + +/* ------------------------- condition variable ------------------------- */ + +struct kcond_link { + struct kcond_link *next; + struct ksem sem; +}; + +struct kcond { + struct kspin guard; + struct kcond_link *waiters; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void kcond_init(struct kcond *cond); +void kcond_done(struct kcond *cond); +void kcond_wait(struct kcond *cond, struct kspin *lock); +void kcond_signal(struct kcond *cond); +void kcond_broadcast(struct kcond *cond); + +void kcond_wait_guard(struct kcond *cond); +void kcond_signal_guard(struct kcond *cond); +void kcond_broadcast_guard(struct kcond *cond); + +/* ------------------------- read-write semaphore ------------------------- */ + +struct krw_sem { + int count; + struct kcond cond; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void krw_sem_init(struct krw_sem *sem); +void krw_sem_done(struct krw_sem *sem); +void krw_sem_down_r(struct krw_sem *sem); +int krw_sem_down_r_try(struct krw_sem *sem); +void krw_sem_down_w(struct krw_sem *sem); +int krw_sem_down_w_try(struct krw_sem *sem); +void krw_sem_up_r(struct krw_sem *sem); +void krw_sem_up_w(struct krw_sem *sem); + +/* ------------------------- sleep-channel ------------------------- */ + +struct ksleep_chan { + struct kspin guard; + struct list_head waiters; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +#define KSLEEP_CHAN_INITIALIZER {{{0}}} + +struct ksleep_link { + int flags; + event_t event; + int hits; + struct ksleep_chan *forward; + struct list_head linkage; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +enum { + KSLEEP_EXCLUSIVE = 1 +}; + +void ksleep_chan_init(struct ksleep_chan *chan); +void ksleep_chan_done(struct ksleep_chan *chan); + +void ksleep_link_init(struct ksleep_link *link); +void ksleep_link_done(struct ksleep_link *link); + +void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link); +void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link); + +void ksleep_wait(struct ksleep_chan *chan, int state); +int64_t ksleep_timedwait(struct ksleep_chan *chan, int state, __u64 timeout); + +void ksleep_wake(struct ksleep_chan *chan); +void ksleep_wake_all(struct ksleep_chan *chan); +void ksleep_wake_nr(struct ksleep_chan *chan, int nr); + +#define KSLEEP_LINK_DECLARE(name) \ +{ \ + .flags = 0, \ + .event = 0, \ + .hits = 0, \ + .linkage = CFS_LIST_HEAD(name.linkage), \ + .magic = KSLEEP_LINK_MAGIC \ +} + +/* ------------------------- timer ------------------------- */ + +struct ktimer { + struct kspin guard; + void (*func)(void *); + void *arg; + u_int64_t deadline; /* timer deadline in absolute nanoseconds */ + int armed; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg); +void ktimer_done(struct ktimer *t); +void ktimer_arm(struct ktimer *t, u_int64_t deadline); +void ktimer_disarm(struct ktimer *t); +int ktimer_is_armed(struct ktimer *t); + +u_int64_t ktimer_deadline(struct ktimer *t); + +/* __XNU_SYNC_H__ */ +#endif + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/darwin/darwin-tcpip.h b/libcfs/include/libcfs/darwin/darwin-tcpip.h new file mode 100644 index 0000000..1a73891 --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-tcpip.h @@ -0,0 +1,90 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_DARWIN_TCPIP_H__ +#define __LIBCFS_DARWIN_TCPIP_H__ + +#ifdef __KERNEL__ +#include + +#ifdef __DARWIN8__ + +struct socket; + +typedef void (*so_upcall)(socket_t sock, void* arg, int waitf); + +#define CFS_SOCK_UPCALL 0x1 +#define CFS_SOCK_DOWN 0x2 + +#define CFS_SOCK_MAGIC 0xbabeface + +typedef struct cfs_socket { + socket_t s_so; + int s_magic; + int s_flags; + so_upcall s_upcall; + void *s_upcallarg; +} cfs_socket_t; + + +/* cfs_socket_t to bsd socket */ +#define C2B_SOCK(s) ((s)->s_so) + +static inline int get_sock_intopt(socket_t so, int opt) +{ + int val, len; + int rc; + + /* + * sock_getsockopt will take a lock(mutex) for socket, + * so it can be blocked. So be careful while using + * them. + */ + len = sizeof(val); + rc = sock_getsockopt(so, SOL_SOCKET, opt, &val, &len); + assert(rc == 0); + return val; +} + +#define SOCK_ERROR(s) get_sock_intopt(C2B_SOCK(s), SO_ERROR) +/* #define SOCK_WMEM_QUEUED(s) (0) */ +#define SOCK_WMEM_QUEUED(s) get_sock_intopt(C2B_SOCK(s), SO_NWRITE) +/* XXX Liang: no reliable way to get it in Darwin8.x */ +#define SOCK_TEST_NOSPACE(s) (0) + +void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg); +void libcfs_sock_reset_cb(cfs_socket_t *sock); + +#else /* !__DARWIN8__ */ + +#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc) +#define SOCK_ERROR(so) ((so)->so_error) + +#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat) + +#endif /* !__DARWIN8__ */ + +#endif /* __KERNEL END */ + +#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/libcfs/include/libcfs/darwin/darwin-time.h b/libcfs/include/libcfs/darwin/darwin-time.h new file mode 100644 index 0000000..35862a6 --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-time.h @@ -0,0 +1,249 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for XNU kernel + * + */ + +#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__ +#define __LIBCFS_DARWIN_DARWIN_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION 1000000 + +#ifdef __KERNEL__ +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +/* + * There are three way to measure time in OS X: + * 1. nanoseconds + * 2. absolute time (abstime unit equal to the length of one bus cycle), + * schedule of thread/timer are counted by absolute time, but abstime + * in different mac can be different also, so we wouldn't use it. + * 3. clock interval (1sec = 100hz). But clock interval only taken by KPI + * like tsleep(). + * + * We use nanoseconds (uptime, not calendar time) + * + * clock_get_uptime() :get absolute time since bootup. + * nanouptime() :get nanoseconds since bootup + * microuptime() :get microseonds since bootup + * nanotime() :get nanoseconds since epoch + * microtime() :get microseconds since epoch + */ +typedef u_int64_t cfs_time_t; /* nanoseconds */ +typedef int64_t cfs_duration_t; + +#define CFS_TIME_T "%llu" +#define CFS_DURATION_T "%lld" + +typedef struct timeval cfs_fs_time_t; + +static inline cfs_time_t cfs_time_current(void) +{ + struct timespec instant; + + nanouptime(&instant); + return ((u_int64_t)instant.tv_sec) * NSEC_PER_SEC + instant.tv_nsec; +} + +static inline time_t cfs_time_current_sec(void) +{ + struct timespec instant; + + nanouptime(&instant); + return instant.tv_sec; +} + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return t + d; +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return t1 - t2; +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return (int64_t)t1 - (int64_t)t2 < 0; +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return (int64_t)t1 - (int64_t)t2 <= 0; +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + microtime((struct timeval *)t); +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return t->tv_sec; +} + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + *v = *t; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = t->tv_sec; + s->tv_nsec = t->tv_usec * NSEC_PER_USEC; +} + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return (NSEC_PER_SEC * (int64_t)seconds); +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return ((int64_t)t->tv_sec)*NSEC_PER_SEC + t->tv_usec*NSEC_PER_USEC; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0; +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0; +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d / NSEC_PER_SEC; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ + s->tv_sec = d / NSEC_PER_SEC; + s->tv_usec = (d - ((int64_t)s->tv_sec) * NSEC_PER_SEC) / NSEC_PER_USEC; +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ + s->tv_sec = d / NSEC_PER_SEC; + s->tv_nsec = d - ((int64_t)s->tv_sec) * NSEC_PER_SEC; +} + +#define cfs_time_current_64 cfs_time_current +#define cfs_time_add_64 cfs_time_add +#define cfs_time_shift_64 cfs_time_shift +#define cfs_time_before_64 cfs_time_before +#define cfs_time_beforeq_64 cfs_time_beforeq + +/* + * One jiffy (in nanoseconds) + * + * osfmk/kern/sched_prim.c + * #define DEFAULT_PREEMPTION_RATE 100 + */ +#define CFS_TICK (NSEC_PER_SEC / (u_int64_t)100) + +#define LTIME_S(t) (t) + +/* __KERNEL__ */ +#else + +/* + * User level + */ +#include + +/* __KERNEL__ */ +#endif + +/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/darwin/darwin-types.h b/libcfs/include/libcfs/darwin/darwin-types.h new file mode 100644 index 0000000..3c247246 --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-types.h @@ -0,0 +1,95 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__ +#define __LIBCFS_DARWIN_XNU_TYPES_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +#ifndef _BLKID_TYPES_H +#define _BLKID_TYPES_H +#endif + +typedef u_int8_t __u8; +typedef u_int16_t __u16; +typedef u_int32_t __u32; +typedef u_int64_t __u64; +typedef int8_t __s8; +typedef int16_t __s16; +typedef int32_t __s32; +typedef int64_t __s64; + +#ifdef __KERNEL__ + +#include + + +typedef struct { int e; } event_chan_t; +typedef dev_t kdev_t; + +/* + * Atmoic define + */ +#include + +typedef struct { volatile uint32_t counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } +#define atomic_read(a) ((a)->counter) +#define atomic_set(a, v) (((a)->counter) = (v)) +#ifdef __DARWIN8__ +/* OS*Atomic return the value before the operation */ +#define atomic_add(v, a) OSAddAtomic(v, (SInt32 *)&((a)->counter)) +#define atomic_sub(v, a) OSAddAtomic(-(v), (SInt32 *)&((a)->counter)) +#define atomic_inc(a) OSIncrementAtomic((SInt32 *)&((a)->counter)) +#define atomic_dec(a) OSDecrementAtomic((SInt32 *)&((a)->counter)) +#else /* !__DARWIN8__ */ +#define atomic_add(v, a) hw_atomic_add((__u32 *)&((a)->counter), v) +#define atomic_sub(v, a) hw_atomic_sub((__u32 *)&((a)->counter), v) +#define atomic_inc(a) atomic_add(1, a) +#define atomic_dec(a) atomic_sub(1, a) +#endif /* !__DARWIN8__ */ +#define atomic_sub_and_test(v, a) (atomic_sub(v, a) == (v)) +#define atomic_dec_and_test(a) (atomic_dec(a) == 1) +#define atomic_inc_return(a) (atomic_inc(a) + 1) +#define atomic_dec_return(a) (atomic_dec(a) - 1) + +#include +typedef off_t loff_t; + +#else /* !__KERNEL__ */ + +#include + +typedef off_t loff_t; + +#endif /* __KERNEL END */ +typedef unsigned short umode_t; + +#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/libcfs/include/libcfs/darwin/darwin-utils.h b/libcfs/include/libcfs/darwin/darwin-utils.h new file mode 100644 index 0000000..0f808a2 --- /dev/null +++ b/libcfs/include/libcfs/darwin/darwin-utils.h @@ -0,0 +1,67 @@ +#ifndef __LIBCFS_DARWIN_UTILS_H__ +#define __LIBCFS_DARWIN_UTILS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include + +#ifdef __KERNEL__ +inline int isspace(char c); +char *strpbrk(const char *cs, const char *ct); +char * strsep(char **s, const char *ct); +size_t strnlen(const char * s, size_t count); +char * strstr(const char *in, const char *str); +char * strrchr(const char *p, int ch); +char * ul2dstr(unsigned long address, char *buf, int len); + +#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) +#define simple_strtoul(a1, a2, a3) strtoul(a1, a2, a3) +#define simple_strtoll(a1, a2, a3) strtoq(a1, a2, a3) +#define simple_strtoull(a1, a2, a3) strtouq(a1, a2, a3) + +#define test_bit(i, a) isset(a, i) +#define set_bit(i, a) setbit(a, i) +#define clear_bit(i, a) clrbit(a, i) + +#define get_random_bytes(buf, len) read_random(buf, len) + +#endif /* __KERNEL__ */ + +#ifndef min_t +#define min_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) +#endif +#ifndef max_t +#define max_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) +#endif + +#define do_div(n,base) \ + ({ \ + __u64 __n = (n); \ + __u32 __base = (base); \ + __u32 __mod; \ + \ + __mod = __n % __base; \ + n = __n / __base; \ + __mod; \ + }) + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +#define HIPQUAD NIPQUAD + +#ifndef LIST_CIRCLE +#define LIST_CIRCLE(elm, field) \ + do { \ + (elm)->field.le_prev = &(elm)->field.le_next; \ + } while (0) +#endif + +#endif /* __XNU_UTILS_H__ */ diff --git a/libcfs/include/libcfs/darwin/kp30.h b/libcfs/include/libcfs/darwin/kp30.h new file mode 100644 index 0000000..f9e94b1 --- /dev/null +++ b/libcfs/include/libcfs/darwin/kp30.h @@ -0,0 +1,101 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_DARWIN_KP30__ +#define __LIBCFS_DARWIN_KP30__ + +#ifndef __LIBCFS_KP30_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define our_cond_resched() cfs_schedule_timeout(CFS_TASK_INTERRUPTIBLE, 1) + +#ifdef CONFIG_SMP +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ +#else +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) +#endif +#define LASSERT_SEM_LOCKED(sem) do {} while(0) /* XXX */ + +#define LIBCFS_PANIC(msg) panic(msg) +#error libcfs_register_panic_notifier() missing +#error libcfs_unregister_panic_notifier() missing + +/* --------------------------------------------------------------------- */ + +#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) +#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) + +#define PORTAL_SYMBOL_GET(x) ((typeof(&x))cfs_symbol_get(#x)) +#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) + +#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0) +#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0) + +#define num_online_cpus() cfs_online_cpus() + +/******************************************************************************/ +/* XXX Liang: There is no module parameter supporting in OSX */ +#define CFS_MODULE_PARM(name, t, type, perm, desc) + +#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ +/******************************************************************************/ + +#else /* !__KERNEL__ */ +# include +# include +# include +# include +# include +# include +# include +#endif + +#define BITS_PER_LONG LONG_BIT +/******************************************************************************/ +/* Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. */ +#define LWT_SUPPORT 0 + +typedef struct { + long long lwte_when; + char *lwte_where; + void *lwte_task; + long lwte_p1; + long lwte_p2; + long lwte_p3; + long lwte_p4; +} lwt_event_t; + +# define LWT_EVENT(p1,p2,p3,p4) /* no lwt implementation yet */ + +/* -------------------------------------------------------------------------- */ + +#define IOCTL_LIBCFS_TYPE struct libcfs_ioctl_data + +#define LPU64 "%llu" +#define LPD64 "%lld" +#define LPX64 "%#llx" +#define LPSZ "%lu" +#define LPSSZ "%ld" +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) + +#endif diff --git a/libcfs/include/libcfs/darwin/libcfs.h b/libcfs/include/libcfs/darwin/libcfs.h new file mode 100644 index 0000000..eb4d8f3 --- /dev/null +++ b/libcfs/include/libcfs/darwin/libcfs.h @@ -0,0 +1,193 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_DARWIN_LIBCFS_H__ +#define __LIBCFS_DARWIN_LIBCFS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __KERNEL__ +# include +# include +# define do_gettimeofday(tv) microuptime(tv) +#else +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL); +typedef unsigned long long cycles_t; +#endif + +#define __cpu_to_le64(x) OSSwapHostToLittleInt64(x) +#define __cpu_to_le32(x) OSSwapHostToLittleInt32(x) +#define __cpu_to_le16(x) OSSwapHostToLittleInt16(x) + +#define __le16_to_cpu(x) OSSwapLittleToHostInt16(x) +#define __le32_to_cpu(x) OSSwapLittleToHostInt32(x) +#define __le64_to_cpu(x) OSSwapLittleToHostInt64(x) + +#define cpu_to_le64(x) __cpu_to_le64(x) +#define cpu_to_le32(x) __cpu_to_le32(x) +#define cpu_to_le16(x) __cpu_to_le16(x) + +#define le64_to_cpu(x) __le64_to_cpu(x) +#define le32_to_cpu(x) __le32_to_cpu(x) +#define le16_to_cpu(x) __le16_to_cpu(x) + +#define __swab16(x) OSSwapInt16(x) +#define __swab32(x) OSSwapInt32(x) +#define __swab64(x) OSSwapInt64(x) +#define __swab16s(x) do { *(x) = __swab16(*(x)); } while (0) +#define __swab32s(x) do { *(x) = __swab32(*(x)); } while (0) +#define __swab64s(x) do { *(x) = __swab64(*(x)); } while (0) + +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u32 ph_cpu_id; + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + + +#ifdef __KERNEL__ +# include +# include +/* Fix me */ +# define THREAD_SIZE 8192 +#else +# define THREAD_SIZE 8192 +#endif +#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) + +#define CHECK_STACK() do { } while(0) +#define CDEBUG_STACK() (0L) + +/* Darwin has defined RETURN, so we have to undef it in lustre */ +#ifdef RETURN +#undef RETURN +#endif + +/* + * When this is enabled debugging messages are indented according to the + * current "nesting level". Nesting level in increased when ENTRY macro + * is executed, and decreased on EXIT and RETURN. + */ +#ifdef __KERNEL__ +#define ENTRY_NESTING_SUPPORT (0) +#endif + +#if ENTRY_NESTING_SUPPORT + +/* + * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic + * idea is to keep per-thread pointer to small data structure (struct + * cfs_debug_data) describing current nesting level. In XNU unused + * proc->p_wmegs field in hijacked for this. On Linux + * current->journal_info can be used. In user space + * pthread_{g,s}etspecific(). + * + * ENTRY macro allocates new cfs_debug_data on stack, and installs it as + * a current nesting level, storing old data in cfs_debug_data it just + * created. + * + * EXIT pops old value back. + * + */ + +/* + * One problem with this approach is that there is a lot of code that + * does ENTRY and then escapes scope without doing EXIT/RETURN. In this + * case per-thread current nesting level pointer is dangling (it points + * to the stack area that is possible already overridden). To detect + * such cases, we add two magic fields to the cfs_debug_data and check + * them whenever current nesting level pointer is dereferenced. While + * looking flaky this works because stack is always consumed + * "continously". + */ +enum { + CDD_MAGIC1 = 0x02128506, + CDD_MAGIC2 = 0x42424242 +}; + +struct cfs_debug_data { + unsigned int magic1; + struct cfs_debug_data *parent; + int nesting_level; + unsigned int magic2; +}; + +void __entry_nesting(struct cfs_debug_data *child); +void __exit_nesting(struct cfs_debug_data *child); +unsigned int __current_nesting_level(void); + +#define ENTRY_NESTING \ +struct cfs_debug_data __cdd = { .magic1 = CDD_MAGIC1, \ + .parent = NULL, \ + .nesting_level = 0, \ + .magic2 = CDD_MAGIC2 }; \ +__entry_nesting(&__cdd); + +#define EXIT_NESTING __exit_nesting(&__cdd) + +/* ENTRY_NESTING_SUPPORT */ +#else + +#define ENTRY_NESTING do {;} while (0) +#define EXIT_NESTING do {;} while (0) +#define __current_nesting_level() (0) + +/* ENTRY_NESTING_SUPPORT */ +#endif + +#define LUSTRE_LNET_PID 12345 + +#define _XNU_LIBCFS_H + +/* + * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) + * + * Implementation is in darwin-curproc.c + */ +#define CFS_CURPROC_COMM_MAX MAXCOMLEN +/* + * XNU has no capabilities + */ +typedef int cfs_kernel_cap_t; + +#ifdef __KERNEL__ +enum { + /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */ + CFS_STACK_TRACE_DEPTH = 16 +}; + +struct cfs_stack_trace { + void *frame[CFS_STACK_TRACE_DEPTH]; +}; + +#define printk(format, args...) printf(format, ## args) + +#ifdef WITH_WATCHDOG +#undef WITH_WATCHDOG +#endif + +#endif /* __KERNEL__ */ + +#endif /* _XNU_LIBCFS_H */ diff --git a/libcfs/include/libcfs/darwin/lltrace.h b/libcfs/include/libcfs/darwin/lltrace.h new file mode 100644 index 0000000..31d6e17 --- /dev/null +++ b/libcfs/include/libcfs/darwin/lltrace.h @@ -0,0 +1,26 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_DARWIN_LLTRACE_H__ +#define __LIBCFS_DARWIN_LLTRACE_H__ + +#ifndef __LIBCFS_LLTRACE_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/libcfs/include/libcfs/darwin/portals_utils.h b/libcfs/include/libcfs/darwin/portals_utils.h new file mode 100644 index 0000000..4907cb1 --- /dev/null +++ b/libcfs/include/libcfs/darwin/portals_utils.h @@ -0,0 +1,18 @@ +#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__ +#define __LIBCFS_DARWIN_PORTALS_UTILS_H__ + +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#ifdef __KERNEL__ +#include +#include +#else /* !__KERNEL__ */ +#include +#include +#include +#endif /* !__KERNEL__ */ + +#endif diff --git a/libcfs/include/libcfs/kp30.h b/libcfs/include/libcfs/kp30.h new file mode 100644 index 0000000..0869f67 --- /dev/null +++ b/libcfs/include/libcfs/kp30.h @@ -0,0 +1,610 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_KP30_H__ +#define __LIBCFS_KP30_H__ + +/* Controlled via configure key */ +/* #define LIBCFS_DEBUG */ + +#include +#include + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system +#endif + +#ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED +#endif + +#ifdef __KERNEL__ + +#ifdef LIBCFS_DEBUG + +/* + * When this is on, LASSERT macro includes check for assignment used instead + * of equality check, but doesn't have unlikely(). Turn this on from time to + * time to make test-builds. This shouldn't be on for production release. + */ +#define LASSERT_CHECKED (0) + +#if LASSERT_CHECKED +/* + * Assertion. + * + * Strange construction with empty "then" clause is used to trigger compiler + * warnings on the assertions of the form LASSERT(a = b); + * + * "warning: suggest parentheses around assignment used as truth value" + * + * requires -Wall. Unfortunately this rules out use of likely/unlikely. + */ +#define LASSERT(cond) \ +({ \ + if (cond) \ + ; \ + else \ + libcfs_assertion_failed( #cond , __FILE__, \ + __FUNCTION__, __LINE__); \ +}) + +#define LASSERTF(cond, fmt, a...) \ +({ \ + if (cond) \ + ; \ + else { \ + libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ + __FILE__, __FUNCTION__,__LINE__, \ + "ASSERTION(" #cond ") failed:" fmt, \ + ## a); \ + LBUG(); \ + } \ +}) + +/* LASSERT_CHECKED */ +#else + +#define LASSERT(cond) \ +({ \ + if (unlikely(!(cond))) \ + libcfs_assertion_failed(#cond , __FILE__, \ + __FUNCTION__, __LINE__); \ +}) + +#define LASSERTF(cond, fmt, a...) \ +({ \ + if (unlikely(!(cond))) { \ + libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_EMERG, \ + __FILE__, __FUNCTION__,__LINE__, \ + "ASSERTION(" #cond ") failed:" fmt, \ + ## a); \ + LBUG(); \ + } \ +}) + +/* LASSERT_CHECKED */ +#endif + +/* LIBCFS_DEBUG */ +#else +#define LASSERT(e) ((void)(0)) +#define LASSERTF(cond, fmt...) ((void)(0)) +#endif /* LIBCFS_DEBUG */ + +#define KLASSERT(e) LASSERT(e) + +void lbug_with_loc(char *file, const char *func, const int line) + __attribute__((noreturn)); + +#define LBUG() lbug_with_loc(__FILE__, __FUNCTION__, __LINE__) + +extern atomic_t libcfs_kmemory; +/* + * Memory + */ +#ifdef LIBCFS_DEBUG + +# define libcfs_kmem_inc(ptr, size) \ +do { \ + atomic_add(size, &libcfs_kmemory); \ +} while (0) + +# define libcfs_kmem_dec(ptr, size) do { \ + atomic_sub(size, &libcfs_kmemory); \ +} while (0) + +#else +# define libcfs_kmem_inc(ptr, size) do {} while (0) +# define libcfs_kmem_dec(ptr, size) do {} while (0) +#endif /* LIBCFS_DEBUG */ + +#define LIBCFS_VMALLOC_SIZE 16384 + +#define LIBCFS_ALLOC_GFP(ptr, size, mask) \ +do { \ + LASSERT(!in_interrupt() || \ + (size <= LIBCFS_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\ + if (unlikely((size) > LIBCFS_VMALLOC_SIZE)) \ + (ptr) = cfs_alloc_large(size); \ + else \ + (ptr) = cfs_alloc((size), (mask)); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("LNET: out of memory at %s:%d (tried to alloc '" \ + #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ + CERROR("LNET: %d total bytes allocated by lnet\n", \ + atomic_read(&libcfs_kmemory)); \ + } else { \ + libcfs_kmem_inc((ptr), (size)); \ + if (!((mask) & CFS_ALLOC_ZERO)) \ + memset((ptr), 0, (size)); \ + } \ + CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ + (int)(size), (ptr), atomic_read (&libcfs_kmemory)); \ +} while (0) + +#define LIBCFS_ALLOC(ptr, size) \ + LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_IO) + +#define LIBCFS_ALLOC_ATOMIC(ptr, size) \ + LIBCFS_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC) + +#define LIBCFS_FREE(ptr, size) \ +do { \ + int s = (size); \ + if (unlikely((ptr) == NULL)) { \ + CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ + "%s:%d\n", s, __FILE__, __LINE__); \ + break; \ + } \ + if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ + cfs_free_large(ptr); \ + else \ + cfs_free(ptr); \ + libcfs_kmem_dec((ptr), s); \ + CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ + s, (ptr), atomic_read(&libcfs_kmemory)); \ +} while (0) + +/******************************************************************************/ + +/* htonl hack - either this, or compile with -O2. Stupid byteorder/generic.h */ +#if defined(__GNUC__) && (__GNUC__ >= 2) && !defined(__OPTIMIZE__) +#define ___htonl(x) __cpu_to_be32(x) +#define ___htons(x) __cpu_to_be16(x) +#define ___ntohl(x) __be32_to_cpu(x) +#define ___ntohs(x) __be16_to_cpu(x) +#define htonl(x) ___htonl(x) +#define ntohl(x) ___ntohl(x) +#define htons(x) ___htons(x) +#define ntohs(x) ___ntohs(x) +#endif + +void libcfs_debug_dumpstack(cfs_task_t *tsk); +void libcfs_run_upcall(char **argv); +void libcfs_run_lbug_upcall(char * file, const char *fn, const int line); +void libcfs_debug_dumplog(void); +int libcfs_debug_init(unsigned long bufsize); +int libcfs_debug_cleanup(void); +int libcfs_debug_clear_buffer(void); +int libcfs_debug_mark_buffer(char *text); + +void libcfs_debug_set_level(unsigned int debug_level); + +#else /* !__KERNEL__ */ +# ifdef LIBCFS_DEBUG +# undef NDEBUG +# include +# define LASSERT(e) assert(e) +# define LASSERTF(cond, args...) \ +do { \ + if (!(cond)) \ + CERROR(args); \ + assert(cond); \ +} while (0) +# define LBUG() assert(0) +# else +# define LASSERT(e) ((void)(0)) +# define LASSERTF(cond, args...) do { } while (0) +# define LBUG() ((void)(0)) +# endif /* LIBCFS_DEBUG */ +# define KLASSERT(e) do { } while (0) +# define printk(format, args...) printf (format, ## args) +# ifdef CRAY_XT3 /* buggy calloc! */ +# define LIBCFS_ALLOC(ptr, size) \ + do { \ + (ptr) = malloc(size); \ + memset(ptr, 0, size); \ + } while (0) +# else +# define LIBCFS_ALLOC(ptr, size) do { (ptr) = calloc(1,size); } while (0) +# endif +# define LIBCFS_FREE(a, b) do { free(a); } while (0) + +void libcfs_debug_dumplog(void); +int libcfs_debug_init(unsigned long bufsize); +int libcfs_debug_cleanup(void); + +/* + * Generic compiler-dependent macros required for kernel + * build go below this comment. Actual compiler/compiler version + * specific implementations come from the above header files + */ + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +/* !__KERNEL__ */ +#endif + +/* + * compile-time assertions. @cond has to be constant expression. + * ISO C Standard: + * + * 6.8.4.2 The switch statement + * + * .... + * + * [#3] The expression of each case label shall be an integer + * constant expression and no two of the case constant + * expressions in the same switch statement shall have the same + * value after conversion... + * + */ +#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) + +/* support decl needed both by kernel and liblustre */ +int libcfs_isknown_lnd(int type); +char *libcfs_lnd2modname(int type); +char *libcfs_lnd2str(int type); +int libcfs_str2lnd(const char *str); +char *libcfs_net2str(__u32 net); +char *libcfs_nid2str(lnet_nid_t nid); +__u32 libcfs_str2net(const char *str); +lnet_nid_t libcfs_str2nid(const char *str); +int libcfs_str2anynid(lnet_nid_t *nid, const char *str); +char *libcfs_id2str(lnet_process_id_t id); +void libcfs_setnet0alias(int type); + +/* how an LNET NID encodes net:address */ +#define LNET_NIDADDR(nid) ((__u32)((nid) & 0xffffffff)) +#define LNET_NIDNET(nid) ((__u32)(((nid) >> 32)) & 0xffffffff) +#define LNET_MKNID(net,addr) ((((__u64)(net))<<32)|((__u64)(addr))) +/* how net encodes type:number */ +#define LNET_NETNUM(net) ((net) & 0xffff) +#define LNET_NETTYP(net) (((net) >> 16) & 0xffff) +#define LNET_MKNET(typ,num) ((((__u32)(typ))<<16)|((__u32)(num))) + +/* implication */ +#define ergo(a, b) (!(a) || (b)) +/* logical equivalence */ +#define equi(a, b) (!!(a) == !!(b)) + +#ifndef CURRENT_TIME +# define CURRENT_TIME time(0) +#endif + +/* -------------------------------------------------------------------- + * Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. + * All stuff about lwt are put in arch/kp30.h + * -------------------------------------------------------------------- */ + +struct libcfs_device_userstate +{ + int ldu_memhog_pages; + cfs_page_t *ldu_memhog_root_page; +}; + +/* what used to be in portals_lib.h */ +#ifndef MIN +# define MIN(a,b) (((a)<(b)) ? (a): (b)) +#endif +#ifndef MAX +# define MAX(a,b) (((a)>(b)) ? (a): (b)) +#endif + +#define MKSTR(ptr) ((ptr))? (ptr) : "" + +static inline int size_round4 (int val) +{ + return (val + 3) & (~0x3); +} + +static inline int size_round (int val) +{ + return (val + 7) & (~0x7); +} + +static inline int size_round16(int val) +{ + return (val + 0xf) & (~0xf); +} + +static inline int size_round32(int val) +{ + return (val + 0x1f) & (~0x1f); +} + +static inline int size_round0(int val) +{ + if (!val) + return 0; + return (val + 1 + 7) & (~0x7); +} + +static inline size_t round_strlen(char *fset) +{ + return (size_t)size_round((int)strlen(fset) + 1); +} + +#define LOGL(var,len,ptr) \ +do { \ + if (var) \ + memcpy((char *)ptr, (const char *)var, len); \ + ptr += size_round(len); \ +} while (0) + +#define LOGU(var,len,ptr) \ +do { \ + if (var) \ + memcpy((char *)var, (const char *)ptr, len); \ + ptr += size_round(len); \ +} while (0) + +#define LOGL0(var,len,ptr) \ +do { \ + if (!len) \ + break; \ + memcpy((char *)ptr, (const char *)var, len); \ + *((char *)(ptr) + len) = 0; \ + ptr += size_round(len + 1); \ +} while (0) + +/* + * USER LEVEL STUFF BELOW + */ + +#define LIBCFS_IOCTL_VERSION 0x0001000a + +struct libcfs_ioctl_data { + __u32 ioc_len; + __u32 ioc_version; + + __u64 ioc_nid; + __u64 ioc_u64[1]; + + __u32 ioc_flags; + __u32 ioc_count; + __u32 ioc_net; + __u32 ioc_u32[7]; + + __u32 ioc_inllen1; + char *ioc_inlbuf1; + __u32 ioc_inllen2; + char *ioc_inlbuf2; + + __u32 ioc_plen1; /* buffers in userspace */ + char *ioc_pbuf1; + __u32 ioc_plen2; /* buffers in userspace */ + char *ioc_pbuf2; + + char ioc_bulk[0]; +}; + + +struct libcfs_ioctl_hdr { + __u32 ioc_len; + __u32 ioc_version; +}; + +struct libcfs_debug_ioctl_data +{ + struct libcfs_ioctl_hdr hdr; + unsigned int subs; + unsigned int debug; +}; + +#define LIBCFS_IOC_INIT(data) \ +do { \ + memset(&data, 0, sizeof(data)); \ + data.ioc_version = LIBCFS_IOCTL_VERSION; \ + data.ioc_len = sizeof(data); \ +} while (0) + +/* FIXME check conflict with lustre_lib.h */ +#define LIBCFS_IOC_DEBUG_MASK _IOWR('f', 250, long) + +static inline int libcfs_ioctl_packlen(struct libcfs_ioctl_data *data) +{ + int len = sizeof(*data); + len += size_round(data->ioc_inllen1); + len += size_round(data->ioc_inllen2); + return len; +} + +static inline int libcfs_ioctl_is_invalid(struct libcfs_ioctl_data *data) +{ + if (data->ioc_len > (1<<30)) { + CERROR ("LIBCFS ioctl: ioc_len larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen1 > (1<<30)) { + CERROR ("LIBCFS ioctl: ioc_inllen1 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inllen2 > (1<<30)) { + CERROR ("LIBCFS ioctl: ioc_inllen2 larger than 1<<30\n"); + return 1; + } + if (data->ioc_inlbuf1 && !data->ioc_inllen1) { + CERROR ("LIBCFS ioctl: inlbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_inlbuf2 && !data->ioc_inllen2) { + CERROR ("LIBCFS ioctl: inlbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf1 && !data->ioc_plen1) { + CERROR ("LIBCFS ioctl: pbuf1 pointer but 0 length\n"); + return 1; + } + if (data->ioc_pbuf2 && !data->ioc_plen2) { + CERROR ("LIBCFS ioctl: pbuf2 pointer but 0 length\n"); + return 1; + } + if (data->ioc_plen1 && !data->ioc_pbuf1) { + CERROR ("LIBCFS ioctl: plen1 nonzero but no pbuf1 pointer\n"); + return 1; + } + if (data->ioc_plen2 && !data->ioc_pbuf2) { + CERROR ("LIBCFS ioctl: plen2 nonzero but no pbuf2 pointer\n"); + return 1; + } + if ((__u32)libcfs_ioctl_packlen(data) != data->ioc_len ) { + CERROR ("LIBCFS ioctl: packlen != ioc_len\n"); + return 1; + } + if (data->ioc_inllen1 && + data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { + CERROR ("LIBCFS ioctl: inlbuf1 not 0 terminated\n"); + return 1; + } + if (data->ioc_inllen2 && + data->ioc_bulk[size_round(data->ioc_inllen1) + + data->ioc_inllen2 - 1] != '\0') { + CERROR ("LIBCFS ioctl: inlbuf2 not 0 terminated\n"); + return 1; + } + return 0; +} + +#ifndef __KERNEL__ +static inline int libcfs_ioctl_pack(struct libcfs_ioctl_data *data, char **pbuf, + int max) +{ + char *ptr; + struct libcfs_ioctl_data *overlay; + data->ioc_len = libcfs_ioctl_packlen(data); + data->ioc_version = LIBCFS_IOCTL_VERSION; + + if (*pbuf && libcfs_ioctl_packlen(data) > max) + return 1; + if (*pbuf == NULL) { + *pbuf = malloc(data->ioc_len); + } + if (!*pbuf) + return 1; + overlay = (struct libcfs_ioctl_data *)*pbuf; + memcpy(*pbuf, data, sizeof(*data)); + + ptr = overlay->ioc_bulk; + if (data->ioc_inlbuf1) + LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); + if (data->ioc_inlbuf2) + LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); + if (libcfs_ioctl_is_invalid(overlay)) + return 1; + + return 0; +} + +#else + +extern int libcfs_ioctl_getdata(char *buf, char *end, void *arg); +extern int libcfs_ioctl_popdata(void *arg, void *buf, int size); + +#endif + +/* ioctls for manipulating snapshots 30- */ +#define IOC_LIBCFS_TYPE 'e' +#define IOC_LIBCFS_MIN_NR 30 +/* libcfs ioctls */ +#define IOC_LIBCFS_PANIC _IOWR('e', 30, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CLEAR_DEBUG _IOWR('e', 31, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_MARK_DEBUG _IOWR('e', 32, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LWT_CONTROL _IOWR('e', 33, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LWT_SNAPSHOT _IOWR('e', 34, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LWT_LOOKUP_STRING _IOWR('e', 35, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_MEMHOG _IOWR('e', 36, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PING_TEST _IOWR('e', 37, IOCTL_LIBCFS_TYPE) +/* lnet ioctls */ +#define IOC_LIBCFS_GET_NI _IOWR('e', 50, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_FAIL_NID _IOWR('e', 51, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_ROUTE _IOWR('e', 52, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_ROUTE _IOWR('e', 53, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_ROUTE _IOWR('e', 54, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_NOTIFY_ROUTER _IOWR('e', 55, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_UNCONFIGURE _IOWR('e', 56, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PORTALS_COMPATIBILITY _IOWR('e', 57, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LNET_DIST _IOWR('e', 58, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CONFIGURE _IOWR('e', 59, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_TESTPROTOCOMPAT _IOWR('e', 60, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PING _IOWR('e', 61, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEBUG_PEER _IOWR('e', 62, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_LNETST _IOWR('e', 63, IOCTL_LIBCFS_TYPE) +/* lnd ioctls */ +#define IOC_LIBCFS_REGISTER_MYNID _IOWR('e', 70, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_CLOSE_CONNECTION _IOWR('e', 71, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_PUSH_CONNECTION _IOWR('e', 72, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_CONN _IOWR('e', 73, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_PEER _IOWR('e', 74, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_PEER _IOWR('e', 75, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_PEER _IOWR('e', 76, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_TXDESC _IOWR('e', 77, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_ADD_INTERFACE _IOWR('e', 78, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_DEL_INTERFACE _IOWR('e', 79, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_INTERFACE _IOWR('e', 80, IOCTL_LIBCFS_TYPE) +#define IOC_LIBCFS_GET_GMID _IOWR('e', 81, IOCTL_LIBCFS_TYPE) + +#define IOC_LIBCFS_MAX_NR 81 + + +enum { + /* Only add to these values (i.e. don't ever change or redefine them): + * network addresses depend on them... */ + QSWLND = 1, + SOCKLND = 2, + GMLND = 3, + PTLLND = 4, + O2IBLND = 5, + CIBLND = 6, + OPENIBLND = 7, + IIBLND = 8, + LOLND = 9, + RALND = 10, + VIBLND = 11, + MXLND = 12, +}; + +enum { + DEBUG_DAEMON_START = 1, + DEBUG_DAEMON_STOP = 2, + DEBUG_DAEMON_PAUSE = 3, + DEBUG_DAEMON_CONTINUE = 4, +}; + + +enum cfg_record_type { + PORTALS_CFG_TYPE = 1, + LUSTRE_CFG_TYPE = 123, +}; + +typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); + +/* lustre_id output helper macros */ +#define DLID4 "%lu/%lu/%lu/%lu" + +#define OLID4(id) \ + (unsigned long)(id)->li_fid.lf_id, \ + (unsigned long)(id)->li_fid.lf_group, \ + (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \ + (unsigned long)(id)->li_stc.u.e3s.l3s_gen + +#endif diff --git a/libcfs/include/libcfs/libcfs.h b/libcfs/include/libcfs/libcfs.h new file mode 100644 index 0000000..80518b1 --- /dev/null +++ b/libcfs/include/libcfs/libcfs.h @@ -0,0 +1,649 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LIBCFS_H__ +#define __LIBCFS_LIBCFS_H__ + +#if !__GNUC__ +#define __attribute__(x) +#endif + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported operating system. +#endif + +#include "curproc.h" + +#ifndef __KERNEL__ +#include +#endif + +/* Controlled via configure key */ +/* #define LIBCFS_DEBUG */ + +#ifndef offsetof +# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) +#endif + +/* cardinality of array */ +#define sizeof_array(a) ((sizeof (a)) / (sizeof ((a)[0]))) + +#if !defined(container_of) +/* given a pointer @ptr to the field @member embedded into type (usually + * struct) @type, return pointer to the embedding instance of @type. */ +#define container_of(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) +#endif + +#define container_of0(ptr, type, member) \ +({ \ + typeof(ptr) __ptr = (ptr); \ + type *__res; \ + \ + if (unlikely(IS_ERR(__ptr) || __ptr == NULL)) \ + __res = (type *)__ptr; \ + else \ + __res = container_of(__ptr, type, member); \ + __res; \ +}) + +/* + * true iff @i is power-of-2 + */ +#define IS_PO2(i) \ +({ \ + typeof(i) __i; \ + \ + __i = (i); \ + !(__i & (__i - 1)); \ +}) + +#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) + +/* + * Debugging + */ +extern unsigned int libcfs_subsystem_debug; +extern unsigned int libcfs_stack; +extern unsigned int libcfs_debug; +extern unsigned int libcfs_printk; +extern unsigned int libcfs_console_ratelimit; +extern cfs_duration_t libcfs_console_max_delay; +extern cfs_duration_t libcfs_console_min_delay; +extern unsigned int libcfs_console_backoff; +extern unsigned int libcfs_debug_binary; +extern char debug_file_path[1024]; + +int libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys); +int libcfs_debug_str2mask(int *mask, const char *str, int is_subsys); + +/* Has there been an LBUG? */ +extern unsigned int libcfs_catastrophe; +extern unsigned int libcfs_panic_on_lbug; + +/* + * struct ptldebug_header is defined in libcfs//libcfs.h + */ + +#define PH_FLAG_FIRST_RECORD 1 + +/* Debugging subsystems (32 bits, non-overlapping) */ +/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ +#define S_UNDEFINED 0x00000001 +#define S_MDC 0x00000002 +#define S_MDS 0x00000004 +#define S_OSC 0x00000008 +#define S_OST 0x00000010 +#define S_CLASS 0x00000020 +#define S_LOG 0x00000040 +#define S_LLITE 0x00000080 +#define S_RPC 0x00000100 +#define S_MGMT 0x00000200 +#define S_LNET 0x00000400 +#define S_LND 0x00000800 /* ALL LNDs */ +#define S_PINGER 0x00001000 +#define S_FILTER 0x00002000 +/* unused */ +#define S_ECHO 0x00008000 +#define S_LDLM 0x00010000 +#define S_LOV 0x00020000 +/* unused */ +/* unused */ +/* unused */ +/* unused */ +/* unused */ +#define S_LMV 0x00800000 /* b_new_cmd */ +/* unused */ +#define S_SEC 0x02000000 /* upcall cache */ +#define S_GSS 0x04000000 /* b_new_cmd */ +/* unused */ +#define S_MGC 0x10000000 +#define S_MGS 0x20000000 +#define S_FID 0x40000000 /* b_new_cmd */ +#define S_FLD 0x80000000 /* b_new_cmd */ +/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ + +/* Debugging masks (32 bits, non-overlapping) */ +/* keep these in sync with lnet/utils/debug.c and lnet/libcfs/debug.c */ +#define D_TRACE 0x00000001 /* ENTRY/EXIT markers */ +#define D_INODE 0x00000002 +#define D_SUPER 0x00000004 +#define D_EXT2 0x00000008 /* anything from ext2_debug */ +#define D_MALLOC 0x00000010 /* print malloc, free information */ +#define D_CACHE 0x00000020 /* cache-related items */ +#define D_INFO 0x00000040 /* general information */ +#define D_IOCTL 0x00000080 /* ioctl related information */ +#define D_NETERROR 0x00000100 /* network errors */ +#define D_NET 0x00000200 /* network communications */ +#define D_WARNING 0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */ +#define D_BUFFS 0x00000800 +#define D_OTHER 0x00001000 +#define D_DENTRY 0x00002000 +#define D_NETTRACE 0x00004000 +#define D_PAGE 0x00008000 /* bulk page handling */ +#define D_DLMTRACE 0x00010000 +#define D_ERROR 0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */ +#define D_EMERG 0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ +#define D_HA 0x00080000 /* recovery and failover */ +#define D_RPCTRACE 0x00100000 /* for distributed debugging */ +#define D_VFSTRACE 0x00200000 +#define D_READA 0x00400000 /* read-ahead */ +#define D_MMAP 0x00800000 +#define D_CONFIG 0x01000000 +#define D_CONSOLE 0x02000000 +#define D_QUOTA 0x04000000 +#define D_SEC 0x08000000 +/* keep these in sync with lnet/{utils,libcfs}/debug.c */ + +#define D_CANTMASK (D_ERROR | D_EMERG | D_WARNING | D_CONSOLE) + +#ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED +#endif + +#define CDEBUG_DEFAULT_MAX_DELAY (cfs_time_seconds(600)) /* jiffies */ +#define CDEBUG_DEFAULT_MIN_DELAY ((cfs_time_seconds(1) + 1) / 2) /* jiffies */ +#define CDEBUG_DEFAULT_BACKOFF 2 +typedef struct { + cfs_time_t cdls_next; + int cdls_count; + cfs_duration_t cdls_delay; +} cfs_debug_limit_state_t; + +/* Controlled via configure key */ +/* #define CDEBUG_ENABLED */ + +#if defined(__KERNEL__) || (defined(__arch_lib__) && !defined(LUSTRE_UTILS)) + +#ifdef CDEBUG_ENABLED +#define __CDEBUG(cdls, mask, format, a...) \ +do { \ + CHECK_STACK(); \ + \ + if (((mask) & D_CANTMASK) != 0 || \ + ((libcfs_debug & (mask)) != 0 && \ + (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ + libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, mask, \ + __FILE__, __FUNCTION__, __LINE__, \ + format, ## a); \ +} while (0) + +#define CDEBUG(mask, format, a...) __CDEBUG(NULL, mask, format, ## a) + +#define CDEBUG_LIMIT(mask, format, a...) \ +do { \ + static cfs_debug_limit_state_t cdls; \ + \ + __CDEBUG(&cdls, mask, format, ## a); \ +} while (0) + +#else /* CDEBUG_ENABLED */ +#define CDEBUG(mask, format, a...) (void)(0) +#define CDEBUG_LIMIT(mask, format, a...) (void)(0) +#warning "CDEBUG IS DISABLED. THIS SHOULD NEVER BE DONE FOR PRODUCTION!" +#endif + +#else + +#define CDEBUG(mask, format, a...) \ +do { \ + if (((mask) & D_CANTMASK) != 0) \ + fprintf(stderr, "(%s:%d:%s()) " format, \ + __FILE__, __LINE__, __FUNCTION__, ## a); \ +} while (0) + +#define CDEBUG_LIMIT CDEBUG + +#endif /* !__KERNEL__ */ + +/* + * Lustre Error Checksum: calculates checksum + * of Hex number by XORing each bit. + */ +#define LERRCHKSUM(hexnum) (((hexnum) & 0xf) ^ ((hexnum) >> 4 & 0xf) ^ \ + ((hexnum) >> 8 & 0xf)) + +#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a) +#define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a) +#define CEMERG(format, a...) CDEBUG_LIMIT(D_EMERG, format, ## a) + +#define LCONSOLE(mask, format, a...) CDEBUG(D_CONSOLE | (mask), format, ## a) +#define LCONSOLE_INFO(format, a...) CDEBUG_LIMIT(D_CONSOLE, format, ## a) +#define LCONSOLE_WARN(format, a...) CDEBUG_LIMIT(D_CONSOLE | D_WARNING, format, ## a) +#define LCONSOLE_ERROR_MSG(errnum, format, a...) CDEBUG_LIMIT(D_CONSOLE | D_ERROR, \ + "%x-%x: " format, errnum, LERRCHKSUM(errnum), ## a) +#define LCONSOLE_ERROR(format, a...) LCONSOLE_ERROR_MSG(0x00, format, ## a) + +#define LCONSOLE_EMERG(format, a...) CDEBUG(D_CONSOLE | D_EMERG, format, ## a) + +#ifdef CDEBUG_ENABLED + +#define GOTO(label, rc) \ +do { \ + long GOTO__ret = (long)(rc); \ + CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ + #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ + (signed long)GOTO__ret); \ + goto label; \ +} while (0) +#else +#define GOTO(label, rc) do { ((void)(rc)); goto label; } while (0) +#endif + +/* Controlled via configure key */ +/* #define CDEBUG_ENTRY_EXIT */ + +#ifdef CDEBUG_ENTRY_EXIT + +/* + * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise + * there will be a warning in osx. + */ +#define RETURN(rc) \ +do { \ + typeof(rc) RETURN__ret = (rc); \ + CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ + (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ + EXIT_NESTING; \ + return RETURN__ret; \ +} while (0) + +#define ENTRY \ +ENTRY_NESTING; \ +do { \ + CDEBUG(D_TRACE, "Process entered\n"); \ +} while (0) + +#define EXIT \ +do { \ + CDEBUG(D_TRACE, "Process leaving\n"); \ + EXIT_NESTING; \ +} while(0) +#else /* !CDEBUG_ENTRY_EXIT */ + +#define RETURN(rc) return (rc) +#define ENTRY do { } while (0) +#define EXIT do { } while (0) + +#endif /* !CDEBUG_ENTRY_EXIT */ + +/* + * Some (nomina odiosa sunt) platforms define NULL as naked 0. This confuses + * Lustre RETURN(NULL) macro. + */ +#if defined(NULL) +#undef NULL +#endif + +#define NULL ((void *)0) + +#define LUSTRE_SRV_LNET_PID LUSTRE_LNET_PID + +#ifdef __KERNEL__ + +#include + +struct libcfs_ioctl_data; /* forward ref */ + +struct libcfs_ioctl_handler { + struct list_head item; + int (*handle_ioctl)(unsigned int cmd, struct libcfs_ioctl_data *data); +}; + +#define DECLARE_IOCTL_HANDLER(ident, func) \ + struct libcfs_ioctl_handler ident = { \ + /* .item = */ CFS_LIST_HEAD_INIT(ident.item), \ + /* .handle_ioctl = */ func \ + } + +int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); +int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); + +/* libcfs tcpip */ +int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask); +int libcfs_ipif_enumerate(char ***names); +void libcfs_ipif_free_enumeration(char **names, int n); +int libcfs_sock_listen(cfs_socket_t **sockp, __u32 ip, int port, int backlog); +int libcfs_sock_accept(cfs_socket_t **newsockp, cfs_socket_t *sock); +void libcfs_sock_abort_accept(cfs_socket_t *sock); +int libcfs_sock_connect(cfs_socket_t **sockp, int *fatal, + __u32 local_ip, int local_port, + __u32 peer_ip, int peer_port); +int libcfs_sock_setbuf(cfs_socket_t *socket, int txbufsize, int rxbufsize); +int libcfs_sock_getbuf(cfs_socket_t *socket, int *txbufsize, int *rxbufsize); +int libcfs_sock_getaddr(cfs_socket_t *socket, int remote, __u32 *ip, int *port); +int libcfs_sock_write(cfs_socket_t *sock, void *buffer, int nob, int timeout); +int libcfs_sock_read(cfs_socket_t *sock, void *buffer, int nob, int timeout); +void libcfs_sock_release(cfs_socket_t *sock); + +/* libcfs watchdogs */ +struct lc_watchdog; + +/* Add a watchdog which fires after "time" milliseconds of delay. You have to + * touch it once to enable it. */ +struct lc_watchdog *lc_watchdog_add(int time, + void (*cb)(pid_t pid, void *), + void *data); + +/* Enables a watchdog and resets its timer. */ +void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms); +void lc_watchdog_touch(struct lc_watchdog *lcw); + +/* Disable a watchdog; touch it to restart it. */ +void lc_watchdog_disable(struct lc_watchdog *lcw); + +/* Clean up the watchdog */ +void lc_watchdog_delete(struct lc_watchdog *lcw); + +/* Dump a debug log */ +void lc_watchdog_dumplog(pid_t pid, void *data); + +/* __KERNEL__ */ +#endif + +/* need both kernel and user-land acceptor */ +#define LNET_ACCEPTOR_MIN_RESERVED_PORT 512 +#define LNET_ACCEPTOR_MAX_RESERVED_PORT 1023 + +/* + * libcfs pseudo device operations + * + * struct cfs_psdev_t and + * cfs_psdev_register() and + * cfs_psdev_deregister() are declared in + * libcfs//cfs_prim.h + * + * It's just draft now. + */ + +struct cfs_psdev_file { + unsigned long off; + void *private_data; + unsigned long reserved1; + unsigned long reserved2; +}; + +struct cfs_psdev_ops { + int (*p_open)(unsigned long, void *); + int (*p_close)(unsigned long, void *); + int (*p_read)(struct cfs_psdev_file *, char *, unsigned long); + int (*p_write)(struct cfs_psdev_file *, char *, unsigned long); + int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *); +}; + +/* + * generic time manipulation functions. + */ + +static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2) +{ + return cfs_time_before(t2, t1); +} + +static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2) +{ + return cfs_time_beforeq(t2, t1); +} + +/* + * return seconds since UNIX epoch + */ +static inline time_t cfs_unix_seconds(void) +{ + cfs_fs_time_t t; + + cfs_fs_time_current(&t); + return (time_t)cfs_fs_time_sec(&t); +} + +static inline cfs_time_t cfs_time_shift(int seconds) +{ + return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds)); +} + +static inline long cfs_timeval_sub(struct timeval *large, struct timeval *small, + struct timeval *result) +{ + long r = (long) ( + (large->tv_sec - small->tv_sec) * ONE_MILLION + + (large->tv_usec - small->tv_usec)); + if (result != NULL) { + result->tv_usec = r % ONE_MILLION; + result->tv_sec = r / ONE_MILLION; + } + return r; +} + +#define CFS_RATELIMIT(seconds) \ +({ \ + /* \ + * XXX nikita: non-portable initializer \ + */ \ + static time_t __next_message = 0; \ + int result; \ + \ + if (cfs_time_after(cfs_time_current(), __next_message)) \ + result = 1; \ + else { \ + __next_message = cfs_time_shift(seconds); \ + result = 0; \ + } \ + result; \ +}) + +struct libcfs_debug_msg_data { + cfs_debug_limit_state_t *msg_cdls; + int msg_subsys; + const char *msg_file; + const char *msg_fn; + int msg_line; +}; + +#define DEBUG_MSG_DATA_INIT(cdls, subsystem, file, func, ln ) { \ + .msg_cdls = (cdls), \ + .msg_subsys = (subsystem), \ + .msg_file = (file), \ + .msg_fn = (func), \ + .msg_line = (ln) \ + } + + +extern int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, + int subsys, int mask, + const char *file, const char *fn, const int line, + const char *format1, va_list args, + const char *format2, ...) + __attribute__ ((format (printf, 9, 10))); + +#define libcfs_debug_vmsg(cdls, subsys, mask, file, fn, line, format, args) \ + libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,format,args,NULL,NULL) + +#define libcfs_debug_msg(cdls, subsys, mask, file, fn, line, format, a...) \ + libcfs_debug_vmsg2(cdls, subsys, mask, file, fn,line,NULL,NULL,format, ##a) + +#define cdebug_va(cdls, mask, file, func, line, fmt, args) do { \ + CHECK_STACK(); \ + \ + if (((mask) & D_CANTMASK) != 0 || \ + ((libcfs_debug & (mask)) != 0 && \ + (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ + libcfs_debug_vmsg(cdls, DEBUG_SUBSYSTEM, (mask), \ + (file), (func), (line), fmt, args); \ +} while(0); + +#define cdebug(cdls, mask, file, func, line, fmt, a...) do { \ + CHECK_STACK(); \ + \ + if (((mask) & D_CANTMASK) != 0 || \ + ((libcfs_debug & (mask)) != 0 && \ + (libcfs_subsystem_debug & DEBUG_SUBSYSTEM) != 0)) \ + libcfs_debug_msg(cdls, DEBUG_SUBSYSTEM, (mask), \ + (file), (func), (line), fmt, ## a); \ +} while(0); + +extern void libcfs_assertion_failed(const char *expr, const char *file, + const char *fn, const int line); + +static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg) +{ + if (cfs_time_after(cfs_time_current(), + cfs_time_add(now, cfs_time_seconds(15)))) + CERROR("slow %s "CFS_TIME_T" sec\n", msg, + cfs_duration_sec(cfs_time_sub(cfs_time_current(),now))); +} + +/* + * helper function similar to do_gettimeofday() of Linux kernel + */ +static inline void cfs_fs_timeval(struct timeval *tv) +{ + cfs_fs_time_t time; + + cfs_fs_time_current(&time); + cfs_fs_time_usec(&time, tv); +} + +/* + * return valid time-out based on user supplied one. Currently we only check + * that time-out is not shorted than allowed. + */ +static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout) +{ + if (timeout < CFS_TICK) + timeout = CFS_TICK; + return timeout; +} + +/* + * Universal memory allocator API + */ +enum cfs_alloc_flags { + /* allocation is not allowed to block */ + CFS_ALLOC_ATOMIC = 0x1, + /* allocation is allowed to block */ + CFS_ALLOC_WAIT = 0x2, + /* allocation should return zeroed memory */ + CFS_ALLOC_ZERO = 0x4, + /* allocation is allowed to call file-system code to free/clean + * memory */ + CFS_ALLOC_FS = 0x8, + /* allocation is allowed to do io to free/clean memory */ + CFS_ALLOC_IO = 0x10, + /* don't report allocation failure to the console */ + CFS_ALLOC_NOWARN = 0x20, + /* standard allocator flag combination */ + CFS_ALLOC_STD = CFS_ALLOC_FS | CFS_ALLOC_IO, + CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO, +}; + +/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */ +enum cfs_alloc_page_flags { + /* allow to return page beyond KVM. It has to be mapped into KVM by + * cfs_page_map(); */ + CFS_ALLOC_HIGH = 0x40, + CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH, +}; + +/* + * Drop into debugger, if possible. Implementation is provided by platform. + */ + +void cfs_enter_debugger(void); + +/* + * Defined by platform + */ +void cfs_daemonize(char *str); +int cfs_daemonize_ctxt(char *str); +cfs_sigset_t cfs_get_blocked_sigs(void); +cfs_sigset_t cfs_block_allsigs(void); +cfs_sigset_t cfs_block_sigs(cfs_sigset_t bits); +void cfs_restore_sigs(cfs_sigset_t); +int cfs_signal_pending(void); +void cfs_clear_sigpending(void); +/* + * XXX Liang: + * these macros should be removed in the future, + * we keep them just for keeping libcfs compatible + * with other branches. + */ +#define libcfs_daemonize(s) cfs_daemonize(s) +#define cfs_sigmask_lock(f) do { f= 0; } while (0) +#define cfs_sigmask_unlock(f) do { f= 0; } while (0) + +int convert_server_error(__u64 ecode); +int convert_client_oflag(int cflag, int *result); + +/* + * Stack-tracing filling. + */ + +/* + * Platform-dependent data-type to hold stack frames. + */ +struct cfs_stack_trace; + +/* + * Fill @trace with current back-trace. + */ +void cfs_stack_trace_fill(struct cfs_stack_trace *trace); + +/* + * Return instruction pointer for frame @frame_no. NULL if @frame_no is + * invalid. + */ +void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no); + +/* + * Universal open flags. + */ +#define CFS_O_ACCMODE 0003 +#define CFS_O_CREAT 0100 +#define CFS_O_EXCL 0200 +#define CFS_O_NOCTTY 0400 +#define CFS_O_TRUNC 01000 +#define CFS_O_APPEND 02000 +#define CFS_O_NONBLOCK 04000 +#define CFS_O_NDELAY CFS_O_NONBLOCK +#define CFS_O_SYNC 010000 +#define CFS_O_ASYNC 020000 +#define CFS_O_DIRECT 040000 +#define CFS_O_LARGEFILE 0100000 +#define CFS_O_DIRECTORY 0200000 +#define CFS_O_NOFOLLOW 0400000 +#define CFS_O_NOATIME 01000000 + +/* convert local open flags to universal open flags */ +int cfs_oflags2univ(int flags); +/* convert universal open flags to local open flags */ +int cfs_univ2oflags(int flags); + +#define _LIBCFS_H + +#endif /* _LIBCFS_H */ diff --git a/libcfs/include/libcfs/linux/.cvsignore b/libcfs/include/libcfs/linux/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/libcfs/include/libcfs/linux/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/libcfs/include/libcfs/linux/Makefile.am b/libcfs/include/libcfs/linux/Makefile.am new file mode 100644 index 0000000..072a7ad --- /dev/null +++ b/libcfs/include/libcfs/linux/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST := kp30.h libcfs.h linux-fs.h linux-lock.h linux-mem.h \ + linux-prim.h linux-time.h linux-tcpip.h lltrace.h \ + portals_compat25.h portals_utils.h diff --git a/libcfs/include/libcfs/linux/kp30.h b/libcfs/include/libcfs/linux/kp30.h new file mode 100644 index 0000000..19355ed --- /dev/null +++ b/libcfs/include/libcfs/linux/kp30.h @@ -0,0 +1,379 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_KP30_H__ +#define __LIBCFS_LINUX_KP30_H__ + +#ifndef __LIBCFS_KP30_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#ifndef AUTOCONF_INCLUDED +# include +#endif +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# ifdef HAVE_MM_INLINE +# include +# endif +# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +# include +# include +# endif + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define schedule_work schedule_task +#define prepare_work(wq,cb,cbdata) \ +do { \ + INIT_TQUEUE((wq), 0, 0); \ + PREPARE_TQUEUE((wq), (cb), (cbdata)); \ +} while (0) + +#define PageUptodate Page_Uptodate +#define our_recalc_sigpending(current) recalc_sigpending(current) +#define num_online_cpus() smp_num_cpus +static inline void our_cond_resched(void) +{ + if (current->need_resched) + schedule (); +} +#define work_struct_t struct tq_struct +#define cfs_get_work_data(type,field,data) (data) +#else + +#ifdef HAVE_3ARGS_INIT_WORK + +#define prepare_work(wq,cb,cbdata) \ +do { \ + INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ +} while (0) + +#define cfs_get_work_data(type,field,data) (data) + +#else + +#define prepare_work(wq,cb,cbdata) \ +do { \ + INIT_WORK((wq), (void *)(cb)); \ +} while (0) + +#define cfs_get_work_data(type,field,data) container_of(data,type,field) + +#endif + +#define wait_on_page wait_on_page_locked +#define our_recalc_sigpending(current) recalc_sigpending() +#define strtok(a,b) strpbrk(a, b) +static inline void our_cond_resched(void) +{ + cond_resched(); +} +#define work_struct_t struct work_struct + +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ + +#ifdef CONFIG_SMP +#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock)) +#else +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) +#endif +#define LASSERT_SEM_LOCKED(sem) LASSERT(down_trylock(sem) != 0) + +#define LIBCFS_PANIC(msg) panic(msg) + +/* ------------------------------------------------------------------- */ + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + +#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) +#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) + +#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) +#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) + +#define PORTAL_MODULE_USE MOD_INC_USE_COUNT +#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT +#else + +#define PORTAL_SYMBOL_REGISTER(x) +#define PORTAL_SYMBOL_UNREGISTER(x) + +#define PORTAL_SYMBOL_GET(x) symbol_get(x) +#define PORTAL_SYMBOL_PUT(x) symbol_put(x) + +#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) +#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) + +#endif + +/******************************************************************************/ +/* Module parameter support */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define CFS_MODULE_PARM(name, t, type, perm, desc) \ + MODULE_PARM(name, t);\ + MODULE_PARM_DESC(name, desc) + +#else +# define CFS_MODULE_PARM(name, t, type, perm, desc) \ + module_param(name, type, perm);\ + MODULE_PARM_DESC(name, desc) +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) +# define CFS_SYSFS_MODULE_PARM 0 /* no sysfs module parameters */ +#else +# define CFS_SYSFS_MODULE_PARM 1 /* module parameters accessible via sysfs */ +#endif +/******************************************************************************/ + +#if (__GNUC__) +/* Use the special GNU C __attribute__ hack to have the compiler check the + * printf style argument string against the actual argument count and + * types. + */ +#ifdef printf +# warning printf has been defined as a macro... +# undef printf +#endif + +#endif /* __GNUC__ */ + +# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) +# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) +# define time(a) CURRENT_TIME + +#ifndef num_possible_cpus +#define num_possible_cpus() NR_CPUS +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +#define i_size_read(a) ((a)->i_size) +#endif + +#else /* !__KERNEL__ */ +# include +# include +#if defined(__CYGWIN__) +# include +#else +# include +#endif +# include +# include +# include +# include +# include /* for _IOWR */ +#ifndef _IOWR +#include "ioctl.h" +#endif + +# define CFS_MODULE_PARM(name, t, type, perm, desc) +#define PORTAL_SYMBOL_GET(x) inter_module_get(#x) +#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) + +#endif /* End of !__KERNEL__ */ + +/******************************************************************************/ +/* Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. */ +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (16<<20) + +#ifndef KLWT_SUPPORT +# if defined(__KERNEL__) +# if !defined(BITS_PER_LONG) +# error "BITS_PER_LONG not defined" +# endif +# elif !defined(__WORDSIZE) +# error "__WORDSIZE not defined" +# else +# define BITS_PER_LONG __WORDSIZE +# endif + +/* kernel hasn't defined this? */ +typedef struct { + long long lwte_when; + char *lwte_where; + void *lwte_task; + long lwte_p1; + long lwte_p2; + long lwte_p3; + long lwte_p4; +# if BITS_PER_LONG > 32 + long lwte_pad; +# endif +} lwt_event_t; +#endif /* !KLWT_SUPPORT */ + +#if LWT_SUPPORT +# ifdef __KERNEL__ +# if !KLWT_SUPPORT + +typedef struct _lwt_page { + struct list_head lwtp_list; + struct page *lwtp_page; + lwt_event_t *lwtp_events; +} lwt_page_t; + +typedef struct { + int lwtc_current_index; + lwt_page_t *lwtc_current_page; +} lwt_cpu_t; + +extern int lwt_enabled; +extern lwt_cpu_t lwt_cpus[]; + +/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. + * This stuff is meant for finding specific problems; it never stays in + * production code... */ + +#define LWTSTR(n) #n +#define LWTWHERE(f,l) f ":" LWTSTR(l) +#define LWT_EVENTS_PER_PAGE (CFS_PAGE_SIZE / sizeof (lwt_event_t)) + +#define LWT_EVENT(p1, p2, p3, p4) \ +do { \ + unsigned long flags; \ + lwt_cpu_t *cpu; \ + lwt_page_t *p; \ + lwt_event_t *e; \ + \ + if (lwt_enabled) { \ + local_irq_save (flags); \ + \ + cpu = &lwt_cpus[smp_processor_id()]; \ + p = cpu->lwtc_current_page; \ + e = &p->lwtp_events[cpu->lwtc_current_index++]; \ + \ + if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ + cpu->lwtc_current_page = \ + list_entry (p->lwtp_list.next, \ + lwt_page_t, lwtp_list); \ + cpu->lwtc_current_index = 0; \ + } \ + \ + e->lwte_when = get_cycles(); \ + e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ + e->lwte_task = current; \ + e->lwte_p1 = (long)(p1); \ + e->lwte_p2 = (long)(p2); \ + e->lwte_p3 = (long)(p3); \ + e->lwte_p4 = (long)(p4); \ + \ + local_irq_restore (flags); \ + } \ +} while (0) + +#endif /* !KLWT_SUPPORT */ + +extern int lwt_init (void); +extern void lwt_fini (void); +extern int lwt_lookup_string (int *size, char *knlptr, + char *usrptr, int usrsize); +extern int lwt_control (int enable, int clear); +extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, + void *user_ptr, int user_size); +# else /* __KERNEL__ */ +# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ +# endif /* __KERNEL__ */ +#endif /* LWT_SUPPORT */ + +/* ------------------------------------------------------------------ */ + +#define IOCTL_LIBCFS_TYPE long + +#ifdef __CYGWIN__ +# ifndef BITS_PER_LONG +# if (~0UL) == 0xffffffffUL +# define BITS_PER_LONG 32 +# else +# define BITS_PER_LONG 64 +# endif +# endif +#endif + +#if BITS_PER_LONG > 32 +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#else +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif + +/* this is a bit chunky */ + +#if defined(__KERNEL__) + #define _LWORDSIZE BITS_PER_LONG +#else + #define _LWORDSIZE __WORDSIZE +#endif + +#if (defined(__x86_64__) && (defined(__KERNEL__) || defined(CRAY_XT3))) || defined(HAVE_U64_LONG_LONG) +/* x86_64 defines __u64 as "long" in userspace, but "long long" in the kernel */ +# define LPU64 "%Lu" +# define LPD64 "%Ld" +# define LPX64 "%#Lx" +# define LPF64 "L" +#elif (_LWORDSIZE == 32) +# define LPU64 "%Lu" +# define LPD64 "%Ld" +# define LPX64 "%#Lx" +# define LPF64 "L" +#elif (_LWORDSIZE == 64) +# define LPU64 "%lu" +# define LPD64 "%ld" +# define LPX64 "%#lx" +# define LPF64 "l" +#endif + +#ifdef HAVE_SIZE_T_LONG +# define LPSZ "%lu" +#else +# define LPSZ "%u" +#endif + +#ifdef HAVE_SSIZE_T_LONG +# define LPSSZ "%ld" +#else +# define LPSSZ "%d" +#endif + +#ifndef LPU64 +# error "No word size defined" +#endif + +#undef _LWORDSIZE + +#endif diff --git a/libcfs/include/libcfs/linux/libcfs.h b/libcfs/include/libcfs/linux/libcfs.h new file mode 100644 index 0000000..c873c2f --- /dev/null +++ b/libcfs/include/libcfs/linux/libcfs.h @@ -0,0 +1,173 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_LIBCFS_H__ +#define __LIBCFS_LINUX_LIBCFS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef HAVE_ASM_TYPES_H +#include +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include + + +#ifdef __KERNEL__ +# include +# include +# include +#else +# include +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL); +typedef unsigned long long cycles_t; +#endif + +#ifndef __KERNEL__ +/* Userpace byte flipping */ +# include +# include +# define __swab16(x) bswap_16(x) +# define __swab32(x) bswap_32(x) +# define __swab64(x) bswap_64(x) +# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0) +# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0) +# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0) +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define le16_to_cpu(x) (x) +# define cpu_to_le16(x) (x) +# define le32_to_cpu(x) (x) +# define cpu_to_le32(x) (x) +# define le64_to_cpu(x) (x) +# define cpu_to_le64(x) (x) + +# define be16_to_cpu(x) bswap_16(x) +# define cpu_to_be16(x) bswap_16(x) +# define be32_to_cpu(x) bswap_32(x) +# define cpu_to_be32(x) bswap_32(x) +# define be64_to_cpu(x) bswap_64(x) +# define cpu_to_be64(x) bswap_64(x) + +# else +# if __BYTE_ORDER == __BIG_ENDIAN +# define le16_to_cpu(x) bswap_16(x) +# define cpu_to_le16(x) bswap_16(x) +# define le32_to_cpu(x) bswap_32(x) +# define cpu_to_le32(x) bswap_32(x) +# define le64_to_cpu(x) bswap_64(x) +# define cpu_to_le64(x) bswap_64(x) + +# define be16_to_cpu(x) (x) +# define cpu_to_be16(x) (x) +# define be32_to_cpu(x) (x) +# define cpu_to_be32(x) (x) +# define be64_to_cpu(x) (x) +# define cpu_to_be64(x) (x) + +# else +# error "Unknown byte order" +# endif /* __BIG_ENDIAN */ +# endif /* __LITTLE_ENDIAN */ +#endif /* ! __KERNEL__ */ + +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u32 ph_cpu_id; + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + +#ifdef __KERNEL__ +# include /* THREAD_SIZE */ +#else +# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ +# define THREAD_SIZE 8192 +# endif +#endif + +#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) + +#if defined(__KERNEL__) && !defined(__x86_64__) +# ifdef __ia64__ +# define CDEBUG_STACK() (THREAD_SIZE - \ + ((unsigned long)__builtin_dwarf_cfa() & \ + (THREAD_SIZE - 1))) +# else +# define CDEBUG_STACK() (THREAD_SIZE - \ + ((unsigned long)__builtin_frame_address(0) & \ + (THREAD_SIZE - 1))) +# endif /* __ia64__ */ + +#define __CHECK_STACK(file, func, line) \ +do { \ + unsigned long _stack = CDEBUG_STACK(); \ + \ + if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ + libcfs_stack = _stack; \ + libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ + file, func, line, \ + "maximum lustre stack %lu\n", _stack); \ + /*panic("LBUG");*/ \ + } \ +} while (0) +#define CHECK_STACK() __CHECK_STACK(__FILE__, __func__, __LINE__) +#else /* !__KERNEL__ */ +#define __CHECK_STACK(X, Y, Z) do { } while(0) +#define CHECK_STACK() do { } while(0) +#define CDEBUG_STACK() (0L) +#endif /* __KERNEL__ */ + +/* initial pid */ +#define LUSTRE_LNET_PID 12345 + +#define ENTRY_NESTING_SUPPORT (1) +#define ENTRY_NESTING do {;} while (0) +#define EXIT_NESTING do {;} while (0) +#define __current_nesting_level() (0) + +/* + * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) + * + * Implementation is in linux-curproc.c + */ +#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm) + +#if defined(__KERNEL__) +#include +typedef kernel_cap_t cfs_kernel_cap_t; +#else +typedef __u32 cfs_kernel_cap_t; +#endif + +#if defined(__KERNEL__) +/* + * No stack-back-tracing in Linux for now. + */ +struct cfs_stack_trace { +}; + +#ifndef WITH_WATCHDOG +#define WITH_WATCHDOG +#endif + +#endif + +#endif /* _LINUX_LIBCFS_H */ diff --git a/libcfs/include/libcfs/linux/linux-fs.h b/libcfs/include/libcfs/linux/linux-fs.h new file mode 100644 index 0000000..7573322 --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-fs.h @@ -0,0 +1,82 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_FS_H__ +#define __LIBCFS_LINUX_CFS_FS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#else /* !__KERNEL__ */ +#include +#include +#include +#include +#include +#include +#include +#include +#endif /* __KERNEL__ */ + +typedef struct file cfs_file_t; +typedef struct dentry cfs_dentry_t; +typedef struct dirent64 cfs_dirent_t; + +#ifdef __KERNEL__ +#define cfs_filp_size(f) (i_size_read((f)->f_dentry->d_inode)) +#define cfs_filp_poff(f) (&(f)->f_pos) + +/* + * XXX Do we need to parse flags and mode in cfs_filp_open? + */ +cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err); +#define cfs_filp_close(f) filp_close(f, NULL) +#define cfs_filp_read(fp, buf, size, pos) (fp)->f_op->read((fp), (buf), (size), pos) +#define cfs_filp_write(fp, buf, size, pos) (fp)->f_op->write((fp), (buf), (size), pos) +#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1) + +#define cfs_get_file(f) get_file(f) +#define cfs_put_file(f) fput(f) +#define cfs_file_count(f) file_count(f) + +typedef struct file_lock cfs_flock_t; +#define cfs_flock_type(fl) ((fl)->fl_type) +#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) +#define cfs_flock_pid(fl) ((fl)->fl_pid) +#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) +#define cfs_flock_start(fl) ((fl)->fl_start) +#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) +#define cfs_flock_end(fl) ((fl)->fl_end) +#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) + +ssize_t cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset); + +#endif + +#endif diff --git a/libcfs/include/libcfs/linux/linux-lock.h b/libcfs/include/libcfs/linux/linux-lock.h new file mode 100644 index 0000000..4b51d1b --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-lock.h @@ -0,0 +1,105 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_LOCK_H__ +#define __LIBCFS_LINUX_CFS_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include + +/* + * IMPORTANT !!!!!!!! + * + * All locks' declaration are not guaranteed to be initialized, + * Althought some of they are initialized in Linux. All locks + * declared by CFS_DECL_* should be initialized explicitly. + */ + + +/* + * spin_lock (use Linux kernel's primitives) + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + */ + +/* + * rw_semaphore (use Linux kernel's primitives) + * + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ + +/* + * rwlock_t (use Linux kernel's primitives) + * + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ + +/* + * mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ +#define init_mutex(x) init_MUTEX(x) +#define init_mutex_locked(x) init_MUTEX_LOCKED(x) +#define mutex_up(x) up(x) +#define mutex_down(x) down(x) +#define mutex_down_trylock(x) down_trylock(x) + +/* + * completion (use Linux kernel's primitives) + * + * - init_complition(c) + * - complete(c) + * - wait_for_completion(c) + */ + +/* __KERNEL__ */ +#else + +#include "../user-lock.h" + +/* __KERNEL__ */ +#endif +#endif diff --git a/libcfs/include/libcfs/linux/linux-mem.h b/libcfs/include/libcfs/linux/linux-mem.h new file mode 100644 index 0000000..fa4ba3d --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-mem.h @@ -0,0 +1,129 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_MEM_H__ +#define __LIBCFS_LINUX_CFS_MEM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +# include +# include +# include +# include +# ifdef HAVE_MM_INLINE +# include +# endif + +typedef struct page cfs_page_t; +#define CFS_PAGE_SIZE PAGE_CACHE_SIZE +#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT +#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) + +static inline void *cfs_page_address(cfs_page_t *page) +{ + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + return page_address(page); +} + +static inline void *cfs_kmap(cfs_page_t *page) +{ + return kmap(page); +} + +static inline void cfs_kunmap(cfs_page_t *page) +{ + kunmap(page); +} + +static inline void cfs_get_page(cfs_page_t *page) +{ + get_page(page); +} + +static inline int cfs_page_count(cfs_page_t *page) +{ + return page_count(page); +} + +#define cfs_page_index(p) ((p)->index) + +/* + * Memory allocator + * XXX Liang: move these declare to public file + */ +extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); +extern void cfs_free(void *addr); + +extern void *cfs_alloc_large(size_t nr_bytes); +extern void cfs_free_large(void *addr); + +extern cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order); +extern void __cfs_free_pages(cfs_page_t *page, unsigned int order); + +#define cfs_alloc_page(flags) cfs_alloc_pages(flags, 0) +#define __cfs_free_page(page) __cfs_free_pages(page, 0) +#define cfs_free_page(p) __free_pages(p, 0) + +/* + * In Linux there is no way to determine whether current execution context is + * blockable. + */ +#define CFS_ALLOC_ATOMIC_TRY CFS_ALLOC_ATOMIC + +/* + * SLAB allocator + * XXX Liang: move these declare to public file + */ +#ifdef HAVE_KMEM_CACHE +typedef struct kmem_cache cfs_mem_cache_t; +#else +typedef kmem_cache_t cfs_mem_cache_t; +#endif +extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long); +extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); +extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); +extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); + +/* + */ +#define CFS_DECL_MMSPACE mm_segment_t __oldfs +#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0) +#define CFS_MMSPACE_CLOSE set_fs(__oldfs) + +#else /* !__KERNEL__ */ +#ifdef HAVE_ASM_PAGE_H +#include /* needed for PAGE_SIZE - rread */ +#endif + +#include +/* __KERNEL__ */ +#endif + +#endif /* __LINUX_CFS_MEM_H__ */ diff --git a/libcfs/include/libcfs/linux/linux-prim.h b/libcfs/include/libcfs/linux/linux-prim.h new file mode 100644 index 0000000..705499e --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-prim.h @@ -0,0 +1,311 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_PRIM_H__ +#define __LIBCFS_LINUX_CFS_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#ifndef AUTOCONF_INCLUDED +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* + * Pseudo device register + */ +typedef struct miscdevice cfs_psdev_t; +#define cfs_psdev_register(dev) misc_register(dev) +#define cfs_psdev_deregister(dev) misc_deregister(dev) + +/* + * Sysctl register + */ +typedef struct ctl_table cfs_sysctl_table_t; +typedef struct ctl_table_header cfs_sysctl_table_header_t; + +#ifdef HAVE_2ARGS_REGISTER_SYSCTL +#define cfs_register_sysctl_table(t, a) register_sysctl_table(t, a) +#else +#define cfs_register_sysctl_table(t, a) register_sysctl_table(t) +#endif +#define cfs_unregister_sysctl_table(t) unregister_sysctl_table(t) + +/* + * Symbol register + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define cfs_symbol_register(s, p) inter_module_register(s, THIS_MODULE, p) +#define cfs_symbol_unregister(s) inter_module_unregister(s) +#define cfs_symbol_get(s) inter_module_get(s) +#define cfs_symbol_put(s) inter_module_put(s) +#define cfs_module_get() MOD_INC_USE_COUNT +#define cfs_module_put() MOD_DEC_USE_COUNT +#else +#define cfs_symbol_register(s, p) do {} while(0) +#define cfs_symbol_unregister(s) do {} while(0) +#define cfs_symbol_get(s) symbol_get(s) +#define cfs_symbol_put(s) symbol_put(s) +#define cfs_module_get() try_module_get(THIS_MODULE) +#define cfs_module_put() module_put(THIS_MODULE) +#endif + +/* + * Proc file system APIs + */ +typedef read_proc_t cfs_read_proc_t; +typedef write_proc_t cfs_write_proc_t; +typedef struct proc_dir_entry cfs_proc_dir_entry_t; +#define cfs_create_proc_entry(n, m, p) create_proc_entry(n, m, p) +#define cfs_free_proc_entry(e) free_proc_entry(e) +#define cfs_remove_proc_entry(n, e) remove_proc_entry(n, e) + +/* + * Wait Queue + */ +#define CFS_TASK_INTERRUPTIBLE TASK_INTERRUPTIBLE +#define CFS_TASK_UNINT TASK_UNINTERRUPTIBLE + +typedef wait_queue_t cfs_waitlink_t; +typedef wait_queue_head_t cfs_waitq_t; + +typedef long cfs_task_state_t; + +#define cfs_waitq_init(w) init_waitqueue_head(w) +#define cfs_waitlink_init(l) init_waitqueue_entry(l, current) +#define cfs_waitq_add(w, l) add_wait_queue(w, l) +#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l) +#define cfs_waitq_forward(l, w) do {} while(0) +#define cfs_waitq_del(w, l) remove_wait_queue(w, l) +#define cfs_waitq_active(w) waitqueue_active(w) +#define cfs_waitq_signal(w) wake_up(w) +#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n) +#define cfs_waitq_broadcast(w) wake_up_all(w) +#define cfs_waitq_wait(l, s) schedule() +#define cfs_waitq_timedwait(l, s, t) schedule_timeout(t) +#define cfs_schedule_timeout(s, t) schedule_timeout(t) +#define cfs_schedule() schedule() + +/* Kernel thread */ +typedef int (*cfs_thread_t)(void *); + +static inline int cfs_kernel_thread(int (*fn)(void *), + void *arg, unsigned long flags) +{ + void *orig_info = current->journal_info; + int rc; + + current->journal_info = NULL; + rc = kernel_thread(fn, arg, flags); + current->journal_info = orig_info; + return rc; +} + + +/* + * Task struct + */ +typedef struct task_struct cfs_task_t; +#define cfs_current() current +#define cfs_task_lock(t) task_lock(t) +#define cfs_task_unlock(t) task_unlock(t) +#define CFS_DECL_JOURNAL_DATA void *journal_info +#define CFS_PUSH_JOURNAL do { \ + journal_info = current->journal_info; \ + current->journal_info = NULL; \ + } while(0) +#define CFS_POP_JOURNAL do { \ + current->journal_info = journal_info; \ + } while(0) + +/* Module interfaces */ +#define cfs_module(name, version, init, fini) \ +module_init(init); \ +module_exit(fini) + +/* + * Signal + */ +typedef sigset_t cfs_sigset_t; + +/* + * Timer + */ +typedef struct timer_list cfs_timer_t; +typedef void (*timer_func_t)(unsigned long); + +#define cfs_init_timer(t) init_timer(t) + +static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg) +{ + init_timer(t); + t->function = (timer_func_t)func; + t->data = (unsigned long)arg; +} + +static inline void cfs_timer_done(cfs_timer_t *t) +{ + return; +} + +static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline) +{ + mod_timer(t, deadline); +} + +static inline void cfs_timer_disarm(cfs_timer_t *t) +{ + del_timer(t); +} + +static inline int cfs_timer_is_armed(cfs_timer_t *t) +{ + return timer_pending(t); +} + +static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t) +{ + return t->expires; +} + + +/* deschedule for a bit... */ +static inline void cfs_pause(cfs_duration_t ticks) +{ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(ticks); +} + +#ifndef wait_event_timeout /* Only for RHEL3 2.4.21 kernel */ +#define __wait_event_timeout(wq, condition, timeout, ret) \ +do { \ + int __ret = 0; \ + if (!(condition)) { \ + wait_queue_t __wait; \ + unsigned long expire; \ + \ + init_waitqueue_entry(&__wait, current); \ + expire = timeout + jiffies; \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + if (jiffies > expire) { \ + ret = jiffies - expire; \ + break; \ + } \ + schedule_timeout(timeout); \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ + } \ +} while (0) +/* + retval == 0; condition met; we're good. + retval > 0; timed out. +*/ +#define cfs_waitq_wait_event_timeout(wq, condition, timeout) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __wait_event_timeout(wq, condition, timeout, __ret); \ + __ret; \ +}) +#else +#define cfs_waitq_wait_event_timeout wait_event_timeout +#endif + +#ifndef wait_event_interruptible_timeout /* Only for RHEL3 2.4.21 kernel */ +#define __wait_event_interruptible_timeout(wq, condition, timeout, ret) \ +do { \ + int __ret = 0; \ + if (!(condition)) { \ + wait_queue_t __wait; \ + unsigned long expire; \ + \ + init_waitqueue_entry(&__wait, current); \ + expire = timeout + jiffies; \ + add_wait_queue(&wq, &__wait); \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + if (condition) \ + break; \ + if (jiffies > expire) { \ + ret = jiffies - expire; \ + break; \ + } \ + if (!signal_pending(current)) { \ + schedule_timeout(timeout); \ + continue; \ + } \ + ret = -ERESTARTSYS; \ + break; \ + } \ + current->state = TASK_RUNNING; \ + remove_wait_queue(&wq, &__wait); \ + } \ +} while (0) + +/* + retval == 0; condition met; we're good. + retval < 0; interrupted by signal. + retval > 0; timed out. +*/ +#define cfs_waitq_wait_event_interruptible_timeout(wq, condition, timeout) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __wait_event_interruptible_timeout(wq, condition, \ + timeout, __ret); \ + __ret; \ +}) +#else +#define cfs_waitq_wait_event_interruptible_timeout wait_event_interruptible_timeout +#endif + +#else /* !__KERNEL__ */ + +typedef struct proc_dir_entry cfs_proc_dir_entry_t; +#include "../user-prim.h" + +#endif /* __KERNEL__ */ + +#endif diff --git a/libcfs/include/libcfs/linux/linux-tcpip.h b/libcfs/include/libcfs/linux/linux-tcpip.h new file mode 100644 index 0000000..fb2ac93 --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-tcpip.h @@ -0,0 +1,66 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_TCP_H__ +#define __LIBCFS_LINUX_CFS_TCP_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include + +typedef struct socket cfs_socket_t; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72)) +# define sk_allocation allocation +# define sk_data_ready data_ready +# define sk_write_space write_space +# define sk_user_data user_data +# define sk_prot prot +# define sk_sndbuf sndbuf +# define sk_rcvbuf rcvbuf +# define sk_socket socket +# define sk_sleep sleep +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) +# define sk_wmem_queued wmem_queued +# define sk_err err +# define sk_route_caps route_caps +#endif + +#define SOCK_SNDBUF(so) ((so)->sk->sk_sndbuf) +#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued) +#define SOCK_ERROR(so) ((so)->sk->sk_err) +#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags) + +#else /* !__KERNEL__ */ + +#include "../user-tcpip.h" + +#endif /* __KERNEL__ */ + +#endif diff --git a/libcfs/include/libcfs/linux/linux-time.h b/libcfs/include/libcfs/linux/linux-time.h new file mode 100644 index 0000000..3d4cdf5 --- /dev/null +++ b/libcfs/include/libcfs/linux/linux-time.h @@ -0,0 +1,327 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for Linux (kernel and user-level). + * + */ + +#ifndef __LIBCFS_LINUX_LINUX_TIME_H__ +#define __LIBCFS_LINUX_LINUX_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION 1000000 + +#ifdef __KERNEL__ +#ifndef AUTOCONF_INCLUDED +#include +#endif +#include +#include +#include +#include +#include + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + +/* + * old kernels---CURRENT_TIME is struct timeval + */ +typedef struct timeval cfs_fs_time_t; + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + *v = *t; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = t->tv_sec; + s->tv_nsec = t->tv_usec * 1000; +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return (unsigned long long)t->tv_sec * ONE_MILLION + t->tv_usec; +} + +#define CURRENT_KERN_TIME xtime + +#else +/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ + +/* + * post 2.5 kernels. + */ + +#include + +typedef struct timespec cfs_fs_time_t; + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + v->tv_sec = t->tv_sec; + v->tv_usec = t->tv_nsec / 1000; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + *s = *t; +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline unsigned long long __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return (unsigned long long)t->tv_sec * ONE_BILLION + t->tv_nsec; +} + +#define CURRENT_KERN_TIME CURRENT_TIME + +/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ +#endif + +/* + * Generic kernel stuff + */ + +typedef unsigned long cfs_time_t; /* jiffies */ +typedef long cfs_duration_t; + + +static inline cfs_time_t cfs_time_current(void) +{ + return jiffies; +} + +static inline time_t cfs_time_current_sec(void) +{ + return CURRENT_SECONDS; +} + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return t + d; +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return t1 - t2; +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return time_before(t1, t2); +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return time_before_eq(t1, t2); +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + *t = CURRENT_KERN_TIME; +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return t->tv_sec; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2); +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2); +} + +#if 0 +static inline cfs_duration_t cfs_duration_build(int64_t nano) +{ +#if (BITS_PER_LONG == 32) + /* We cannot use do_div(t, ONE_BILLION), do_div can only process + * 64 bits n and 32 bits base */ + int64_t t = nano * HZ; + do_div(t, 1000); + do_div(t, 1000000); + return (cfs_duration_t)t; +#else + return (nano * HZ / ONE_BILLION); +#endif +} +#endif + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return ((cfs_duration_t)seconds) * HZ; +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d / HZ; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ +#if (BITS_PER_LONG == 32) && (HZ > 4096) + __u64 t; + + s->tv_sec = d / HZ; + t = (d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION; + do_div(t, HZ); + s->tv_usec = t; +#else + s->tv_sec = d / HZ; + s->tv_usec = ((d - (cfs_duration_t)s->tv_sec * HZ) * ONE_MILLION) / HZ; +#endif +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ +#if (BITS_PER_LONG == 32) + __u64 t; + + s->tv_sec = d / HZ; + t = (d - s->tv_sec * HZ) * ONE_BILLION; + do_div(t, HZ); + s->tv_nsec = t; +#else + s->tv_sec = d / HZ; + s->tv_nsec = ((d - s->tv_sec * HZ) * ONE_BILLION) / HZ; +#endif +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) + +#define cfs_time_current_64 get_jiffies_64 + +static inline __u64 cfs_time_add_64(__u64 t, __u64 d) +{ + return t + d; +} + +static inline __u64 cfs_time_shift_64(int seconds) +{ + return cfs_time_add_64(cfs_time_current_64(), + cfs_time_seconds(seconds)); +} + +static inline int cfs_time_before_64(__u64 t1, __u64 t2) +{ + return (__s64)t2 - (__s64)t1 > 0; +} + +static inline int cfs_time_beforeq_64(__u64 t1, __u64 t2) +{ + return (__s64)t2 - (__s64)t1 >= 0; +} + +#else +#define cfs_time_current_64 cfs_time_current +#define cfs_time_add_64 cfs_time_add +#define cfs_time_shift_64 cfs_time_shift +#define cfs_time_before_64 cfs_time_before +#define cfs_time_beforeq_64 cfs_time_beforeq +#endif + +/* + * One jiffy + */ +#define CFS_TICK (1) + +#define CFS_TIME_T "%lu" +#define CFS_DURATION_T "%ld" + +#else /* !__KERNEL__ */ + +/* + * Liblustre. time(2) based implementation. + */ + +#define CFS_TIME_T "%lu" + +#include + +#endif /* __KERNEL__ */ + +/* __LIBCFS_LINUX_LINUX_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/linux/lltrace.h b/libcfs/include/libcfs/linux/lltrace.h new file mode 100644 index 0000000..1ddd03d --- /dev/null +++ b/libcfs/include/libcfs/linux/lltrace.h @@ -0,0 +1,28 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_LLTRACE_H__ +#define __LIBCFS_LINUX_LLTRACE_H__ + +#ifndef __LIBCFS_LLTRACE_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/libcfs/include/libcfs/linux/portals_compat25.h b/libcfs/include/libcfs/linux/portals_compat25.h new file mode 100644 index 0000000..2d6b782 --- /dev/null +++ b/libcfs/include/libcfs/linux/portals_compat25.h @@ -0,0 +1,125 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__ +#define __LIBCFS_LINUX_PORTALS_COMPAT_H__ + +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved +#if defined(SPINLOCK_DEBUG) && SPINLOCK_DEBUG +# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC) +# else +# define SIGNAL_MASK_ASSERT() \ + LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC) +# endif +#else +# define SIGNAL_MASK_ASSERT() +#endif +// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) + +# define SIGNAL_MASK_LOCK(task, flags) \ + spin_lock_irqsave(&task->sighand->siglock, flags) +# define SIGNAL_MASK_UNLOCK(task, flags) \ + spin_unlock_irqrestore(&task->sighand->siglock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp, 1) +# define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING clear_tsk_thread_flag(current, \ + TIF_SIGPENDING) +# define CURRENT_SECONDS get_seconds() +# define smp_num_cpus num_online_cpus() + + +#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */ + +# define SIGNAL_MASK_LOCK(task, flags) \ + spin_lock_irqsave(&task->sighand->siglock, flags) +# define SIGNAL_MASK_UNLOCK(task, flags) \ + spin_unlock_irqrestore(&task->sighand->siglock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp) +# define RECALC_SIGPENDING recalc_sigpending() +# define CLEAR_SIGPENDING (current->sigpending = 0) +# define CURRENT_SECONDS CURRENT_TIME +# define wait_event_interruptible_exclusive(wq, condition) \ + wait_event_interruptible(wq, condition) + +#else /* 2.4.x */ + +# define SIGNAL_MASK_LOCK(task, flags) \ + spin_lock_irqsave(&task->sigmask_lock, flags) +# define SIGNAL_MASK_UNLOCK(task, flags) \ + spin_unlock_irqrestore(&task->sigmask_lock, flags) +# define USERMODEHELPER(path, argv, envp) \ + call_usermodehelper(path, argv, envp) +# define RECALC_SIGPENDING recalc_sigpending(current) +# define CLEAR_SIGPENDING (current->sigpending = 0) +# define CURRENT_SECONDS CURRENT_TIME +# define wait_event_interruptible_exclusive(wq, condition) \ + wait_event_interruptible(wq, condition) + +#endif + +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) +#define UML_PID(tsk) ((tsk)->thread.extern_pid) +#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid) +#else +#define UML_PID(tsk) ((tsk)->pid) +#endif + +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define THREAD_NAME(comm, len, fmt, a...) \ + snprintf(comm, len,fmt"|%d", ## a, UML_PID(current)) +#else +# define THREAD_NAME(comm, len, fmt, a...) \ + snprintf(comm, len, fmt, ## a) +#endif + +#ifdef HAVE_PAGE_LIST +/* 2.4 alloc_page users can use page->list */ +#define PAGE_LIST_ENTRY list +#define PAGE_LIST(page) ((page)->list) +#else +/* 2.6 alloc_page users can use page->lru */ +#define PAGE_LIST_ENTRY lru +#define PAGE_LIST(page) ((page)->lru) +#endif + +#ifndef HAVE_CPU_ONLINE +#define cpu_online(cpu) ((1<f_pos +#else +#define ll_proc_dointvec(table, write, filp, buffer, lenp, ppos) \ + proc_dointvec(table, write, filp, buffer, lenp, ppos); +#define ll_proc_dostring(table, write, filp, buffer, lenp, ppos) \ + proc_dostring(table, write, filp, buffer, lenp, ppos); +#define LL_PROC_PROTO(name) \ + name(cfs_sysctl_table_t *table, int write, struct file *filp, \ + void __user *buffer, size_t *lenp, loff_t *ppos) +#define DECLARE_LL_PROC_PPOS_DECL +#endif + +#endif /* _PORTALS_COMPAT_H */ diff --git a/libcfs/include/libcfs/linux/portals_utils.h b/libcfs/include/libcfs/linux/portals_utils.h new file mode 100644 index 0000000..4e76856 --- /dev/null +++ b/libcfs/include/libcfs/linux/portals_utils.h @@ -0,0 +1,51 @@ +#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__ +#define __LIBCFS_LINUX_PORTALS_UTILS_H__ + +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# include +#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */ +# include +#endif +#include +#include +#else /* !__KERNEL__ */ + +#include +#include + +#ifdef HAVE_LINUX_VERSION_H +# include + +# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define BUG() /* workaround for module.h includes */ +# include +# endif +#endif /* !HAVE_LINUX_VERSION_H */ + +#ifndef __CYGWIN__ +# include +#else /* __CYGWIN__ */ +# include +# include +# include +#endif /* __CYGWIN__ */ + +#endif /* !__KERNEL__ */ +#endif diff --git a/libcfs/include/libcfs/list.h b/libcfs/include/libcfs/list.h new file mode 100644 index 0000000..ed03bd5 --- /dev/null +++ b/libcfs/include/libcfs/list.h @@ -0,0 +1,463 @@ +#ifndef __LIBCFS_LIST_H__ +#define __LIBCFS_LIST_H__ + +#if defined (__linux__) && defined(__KERNEL__) + +#include + +#define CFS_LIST_HEAD_INIT(n) LIST_HEAD_INIT(n) +#define CFS_LIST_HEAD(n) LIST_HEAD(n) +#define CFS_INIT_LIST_HEAD(p) INIT_LIST_HEAD(p) + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) +#define CFS_HLIST_HEAD_INIT HLIST_HEAD_INIT +#define CFS_HLIST_HEAD(n) HLIST_HEAD(n) +#define CFS_INIT_HLIST_HEAD(p) INIT_HLIST_HEAD(p) +#define CFS_INIT_HLIST_NODE(p) INIT_HLIST_NODE(p) +#endif + +#else /* !defined (__linux__) || !defined(__KERNEL__) */ + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +#ifndef __WINNT__ +#define prefetch(a) ((void)a) +#else +#define prefetch(a) ((void *)a) +#endif + +struct list_head { + struct list_head *next, *prev; +}; + +typedef struct list_head list_t; + +#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) } + +#define CFS_LIST_HEAD(name) \ + struct list_head name = CFS_LIST_HEAD_INIT(name) + +#define CFS_INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head * new, + struct list_head * prev, + struct list_head * next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + CFS_INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + * + * This is not safe to use if @list is already on the same list as @head. + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + * + * This is not safe to use if @list is already on the same list as @head. + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + CFS_INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next, prefetch(pos->next); pos != (head); \ + pos = pos->next, prefetch(pos->next)) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/* + * Double linked lists with a single pointer list head. + * Mostly useful for hash tables where the two pointer list head is + * too wasteful. + * You lose the ability to access the tail in O(1). + */ + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +/* + * "NULL" might not be defined at this point + */ +#ifdef NULL +#define NULL_P NULL +#else +#define NULL_P ((void *)0) +#endif + +#define CFS_HLIST_HEAD_INIT { .first = NULL_P } +#define CFS_HLIST_HEAD(name) struct hlist_head name = { .first = NULL_P } +#define CFS_INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL_P) +#define CFS_INIT_HLIST_NODE(ptr) ((ptr)->next = NULL_P, (ptr)->pprev = NULL_P) + +#define HLIST_HEAD_INIT CFS_HLIST_HEAD_INIT +#define HLIST_HEAD(n) CFS_HLIST_HEAD(n) +#define INIT_HLIST_HEAD(p) CFS_INIT_HLIST_HEAD(p) +#define INIT_HLIST_NODE(p) CFS_INIT_HLIST_NODE(p) + +static inline int hlist_unhashed(const struct hlist_node *h) +{ + return !h->pprev; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); +} + +static inline void hlist_del_init(struct hlist_node *n) +{ + if (n->pprev) { + __hlist_del(n); + INIT_HLIST_NODE(n); + } +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +/* next must be != NULL */ +static inline void hlist_add_before(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_after(struct hlist_node *n, + struct hlist_node *next) +{ + next->next = n->next; + n->next = next; + next->pprev = &n->next; + + if(next->next) + next->next->pprev = &next->next; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \ + pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ + pos = n) + +/** + * hlist_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue(tpos, pos, member) \ + for (pos = (pos)->next; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_from - iterate over a hlist continuing from existing point + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from(tpos, pos, member) \ + for (; pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @tpos: the type * to use as a loop counter. + * @pos: the &struct hlist_node to use as a loop counter. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#endif /* __linux__ && __KERNEL__ */ + +#ifndef list_for_each_prev +/** + * list_for_each_prev - iterate over a list in reverse order + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ + pos = pos->prev, prefetch(pos->prev)) + +#endif /* list_for_each_prev */ + +#ifndef list_for_each_entry +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + prefetch(pos->member.next); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member), \ + prefetch(pos->member.next)) +#endif /* list_for_each_entry */ + +#ifndef list_for_each_entry_reverse +/** + * list_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_entry((head)->prev, typeof(*pos), member); \ + prefetch(pos->member.prev), &pos->member != (head); \ + pos = list_entry(pos->member.prev, typeof(*pos), member)) +#endif /* list_for_each_entry_reverse */ + +#ifndef list_for_each_entry_safe +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +#endif /* list_for_each_entry_safe */ + +#ifndef list_for_each_entry_safe_from +/** + * list_for_each_entry_safe_from + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * Iterate over list of given type from current point, safe against + * removal of list entry. + */ +#define list_for_each_entry_safe_from(pos, n, head, member) \ + for (n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) +#endif /* list_for_each_entry_safe_from */ + +#endif /* __LIBCFS_LUSTRE_LIST_H__ */ diff --git a/libcfs/include/libcfs/lltrace.h b/libcfs/include/libcfs/lltrace.h new file mode 100644 index 0000000..dbeae91 --- /dev/null +++ b/libcfs/include/libcfs/lltrace.h @@ -0,0 +1,167 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Compile with: + * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl + */ +#ifndef __LIBCFS_LLTRACE_H__ +#define __LIBCFS_LLTRACE_H__ + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported Operating System +#endif + +static inline int ltrace_write_file(char* fname) +{ + char* argv[3]; + + argv[0] = "debug_kernel"; + argv[1] = fname; + argv[2] = "1"; + + fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); + + return jt_dbg_debug_kernel(3, argv); +} + +static inline int ltrace_clear() +{ + char* argv[1]; + + argv[0] = "clear"; + + fprintf(stderr, "[ptlctl] %s\n", argv[0]); + + return jt_dbg_clear_debug_buf(1, argv); +} + +static inline int ltrace_mark(int indent_level, char* text) +{ + char* argv[2]; + char mark_buf[PATH_MAX]; + + snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); + + argv[0] = "mark"; + argv[1] = mark_buf; + return jt_dbg_mark_debug_buf(2, argv); +} + +static inline int ltrace_applymasks() +{ + char* argv[2]; + argv[0] = "list"; + argv[1] = "applymasks"; + + fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); + + return jt_dbg_list(2, argv); +} + + +static inline int ltrace_filter(char* subsys_or_mask) +{ + char* argv[2]; + argv[0] = "filter"; + argv[1] = subsys_or_mask; + return jt_dbg_filter(2, argv); +} + +static inline int ltrace_show(char* subsys_or_mask) +{ + char* argv[2]; + argv[0] = "show"; + argv[1] = subsys_or_mask; + return jt_dbg_show(2, argv); +} + +static inline int ltrace_start() +{ + int rc = 0; + dbg_initialize(0, NULL); +#ifdef LNET_DEV_ID + rc = register_ioc_dev(LNET_DEV_ID, LNET_DEV_PATH, + LNET_DEV_MAJOR, LNET_DEV_MINOR); +#endif + ltrace_filter("class"); + ltrace_filter("nal"); + ltrace_filter("portals"); + + ltrace_show("all_types"); + ltrace_filter("trace"); + ltrace_filter("malloc"); + ltrace_filter("net"); + ltrace_filter("page"); + ltrace_filter("other"); + ltrace_filter("info"); + ltrace_applymasks(); + + return rc; +} + + +static inline void ltrace_stop() +{ +#ifdef LNET_DEV_ID + unregister_ioc_dev(LNET_DEV_ID); +#endif +} + +static inline int not_uml() +{ + /* Return Values: + * 0 when run under UML + * 1 when run on host + * <0 when lookup failed + */ + struct stat buf; + int rc = stat("/dev/ubd", &buf); + rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc; + if (rc<0) { + fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno)); + rc = 1; /* Assume host */ + } + return rc; +} + +#define LTRACE_MAX_NOB 256 +static inline void ltrace_add_processnames(char* fname) +{ + char cmdbuf[LTRACE_MAX_NOB]; + struct timeval tv; + struct timezone tz; + int nob; + int underuml = !not_uml(); + + gettimeofday(&tv, &tz); + + nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); + + /* Careful - these format strings need to match the CDEBUG + * formats in portals/linux/debug.c EXACTLY + */ + nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ", + S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec); + + if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) { + nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, + "(%s:%d:%s() %d | %d+%lu): ", + "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L); + } + else { + nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, + "(%s:%d:%s() %d+%lu): ", + "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); + } + + nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); + system(cmdbuf); +} + +#endif diff --git a/libcfs/include/libcfs/portals_utils.h b/libcfs/include/libcfs/portals_utils.h new file mode 100644 index 0000000..b79eb7e --- /dev/null +++ b/libcfs/include/libcfs/portals_utils.h @@ -0,0 +1,21 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#define __LIBCFS_PORTALS_UTILS_H__ + +/* + * portals_utils.h + * + */ +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#elif defined(__WINNT__) +#include +#else +#error Unsupported Operating System +#endif + +#endif diff --git a/libcfs/include/libcfs/types.h b/libcfs/include/libcfs/types.h new file mode 100644 index 0000000..71dd7fb --- /dev/null +++ b/libcfs/include/libcfs/types.h @@ -0,0 +1,17 @@ +#ifndef _LIBCFS_TYPES_H +#define _LIBCFS_TYPES_H + +/* + * This file was inttroduced to resolve XT3 (Catamount) build issues. + * The orignal idea was to move here however at + * the time of this writing + * it's unclear what external dependencies are tied + * to that file (It's not just some source file #including it) + * there is some build/packaging infrastructure that includes it. + * Hopefully that will be resolved shortly, that file will + * be removed, its contents copied here and this comment can be deleted. + */ + +#include + +#endif diff --git a/libcfs/include/libcfs/user-bitops.h b/libcfs/include/libcfs/user-bitops.h new file mode 100644 index 0000000..d2eea0e --- /dev/null +++ b/libcfs/include/libcfs/user-bitops.h @@ -0,0 +1,102 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_BITOPS_H__ +#define __LIBCFS_USER_BITOPS_H__ + +/* test if bit nr is set in bitmap addr; returns previous value of bit nr */ +static __inline__ int set_bit(int nr, unsigned long * addr) +{ + long mask; + + addr += nr / BITS_PER_LONG; + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + nr = (mask & *addr) != 0; + *addr |= mask; + return nr; +} + +/* clear bit nr in bitmap addr; returns previous value of bit nr*/ +static __inline__ int clear_bit(int nr, unsigned long * addr) +{ + long mask; + + addr += nr / BITS_PER_LONG; + mask = 1UL << (nr & (BITS_PER_LONG - 1)); + nr = (mask & *addr) != 0; + *addr &= ~mask; + return nr; +} + +static __inline__ int test_bit(int nr, const unsigned long * addr) +{ + return ((1UL << (nr & (BITS_PER_LONG - 1))) & ((addr)[nr / BITS_PER_LONG])) != 0; +} + +/* using binary seach */ +static __inline__ unsigned long __ffs(long data) +{ + int pos = 0; + +#if BITS_PER_LONG == 64 + if ((data & 0xFFFFFFFF) == 0) { + pos += 32; + data >>= 32; + } +#endif + if ((data & 0xFFFF) == 0) { + pos += 16; + data >>= 16; + } + if ((data & 0xFF) == 0) { + pos += 8; + data >>= 8; + } + if ((data & 0xF) == 0) { + pos += 4; + data >>= 4; + } + if ((data & 0x3) == 0) { + pos += 2; + data >>= 2; + } + if ((data & 0x1) == 0) + pos += 1; + + return pos; +} + +#define __ffz(x) __ffs(~(x)) + +unsigned long find_next_bit(unsigned long *addr, + unsigned long size, unsigned long offset); + +unsigned long find_next_zero_bit(unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_bit(addr,size) (find_next_bit((addr),(size),0)) +#define find_first_zero_bit(addr,size) (find_next_zero_bit((addr),(size),0)) + +#endif diff --git a/libcfs/include/libcfs/user-lock.h b/libcfs/include/libcfs/user-lock.h new file mode 100644 index 0000000..6b46ce2 --- /dev/null +++ b/libcfs/include/libcfs/user-lock.h @@ -0,0 +1,243 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_LOCK_H__ +#define __LIBCFS_USER_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Implementations of portable synchronization APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + * + * XXX Liang: There are several branches share lnet with b_hd_newconfig, + * if we define lock APIs at here, there will be conflict with liblustre + * in other branches. + */ + +#ifndef __KERNEL__ +#include +#include + +#if 0 +/* + * Optional debugging (magic stamping and checking ownership) can be added. + */ + +/* + * spin_lock + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + * + * No-op implementation. + */ +struct spin_lock {int foo;}; + +typedef struct spin_lock spinlock_t; + +#define SPIN_LOCK_UNLOCKED (spinlock_t) { } +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) + +void spin_lock_init(spinlock_t *lock); +void spin_lock(spinlock_t *lock); +void spin_unlock(spinlock_t *lock); +int spin_trylock(spinlock_t *lock); +void spin_lock_bh_init(spinlock_t *lock); +void spin_lock_bh(spinlock_t *lock); +void spin_unlock_bh(spinlock_t *lock); +static inline int spin_is_locked(spinlock_t *l) {return 1;} + +static inline void spin_lock_irqsave(spinlock_t *l, unsigned long f){} +static inline void spin_unlock_irqrestore(spinlock_t *l, unsigned long f){} + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ +typedef struct semaphore { + int foo; +} mutex_t; + +void sema_init(struct semaphore *s, int val); +void __down(struct semaphore *s); +void __up(struct semaphore *s); + +/* + * Mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ +#define mutex_up(s) __up(s) +#define mutex_down(s) __down(s) + +#define init_mutex(x) sema_init(x, 1) +#define init_mutex_locked(x) sema_init(x, 0) + +/* + * Completion: + * + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + */ +#if 0 +struct completion {}; + +void init_completion(struct completion *c); +void complete(struct completion *c); +void wait_for_completion(struct completion *c); +#endif + +/* + * rw_semaphore: + * + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ +struct rw_semaphore {}; + +void init_rwsem(struct rw_semaphore *s); +void down_read(struct rw_semaphore *s); +int down_read_trylock(struct rw_semaphore *s); +void down_write(struct rw_semaphore *s); +int down_write_trylock(struct rw_semaphore *s); +void up_read(struct rw_semaphore *s); +void up_write(struct rw_semaphore *s); + +/* + * read-write lock : Need to be investigated more!! + * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore + * + * - DECLARE_RWLOCK(l) + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ +typedef struct rw_semaphore rwlock_t; + +#define rwlock_init(pl) init_rwsem(pl) + +#define read_lock(l) down_read(l) +#define read_unlock(l) up_read(l) +#define write_lock(l) down_write(l) +#define write_unlock(l) up_write(l) + +static inline void +write_lock_irqsave(rwlock_t *l, unsigned long f) { write_lock(l); } +static inline void +write_unlock_irqrestore(rwlock_t *l, unsigned long f) { write_unlock(l); } + +static inline void +read_lock_irqsave(rwlock_t *l, unsigned long f) { read_lock(l); } +static inline void +read_unlock_irqrestore(rwlock_t *l, unsigned long f) { read_unlock(l); } + +/* + * Atomic for user-space + * Copied from liblustre + */ +typedef struct { volatile int counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } +#define atomic_read(a) ((a)->counter) +#define atomic_set(a,b) do {(a)->counter = b; } while (0) +#define atomic_dec_and_test(a) ((--((a)->counter)) == 0) +#define atomic_inc(a) (((a)->counter)++) +#define atomic_dec(a) do { (a)->counter--; } while (0) +#define atomic_add(b,a) do {(a)->counter += b;} while (0) +#define atomic_add_return(n,a) ((a)->counter = n) +#define atomic_inc_return(a) atomic_add_return(1,a) +#define atomic_sub(b,a) do {(a)->counter -= b;} while (0) + +#endif + +#ifdef HAVE_LIBPTHREAD +#include + +/* + * Completion + */ + +struct cfs_completion { + int c_done; + pthread_cond_t c_cond; + pthread_mutex_t c_mut; +}; + +void cfs_init_completion(struct cfs_completion *c); +void cfs_fini_completion(struct cfs_completion *c); +void cfs_complete(struct cfs_completion *c); +void cfs_wait_for_completion(struct cfs_completion *c); + +/* + * atomic.h + */ + +typedef struct { volatile int counter; } cfs_atomic_t; + +int cfs_atomic_read(cfs_atomic_t *a); +void cfs_atomic_set(cfs_atomic_t *a, int b); +int cfs_atomic_dec_and_test(cfs_atomic_t *a); +void cfs_atomic_inc(cfs_atomic_t *a); +void cfs_atomic_dec(cfs_atomic_t *a); +void cfs_atomic_add(int b, cfs_atomic_t *a); +void cfs_atomic_sub(int b, cfs_atomic_t *a); + +#endif /* HAVE_LIBPTHREAD */ + +/* !__KERNEL__ */ +#endif + +/* __LIBCFS_USER_LOCK_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/user-prim.h b/libcfs/include/libcfs/user-prim.h new file mode 100644 index 0000000..43c1aeb --- /dev/null +++ b/libcfs/include/libcfs/user-prim.h @@ -0,0 +1,328 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_PRIM_H__ +#define __LIBCFS_USER_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Implementations of portable APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + */ + +#ifndef __KERNEL__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_LIBPTHREAD +#include +#endif + + +/* + * Wait Queue. No-op implementation. + */ + +typedef struct cfs_waitlink { + struct list_head sleeping; + void *process; +} cfs_waitlink_t; + +typedef struct cfs_waitq { + struct list_head sleepers; +} cfs_waitq_t; + +void cfs_waitq_init(struct cfs_waitq *waitq); +void cfs_waitlink_init(struct cfs_waitlink *link); +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link); +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); +int cfs_waitq_active(struct cfs_waitq *waitq); +void cfs_waitq_signal(struct cfs_waitq *waitq); +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); +void cfs_waitq_broadcast(struct cfs_waitq *waitq); +void cfs_waitq_wait(struct cfs_waitlink *link, int state); +int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout); +#define cfs_schedule_timeout(s, t) \ + do { \ + cfs_waitlink_t l; \ + cfs_waitq_timedwait(&l, s, t); \ + } while (0) + +#define CFS_TASK_INTERRUPTIBLE (0) +#define CFS_TASK_UNINT (0) + +/* 2.4 defines */ + +/* XXX + * for this moment, liblusre will not rely OST for non-page-aligned write + */ +#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE + +struct page { + void *addr; + unsigned long index; + struct list_head list; + unsigned long private; + + /* internally used by liblustre file i/o */ + int _offset; + int _count; +#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE + int _managed; +#endif + struct list_head _node; +}; + +typedef struct page cfs_page_t; + +#ifndef PAGE_SIZE + +/* 4K */ +#define CFS_PAGE_SHIFT 12 +#define CFS_PAGE_SIZE (1UL << CFS_PAGE_SHIFT) +#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) + +#else + +#define CFS_PAGE_SIZE PAGE_SIZE +#define CFS_PAGE_SHIFT PAGE_SHIFT +#define CFS_PAGE_MASK (~((__u64)CFS_PAGE_SIZE-1)) + +#endif + +cfs_page_t *cfs_alloc_page(unsigned int flags); +void cfs_free_page(cfs_page_t *pg); +void *cfs_page_address(cfs_page_t *pg); +void *cfs_kmap(cfs_page_t *pg); +void cfs_kunmap(cfs_page_t *pg); + +#define cfs_get_page(p) __I_should_not_be_called__(at_all) +#define cfs_page_count(p) __I_should_not_be_called__(at_all) +#define cfs_page_index(p) ((p)->index) + +/* + * Memory allocator + * Inline function, so utils can use them without linking of libcfs + */ +#define __ALLOC_ZERO (1 << 2) +static inline void *cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + void *result; + + result = malloc(nr_bytes); + if (result != NULL && (flags & __ALLOC_ZERO)) + memset(result, 0, nr_bytes); + return result; +} + +#define cfs_free(addr) free(addr) +#define cfs_alloc_large(nr_bytes) cfs_alloc(nr_bytes, 0) +#define cfs_free_large(addr) cfs_free(addr) + +#define CFS_ALLOC_ATOMIC_TRY (0) +/* + * SLAB allocator + */ +typedef struct { + int size; +} cfs_mem_cache_t; + +#define SLAB_HWCACHE_ALIGN 0 +#define SLAB_KERNEL 0 +#define SLAB_NOFS 0 + +cfs_mem_cache_t * +cfs_mem_cache_create(const char *, size_t, size_t, unsigned long); +int cfs_mem_cache_destroy(cfs_mem_cache_t *c); +void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp); +void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr); + +typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, + int count, int *eof, void *data); + +struct file; /* forward ref */ +typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, + unsigned long count, void *data); + +/* + * Signal + */ +typedef sigset_t cfs_sigset_t; + +/* + * Timer + */ +#include + +typedef struct { + struct list_head tl_list; + void (*function)(unsigned long unused); + unsigned long data; + long expires; +} cfs_timer_t; + +#define cfs_init_timer(t) do {} while(0) +#define cfs_jiffies \ +({ \ + unsigned long _ret = 0; \ + struct timeval tv; \ + if (gettimeofday(&tv, NULL) == 0) \ + _ret = tv.tv_sec; \ + _ret; \ +}) + +static inline int cfs_timer_init(cfs_timer_t *l, void (* func)(unsigned long), void *arg) +{ + CFS_INIT_LIST_HEAD(&l->tl_list); + l->function = func; + l->data = (unsigned long)arg; + return 0; +} + +static inline int cfs_timer_is_armed(cfs_timer_t *l) +{ + if (cfs_time_before(cfs_jiffies, l->expires)) + return 1; + else + return 0; +} + +static inline void cfs_timer_arm(cfs_timer_t *l, int thetime) +{ + l->expires = thetime; +} + +static inline void cfs_timer_disarm(cfs_timer_t *l) +{ +} + +static inline long cfs_timer_deadline(cfs_timer_t *l) +{ + return l->expires; +} + +#if 0 +#define cfs_init_timer(t) do {} while(0) +void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); +void cfs_timer_done(struct cfs_timer *t); +void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); +void cfs_timer_disarm(struct cfs_timer *t); +int cfs_timer_is_armed(struct cfs_timer *t); + +cfs_time_t cfs_timer_deadline(struct cfs_timer *t); +#endif + +#define in_interrupt() (0) + +static inline void cfs_pause(cfs_duration_t d) +{ + struct timespec s; + + cfs_duration_nsec(d, &s); + nanosleep(&s, NULL); +} + +typedef void cfs_psdev_t; + +static inline int cfs_psdev_register(cfs_psdev_t *foo) +{ + return 0; +} + +static inline int cfs_psdev_deregister(cfs_psdev_t *foo) +{ + return 0; +} + +#define cfs_lock_kernel() do {} while (0) +#define cfs_sigfillset(l) do {} while (0) +#define cfs_recalc_sigpending(l) do {} while (0) +#define cfs_kernel_thread(l,m,n) LBUG() + +#ifdef HAVE_LIBPTHREAD +typedef int (*cfs_thread_t)(void *); +int cfs_create_thread(cfs_thread_t func, void *arg); +#else +#define cfs_create_thread(l,m) LBUG() +#endif + +int cfs_parse_int_tunable(int *value, char *name); +uid_t cfs_curproc_uid(void); + +#define LIBCFS_REALLOC(ptr, size) realloc(ptr, size) + +#define cfs_online_cpus() sysconf(_SC_NPROCESSORS_ONLN) + +// static inline void local_irq_save(unsigned long flag) {return;} +// static inline void local_irq_restore(unsigned long flag) {return;} + +enum { + CFS_STACK_TRACE_DEPTH = 16 +}; + +struct cfs_stack_trace { + void *frame[CFS_STACK_TRACE_DEPTH]; +}; + +/* + * arithmetic + */ +#define do_div(a,b) \ + ({ \ + unsigned long remainder;\ + remainder = (a) % (b); \ + (a) = (a) / (b); \ + (remainder); \ + }) + +/* !__KERNEL__ */ +#endif + +/* __LIBCFS_USER_PRIM_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/user-tcpip.h b/libcfs/include/libcfs/user-tcpip.h new file mode 100644 index 0000000..342c039 --- /dev/null +++ b/libcfs/include/libcfs/user-tcpip.h @@ -0,0 +1,90 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __LIBCFS_USER_TCPIP_H__ +#define __LIBCFS_USER_TCPIP_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifndef __KERNEL__ + +#include + +/* + * Functions to get network interfaces info + */ + +int libcfs_sock_ioctl(int cmd, unsigned long arg); +int libcfs_ipif_query (char *name, int *up, __u32 *ip); +void libcfs_ipif_free_enumeration (char **names, int n); +int libcfs_ipif_enumerate (char ***namesp); + +/* + * Network function used by user-land lnet acceptor + */ + +int libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog); +int libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port); +int libcfs_sock_read (int sock, void *buffer, int nob, int timeout); +void libcfs_sock_abort_accept(__u16 port); + +/* + * Network functions of common use + */ + +int libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p); +int libcfs_socketpair(int *fdp); +int libcfs_fcntl_nonblock(int fd); +int libcfs_sock_set_nagle(int fd, int nagle); +int libcfs_sock_set_bufsiz(int fd, int bufsiz); +int libcfs_sock_create(int *fdp); +int libcfs_sock_bind_to_port(int fd, __u16 port); +int libcfs_sock_connect(int fd, __u32 ip, __u16 port); +int libcfs_sock_writev(int fd, const struct iovec *vector, int count); +int libcfs_sock_readv(int fd, const struct iovec *vector, int count); + +/* + * Macros for easy printing IP-adresses + */ + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +#if defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN) +#define HIPQUAD(addr) \ + ((unsigned char *)&addr)[3], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[0] +#elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) +#define HIPQUAD NIPQUAD +#else +#error "Undefined byteorder??" +#endif /* __LITTLE_ENDIAN */ + +#endif /* !__KERNEL__ */ + +#endif diff --git a/libcfs/include/libcfs/user-time.h b/libcfs/include/libcfs/user-time.h new file mode 100644 index 0000000..874b7da --- /dev/null +++ b/libcfs/include/libcfs/user-time.h @@ -0,0 +1,205 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_TIME_H__ +#define __LIBCFS_USER_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#ifndef __KERNEL__ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION 1000000 + +/* + * Liblustre. time(2) based implementation. + */ + +#include +#include +#include + +typedef time_t cfs_fs_time_t; +typedef time_t cfs_time_t; +typedef long cfs_duration_t; + +static inline cfs_time_t cfs_time_current(void) +{ + return time(NULL); +} + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return seconds; +} + +static inline time_t cfs_time_current_sec(void) +{ + return cfs_time_seconds(cfs_time_current()); +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return t1 < t2; +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return t1 <= t2; +} + +static inline cfs_duration_t cfs_duration_build(int64_t nano) +{ + return (cfs_duration_t) (nano / ONE_BILLION); +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ + s->tv_sec = d; + s->tv_usec = 0; +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ + s->tv_sec = d; + s->tv_nsec = 0; +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + time(t); +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return *t; +} + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + v->tv_sec = *t; + v->tv_usec = 0; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = *t; + s->tv_nsec = 0; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return *t1 < *t2; +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return *t1 <= *t2; +} + +#define CFS_TICK (1) + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return t + d; +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return t1 - t2; +} + +#define cfs_time_current_64 cfs_time_current +#define cfs_time_add_64 cfs_time_add +#define cfs_time_shift_64 cfs_time_shift +#define cfs_time_before_64 cfs_time_before +#define cfs_time_beforeq_64 cfs_time_beforeq + +#ifndef CFS_TIME_T +#define CFS_TIME_T "%u" +#endif + +#define CFS_DURATION_T "%ld" + +/* !__KERNEL__ */ +#endif + +/* __LIBCFS_USER_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/winnt/kp30.h b/libcfs/include/libcfs/winnt/kp30.h new file mode 100644 index 0000000..779d8be --- /dev/null +++ b/libcfs/include/libcfs/winnt/kp30.h @@ -0,0 +1,157 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LIBCFS_WINNT_KP30_H__ +#define __LIBCFS_WINNT_KP30_H__ + +#ifndef __LIBCFS_KP30_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +#ifdef __KERNEL__ + +/* Module parameter support */ +#define CFS_MODULE_PARM(name, t, type, perm, desc) + +#define CFS_SYSFS_MODULE_PARM 0 /* no sysfs access to module parameters */ + + +static inline void our_cond_resched() +{ + schedule_timeout(1i64); +} + +#ifdef CONFIG_SMP +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ +#else +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) +#endif + +#error Need a winnt version of panic() +#define LIBCFS_PANIC(msg) KeBugCheckEx(msg, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL) +#error libcfs_register_panic_notifier() missing +#error libcfs_unregister_panic_notifier() missing + +#define cfs_work_struct_t WORK_QUEUE_ITEM +#define cfs_prepare_work(tq, routine, contex) +#define cfs_schedule_work(tq) +#define cfs_get_work_data(type,field,data) (data) + +/* ------------------------------------------------------------------- */ + +#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) +#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) + +#define PORTAL_SYMBOL_GET(x) (cfs_symbol_get(#x)) +#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) + +#define PORTAL_MODULE_USE do{}while(0) +#define PORTAL_MODULE_UNUSE do{}while(0) + +#define printk DbgPrint +#define ptintf DbgPrint + +#else /* !__KERNEL__ */ + +# include +# include +#ifdef __CYGWIN__ +# include +#endif +# include + +#endif /* End of !__KERNEL__ */ + +/******************************************************************************/ +/* Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. */ +#define LWT_SUPPORT 0 + +/* kernel hasn't defined this? */ +typedef struct { + __s64 lwte_when; + char *lwte_where; + void *lwte_task; + long_ptr lwte_p1; + long_ptr lwte_p2; + long_ptr lwte_p3; + long_ptr lwte_p4; +# if BITS_PER_LONG > 32 + long_ptr lwte_pad; +# endif +} lwt_event_t; + + +# define LWT_EVENT(p1,p2,p3,p4) + + +/* ------------------------------------------------------------------ */ + +#define IOCTL_LIBCFS_TYPE long_ptr + +#ifdef __CYGWIN__ +# ifndef BITS_PER_LONG +# if (~0UL) == 0xffffffffUL +# define BITS_PER_LONG 32 +# else +# define BITS_PER_LONG 64 +# endif +# endif +#endif + +#if BITS_PER_LONG > 32 +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long_ptr)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a5a5a5a5a) +#else +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long_ptr)0x5a5a5a5a) +# define LP_POISON ((char *)(long_ptr)0x5a5a5a5a) +#endif + +#if defined(__x86_64__) +# define LPU64 "%I64u" +# define LPD64 "%I64d" +# define LPX64 "%I64x" +# define LPSZ "%lu" +# define LPSSZ "%ld" +#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) +# define LPU64 "%I64u" +# define LPD64 "%I64d" +# define LPX64 "%I64x" +# define LPSZ "%u" +# define LPSSZ "%d" +#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) +# define LPU64 "%I64u" +# define LPD64 "%I64d" +# define LPX64 "%I64x" +# define LPSZ "%u" +# define LPSSZ "%d" +#endif +#ifndef LPU64 +# error "No word size defined" +#endif + +#endif diff --git a/libcfs/include/libcfs/winnt/libcfs.h b/libcfs/include/libcfs/winnt/libcfs.h new file mode 100644 index 0000000..386eb5f --- /dev/null +++ b/libcfs/include/libcfs/winnt/libcfs.h @@ -0,0 +1,126 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LIBCFS_WINNT_LIBCFS_H__ +#define __LIBCFS_WINNT_LIBCFS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* workgroud for VC compiler */ +#ifndef __FUNCTION__ +#define __FUNCTION__ "generic" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u32 ph_cpu_id; + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + +#ifdef __KERNEL__ + +enum { + /* if you change this, update darwin-util.c:cfs_stack_trace_fill() */ + CFS_STACK_TRACE_DEPTH = 16 +}; + +struct cfs_stack_trace { + void *frame[CFS_STACK_TRACE_DEPTH]; +}; + +static inline __u32 query_stack_size() +{ + ULONG LowLimit, HighLimit; + + IoGetStackLimits(&LowLimit, &HighLimit); + ASSERT(HighLimit > LowLimit); + + return (__u32) (HighLimit - LowLimit); +} +#else +static inline __u32 query_stack_size() +{ + return 4096; +} +#endif + + +#ifndef THREAD_SIZE +# define THREAD_SIZE query_stack_size() +#endif + +#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) + +#ifdef __KERNEL__ +# ifdef __ia64__ +# define CDEBUG_STACK() (THREAD_SIZE - \ + ((ulong_ptr)__builtin_dwarf_cfa() & \ + (THREAD_SIZE - 1))) +# else +# define CDEBUG_STACK (IoGetRemainingStackSize()) +# error "This doesn't seem right; CDEBUG_STACK should grow with the stack" +# endif /* __ia64__ */ + +#define CHECK_STACK() \ +do { \ + unsigned long _stack = CDEBUG_STACK(); \ + \ + if (_stack > 3*THREAD_SIZE/4 && _stack > libcfs_stack) { \ + libcfs_stack = _stack; \ + libcfs_debug_msg(NULL, DEBUG_SUBSYSTEM, D_WARNING, \ + __FILE__, NULL, __LINE__, \ + "maximum lustre stack %lu\n", _stack); \ + } \ +} while (0) +#else /* !__KERNEL__ */ +#define CHECK_STACK() do { } while(0) +#define CDEBUG_STACK() (0L) +#endif /* __KERNEL__ */ + +/* initial pid */ +#define LUSTRE_LNET_PID 12345 + +#define ENTRY_NESTING_SUPPORT (0) +#define ENTRY_NESTING do {;} while (0) +#define EXIT_NESTING do {;} while (0) +#define __current_nesting_level() (0) + +#endif /* _WINNT_LIBCFS_H */ diff --git a/libcfs/include/libcfs/winnt/lltrace.h b/libcfs/include/libcfs/winnt/lltrace.h new file mode 100644 index 0000000..9615e94 --- /dev/null +++ b/libcfs/include/libcfs/winnt/lltrace.h @@ -0,0 +1,33 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_WINNT_LLTRACE_H__ +#define __LIBCFS_WINNT_LLTRACE_H__ + +#ifndef __LIBCFS_LLTRACE_H__ +#error Do not #include this file directly. #include instead +#endif + + +#endif diff --git a/libcfs/include/libcfs/winnt/portals_compat25.h b/libcfs/include/libcfs/winnt/portals_compat25.h new file mode 100644 index 0000000..579b795 --- /dev/null +++ b/libcfs/include/libcfs/winnt/portals_compat25.h @@ -0,0 +1,28 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef __LIBCFS_WINNT_PORTALS_COMPAT_H__ +#define __LIBCFS_WINNT_PORTALS_COMPAT_H__ + + + +#endif /* _PORTALS_COMPAT_H */ diff --git a/libcfs/include/libcfs/winnt/portals_utils.h b/libcfs/include/libcfs/winnt/portals_utils.h new file mode 100644 index 0000000..ec80692 --- /dev/null +++ b/libcfs/include/libcfs/winnt/portals_utils.h @@ -0,0 +1,168 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_WINNT_PORTALS_UTILS_H__ +#define __LIBCFS_WINNT_PORTALS_UTILS_H__ + +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifndef cfs_is_flag_set +#define cfs_is_flag_set(x,f) (((x)&(f))==(f)) +#endif + +#ifndef cfs_set_flag +#define cfs_set_flag(x,f) ((x) |= (f)) +#endif + +#ifndef cfs_clear_flag +#define cfs_clear_flag(x,f) ((x) &= ~(f)) +#endif + + +static inline __u32 __do_div(__u32 * n, __u32 b) +{ + __u32 mod; + + mod = *n % b; + *n = *n / b; + return mod; +} + +#define do_div(n,base) __do_div((__u32 *)&(n), (__u32) (base)) + +#ifdef __KERNEL__ + +#include +#include + +char * strsep(char **s, const char *ct); +static inline size_t strnlen(const char * s, size_t count) { + size_t len = 0; + while(len < count && s[len++]); + return len; +} +char * ul2dstr(ulong_ptr address, char *buf, int len); + +#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) +#define simple_strtoll(a1, a2, a3) (__s64)strtoull(a1, a2, a3) +#define simple_strtoull(a1, a2, a3) strtoull(a1, a2, a3) + +unsigned long simple_strtoul(const char *cp,char **endp, unsigned int base); + +static inline int test_bit(int nr, void * addr) +{ + return ((1UL << (nr & 31)) & (((volatile ULONG *) addr)[nr >> 5])) != 0; +} + +static inline void clear_bit(int nr, void * addr) +{ + (((volatile ULONG *) addr)[nr >> 5]) &= (~(1UL << (nr & 31))); +} + + +static inline void set_bit(int nr, void * addr) +{ + (((volatile ULONG *) addr)[nr >> 5]) |= (1UL << (nr & 31)); +} + +static inline void read_random(char *buf, int len) +{ + ULONG Seed = (ULONG) buf; + Seed = RtlRandom(&Seed); + while (len >0) { + if (len > sizeof(ULONG)) { + memcpy(buf, &Seed, sizeof(ULONG)); + len -= sizeof(ULONG); + buf += sizeof(ULONG); + } else { + memcpy(buf, &Seed, len); + len = 0; + break; + } + } +} +#define get_random_bytes(buf, len) read_random(buf, len) + +/* do NOT use function or expression as parameters ... */ + +#ifndef min_t +#define min_t(type,x,y) (type)(x) < (type)(y) ? (x): (y) +#endif + +#ifndef max_t +#define max_t(type,x,y) (type)(x) < (type)(y) ? (y): (x) +#endif + + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +#define HIPQUAD(addr) \ + ((unsigned char *)&addr)[3], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[0] + +static int copy_from_user(void *to, void *from, int c) +{ + memcpy(to, from, c); + return 0; +} + +static int copy_to_user(void *to, void *from, int c) +{ + memcpy(to, from, c); + return 0; +} + + +#define put_user(x, ptr) \ +( \ + *(ptr) = x, \ + 0 \ +) + + +#define get_user(x,ptr) \ +( \ + x = *(ptr), \ + 0 \ +) + +#define num_physpages (64 * 1024) + +#define snprintf _snprintf +#define vsnprintf _vsnprintf + + +#endif /* !__KERNEL__ */ + +int cfs_error_code(NTSTATUS); + +#endif diff --git a/libcfs/include/libcfs/winnt/winnt-fs.h b/libcfs/include/libcfs/winnt/winnt-fs.h new file mode 100644 index 0000000..088d0e0 --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-fs.h @@ -0,0 +1,254 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * File operations & routines. + * + */ + +#ifndef __LIBCFS_WINNT_CFS_FS_H__ +#define __LIBCFS_WINNT_CFS_FS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + + +#define MINORBITS 8 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define NODEV 0 +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + + +#ifdef __KERNEL__ + +struct file_operations +{ + loff_t (*lseek)(struct file * file, loff_t offset, int origin); + ssize_t (*read) (struct file * file, char * buf, size_t nbytes, loff_t *ppos); + ssize_t (*write)(struct file * file, const char * buffer, + size_t count, loff_t *ppos); + int (*ioctl) (struct file *, unsigned int, ulong_ptr); + int (*open) (struct file *); + int (*release) (struct file *); +}; + +struct file { + + cfs_handle_t f_handle; + unsigned int f_flags; + mode_t f_mode; + ulong_ptr f_count; + + //struct list_head f_list; + //struct dentry * f_dentry; + + cfs_proc_entry_t * proc_dentry; + cfs_file_operations_t * f_op; + + size_t f_size; + loff_t f_pos; + unsigned int f_uid, f_gid; + int f_error; + + ulong_ptr f_version; + + void * private_data; + + char f_name[1]; + +}; + +#define cfs_filp_size(f) ((f)->f_size) +#define cfs_filp_poff(f) (&(f)->f_pos) + +cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err); +int cfs_filp_close(cfs_file_t *fp); +int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos); +int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos); +int cfs_filp_fsync(cfs_file_t *fp); +int cfs_get_file(cfs_file_t *fp); +int cfs_put_file(cfs_file_t *fp); +int cfs_file_count(cfs_file_t *fp); + + + +/* + * CFS_FLOCK routines + */ + +typedef struct file_lock{ + int fl_type; + pid_t fl_pid; + size_t fl_len; + off_t fl_start; + off_t fl_end; +} cfs_flock_t; + +#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) +#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) + +#define cfs_flock_type(fl) ((fl)->fl_type) +#define cfs_flock_set_type(fl, type) do { (fl)->fl_type = (type); } while(0) +#define cfs_flock_pid(fl) ((fl)->fl_pid) +#define cfs_flock_set_pid(fl, pid) do { (fl)->fl_pid = (pid); } while(0) +#define cfs_flock_start(fl) ((fl)->fl_start) +#define cfs_flock_set_start(fl, start) do { (fl)->fl_start = (start); } while(0) +#define cfs_flock_end(fl) ((fl)->fl_end) +#define cfs_flock_set_end(fl, end) do { (fl)->fl_end = (end); } while(0) + +#define ATTR_MODE 0x0001 +#define ATTR_UID 0x0002 +#define ATTR_GID 0x0004 +#define ATTR_SIZE 0x0008 +#define ATTR_ATIME 0x0010 +#define ATTR_MTIME 0x0020 +#define ATTR_CTIME 0x0040 +#define ATTR_ATIME_SET 0x0080 +#define ATTR_MTIME_SET 0x0100 +#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 0x0400 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ +//#define ATTR_CTIME_SET 0x2000 +#define ATTR_BLOCKS 0x4000 +#define ATTR_KILL_SUID 0 +#define ATTR_KILL_SGID 0 + +#define in_group_p(x) (0) + +/* + * proc fs routines + */ + +int proc_init_fs(); +void proc_destroy_fs(); + + +/* + * misc + */ + +static inline void *ERR_PTR(long_ptr error) +{ + return (void *) error; +} + +static inline long_ptr PTR_ERR(const void *ptr) +{ + return (long_ptr) ptr; +} + +static inline long_ptr IS_ERR(const void *ptr) +{ + return (ulong_ptr)ptr > (ulong_ptr)-1000L; +} + +#else /* !__KERNEL__ */ + +#define CREATE_NEW 1 +#define CREATE_ALWAYS 2 +#define OPEN_EXISTING 3 +#define OPEN_ALWAYS 4 +#define TRUNCATE_EXISTING 5 + +#define SECTION_QUERY 0x0001 +#define SECTION_MAP_WRITE 0x0002 +#define SECTION_MAP_READ 0x0004 +#define SECTION_MAP_EXECUTE 0x0008 +#define SECTION_EXTEND_SIZE 0x0010 + +#define FILE_MAP_COPY SECTION_QUERY +#define FILE_MAP_WRITE SECTION_MAP_WRITE +#define FILE_MAP_READ SECTION_MAP_READ +#define FILE_MAP_ALL_ACCESS SECTION_ALL_ACCESS + + +NTSYSAPI +HANDLE +NTAPI +CreateFileA( + IN LPCSTR lpFileName, + IN DWORD dwDesiredAccess, + IN DWORD dwShareMode, + IN PVOID lpSecurityAttributes, + IN DWORD dwCreationDisposition, + IN DWORD dwFlagsAndAttributes, + IN HANDLE hTemplateFile + ); + +#define CreateFile CreateFileA + +NTSYSAPI +BOOL +NTAPI +CloseHandle( + IN OUT HANDLE hObject + ); + +NTSYSAPI +HANDLE +NTAPI +CreateFileMappingA( + IN HANDLE hFile, + IN PVOID lpFileMappingAttributes, + IN DWORD flProtect, + IN DWORD dwMaximumSizeHigh, + IN DWORD dwMaximumSizeLow, + IN LPCSTR lpName + ); +#define CreateFileMapping CreateFileMappingA + +NTSYSAPI +DWORD +NTAPI +GetFileSize( + IN HANDLE hFile, + OUT DWORD * lpFileSizeHigh + ); + +NTSYSAPI +PVOID +NTAPI +MapViewOfFile( + IN HANDLE hFileMappingObject, + IN DWORD dwDesiredAccess, + IN DWORD dwFileOffsetHigh, + IN DWORD dwFileOffsetLow, + IN SIZE_T dwNumberOfBytesToMap + ); + +NTSYSAPI +BOOL +NTAPI +UnmapViewOfFile( + IN PVOID lpBaseAddress + ); + +#endif /* __KERNEL__ */ + +typedef struct { + void *d; +} cfs_dentry_t; + + +#endif /* __LIBCFS_WINNT_CFS_FS_H__*/ diff --git a/libcfs/include/libcfs/winnt/winnt-lock.h b/libcfs/include/libcfs/winnt/winnt-lock.h new file mode 100644 index 0000000..e0b9393 --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-lock.h @@ -0,0 +1,686 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_WINNT_CFS_LOCK_H__ +#define __LIBCFS_WINNT_CFS_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + + +/* + * nt specific part ... + */ + + +/* atomic */ + +typedef struct { volatile int counter; } atomic_t; + +#define ATOMIC_INIT(i) { i } + +#define atomic_read(v) ((v)->counter) +#define atomic_set(v,i) (((v)->counter) = (i)) + +void FASTCALL atomic_add(int i, atomic_t *v); +void FASTCALL atomic_sub(int i, atomic_t *v); + +int FASTCALL atomic_sub_and_test(int i, atomic_t *v); + +void FASTCALL atomic_inc(atomic_t *v); +void FASTCALL atomic_dec(atomic_t *v); + +int FASTCALL atomic_dec_and_test(atomic_t *v); +int FASTCALL atomic_inc_and_test(atomic_t *v); + + +/* event */ + +typedef KEVENT event_t; + +/* + * cfs_init_event + * To initialize the event object + * + * Arguments: + * event: pointer to the event object + * type: Non Zero: SynchronizationEvent + * Zero: NotificationEvent + * status: the initial stats of the event + * Non Zero: signaled + * Zero: un-signaled + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ +static inline void + cfs_init_event(event_t *event, int type, int status) +{ + KeInitializeEvent( + event, + (type) ? SynchronizationEvent: NotificationEvent, + (status) ? TRUE : FALSE + ); +} + +/* + * cfs_wait_event + * To wait on an event to syncrhonize the process + * + * Arguments: + * event: pointer to the event object + * timeout: the timeout for waitting or 0 means infinite time. + * + * Return Value: + * Zero: waiting timeouts + * Non Zero: event signaled ... + * + * Notes: + * N/A + */ + +static inline int64_t +cfs_wait_event(event_t * event, int64_t timeout) +{ + NTSTATUS Status; + LARGE_INTEGER TimeOut; + + TimeOut.QuadPart = -1 * (10000000/HZ) * timeout; + + Status = KeWaitForSingleObject( + event, + Executive, + KernelMode, + FALSE, + (timeout != 0) ? (&TimeOut) : (NULL) + ); + + if (Status == STATUS_TIMEOUT) { + return 0; + } + + return TRUE; // signaled case +} + +/* + * cfs_wake_event + * To signal the event object + * + * Arguments: + * event: pointer to the event object + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline int +cfs_wake_event(event_t * event) +{ + return (KeSetEvent(event, 0, FALSE) != 0); +} + +/* + * cfs_clear_event + * To clear/reset the status of the event object + * + * Arguments: + * event: pointer to the event object + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void +cfs_clear_event(event_t * event) +{ + KeResetEvent(event); +} + + +/* + * IMPORTANT !!!!!!!! + * + * All locks' declaration are not guaranteed to be initialized, + * Althought some of they are initialized in Linux. All locks + * declared by CFS_DECL_* should be initialized explicitly. + */ + + +/* + * spin lock defintions / routines + */ + +/* + * Warning: + * + * for spinlock operations, try to grab nesting acquisition of + * spinlock will cause dead-lock in MP system and current irql + * overwritten for UP system. (UP system could allow nesting spin + * acqisition, because it's not spin at all just raising the irql.) + * + */ + +typedef struct spin_lock { + + KSPIN_LOCK lock; + KIRQL irql; + +} spinlock_t; + + +#define CFS_DECL_SPIN(name) spinlock_t name; +#define CFS_DECL_SPIN_EXTERN(name) extern spinlock_t name; + + +static inline void spin_lock_init(spinlock_t *lock) +{ + KeInitializeSpinLock(&(lock->lock)); +} + + +static inline void spin_lock(spinlock_t *lock) +{ + KeAcquireSpinLock(&(lock->lock), &(lock->irql)); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + KIRQL irql = lock->irql; + KeReleaseSpinLock(&(lock->lock), irql); +} + + +#define spin_lock_irqsave(lock, flags) do {(flags) = 0; spin_lock(lock);} while(0) +#define spin_unlock_irqrestore(lock, flags) do {spin_unlock(lock);} while(0) + + +/* There's no corresponding routine in windows kernel. + We must realize a light one of our own. But there's + no way to identify the system is MP build or UP build + on the runtime. We just uses a workaround for it. */ + +extern int MPSystem; + +static int spin_trylock(spinlock_t *lock) +{ + KIRQL Irql; + int rc = 0; + + ASSERT(lock != NULL); + + KeRaiseIrql(DISPATCH_LEVEL, &Irql); + + if (MPSystem) { + if (0 == (ulong_ptr)lock->lock) { +#if _X86_ + __asm { + mov edx, dword ptr [ebp + 8] + lock bts dword ptr[edx], 0 + jb lock_failed + mov rc, TRUE + lock_failed: + } +#else + KdBreakPoint(); +#endif + + } + } else { + rc = TRUE; + } + + if (rc) { + lock->irql = Irql; + } else { + KeLowerIrql(Irql); + } + + return rc; +} + +/* synchronization between cpus: it will disable all DPCs + kernel task scheduler on the CPU */ +#define spin_lock_bh(x) spin_lock(x) +#define spin_unlock_bh(x) spin_unlock(x) +#define spin_lock_bh_init(x) spin_lock_init(x) + +/* + * rw_semaphore (using ERESOURCE) + */ + + +typedef struct rw_semaphore { + ERESOURCE rwsem; +} rw_semaphore_t; + + +#define CFS_DECL_RWSEM(name) rw_semaphore_t name +#define CFS_DECL_RWSEM_EXTERN(name) extern rw_semaphore_t name + + +/* + * init_rwsem + * To initialize the the rw_semaphore_t structure + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void init_rwsem(rw_semaphore_t *s) +{ + ExInitializeResourceLite(&s->rwsem); +} + + +/* + * fini_rwsem + * To finilize/destroy the the rw_semaphore_t structure + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * N/A + * + * Notes: + * For winnt system, we need this routine to delete the ERESOURCE. + * Just define it NULL for other systems. + */ + +static inline void fini_rwsem(rw_semaphore_t *s) +{ + ExDeleteResourceLite(&s->rwsem); +} + +/* + * down_read + * To acquire read-lock of the rw_semahore + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void down_read(struct rw_semaphore *s) +{ + ExAcquireResourceSharedLite(&s->rwsem, TRUE); +} + + +/* + * down_read_trylock + * To acquire read-lock of the rw_semahore without blocking + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * Zero: failed to acquire the read lock + * Non-Zero: succeeded to acquire the read lock + * + * Notes: + * This routine will return immediately without waiting. + */ + +static inline int down_read_trylock(struct rw_semaphore *s) +{ + return ExAcquireResourceSharedLite(&s->rwsem, FALSE); +} + + +/* + * down_write + * To acquire write-lock of the rw_semahore + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void down_write(struct rw_semaphore *s) +{ + ExAcquireResourceExclusiveLite(&(s->rwsem), TRUE); +} + + +/* + * down_write_trylock + * To acquire write-lock of the rw_semahore without blocking + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * Zero: failed to acquire the write lock + * Non-Zero: succeeded to acquire the read lock + * + * Notes: + * This routine will return immediately without waiting. + */ + +static inline int down_write_trylock(struct rw_semaphore *s) +{ + return ExAcquireResourceExclusiveLite(&(s->rwsem), FALSE); +} + + +/* + * up_read + * To release read-lock of the rw_semahore + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void up_read(struct rw_semaphore *s) +{ + ExReleaseResourceForThreadLite( + &(s->rwsem), + ExGetCurrentResourceThread()); +} + + +/* + * up_write + * To release write-lock of the rw_semahore + * + * Arguments: + * rwsem: pointer to the rw_semaphore_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void up_write(struct rw_semaphore *s) +{ + ExReleaseResourceForThreadLite( + &(s->rwsem), + ExGetCurrentResourceThread()); +} + +/* + * rwlock_t (using sempahore) + * + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ + +typedef struct { + spinlock_t guard; + int count; +} rwlock_t; + +void rwlock_init(rwlock_t * rwlock); +void rwlock_fini(rwlock_t * rwlock); + +void read_lock(rwlock_t * rwlock); +void read_unlock(rwlock_t * rwlock); +void write_lock(rwlock_t * rwlock); +void write_unlock(rwlock_t * rwlock); + +#define write_lock_irqsave(l, f) do {f = 0; write_lock(l);} while(0) +#define write_unlock_irqrestore(l, f) do {write_unlock(l);} while(0) +#define read_lock_irqsave(l, f) do {f=0; read_lock(l);} while(0) +#define read_unlock_irqrestore(l, f) do {read_unlock(l);} while(0) + + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ + +typedef struct semaphore { + KSEMAPHORE sem; +} mutex_t; + +static inline void sema_init(struct semaphore *s, int val) +{ + KeInitializeSemaphore(&s->sem, val, val); +} + +static inline void __down(struct semaphore *s) +{ + KeWaitForSingleObject( &(s->sem), Executive, + KernelMode, FALSE, NULL ); + +} + +static inline void __up(struct semaphore *s) +{ + KeReleaseSemaphore(&s->sem, 0, 1, FALSE); +} + +/* + * mutex_t: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ + + +/* + * init_mutex + * To initialize a mutex_t structure + * + * Arguments: + * mutex: pointer to the mutex_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void init_mutex(mutex_t *mutex) +{ + sema_init(mutex, 1); +} + + +/* + * mutex_down + * To acquire the mutex lock + * + * Arguments: + * mutex: pointer to the mutex_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void mutex_down(mutex_t *mutex) +{ + __down(mutex); +} + + +/* + * mutex_up + * To release the mutex lock (acquired already) + * + * Arguments: + * mutex: pointer to the mutex_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void mutex_up(mutex_t *mutex) +{ + __up(mutex); +} + + +/* + * init_mutex_locked + * To initialize the mutex as acquired state + * + * Arguments: + * mutex: pointer to the mutex_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline init_mutex_locked(mutex_t *mutex) +{ + init_mutex(mutex); + mutex_down(mutex); +} + +/* + * completion + * + * - init_complition(c) + * - complete(c) + * - wait_for_completion(c) + */ + +struct completion { + event_t event; +}; + + +/* + * init_completion + * To initialize the completion object + * + * Arguments: + * c: pointer to the completion structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void init_completion(struct completion *c) +{ + cfs_init_event(&(c->event), 1, FALSE); +} + + +/* + * complete + * To complete/signal the completion object + * + * Arguments: + * c: pointer to the completion structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void complete(struct completion *c) +{ + cfs_wake_event(&(c->event)); +} + +/* + * wait_for_completion + * To wait on the completion object. If the event is signaled, + * this function will return to the call with the event un-singled. + * + * Arguments: + * c: pointer to the completion structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +static inline void wait_for_completion(struct completion *c) +{ + cfs_wait_event(&(c->event), 0); +} + +/* __KERNEL__ */ +#else + +#include "../user-lock.h" + +/* __KERNEL__ */ +#endif +#endif diff --git a/libcfs/include/libcfs/winnt/winnt-mem.h b/libcfs/include/libcfs/winnt/winnt-mem.h new file mode 100644 index 0000000..b7f00a4 --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-mem.h @@ -0,0 +1,133 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines of memory manipulation routines . + * + */ + +#ifndef __LIBCFS_WINNT_CFS_MEM_H__ +#define __LIBCFS_WINNT_CFS_MEM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#define CFS_PAGE_SIZE PAGE_SIZE +#define CFS_PAGE_SHIFT PAGE_SHIFT +#define CFS_PAGE_MASK (~(PAGE_SIZE - 1)) + +typedef struct cfs_page { + void * addr; + atomic_t count; +} cfs_page_t; + + +cfs_page_t *cfs_alloc_page(int flags); +void cfs_free_page(cfs_page_t *pg); + +static inline void *cfs_page_address(cfs_page_t *page) +{ + return page->addr; +} + +static inline void *cfs_kmap(cfs_page_t *page) +{ + return page->addr; +} + +static inline void cfs_kunmap(cfs_page_t *page) +{ + return; +} + +static inline void cfs_get_page(cfs_page_t *page) +{ + atomic_inc(&page->count); +} + +static inline void cfs_put_page(cfs_page_t *page) +{ + atomic_dec(&page->count); +} + +static inline int cfs_page_count(cfs_page_t *page) +{ + return atomic_read(&page->count); +} + +/* + * Memory allocator + */ + +#define CFS_ALLOC_ATOMIC_TRY (0) + +extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); +extern void cfs_free(void *addr); + +extern void *cfs_alloc_large(size_t nr_bytes); +extern void cfs_free_large(void *addr); + +/* + * SLAB allocator + */ + +#define SLAB_HWCACHE_ALIGN 0 + +/* The cache name is limited to 20 chars */ + +typedef struct cfs_mem_cache { + + char name[20]; + ulong_ptr flags; + NPAGED_LOOKASIDE_LIST npll; + +} cfs_mem_cache_t; + + +extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, ulong_ptr); +extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); +extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); +extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); + + +/* + * Page allocator slabs + */ + +extern cfs_mem_cache_t *cfs_page_t_slab; +extern cfs_mem_cache_t *cfs_page_p_slab; + + +#define CFS_DECL_MMSPACE +#define CFS_MMSPACE_OPEN do {} while(0) +#define CFS_MMSPACE_CLOSE do {} while(0) + + +#define mb() do {} while(0) +#define rmb() mb() +#define wmb() mb() + + +/* __KERNEL__ */ +#endif + +#endif /* __WINNT_CFS_MEM_H__ */ diff --git a/libcfs/include/libcfs/winnt/winnt-prim.h b/libcfs/include/libcfs/winnt/winnt-prim.h new file mode 100644 index 0000000..3c8560b --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-prim.h @@ -0,0 +1,1082 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_WINNT_CFS_PRIM_H__ +#define __LIBCFS_WINNT_CFS_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + + +/* + * libcfs proc device object + */ + + +#define LUSTRE_PROC_DEVICE L"\\Device\\lproc" /* proc fs emulator device object */ +#define LUSTRE_PROC_SYMLNK L"\\DosDevices\\lproc" /* proc fs user-visible device */ + + +/* + * Device IO Control Code Definitions + */ + +#define FILE_DEVICE_LIBCFS ('LC') + +#define FILE_DEVICE_LIBCFS ('LC') + +#define FUNC_LIBCFS_VERSION 0x101 // get version of current libcfs +#define FUNC_LIBCFS_IOCTL 0x102 // Device i/o control to proc fs + + +#define IOCTL_LIBCFS_VERSION \ + CTL_CODE (FILE_DEVICE_LIBCFS, FUNC_LIBCFS_VERSION, METHOD_BUFFERED, FILE_ANY_ACCESS) +#define IOCTL_LIBCFS_ENTRY \ + CTL_CODE(FILE_DEVICE_LIBCFS, FUNC_LIBCFS_IOCTL, METHOD_BUFFERED, FILE_ANY_ACCESS) + +#pragma pack(4) + +typedef struct _CFS_PROC_IOCTL { + + ULONG cmd; // ioctl command identifier + ULONG len; // length of data + + // UCHAR data[]; // content of the real ioctl + +} CFS_PROC_IOCTL, *PCFS_PROC_IOCTL; + +#pragma pack() + +#ifdef __KERNEL__ + +#include + +/* + * Symbol functions for libcfs + * + * OSX has no facility for use to register symbol. + * So we have to implement it. + */ +#define CFS_SYMBOL_LEN 64 + +struct cfs_symbol { + char name[CFS_SYMBOL_LEN]; + void *value; + int ref; + struct list_head sym_list; +}; + +extern int cfs_symbol_register(const char *, const void *); +extern void cfs_symbol_unregister(const char *); +extern void * cfs_symbol_get(const char *); +extern void cfs_symbol_put(const char *); +extern void cfs_symbol_clean(); + + + +typedef struct file_operations cfs_file_operations_t; +typedef struct file cfs_file_t; + +/* + * Pseudo device register + */ + +typedef struct +{ + int minor; + const char * name; + cfs_file_operations_t * fops; +} cfs_psdev_t; + +int cfs_psdev_register(cfs_psdev_t * psdev); +int cfs_psdev_deregister(cfs_psdev_t * psdev); + + +/* + * Proc emulator file system APIs + */ + +typedef int cfs_read_proc_t(char *page, char **start, off_t off, + int count, int *eof, void *data); +typedef int cfs_write_proc_t(struct file *file, const char *buffer, + ulong_ptr count, void *data); + +#define CFS_PROC_ENTRY_MAGIC 'CPEM' + +#define CFS_PROC_FLAG_DIRECTORY 0x00000001 // directory node +#define CFS_PROC_FLAG_ATTACHED 0x00000002 // node is attached to proc +#define CFS_PROC_FLAG_MISCDEV 0x00000004 // miscellaneous device + +typedef struct cfs_proc_entry +{ + ULONG magic; // Magic + ULONG flags; // Flags + + struct _dir_entry { // proc directory entry + PRTL_SPLAY_LINKS root; + }; + + struct _file_entry { // proc file / leaf entry + cfs_read_proc_t * read_proc; + cfs_write_proc_t * write_proc; + }; + + mode_t mode; + unsigned short nlink; + + + struct file_operations * proc_fops; + void * data; + + // proc_dir_entry ended. + + RTL_SPLAY_LINKS s_link; // splay link + + // + // Maximum length of proc entry name is 0x20 + // + + char name[0x20]; + +} cfs_proc_entry_t, cfs_proc_dir_entry_t; + +typedef cfs_proc_entry_t cfs_proc_dir_entry_t; + +#define PROC_BLOCK_SIZE PAGE_SIZE + +/* + * Sysctl register + */ + +typedef struct ctl_table cfs_sysctl_table_t; +typedef struct ctl_table_header cfs_sysctl_table_header_t; + + +typedef int ctl_handler ( + cfs_sysctl_table_t *table, + int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, + void **context ); + +typedef int proc_handler ( + cfs_sysctl_table_t *ctl, + int write, struct file * filp, + void *buffer, size_t *lenp ); + + +int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, + void *buffer, size_t *lenp); + +int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp, + void *buffer, size_t *lenp); + +int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context); + + +/* + * System io control definitions + */ + +#define CTL_MAXNAME 10 + +#define CTL_ANY -1 /* Matches any name */ +#define CTL_NONE 0 + +enum +{ + CTL_KERN=1, /* General kernel info and control */ + CTL_VM=2, /* VM management */ + CTL_NET=3, /* Networking */ + CTL_PROC=4, /* Process info */ + CTL_FS=5, /* Filesystems */ + CTL_DEBUG=6, /* Debugging */ + CTL_DEV=7, /* Devices */ + CTL_BUS=8, /* Busses */ + CTL_ABI=9, /* Binary emulation */ + CTL_CPU=10 /* CPU stuff (speed scaling, etc) */ +}; + +/* sysctl table definitons */ +struct ctl_table +{ + int ctl_name; + char *procname; + void *data; + int maxlen; + mode_t mode; + cfs_sysctl_table_t *child; + proc_handler *proc_handler; /* text formatting callback */ + ctl_handler *strategy; /* read / write callback functions */ + cfs_proc_entry_t *de; /* proc entry block */ + void *extra1; + void *extra2; +}; + + +/* the mantaner of the cfs_sysctl_table trees */ +struct ctl_table_header +{ + cfs_sysctl_table_t * ctl_table; + struct list_head ctl_entry; +}; + + +cfs_proc_entry_t * create_proc_entry(char *name, mode_t mod, + cfs_proc_entry_t *parent); +void proc_free_entry(cfs_proc_entry_t *de); +void remove_proc_entry(char *name, cfs_proc_entry_t *entry); +cfs_proc_entry_t * search_proc_entry(char * name, + cfs_proc_entry_t * root ); + +#define cfs_create_proc_entry create_proc_entry +#define cfs_free_proc_entry proc_free_entry +#define cfs_remove_proc_entry remove_proc_entry + +#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a) +#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a) + + +/* + * declaration of proc kernel process routines + */ + +cfs_file_t * +lustre_open_file(char * filename); + +int +lustre_close_file(cfs_file_t * fh); + +int +lustre_do_ioctl( cfs_file_t * fh, + unsigned long cmd, + ulong_ptr arg ); + +int +lustre_ioctl_file( cfs_file_t * fh, + PCFS_PROC_IOCTL devctl); + +size_t +lustre_read_file( cfs_file_t * fh, + loff_t off, + size_t size, + char * buf + ); + +size_t +lustre_write_file( cfs_file_t * fh, + loff_t off, + size_t size, + char * buf + ); + +/* + * Wait Queue + */ + + +typedef int cfs_task_state_t; + +#define CFS_TASK_INTERRUPTIBLE 0x00000001 +#define CFS_TASK_UNINT 0x00000002 + + + +#define CFS_WAITQ_MAGIC 'CWQM' +#define CFS_WAITLINK_MAGIC 'CWLM' + +typedef struct cfs_waitq { + + unsigned int magic; + unsigned int flags; + + spinlock_t guard; + struct list_head waiters; + +} cfs_waitq_t; + + +typedef struct cfs_waitlink cfs_waitlink_t; + +#define CFS_WAITQ_CHANNELS (2) + +#define CFS_WAITQ_CHAN_NORMAL (0) +#define CFS_WAITQ_CHAN_FORWARD (1) + + + +typedef struct cfs_waitlink_channel { + struct list_head link; + cfs_waitq_t * waitq; + cfs_waitlink_t * waitl; +} cfs_waitlink_channel_t; + +struct cfs_waitlink { + + unsigned int magic; + int flags; + event_t * event; + atomic_t * hits; + + cfs_waitlink_channel_t waitq[CFS_WAITQ_CHANNELS]; +}; + +enum { + CFS_WAITQ_EXCLUSIVE = 1 +}; + +#define CFS_DECL_WAITQ(name) cfs_waitq_t name + + +void cfs_waitq_init(struct cfs_waitq *waitq); +void cfs_waitlink_init(struct cfs_waitlink *link); + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link); +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); +int cfs_waitq_active(struct cfs_waitq *waitq); + +void cfs_waitq_signal(struct cfs_waitq *waitq); +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); +void cfs_waitq_broadcast(struct cfs_waitq *waitq); + +void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state); +cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, + cfs_task_state_t state, cfs_duration_t timeout); + + + +/* Kernel thread */ + +typedef int (*cfs_thread_t) (void *arg); + +typedef struct _cfs_thread_context { + cfs_thread_t func; + void * arg; +} cfs_thread_context_t; + +int cfs_kernel_thread(int (*func)(void *), void *arg, int flag); + +/* + * thread creation flags from Linux, not used in winnt + */ +#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ +#define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ +#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ +#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PID 0x00001000 /* set if pid shared */ +#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ +#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ +#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ +#define CLONE_THREAD 0x00010000 /* Same thread group? */ +#define CLONE_NEWNS 0x00020000 /* New namespace group? */ + +#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) + + +/* + * sigset ... + */ + +typedef sigset_t cfs_sigset_t; + +/* + * Task struct + */ + +#define MAX_SCHEDULE_TIMEOUT ((long_ptr)(~0UL>>12)) + + +#define NGROUPS 1 +#define CFS_CURPROC_COMM_MAX (16) +typedef struct task_sruct{ + mode_t umask; + + pid_t pid; + pid_t pgrp; + + uid_t uid,euid,suid,fsuid; + gid_t gid,egid,sgid,fsgid; + + int ngroups; + gid_t groups[NGROUPS]; + cfs_kernel_cap_t cap_effective, + cap_inheritable, + cap_permitted; + + char comm[CFS_CURPROC_COMM_MAX]; + void * journal_info; +} cfs_task_t; + + +/* + * linux task struct emulator ... + */ + +#define TASKMAN_MAGIC 'TMAN' /* Task Manager */ +#define TASKSLT_MAGIC 'TSLT' /* Task Slot */ + +typedef struct _TASK_MAN { + + ULONG Magic; /* Magic and Flags */ + ULONG Flags; + + spinlock_t Lock; /* Protection lock */ + + cfs_mem_cache_t * slab; /* Memory slab for task slot */ + + ULONG NumOfTasks; /* Total tasks (threads) */ + LIST_ENTRY TaskList; /* List of task slots */ + +} TASK_MAN, *PTASK_MAN; + +typedef struct _TASK_SLOT { + + ULONG Magic; /* Magic and Flags */ + ULONG Flags; + + LIST_ENTRY Link; /* To be linked to TaskMan */ + + event_t Event; /* Schedule event */ + + HANDLE Pid; /* Process id */ + HANDLE Tid; /* Thread id */ + PETHREAD Tet; /* Pointer to ethread */ + + atomic_t count; /* refer count */ + atomic_t hits; /* times of waken event singaled */ + + KIRQL irql; /* irql for rwlock ... */ + + cfs_task_t task; /* linux task part */ + +} TASK_SLOT, *PTASK_SLOT; + + +#define current cfs_current() +#define set_current_state(s) do {;} while (0) + +#define wait_event(wq, condition) \ +do { \ + cfs_waitlink_t __wait; \ + \ + cfs_waitlink_init(&__wait); \ + while (TRUE) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) { \ + break; \ + } \ + cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ +} while(0) + +#define wait_event_interruptible(wq, condition, __ret) \ +do { \ + cfs_waitlink_t __wait; \ + \ + __ret = 0; \ + cfs_waitlink_init(&__wait); \ + while (TRUE) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) { \ + break; \ + } \ + cfs_waitq_wait(&__wait, CFS_TASK_INTERRUPTIBLE); \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ +} while(0) + + +int init_task_manager(); +void cleanup_task_manager(); +cfs_task_t * cfs_current(); +int schedule_timeout(int64_t time); +int schedule(); +int wake_up_process(cfs_task_t * task); +#define cfs_schedule_timeout(state, time) schedule_timeout(time) +void sleep_on(cfs_waitq_t *waitq); + +#define CFS_DECL_JOURNAL_DATA +#define CFS_PUSH_JOURNAL do {;} while(0) +#define CFS_POP_JOURNAL do {;} while(0) + + +/* module related definitions */ + +#ifndef __exit +#define __exit +#endif +#ifndef __init +#define __init +#endif + +#define request_module(x) (0) + +#define EXPORT_SYMBOL(s) +#define MODULE_AUTHOR(s) +#define MODULE_DESCRIPTION(s) +#define MODULE_LICENSE(s) +#define MODULE_PARM(a, b) +#define MODULE_PARM_DESC(a, b) + +#define module_init(X) int __init module_##X() {return X();} +#define module_exit(X) void __exit module_##X() {X();} + +#define DECLARE_INIT(X) extern int __init module_##X(void) +#define DECLARE_EXIT(X) extern void __exit module_##X(void) + +#define MODULE_INIT(X) do { int rc = module_##X(); \ + if (rc) goto errorout; \ + } while(0) + +#define MODULE_EXIT(X) do { module_##X(); } while(0) + + +/* Module interfaces */ +#define cfs_module(name, version, init, fini) \ +module_init(init); \ +module_exit(fini) + + +/* + * Linux kernel version definition + */ + +#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) +#define LINUX_VERSION_CODE (2*100+6*10+7) + + +/* + * Signal + */ +#define SIGNAL_MASK_ASSERT() + +/* + * Timer + */ + +#define CFS_TIMER_FLAG_INITED 0x00000001 // Initialized already +#define CFS_TIMER_FLAG_TIMERED 0x00000002 // KeSetTimer is called + +typedef struct cfs_timer { + + KSPIN_LOCK Lock; + + ULONG Flags; + + KDPC Dpc; + KTIMER Timer; + + cfs_time_t deadline; + + void (*proc)(ulong_ptr); + void * arg; + +} cfs_timer_t; + + +typedef void (*timer_func_t)(ulong_ptr); + +#define cfs_init_timer(t) + +void cfs_timer_init(cfs_timer_t *timer, void (*func)(ulong_ptr), void *arg); +void cfs_timer_done(cfs_timer_t *t); +void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline); +void cfs_timer_disarm(cfs_timer_t *t); +int cfs_timer_is_armed(cfs_timer_t *t); +cfs_time_t cfs_timer_deadline(cfs_timer_t *t); + + +/* deschedule for a bit... */ +static inline void cfs_pause(cfs_duration_t ticks) +{ + cfs_schedule_timeout(TASK_UNINTERRUPTIBLE, ticks); +} + + +static inline void cfs_enter_debugger(void) +{ +#if _X86_ + __asm int 3; +#else + KdBreakPoint(); +#endif +} + +/* + * libcfs globals initialization/cleanup + */ + +int +libcfs_arch_init(void); + +void +libcfs_arch_cleanup(void); + +/* + * SMP ... + */ + +#define SMP_CACHE_BYTES 128 +#define __cacheline_aligned +#define NR_CPUS (2) +#define smp_processor_id() KeGetCurrentProcessorNumber() +#define smp_num_cpus NR_CPUS +#define num_online_cpus() smp_num_cpus +#define smp_call_function(f, a, n, w) do {} while(0) + +/* + * Irp related + */ + +#define NR_IRQS 512 +#define in_interrupt() (0) + +/* + * printk flags + */ + +#define KERN_EMERG "<0>" /* system is unusable */ +#define KERN_ALERT "<1>" /* action must be taken immediately */ +#define KERN_CRIT "<2>" /* critical conditions */ +#define KERN_ERR "<3>" /* error conditions */ +#define KERN_WARNING "<4>" /* warning conditions */ +#define KERN_NOTICE "<5>" /* normal but significant condition */ +#define KERN_INFO "<6>" /* informational */ +#define KERN_DEBUG "<7>" /* debug-level messages */ + +/* + * Misc + */ + + +#define inter_module_get(n) cfs_symbol_get(n) +#define inter_module_put(n) cfs_symbol_put(n) + +#ifndef likely +#define likely(exp) (exp) +#endif +#ifndef unlikely +#define unlikely(exp) (exp) +#endif + +#define lock_kernel() do {} while(0) +#define unlock_kernel() do {} while(0) + +#define CAP_SYS_ADMIN 0 +#define CAP_SYS_ROOT 1 + +#define capable(a) (TRUE) + +#define USERMODEHELPER(path, argv, envp) (0) + + +#define local_irq_save(x) +#define local_irq_restore(x) + +#define cfs_assert ASSERT + +#define THREAD_NAME + +#else /* !__KERNEL__ */ + +#define PAGE_CACHE_SIZE PAGE_SIZE +#define PAGE_CACHE_MASK PAGE_MASK + +#define getpagesize() (PAGE_SIZE) + + +typedef struct { + int foo; +} pthread_mutex_t; + +typedef struct { + int foo; +} pthread_cond_t; + +#define pthread_mutex_init(x, y) do {} while(0) +#define pthread_cond_init(x, y) do {} while(0) + +#define pthread_mutex_lock(x) do {} while(0) +#define pthread_mutex_unlock(x) do {} while(0) + +#define pthread_cond_wait(x,y) do {} while(0) +#define pthread_cond_broadcast(x) do {} while(0) + +typedef struct file { + int foo; +} cfs_file_t; + +typedef struct cfs_proc_dir_entry{ + void *data; +}cfs_proc_dir_entry_t; + + + +#include "../user-prim.h" + +#include +#include + +#define strcasecmp strcmp +#define strncasecmp strncmp +#define snprintf _snprintf +#define getpid() (0) + + +#define getpwuid(x) (NULL) +#define getgrgid(x) (NULL) + +int cfs_proc_mknod(const char *path, mode_t mode, dev_t dev); + +int gethostname(char * name, int namelen); + +#define setlinebuf(x) do {} while(0) + + +NTSYSAPI VOID NTAPI DebugBreak(); + + +static inline void cfs_enter_debugger(void) +{ +#if _X86_ + __asm int 3; +#else + DebugBreak(); +#endif +} + +/* Maximum EA Information Length */ +#define EA_MAX_LENGTH (sizeof(FILE_FULL_EA_INFORMATION) + 15) + + +/* + * proc user mode routines + */ + +HANDLE cfs_proc_open (char * filename, int oflag); +int cfs_proc_close(HANDLE handle); +int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count); +int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count); +int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer); + + +/* + * Native API definitions + */ + +// +// Disk I/O Routines +// + +NTSYSAPI +NTSTATUS +NTAPI +NtReadFile(HANDLE FileHandle, + HANDLE Event OPTIONAL, + PIO_APC_ROUTINE ApcRoutine OPTIONAL, + PVOID ApcContext OPTIONAL, + PIO_STATUS_BLOCK IoStatusBlock, + PVOID Buffer, + ULONG Length, + PLARGE_INTEGER ByteOffset OPTIONAL, + PULONG Key OPTIONAL); + +NTSYSAPI +NTSTATUS +NTAPI +NtWriteFile(HANDLE FileHandle, + HANDLE Event OPTIONAL, + PIO_APC_ROUTINE ApcRoutine OPTIONAL, + PVOID ApcContext OPTIONAL, + PIO_STATUS_BLOCK IoStatusBlock, + PVOID Buffer, + ULONG Length, + PLARGE_INTEGER ByteOffset OPTIONAL, + PULONG Key OPTIONAL); + +NTSYSAPI +NTSTATUS +NTAPI +NtClose(HANDLE Handle); + +NTSYSAPI +NTSTATUS +NTAPI +NtCreateFile(PHANDLE FileHandle, + ACCESS_MASK DesiredAccess, + POBJECT_ATTRIBUTES ObjectAttributes, + PIO_STATUS_BLOCK IoStatusBlock, + PLARGE_INTEGER AllocationSize OPTIONAL, + ULONG FileAttributes, + ULONG ShareAccess, + ULONG CreateDisposition, + ULONG CreateOptions, + PVOID EaBuffer OPTIONAL, + ULONG EaLength); + + +NTSYSAPI +NTSTATUS +NTAPI +NtDeviceIoControlFile( + IN HANDLE FileHandle, + IN HANDLE Event, + IN PIO_APC_ROUTINE ApcRoutine, + IN PVOID ApcContext, + OUT PIO_STATUS_BLOCK IoStatusBlock, + IN ULONG IoControlCode, + IN PVOID InputBuffer, + IN ULONG InputBufferLength, + OUT PVOID OutputBuffer, + OUT ULONG OutputBufferLength + ); + +NTSYSAPI +NTSTATUS +NTAPI +NtFsControlFile( + IN HANDLE FileHandle, + IN HANDLE Event OPTIONAL, + IN PIO_APC_ROUTINE ApcRoutine OPTIONAL, + IN PVOID ApcContext OPTIONAL, + OUT PIO_STATUS_BLOCK IoStatusBlock, + IN ULONG FsControlCode, + IN PVOID InputBuffer OPTIONAL, + IN ULONG InputBufferLength, + OUT PVOID OutputBuffer OPTIONAL, + IN ULONG OutputBufferLength +); + + +NTSYSAPI +NTSTATUS +NTAPI +NtQueryInformationFile( + IN HANDLE FileHandle, + OUT PIO_STATUS_BLOCK IoStatusBlock, + OUT PVOID FileInformation, + IN ULONG Length, + IN FILE_INFORMATION_CLASS FileInformationClass + ); + +// +// Random routines ... +// + +NTSYSAPI +ULONG +NTAPI +RtlRandom( + IN OUT PULONG Seed + ); + +#endif /* __KERNEL__ */ + + +// +// Inode flags (Linux uses octad number, but why ? strange!!!) +// + +#undef S_IFMT +#undef S_IFDIR +#undef S_IFCHR +#undef S_IFREG +#undef S_IREAD +#undef S_IWRITE +#undef S_IEXEC + +#define S_IFMT 0x0F000 /* 017 0000 */ +#define S_IFSOCK 0x0C000 /* 014 0000 */ +#define S_IFLNK 0x0A000 /* 012 0000 */ +#define S_IFREG 0x08000 /* 010 0000 */ +#define S_IFBLK 0x06000 /* 006 0000 */ +#define S_IFDIR 0x04000 /* 004 0000 */ +#define S_IFCHR 0x02000 /* 002 0000 */ +#define S_IFIFO 0x01000 /* 001 0000 */ +#define S_ISUID 0x00800 /* 000 4000 */ +#define S_ISGID 0x00400 /* 000 2000 */ +#define S_ISVTX 0x00200 /* 000 1000 */ + +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#define S_ISFIL(m) (((m) & S_IFMT) == S_IFFIL) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) + +#define S_IPERMISSION_MASK 0x1FF /* */ + +#define S_IRWXU 0x1C0 /* 0 0700 */ +#define S_IRUSR 0x100 /* 0 0400 */ +#define S_IWUSR 0x080 /* 0 0200 */ +#define S_IXUSR 0x040 /* 0 0100 */ + +#define S_IRWXG 0x038 /* 0 0070 */ +#define S_IRGRP 0x020 /* 0 0040 */ +#define S_IWGRP 0x010 /* 0 0020 */ +#define S_IXGRP 0x008 /* 0 0010 */ + +#define S_IRWXO 0x007 /* 0 0007 */ +#define S_IROTH 0x004 /* 0 0004 */ +#define S_IWOTH 0x002 /* 0 0002 */ +#define S_IXOTH 0x001 /* 0 0001 */ + +#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) +#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) +#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) +#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) +#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) + +/* + * linux ioctl coding definitions + */ + +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 14 +#define _IOC_DIRBITS 2 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits. + */ +#define _IOC_NONE 0U +#define _IOC_WRITE 1U +#define _IOC_READ 2U + +#define _IOC(dir,type,nr,size) \ + (((dir) << _IOC_DIRSHIFT) | \ + ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | \ + ((size) << _IOC_SIZESHIFT)) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode ioctl numbers.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* + * Io vector ... + */ + +struct iovec +{ + void *iov_base; + size_t iov_len; +}; + + +#define ULONG_LONG_MAX ((__u64)(0xFFFFFFFFFFFFFFFF)) +/* + * Convert a string to an unsigned long long integer. + * + * Ignores `locale' stuff. Assumes that the upper and lower case + * alphabets and digits are each contiguous. + */ +static inline __u64 +strtoull( + char *nptr, + char **endptr, + int base) +{ + char *s = nptr; + __u64 acc, cutoff; + int c, neg = 0, any, cutlim; + + /* + * See strtol for comments as to the logic used. + */ + do { + c = *s++; + } while (isspace(c)); + if (c == '-') { + neg = 1; + c = *s++; + } else if (c == '+') + c = *s++; + if ((base == 0 || base == 16) && + c == '0' && (*s == 'x' || *s == 'X')) { + c = s[1]; + s += 2; + base = 16; + } + if (base == 0) + base = c == '0' ? 8 : 10; + cutoff = (__u64)ULONG_LONG_MAX / (__u64)base; + cutlim = (int)((__u64)ULONG_LONG_MAX % (__u64)base); + for (acc = 0, any = 0;; c = *s++) { + if (isdigit(c)) + c -= '0'; + else if (isalpha(c)) + c -= isupper(c) ? 'A' - 10 : 'a' - 10; + else + break; + if (c >= base) + break; + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) + any = -1; + else { + any = 1; + acc *= base; + acc += c; + } + } + if (any < 0) { + acc = ULONG_LONG_MAX; + } else if (neg) + acc = 0 - acc; + if (endptr != 0) + *endptr = (char *) (any ? s - 1 : nptr); + return (acc); +} + +#endif diff --git a/libcfs/include/libcfs/winnt/winnt-tcpip.h b/libcfs/include/libcfs/winnt/winnt-tcpip.h new file mode 100644 index 0000000..a988247 --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-tcpip.h @@ -0,0 +1,660 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for Winnt (kernel and user-level). + * + */ + +#ifndef __LIBCFS_WINNT_TCPIP_H__ +#define __LIBCFS_WINNT_TCPIP_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + + +#ifdef __KERNEL__ + +// +// ks definitions +// + +// iovec is defined in libcfs: winnt_prim.h +// lnetkiov_t is defined in lnet/types.h + +typedef struct socket ksock_tconn_t; +typedef struct socket cfs_socket_t; + +// completion notification callback routine + +typedef VOID (*ksock_schedule_cb)(struct socket*, int, void *, ulong_ptr); + +/* completion routine to update tx structure for async sending */ +typedef PVOID (*ksock_update_tx)(struct socket*, PVOID tx, ulong_ptr); + +// +// tdinal definitions +// + + +#if TDI_LIBCFS_DBG +#define KsPrint(X) KsPrintf X +#else +#define KsPrint(X) +#endif + + +// +// Socket Addresses Related ... +// + +#define INADDR_ANY (ULONG)0x00000000 +#define INADDR_LOOPBACK (ULONG)0x7f000001 +#define INADDR_BROADCAST (ULONG)0xffffffff +#define INADDR_NONE (ULONG)0xffffffff + +/* + * TCP / IP options + */ + +#define SOL_TCP 6 +#define SOL_UDP 17 + + +#define TL_INSTANCE 0 + +#define TCP_SOCKET_NODELAY 1 // disabling "Nagle" +#define TCP_SOCKET_KEEPALIVE 2 +#define TCP_SOCKET_OOBINLINE 3 +#define TCP_SOCKET_BSDURGENT 4 +#define TCP_SOCKET_ATMARK 5 +#define TCP_SOCKET_WINDOW 6 + + +/* Flags we can use with send/ and recv. + Added those for 1003.1g not all are supported yet + */ + +#define MSG_OOB 1 +#define MSG_PEEK 2 +#define MSG_DONTROUTE 4 +#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */ +#define MSG_CTRUNC 8 +#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */ +#define MSG_TRUNC 0x20 +#define MSG_DONTWAIT 0x40 /* Nonblocking io */ +#define MSG_EOR 0x80 /* End of record */ +#define MSG_WAITALL 0x100 /* Wait for a full request */ +#define MSG_FIN 0x200 +#define MSG_SYN 0x400 +#define MSG_CONFIRM 0x800 /* Confirm path validity */ +#define MSG_RST 0x1000 +#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */ +#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */ +#define MSG_MORE 0x8000 /* Sender will send more */ + +#define MSG_EOF MSG_FIN + + +// +// Maximum TRANSPORT_ADDRESS Length +// +// it must >= FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) +// + TDI_ADDRESS_LENGTH_IP +// +// I define it a little large and 16 bytes aligned to avoid possible overflow. +// + +#define MAX_ADDRESS_LENGTH (0x30) + + +// +// Maximum Listers Children Sockets +// + +#define MAX_CHILD_LISTENERS (4) + +// +// Maximum EA Information Length +// + +#define EA_MAX_LENGTH ( sizeof(FILE_FULL_EA_INFORMATION) - 1 + \ + TDI_TRANSPORT_ADDRESS_LENGTH + 1 + \ + MAX_ADDRESS_LENGTH ) + + +#define UDP_DEVICE_NAME L"\\Device\\Udp" +#define TCP_DEVICE_NAME L"\\Device\\Tcp" + + +/* + * TSDU definitions + */ + +#define TDINAL_TSDU_DEFAULT_SIZE (0x10000) + +#define KS_TSDU_MAGIC 'KSTD' + +#define KS_TSDU_ATTACHED 0x00000001 // Attached to the socket receive tsdu list + +typedef struct _KS_TSDU { + + ULONG Magic; + ULONG Flags; + + struct list_head Link; + + ULONG TotalLength; // Total size of KS_TSDU + + ULONG StartOffset; // Start offset of the first Tsdu unit + ULONG LastOffset; // End offset of the last Tsdu unit + +/* + union { + KS_TSDU_DAT[]; + KS_TSDU_BUF[]; + KS_TSDU_MDL[]; + } +*/ + +} KS_TSDU, *PKS_TSDU; + +#define TSDU_TYPE_BUF ((USHORT)0x5401) +#define TSDU_TYPE_DAT ((USHORT)0x5402) +#define TSDU_TYPE_MDL ((USHORT)0x5403) + +#define KS_TSDU_BUF_RECEIVING 0x0001 +typedef struct _KS_TSDU_BUF { + + USHORT TsduType; + USHORT TsduFlags; + + ULONG DataLength; + ULONG StartOffset; + + PVOID UserBuffer; + +} KS_TSDU_BUF, *PKS_TSDU_BUF; + +#define KS_TSDU_DAT_RECEIVING 0x0001 + +typedef struct _KS_TSDU_DAT { + + USHORT TsduType; + USHORT TsduFlags; + + ULONG DataLength; + ULONG StartOffset; + + ULONG TotalLength; + + UCHAR Data[1]; + +} KS_TSDU_DAT, *PKS_TSDU_DAT; + +#define KS_DWORD_ALIGN(x) (((x) + 0x03) & (~(0x03))) +#define KS_TSDU_STRU_SIZE(Len) (KS_DWORD_ALIGN((Len) + FIELD_OFFSET(KS_TSDU_DAT, Data))) + +typedef struct _KS_TSDU_MDL { + + USHORT TsduType; + USHORT TsduFlags; + + ULONG DataLength; + ULONG StartOffset; + + PMDL Mdl; + PVOID Descriptor; + +} KS_TSDU_MDL, *PKS_TSDU_MDL; + + +typedef struct _KS_TSDUMGR { + + struct list_head TsduList; + ULONG NumOfTsdu; + ULONG TotalBytes; + KEVENT Event; + +} KS_TSDUMGR, *PKS_TSDUMGR; + + +typedef struct _KS_CHAIN { + + KS_TSDUMGR Normal; + KS_TSDUMGR Expedited; + +} KS_CHAIN, *PKS_CHAIN; + + +#define TDINAL_SCHED_FACTOR (1) +#define CAN_BE_SCHED(Len, Limit) (Len >= ((Limit) >> TDINAL_SCHED_FACTOR)) + +// +// Handler Settings Indictor +// + +#define TDI_EVENT_MAXIMUM_HANDLER (TDI_EVENT_ERROR_EX + 1) + + +typedef struct _KS_EVENT_HANDLERS { + BOOLEAN IsActive[TDI_EVENT_MAXIMUM_HANDLER]; + PVOID Handler [TDI_EVENT_MAXIMUM_HANDLER]; +} KS_EVENT_HANDLERS, *PKS_EVENT_HANDLERS; + +#define SetEventHandler(ha, ht, hr) do { \ + ha.IsActive[ht] = TRUE; \ + ha.Handler[ht] = (PVOID) (hr); \ + } while(0) + +// +// KSock Internal Structures +// + +typedef struct _KS_ADDRESS { + + union { + TRANSPORT_ADDRESS Tdi; + UCHAR Pading[MAX_ADDRESS_LENGTH]; + }; + + HANDLE Handle; + PFILE_OBJECT FileObject; + +} KS_ADDRESS, *PKS_ADDRESS; + +// +// Structures for Disconnect Workitem +// + +typedef struct _KS_DISCONNECT_WORKITEM { + + WORK_QUEUE_ITEM WorkItem; // Workitem to perform disconnection + ksock_tconn_t * tconn; // tdi connecton + ULONG Flags; // connection broken/discnnection flags + KEVENT Event; // sync event + +} KS_DISCONNECT_WORKITEM, *PKS_DISCONNECT_WORKITEM; + + +typedef struct _KS_CONNECTION { + + HANDLE Handle; // Handle of the tdi connection + PFILE_OBJECT FileObject; // FileObject if the conn object + + PTRANSPORT_ADDRESS Remote; // the ConnectionInfo of this connection + PTDI_CONNECTION_INFORMATION ConnectionInfo; + + ULONG nagle; // Tcp options + +} KS_CONNECTION, *PKS_CONNECTION; + + +// +// type definitions +// + +typedef MDL ksock_mdl_t; +typedef UNICODE_STRING ksock_unicode_name_t; +typedef WORK_QUEUE_ITEM ksock_workitem_t; + + +typedef KS_CHAIN ksock_chain_t; +typedef KS_ADDRESS ksock_tdi_addr_t; +typedef KS_CONNECTION ksock_tconn_info_t; +typedef KS_DISCONNECT_WORKITEM ksock_disconnect_workitem_t; + + +// +// Structures for transmission done Workitem +// + +typedef struct _KS_TCPX_FINILIZE { + ksock_workitem_t item; + void * tx; +} ksock_tcpx_fini_t; + + +typedef struct ksock_backlogs { + + struct list_head list; /* list to link the backlog connections */ + int num; /* number of backlogs in the list */ + +} ksock_backlogs_t; + + +typedef struct ksock_daemon { + + ksock_tconn_t * tconn; /* the listener connection object */ + unsigned short nbacklogs; /* number of listening backlog conns */ + unsigned short port; /* listening port number */ + int shutdown; /* daemon threads is to exit */ + struct list_head list; /* to be attached into ksock_nal_data_t*/ + +} ksock_daemon_t ; + + +typedef enum { + + kstt_sender = 0, // normal sending connection type, it's active connection, while + // child tconn is for passive connection. + + kstt_listener, // listener daemon type, it just acts as a daemon, and it does + // not have real connection. It manages children tcons to accept + // or refuse the connecting request from remote peers. + + kstt_child, // accepted child connection type, it's parent must be Listener + kstt_lasttype +} ksock_tconn_type; + +typedef enum { + + ksts_uninited = 0, // tconn is just allocated (zero values), not initialized yet + + ksts_inited, // tconn structure initialized: so it now can be identified as + // a sender, listener or a child + + ksts_bind, // tconn is bound: the local address object (ip/port) is created. + // after being bound, we must call ksocknal_put_tconn to release + // the tconn objects, it's not safe just to free the memory of tconn. + + ksts_associated, // the connection object is created and associated with the address + // object. so it's ready for connection. only for child and sender. + + ksts_connecting, // only used by child tconn: in the ConnectEvent handler routine, + // it indicts the child tconn is busy to be connected to the peer. + + ksts_connected, // the connection is built already: for sender and child + + ksts_listening, // listener daemon is working, only for listener tconn + + ksts_disconnected, // disconnected by user + ksts_aborted, // un-exptected broken status + + ksts_last // total number of tconn statuses +} ksock_tconn_state; + +#define KS_TCONN_MAGIC 'KSTM' + +#define KS_TCONN_HANDLERS_SET 0x00000001 // Conection handlers are set. +#define KS_TCONN_DISCONNECT_BUSY 0x00010000 // Disconnect Workitem is queued ... +#define KS_TCONN_DESTROY_BUSY 0x00020000 // Destory Workitem is queued ... + +#define KS_TCONN_DAEMON_STARTED 0x00100000 // indict the daemon is started, + // only valid for listener + +struct socket { + + ulong_ptr kstc_magic; /* Magic & Flags */ + ulong_ptr kstc_flags; + + spinlock_t kstc_lock; /* serialise lock*/ + void * kstc_conn; /* ksock_conn_t */ + + ksock_tconn_type kstc_type; /* tdi connection Type */ + ksock_tconn_state kstc_state; /* tdi connection state flag */ + + ksock_unicode_name_t kstc_dev; /* tcp transport device name */ + + ksock_tdi_addr_t kstc_addr; /* local address handlers / Objects */ + + atomic_t kstc_refcount; /* reference count of ksock_tconn */ + + struct list_head kstc_list; /* linked to global ksocknal_data */ + + union { + + struct { + int nbacklog; /* total number of backlog tdi connections */ + ksock_backlogs_t kstc_listening; /* listeing backlog child connections */ + ksock_backlogs_t kstc_accepted; /* connected backlog child connections */ + event_t kstc_accept_event; /* Signaled by AcceptedHander, + ksocknal_wait_accpeted_conns waits on */ + event_t kstc_destroy_event; /* Signaled when accepted child is released */ + } listener; + + struct { + ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ + ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ + ksock_chain_t kstc_send; /* tsdu engine for data sending */ + + int kstc_queued; /* Attached to Parent->ChildList ... */ + int kstc_queueno; /* 0: Attached to Listening list + 1: Attached to Accepted list */ + + int kstc_busy; /* referred by ConnectEventCallback ? */ + int kstc_accepted; /* the connection is built ready ? */ + + struct list_head kstc_link; /* linked to parent tdi connection */ + ksock_tconn_t * kstc_parent; /* pointers to it's listener parent */ + } child; + + struct { + ksock_tconn_info_t kstc_info; /* Connection Info if Connected */ + ksock_chain_t kstc_recv; /* tsdu engine for data receiving */ + ksock_chain_t kstc_send; /* tsdu engine for data sending */ + } sender; + }; + + ulong_ptr kstc_snd_wnd; /* Sending window size */ + ulong_ptr kstc_rcv_wnd; /* Recving window size */ + + ksock_workitem_t kstc_destroy; /* tconn destruction workitem */ + ksock_disconnect_workitem_t kstc_disconnect; /* connection disconnect workitem */ + + ksock_schedule_cb kstc_sched_cb; /* notification callback routine of completion */ + ksock_update_tx kstc_update_tx; /* aync sending callback to update tx */ +}; + +#define SOCK_WMEM_QUEUED(sock) (0) + +#define TDINAL_WINDOW_DEFAULT_SIZE (0x100000) + + +struct _KS_UDP_COMPLETION_CONTEXT; +struct _KS_TCP_COMPLETION_CONTEXT; + + +typedef +NTSTATUS +(*PKS_UDP_COMPLETION_ROUTINE) ( + IN PIRP Irp, + IN struct _KS_UDP_COMPLETION_CONTEXT + *UdpContext + ); + + +typedef +NTSTATUS +(*PKS_TCP_COMPLETION_ROUTINE) ( + IN PIRP Irp, + IN struct _KS_TCP_COMPLETION_CONTEXT + *TcpContext + ); + +// +// Udp Irp Completion Context +// + +typedef struct _KS_UDP_COMPLETION_CONTEXT { + + PKEVENT Event; + union { + PFILE_OBJECT AddressObject; + ksock_tconn_t * tconn; + }; + + PKS_UDP_COMPLETION_ROUTINE CompletionRoutine; + PVOID CompletionContext; + +} KS_UDP_COMPLETION_CONTEXT, *PKS_UDP_COMPLETION_CONTEXT; + + +// +// Tcp Irp Completion Context (used by tcp data recv/send) +// + +typedef struct _KS_TCP_COMPLETION_CONTEXT { + + PKEVENT Event; // Event to be waited on by Irp caller ... + + ksock_tconn_t * tconn; // the tdi connection + + PKS_TCP_COMPLETION_ROUTINE CompletionRoutine; + PVOID CompletionContext; + PVOID CompletionContext2; + + PKS_TSDUMGR KsTsduMgr; // Tsdu buffer manager + + // + // These tow new members are for NON_BLOCKING transmission + // + + BOOLEAN bCounted; // To indict needing refcount to + // execute CompetionRoutine + ULONG ReferCount; // Refer count of this structure + +} KS_TCP_COMPLETION_CONTEXT, *PKS_TCP_COMPLETION_CONTEXT; + +typedef KS_TCP_COMPLETION_CONTEXT ksock_tdi_tx_t, ksock_tdi_rx_t; + + +/* + * tdi extensions + */ + +#define IOCTL_TCP_QUERY_INFORMATION_EX \ + CTL_CODE(FILE_DEVICE_NETWORK, 0, METHOD_NEITHER, FILE_ANY_ACCESS) +#define IOCTL_TCP_SET_INFORMATION_EX \ + CTL_CODE(FILE_DEVICE_NETWORK, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS) + + +#define TcpBuildSetInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, Buffer, BufferLen)\ + { \ + PIO_STACK_LOCATION _IRPSP; \ + if ( CompRoutine != NULL) { \ + IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\ + } else { \ + IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \ + } \ + _IRPSP = IoGetNextIrpStackLocation (Irp); \ + _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \ + _IRPSP->DeviceObject = DevObj; \ + _IRPSP->FileObject = FileObj; \ + _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = 0; \ + _IRPSP->Parameters.DeviceIoControl.InputBufferLength = BufferLen; \ + _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_SET_INFORMATION_EX; \ + Irp->AssociatedIrp.SystemBuffer = Buffer; \ + } + + +#define TcpBuildQueryInformationEx(Irp, DevObj, FileObj, CompRoutine, Contxt, InBuffer, InLength, OutBuffer, OutLength)\ + { \ + PIO_STACK_LOCATION _IRPSP; \ + if ( CompRoutine != NULL) { \ + IoSetCompletionRoutine( Irp, CompRoutine, Contxt, TRUE, TRUE, TRUE);\ + } else { \ + IoSetCompletionRoutine( Irp, NULL, NULL, FALSE, FALSE, FALSE); \ + } \ + _IRPSP = IoGetNextIrpStackLocation (Irp); \ + _IRPSP->MajorFunction = IRP_MJ_DEVICE_CONTROL; \ + _IRPSP->DeviceObject = DevObj; \ + _IRPSP->FileObject = FileObj; \ + _IRPSP->Parameters.DeviceIoControl.OutputBufferLength = OutLength; \ + _IRPSP->Parameters.DeviceIoControl.InputBufferLength = InLength; \ + _IRPSP->Parameters.DeviceIoControl.IoControlCode = IOCTL_TCP_QUERY_INFORMATION_EX; \ + _IRPSP->Parameters.DeviceIoControl.Type3InputBuffer = InBuffer; \ + Irp->UserBuffer = OutBuffer; \ + } + + +typedef struct ks_addr_slot { + LIST_ENTRY link; + int up; + char iface[40]; + __u32 ip_addr; + __u32 netmask; + UNICODE_STRING devname; + WCHAR buffer[1]; +} ks_addr_slot_t; + +typedef struct { + + /* + * Tdi client information + */ + + UNICODE_STRING ksnd_client_name; /* tdi client module name */ + HANDLE ksnd_pnp_handle; /* the handle for pnp changes */ + + spinlock_t ksnd_addrs_lock; /* serialize ip address list access */ + LIST_ENTRY ksnd_addrs_list; /* list of the ip addresses */ + int ksnd_naddrs; /* number of the ip addresses */ + + /* + * Tdilnd internal defintions + */ + + int ksnd_init; /* initialisation state */ + + TDI_PROVIDER_INFO ksnd_provider; /* tdi tcp/ip provider's information */ + + spinlock_t ksnd_tconn_lock; /* tdi connections access serialise */ + + int ksnd_ntconns; /* number of tconns attached in list */ + struct list_head ksnd_tconns; /* tdi connections list */ + cfs_mem_cache_t * ksnd_tconn_slab; /* slabs for ksock_tconn_t allocations */ + event_t ksnd_tconn_exit; /* exit event to be signaled by the last tconn */ + + spinlock_t ksnd_tsdu_lock; /* tsdu access serialise */ + + int ksnd_ntsdus; /* number of tsdu buffers allocated */ + ulong_ptr ksnd_tsdu_size; /* the size of a signel tsdu buffer */ + cfs_mem_cache_t * ksnd_tsdu_slab; /* slab cache for tsdu buffer allocation */ + + int ksnd_nfreetsdus; /* number of tsdu buffers in the freed list */ + struct list_head ksnd_freetsdus; /* List of the freed Tsdu buffer. */ + + spinlock_t ksnd_daemon_lock; /* stabilize daemon ops */ + int ksnd_ndaemons; /* number of listening daemons */ + struct list_head ksnd_daemons; /* listening daemon list */ + event_t ksnd_daemon_exit; /* the last daemon quiting should singal it */ + +} ks_data_t; + +int +ks_init_tdi_data(); + +void +ks_fini_tdi_data(); + + +#endif /* __KERNEL__ */ +#endif /* __LIBCFS_WINNT_TCPIP_H__ */ + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/winnt/winnt-time.h b/libcfs/include/libcfs/winnt/winnt-time.h new file mode 100644 index 0000000..a7a570c --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-time.h @@ -0,0 +1,316 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for Winnt (kernel and user-level). + * + */ + +#ifndef __LIBCFS_WINNT_LINUX_TIME_H__ +#define __LIBCFS_WINNT_LINUX_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION ((u_int64_t) 1000000) + +#define HZ (100) + +struct timeval { + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ +}; + +struct timespec { + ulong_ptr tv_sec; + ulong_ptr tv_nsec; +}; + +#ifdef __KERNEL__ + +#include + +/* + * Generic kernel stuff + */ + +typedef struct timeval cfs_fs_time_t; + +typedef u_int64_t cfs_time_t; +typedef int64_t cfs_duration_t; + +static inline void do_gettimeofday(struct timeval *tv) +{ + LARGE_INTEGER Time; + + KeQuerySystemTime(&Time); + + tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); + tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; +} + +static inline cfs_time_t JIFFIES() +{ + LARGE_INTEGER Tick; + LARGE_INTEGER Elapse; + + KeQueryTickCount(&Tick); + + Elapse.QuadPart = Tick.QuadPart * KeQueryTimeIncrement(); + Elapse.QuadPart /= (10000000 / HZ); + + return Elapse.QuadPart; +} + +static inline cfs_time_t cfs_time_current(void) +{ + return JIFFIES(); +} + +static inline cfs_time_t cfs_time_current_sec(void) +{ + return (JIFFIES() / HZ); +} + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return (t + d); +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return (t1 - t2); +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return ((int64_t)t1 - (int64_t)t2) < 0; +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return ((int64_t)t1 - (int64_t)t2) <= 0; +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + ULONG Linux; + LARGE_INTEGER Sys; + + KeQuerySystemTime(&Sys); + + RtlTimeToSecondsSince1970(&Sys, &Linux); + + t->tv_sec = Linux; + t->tv_usec = (Sys.LowPart % 10000000) / 10; +} + +static inline cfs_time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return t->tv_sec; +} + +static inline u_int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return ((u_int64_t)t->tv_sec) * ONE_MILLION + t->tv_usec; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return (__cfs_fs_time_flat(t1) < __cfs_fs_time_flat(t2)); +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return (__cfs_fs_time_flat(t1) <= __cfs_fs_time_flat(t2)); +} + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return (cfs_duration_t)seconds * HZ; +} + +static inline cfs_time_t cfs_duration_sec(cfs_duration_t d) +{ + return d / HZ; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ + s->tv_sec = (suseconds_t) (d / HZ); + s->tv_usec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * + ONE_MILLION / HZ); +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ + s->tv_sec = (suseconds_t) (d / HZ); + s->tv_nsec = (time_t)((d - (cfs_duration_t)s->tv_sec * HZ) * + ONE_BILLION / HZ); +} + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + *v = *t; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = t->tv_sec; + s->tv_nsec = t->tv_usec * 1000; +} + +#define cfs_time_current_64 cfs_time_current +#define cfs_time_add_64 cfs_time_add +#define cfs_time_shift_64 cfs_time_shift +#define cfs_time_before_64 cfs_time_before +#define cfs_time_beforeq_64 cfs_time_beforeq + +/* + * One jiffy + */ +#define CFS_TICK (1) + +#define LTIME_S(t) (t) + +#define CFS_TIME_T "%I64u" +#define CFS_DURATION_T "%I64d" + +#else /* !__KERNEL__ */ + +/* + * Liblustre. time(2) based implementation. + */ +#include + + +// +// Time routines ... +// + +NTSYSAPI +CCHAR +NTAPI +NtQuerySystemTime( + OUT PLARGE_INTEGER CurrentTime + ); + + +NTSYSAPI +BOOLEAN +NTAPI +RtlTimeToSecondsSince1970( + IN PLARGE_INTEGER Time, + OUT PULONG ElapsedSeconds + ); + + +NTSYSAPI +VOID +NTAPI +RtlSecondsSince1970ToTime( + IN ULONG ElapsedSeconds, + OUT PLARGE_INTEGER Time + ); + +NTSYSAPI +VOID +NTAPI +Sleep( + DWORD dwMilliseconds // sleep time in milliseconds +); + + +static inline void sleep(int time) +{ + DWORD Time = 1000 * time; + Sleep(Time); +} + + +static inline void do_gettimeofday(struct timeval *tv) +{ + LARGE_INTEGER Time; + + NtQuerySystemTime(&Time); + + tv->tv_sec = (long_ptr) (Time.QuadPart / 10000000); + tv->tv_usec = (long_ptr) (Time.QuadPart % 10000000) / 10; +} + +static inline int gettimeofday(struct timeval *tv, void * tz) +{ + do_gettimeofday(tv); + return 0; +} + +#endif /* __KERNEL__ */ + +/* __LIBCFS_LINUX_LINUX_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/include/libcfs/winnt/winnt-types.h b/libcfs/include/libcfs/winnt/winnt-types.h new file mode 100644 index 0000000..b50b6bb --- /dev/null +++ b/libcfs/include/libcfs/winnt/winnt-types.h @@ -0,0 +1,647 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic types definitions + * + */ + +#ifndef _WINNT_TYPE_H +#define _WINNT_TYPE_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#else + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif + + +#define __LITTLE_ENDIAN + +#define inline __inline +#define __inline__ __inline + +typedef unsigned __int8 __u8; +typedef signed __int8 __s8; + +typedef signed __int64 __s64; +typedef unsigned __int64 __u64; + +typedef signed __int16 __s16; +typedef unsigned __int16 __u16; + +typedef signed __int32 __s32; +typedef unsigned __int32 __u32; + +typedef signed __int64 __s64; +typedef unsigned __int64 __u64; + +typedef unsigned long ULONG; + + +#if defined(_WIN64) + #define long_ptr __int64 + #define ulong_ptr unsigned __int64 + #define BITS_PER_LONG (64) +#else + #define long_ptr long + #define ulong_ptr unsigned long + #define BITS_PER_LONG (32) + +#endif + +/* bsd */ +typedef unsigned char u_char; +typedef unsigned short u_short; +typedef unsigned int u_int; +typedef unsigned long u_long; + +/* sysv */ +typedef unsigned char unchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +#ifndef __BIT_TYPES_DEFINED__ +#define __BIT_TYPES_DEFINED__ + +typedef __u8 u_int8_t; +typedef __s8 int8_t; +typedef __u16 u_int16_t; +typedef __s16 int16_t; +typedef __u32 u_int32_t; +typedef __s32 int32_t; + +#endif /* !(__BIT_TYPES_DEFINED__) */ + +typedef __u8 uint8_t; +typedef __u16 uint16_t; +typedef __u32 uint32_t; + +typedef __u64 uint64_t; +typedef __u64 u_int64_t; +typedef __s64 int64_t; + +typedef long ssize_t; + +typedef __u32 suseconds_t; + +typedef __u32 pid_t, tid_t; + +typedef __u16 uid_t, gid_t; + +typedef __u16 mode_t; +typedef __u16 umode_t; + +typedef ulong_ptr sigset_t; + +typedef uint64_t loff_t; +typedef HANDLE cfs_handle_t; +typedef uint64_t cycles_t; + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE ((HANDLE)-1) +#endif + + +#ifdef __KERNEL__ /* kernel */ + +typedef __u32 off_t; +typedef __u32 time_t; + +typedef unsigned short kdev_t; + +#else /* !__KERNEL__ */ + +typedef int BOOL; +typedef __u8 BYTE; +typedef __u16 WORD; +typedef __u32 DWORD; + +#endif /* __KERNEL__ */ + +/* + * Conastants suffix + */ + +#define ULL i64 +#define ull i64 + +/* + * Winnt kernel has no capabilities. + */ + +typedef __u32 cfs_kernel_cap_t; + +#define INT_MAX ((int)(~0U>>1)) +#define INT_MIN (-INT_MAX - 1) +#define UINT_MAX (~0U) + +#endif /* _WINNT_TYPES_H */ + + +/* + * Bytes order + */ + +// +// Byte order swapping routines +// + + +#define ___swab16(x) RtlUshortByteSwap(x) +#define ___swab32(x) RtlUlongByteSwap(x) +#define ___swab64(x) RtlUlonglongByteSwap(x) + +#define ___constant_swab16(x) \ + ((__u16)( \ + (((__u16)(x) & (__u16)0x00ffU) << 8) | \ + (((__u16)(x) & (__u16)0xff00U) >> 8) )) + +#define ___constant_swab32(x) \ + ((__u32)( \ + (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \ + (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \ + (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \ + (((__u32)(x) & (__u32)0xff000000UL) >> 24) )) + +#define ___constant_swab64(x) \ + ((__u64)( \ + (__u64)(((__u64)(x) & (__u64)0x00000000000000ffUL) << 56) | \ + (__u64)(((__u64)(x) & (__u64)0x000000000000ff00UL) << 40) | \ + (__u64)(((__u64)(x) & (__u64)0x0000000000ff0000UL) << 24) | \ + (__u64)(((__u64)(x) & (__u64)0x00000000ff000000UL) << 8) | \ + (__u64)(((__u64)(x) & (__u64)0x000000ff00000000UL) >> 8) | \ + (__u64)(((__u64)(x) & (__u64)0x0000ff0000000000UL) >> 24) | \ + (__u64)(((__u64)(x) & (__u64)0x00ff000000000000UL) >> 40) | \ + (__u64)(((__u64)(x) & (__u64)0xff00000000000000UL) >> 56) )) + + +#define __swab16(x) ___constant_swab16(x) +#define __swab32(x) ___constant_swab32(x) +#define __swab64(x) ___constant_swab64(x) + +#define __swab16s(x) do { *(x) = __swab16((USHORT)(*(x)));} while(0) +#define __swab32s(x) do { *(x) = __swab32((ULONG)(*(x)));} while(0) +#define __swab64s(x) do { *(x) = __swab64((ULONGLONG)(*(x)));} while(0) + +#define __constant_htonl(x) ___constant_swab32((x)) +#define __constant_ntohl(x) ___constant_swab32((x)) +#define __constant_htons(x) ___constant_swab16((x)) +#define __constant_ntohs(x) ___constant_swab16((x)) +#define __constant_cpu_to_le64(x) ((__u64)(x)) +#define __constant_le64_to_cpu(x) ((__u64)(x)) +#define __constant_cpu_to_le32(x) ((__u32)(x)) +#define __constant_le32_to_cpu(x) ((__u32)(x)) +#define __constant_cpu_to_le16(x) ((__u16)(x)) +#define __constant_le16_to_cpu(x) ((__u16)(x)) +#define __constant_cpu_to_be64(x) ___constant_swab64((x)) +#define __constant_be64_to_cpu(x) ___constant_swab64((x)) +#define __constant_cpu_to_be32(x) ___constant_swab32((x)) +#define __constant_be32_to_cpu(x) ___constant_swab32((x)) +#define __constant_cpu_to_be16(x) ___constant_swab16((x)) +#define __constant_be16_to_cpu(x) ___constant_swab16((x)) +#define __cpu_to_le64(x) ((__u64)(x)) +#define __le64_to_cpu(x) ((__u64)(x)) +#define __cpu_to_le32(x) ((__u32)(x)) +#define __le32_to_cpu(x) ((__u32)(x)) +#define __cpu_to_le16(x) ((__u16)(x)) +#define __le16_to_cpu(x) ((__u16)(x)) +#define __cpu_to_be64(x) __swab64((x)) +#define __be64_to_cpu(x) __swab64((x)) +#define __cpu_to_be32(x) __swab32((x)) +#define __be32_to_cpu(x) __swab32((x)) +#define __cpu_to_be16(x) __swab16((x)) +#define __be16_to_cpu(x) __swab16((x)) +#define __cpu_to_le64p(x) (*(__u64*)(x)) +#define __le64_to_cpup(x) (*(__u64*)(x)) +#define __cpu_to_le32p(x) (*(__u32*)(x)) +#define __le32_to_cpup(x) (*(__u32*)(x)) +#define __cpu_to_le16p(x) (*(__u16*)(x)) +#define __le16_to_cpup(x) (*(__u16*)(x)) +#define __cpu_to_be64p(x) __swab64p((x)) +#define __be64_to_cpup(x) __swab64p((x)) +#define __cpu_to_be32p(x) __swab32p((x)) +#define __be32_to_cpup(x) __swab32p((x)) +#define __cpu_to_be16p(x) __swab16p((x)) +#define __be16_to_cpup(x) __swab16p((x)) +#define __cpu_to_le64s(x) do {} while (0) +#define __le64_to_cpus(x) do {} while (0) +#define __cpu_to_le32s(x) do {} while (0) +#define __le32_to_cpus(x) do {} while (0) +#define __cpu_to_le16s(x) do {} while (0) +#define __le16_to_cpus(x) do {} while (0) +#define __cpu_to_be64s(x) __swab64s((x)) +#define __be64_to_cpus(x) __swab64s((x)) +#define __cpu_to_be32s(x) __swab32s((x)) +#define __be32_to_cpus(x) __swab32s((x)) +#define __cpu_to_be16s(x) __swab16s((x)) +#define __be16_to_cpus(x) __swab16s((x)) + +#ifndef cpu_to_le64 +#define cpu_to_le64 __cpu_to_le64 +#define le64_to_cpu __le64_to_cpu +#define cpu_to_le32 __cpu_to_le32 +#define le32_to_cpu __le32_to_cpu +#define cpu_to_le16 __cpu_to_le16 +#define le16_to_cpu __le16_to_cpu +#endif + +#define cpu_to_be64 __cpu_to_be64 +#define be64_to_cpu __be64_to_cpu +#define cpu_to_be32 __cpu_to_be32 +#define be32_to_cpu __be32_to_cpu +#define cpu_to_be16 __cpu_to_be16 +#define be16_to_cpu __be16_to_cpu +#define cpu_to_le64p __cpu_to_le64p +#define le64_to_cpup __le64_to_cpup +#define cpu_to_le32p __cpu_to_le32p +#define le32_to_cpup __le32_to_cpup +#define cpu_to_le16p __cpu_to_le16p +#define le16_to_cpup __le16_to_cpup +#define cpu_to_be64p __cpu_to_be64p +#define be64_to_cpup __be64_to_cpup +#define cpu_to_be32p __cpu_to_be32p +#define be32_to_cpup __be32_to_cpup +#define cpu_to_be16p __cpu_to_be16p +#define be16_to_cpup __be16_to_cpup +#define cpu_to_le64s __cpu_to_le64s +#define le64_to_cpus __le64_to_cpus +#define cpu_to_le32s __cpu_to_le32s +#define le32_to_cpus __le32_to_cpus +#define cpu_to_le16s __cpu_to_le16s +#define le16_to_cpus __le16_to_cpus +#define cpu_to_be64s __cpu_to_be64s +#define be64_to_cpus __be64_to_cpus +#define cpu_to_be32s __cpu_to_be32s +#define be32_to_cpus __be32_to_cpus +#define cpu_to_be16s __cpu_to_be16s +#define be16_to_cpus __be16_to_cpus + + +// +// Network to host byte swap functions +// + +#define ntohl(x) ( ( ( ( x ) & 0x000000ff ) << 24 ) | \ + ( ( ( x ) & 0x0000ff00 ) << 8 ) | \ + ( ( ( x ) & 0x00ff0000 ) >> 8 ) | \ + ( ( ( x ) & 0xff000000 ) >> 24 ) ) + +#define ntohs(x) ( ( ( ( x ) & 0xff00 ) >> 8 ) | \ + ( ( ( x ) & 0x00ff ) << 8 ) ) + + +#define htonl(x) ntohl(x) +#define htons(x) ntohs(x) + + + +#ifndef _I386_ERRNO_H +#define _I386_ERRNO_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Arg list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ +#undef EDEADLK +#define EDEADLK 35 /* Resource deadlock would occur */ +#undef ENAMETOOLONG +#define ENAMETOOLONG 36 /* File name too long */ +#undef ENOLCK +#define ENOLCK 37 /* No record locks available */ +#undef ENOSYS +#define ENOSYS 38 /* Function not implemented */ +#undef ENOTEMPTY +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#undef EILSEQ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale NFS file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ + +/* Should never be seen by user programs */ +#define ERESTARTSYS 512 +#define ERESTARTNOINTR 513 +#define ERESTARTNOHAND 514 /* restart if no handler.. */ +#define ENOIOCTLCMD 515 /* No ioctl command */ + +/* Defined for the NFSv3 protocol */ +#define EBADHANDLE 521 /* Illegal NFS file handle */ +#define ENOTSYNC 522 /* Update synchronization mismatch */ +#define EBADCOOKIE 523 /* Cookie is stale */ +#define ENOTSUPP 524 /* Operation is not supported */ +#define ETOOSMALL 525 /* Buffer or request is too small */ +#define ESERVERFAULT 526 /* An untranslatable error occurred */ +#define EBADTYPE 527 /* Type not supported by server */ +#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ + + + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_ACCMODE 0003 +#define O_RDONLY 00 +#define O_WRONLY 01 +#define O_RDWR 02 +#define O_CREAT 0100 /* not fcntl */ +#define O_EXCL 0200 /* not fcntl */ +#define O_NOCTTY 0400 /* not fcntl */ +#define O_TRUNC 01000 /* not fcntl */ +#define O_APPEND 02000 +#define O_NONBLOCK 04000 +#define O_NDELAY O_NONBLOCK +#define O_SYNC 010000 +#define FASYNC 020000 /* fcntl, for BSD compatibility */ +#define O_DIRECT 040000 /* direct disk access hint */ +#define O_LARGEFILE 0100000 +#define O_DIRECTORY 0200000 /* must be a directory */ +#define O_NOFOLLOW 0400000 /* don't follow links */ + +#define F_DUPFD 0 /* dup */ +#define F_GETFD 1 /* get close_on_exec */ +#define F_SETFD 2 /* set/clear close_on_exec */ +#define F_GETFL 3 /* get file->f_flags */ +#define F_SETFL 4 /* set file->f_flags */ +#define F_GETLK 5 +#define F_SETLK 6 +#define F_SETLKW 7 + +#define F_SETOWN 8 /* for sockets. */ +#define F_GETOWN 9 /* for sockets. */ +#define F_SETSIG 10 /* for sockets. */ +#define F_GETSIG 11 /* for sockets. */ + +#define F_GETLK64 12 /* using 'struct flock64' */ +#define F_SETLK64 13 +#define F_SETLKW64 14 + +/* for F_[GET|SET]FL */ +#define FD_CLOEXEC 1 /* actually anything with low bit set goes */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 0 +#define F_WRLCK 1 +#define F_UNLCK 2 + +/* for old implementation of bsd flock () */ +#define F_EXLCK 4 /* or 3 */ +#define F_SHLCK 8 /* or 4 */ + +/* for leases */ +#define F_INPROGRESS 16 + +/* operations for bsd flock(), also used by the kernel implementation */ +#define LOCK_SH 1 /* shared lock */ +#define LOCK_EX 2 /* exclusive lock */ +#define LOCK_NB 4 /* or'd with one of the above to prevent + blocking */ +#define LOCK_UN 8 /* remove lock */ + +#define LOCK_MAND 32 /* This is a mandatory flock */ +#define LOCK_READ 64 /* ... Which allows concurrent read operations */ +#define LOCK_WRITE 128 /* ... Which allows concurrent write operations */ +#define LOCK_RW 192 /* ... Which allows concurrent read & write ops */ + +#endif + + +#ifndef LIBCFS_SIGNAL_H +#define LIBCFS_SIGNAL_H + +/* + * signal values ... + */ + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX (_NSIG-1) + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_NOCLDSTOP 0x00000001 +#define SA_NOCLDWAIT 0x00000002 /* not supported yet */ +#define SA_SIGINFO 0x00000004 +#define SA_ONSTACK 0x08000000 +#define SA_RESTART 0x10000000 +#define SA_NODEFER 0x40000000 +#define SA_RESETHAND 0x80000000 + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND +#define SA_INTERRUPT 0x20000000 /* dummy -- ignored */ + +#define SA_RESTORER 0x04000000 + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + + +#define sigmask(sig) ((__u32)1 << ((sig) - 1)) + +#endif // LIBCFS_SIGNAL_H diff --git a/libcfs/libcfs/.cvsignore b/libcfs/libcfs/.cvsignore new file mode 100644 index 0000000..c6f0aa4 --- /dev/null +++ b/libcfs/libcfs/.cvsignore @@ -0,0 +1,11 @@ +.deps +Makefile +link-stamp +.*.cmd +autoMakefile.in +autoMakefile +*.ko +*.mod.c +.*.flags +.tmp_versions +.depend diff --git a/libcfs/libcfs/Info.plist b/libcfs/libcfs/Info.plist new file mode 100644 index 0000000..aaf9b2f --- /dev/null +++ b/libcfs/libcfs/Info.plist @@ -0,0 +1,35 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + libcfs + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.libcfs + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kpi.bsd + 8.0.0b1 + com.apple.kpi.libkern + 8.0.0b1 + com.apple.kpi.mach + 8.0.0b1 + com.apple.kpi.unsupported + 8.0.0b1 + + + diff --git a/libcfs/libcfs/Makefile.in b/libcfs/libcfs/Makefile.in new file mode 100644 index 0000000..823782a --- /dev/null +++ b/libcfs/libcfs/Makefile.in @@ -0,0 +1,33 @@ +MODULES = libcfs + +libcfs-linux-objs := linux-tracefile.o linux-debug.o +libcfs-linux-objs += linux-prim.o linux-mem.o +libcfs-linux-objs += linux-fs.o linux-sync.o linux-tcpip.o +libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o +libcfs-linux-objs += linux-utils.o linux-module.o + +ifeq ($(PATCHLEVEL),6) +libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) +endif + +default: all + +ifeq (@linux25@,no) +sources: + @for i in $(libcfs-linux-objs:%.o=%.c) ; do \ + echo "ln -s @srcdir@/linux/$$i ." ; \ + ln -sf @srcdir@/linux/$$i . || exit 1 ; \ + done + +else +sources: + +endif + +libcfs-all-objs := debug.o nidstrings.o lwt.o module.o tracefile.o watchdog.o + +libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) + +EXTRA_PRE_CFLAGS := -I@LUSTRE@/../libcfs/libcfs + +@INCLUDE_RULES@ diff --git a/libcfs/libcfs/autoMakefile.am b/libcfs/libcfs/autoMakefile.am new file mode 100644 index 0000000..e70e5ce --- /dev/null +++ b/libcfs/libcfs/autoMakefile.am @@ -0,0 +1,53 @@ +# Copyright (C) 2001, 2002 Cluster File Systems, Inc. +# +# This code is issued under the GNU General Public License. +# See the file COPYING in this distribution + +SUBDIRS := linux +if DARWIN +SUBDIRS += darwin +endif +DIST_SUBDIRS := $(SUBDIRS) + +if LIBLUSTRE +noinst_LIBRARIES= libcfs.a +libcfs_a_SOURCES= debug.c user-prim.c user-lock.c user-tcpip.c user-bitops.c +libcfs_a_CPPFLAGS = $(LLCPPFLAGS) +libcfs_a_CFLAGS = $(LLCFLAGS) +endif + +if MODULES + +if LINUX +modulenet_DATA := libcfs$(KMODEXT) +endif + +if DARWIN +macos_PROGRAMS := libcfs + +nodist_libcfs_SOURCES := darwin/darwin-sync.c darwin/darwin-mem.c \ + darwin/darwin-prim.c darwin/darwin-fs.c darwin/darwin-curproc.c \ + darwin/darwin-tcpip.c darwin/darwin-utils.c \ + darwin/darwin-debug.c darwin/darwin-proc.c \ + darwin/darwin-tracefile.c darwin/darwin-module.c \ + debug.c module.c tracefile.c nidstrings.c watchdog.c + +libcfs_CFLAGS := $(EXTRA_KCFLAGS) +libcfs_LDFLAGS := $(EXTRA_KLDFLAGS) +libcfs_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install_data_hook := fix-kext-ownership + +endif + +endif + +install-data-hook: $(install_data_hook) + +EXTRA_DIST := Info.plist + +MOSTLYCLEANFILES := @MOSTLYCLEANFILES@ linux-*.c linux/*.o darwin/*.o libcfs +DIST_SOURCES := $(libcfs-all-objs:%.o=%.c) tracefile.h user-prim.c \ + user-lock.c user-tcpip.c user-bitops.c diff --git a/libcfs/libcfs/darwin/.cvsignore b/libcfs/libcfs/darwin/.cvsignore new file mode 100644 index 0000000..282522d --- /dev/null +++ b/libcfs/libcfs/darwin/.cvsignore @@ -0,0 +1,2 @@ +Makefile +Makefile.in diff --git a/libcfs/libcfs/darwin/Makefile.am b/libcfs/libcfs/darwin/Makefile.am new file mode 100644 index 0000000..3f2077b --- /dev/null +++ b/libcfs/libcfs/darwin/Makefile.am @@ -0,0 +1,12 @@ +EXTRA_DIST := \ + darwin-mem.c \ + darwin-proc.c \ + darwin-utils.c \ + darwin-debug.c \ + darwin-module.c \ + darwin-sync.c \ + darwin-fs.c \ + darwin-prim.c \ + darwin-tracefile.c \ + darwin-curproc.c \ + darwin-tcpip.c diff --git a/libcfs/libcfs/darwin/darwin-curproc.c b/libcfs/libcfs/darwin/darwin-curproc.c new file mode 100644 index 0000000..e12394e --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-curproc.c @@ -0,0 +1,164 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre curproc API implementation for XNU kernel + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. You should have received a copy of the GNU + * General Public License along with Lustre; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + +/* + * Implementation of cfs_curproc API (see lnet/include/libcfs/curproc.h) + * for XNU kernel. + */ + +static inline struct ucred *curproc_ucred(void) +{ +#ifdef __DARWIN8__ + return proc_ucred(current_proc()); +#else + return current_proc()->p_cred->pc_ucred; +#endif +} + +uid_t cfs_curproc_uid(void) +{ + return curproc_ucred()->cr_uid; +} + +gid_t cfs_curproc_gid(void) +{ + LASSERT(curproc_ucred()->cr_ngroups > 0); + return curproc_ucred()->cr_groups[0]; +} + +uid_t cfs_curproc_fsuid(void) +{ +#ifdef __DARWIN8__ + return curproc_ucred()->cr_ruid; +#else + return current_proc()->p_cred->p_ruid; +#endif +} + +gid_t cfs_curproc_fsgid(void) +{ +#ifdef __DARWIN8__ + return curproc_ucred()->cr_rgid; +#else + return current_proc()->p_cred->p_rgid; +#endif +} + +pid_t cfs_curproc_pid(void) +{ +#ifdef __DARWIN8__ + /* no pid for each thread, return address of thread struct */ + return (pid_t)current_thread(); +#else + return current_proc()->p_pid; +#endif +} + +int cfs_curproc_groups_nr(void) +{ + LASSERT(curproc_ucred()->cr_ngroups > 0); + return curproc_ucred()->cr_ngroups - 1; +} + +int cfs_curproc_is_in_groups(gid_t gid) +{ + int i; + struct ucred *cr; + + cr = curproc_ucred(); + LASSERT(cr != NULL); + + for (i = 0; i < cr->cr_ngroups; ++ i) { + if (cr->cr_groups[i] == gid) + return 1; + } + return 0; +} + +void cfs_curproc_groups_dump(gid_t *array, int size) +{ + struct ucred *cr; + + cr = curproc_ucred(); + LASSERT(cr != NULL); + CLASSERT(sizeof array[0] == sizeof (__u32)); + + size = min_t(int, size, cr->cr_ngroups); + memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t)); +} + +mode_t cfs_curproc_umask(void) +{ +#ifdef __DARWIN8__ + /* + * XXX Liang: + * + * fd_cmask is not available in kexts, so we just assume + * verything is permited. + */ + return -1; +#else + return current_proc()->p_fd->fd_cmask; +#endif +} + +char *cfs_curproc_comm(void) +{ +#ifdef __DARWIN8__ + /* + * Writing to proc->p_comm is not permited in Darwin8, + * because proc_selfname() only return a copy of proc->p_comm, + * so this function is not really working while user try to + * change comm of current process. + */ + static char pcomm[MAXCOMLEN+1]; + + proc_selfname(pcomm, MAXCOMLEN+1); + return pcomm; +#else + return current_proc()->p_comm; +#endif +} + +cfs_kernel_cap_t cfs_curproc_cap_get(void) +{ + return -1; +} + +void cfs_curproc_cap_set(cfs_kernel_cap_t cap) +{ + return; +} + + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/libcfs/darwin/darwin-debug.c b/libcfs/libcfs/darwin/darwin-debug.c new file mode 100644 index 0000000..2152d40 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-debug.c @@ -0,0 +1,77 @@ +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include "tracefile.h" + +void libcfs_debug_dumpstack(cfs_task_t *tsk) +{ + return; +} + +void libcfs_run_lbug_upcall(char *file, const char *fn, const int line) +{ +} + +void lbug_with_loc(char *file, const char *func, const int line) +{ + libcfs_catastrophe = 1; + CEMERG("LBUG: pid: %u thread: %#x\n", + (unsigned)cfs_curproc_pid(), (unsigned)current_thread()); + libcfs_debug_dumplog(); + libcfs_run_lbug_upcall(file, func, line); + while (1) + cfs_schedule(); + + /* panic("lbug_with_loc(%s, %s, %d)", file, func, line) */ +} + +#if ENTRY_NESTING_SUPPORT + +static inline struct cfs_debug_data *__current_cdd(void) +{ + struct cfs_debug_data *cdd; + + cdd = (struct cfs_debug_data *)current_uthread()->uu_nlminfo; + if (cdd != NULL && + cdd->magic1 == CDD_MAGIC1 && cdd->magic2 == CDD_MAGIC2 && + cdd->nesting_level < 1000) + return cdd; + else + return NULL; +} + +static inline void __current_cdd_set(struct cfs_debug_data *cdd) +{ + current_uthread()->uu_nlminfo = (void *)cdd; +} + +void __entry_nesting(struct cfs_debug_data *child) +{ + struct cfs_debug_data *parent; + + parent = __current_cdd(); + if (parent != NULL) { + child->parent = parent; + child->nesting_level = parent->nesting_level + 1; + } + __current_cdd_set(child); +} + +void __exit_nesting(struct cfs_debug_data *child) +{ + __current_cdd_set(child->parent); +} + +unsigned int __current_nesting_level(void) +{ + struct cfs_debug_data *cdd; + + cdd = __current_cdd(); + if (cdd != NULL) + return cdd->nesting_level; + else + return 0; +} +/* ENTRY_NESTING_SUPPORT */ +#endif diff --git a/libcfs/libcfs/darwin/darwin-fs.c b/libcfs/libcfs/darwin/darwin-fs.c new file mode 100644 index 0000000..6fce8d5 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-fs.c @@ -0,0 +1,451 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + +/* + * Kernel APIs for file system in xnu + * + * Public functions + */ + +#ifdef __DARWIN8__ +#include + +extern int vn_rdwr(enum uio_rw, vnode_t, caddr_t, int, off_t, enum uio_seg, int, kauth_cred_t, int *, proc_t); + +/* vnode_size() is not exported */ +static errno_t +vnode_size(vnode_t vp, off_t *sizep, vfs_context_t ctx) +{ + struct vnode_attr va; + int error; + + VATTR_INIT(&va); + VATTR_WANTED(&va, va_data_size); + error = vnode_getattr(vp, &va, ctx); + if (!error) + *sizep = va.va_data_size; + return(error); +} + +/* + * XXX Liang: + * + * kern_file_*() are not safe for multi-threads now, + * however, we need them only for tracefiled, so it's + * not so important to implement for MT. + */ +int +kern_file_size(struct cfs_kern_file *fp, off_t *psize) +{ + int error; + off_t size; + + error = vnode_size(fp->f_vp, &size, fp->f_ctxt); + if (error) + return error; + + if (psize) + *psize = size; + return 0; +} + +struct cfs_kern_file * +kern_file_open(const char * filename, int uflags, int mode, int *err) +{ + struct cfs_kern_file *fp; + vnode_t vp; + int error; + + fp = (struct cfs_kern_file *)_MALLOC(sizeof(struct cfs_kern_file), M_TEMP, M_WAITOK); + if (fp == NULL) { + if (err != NULL) + *err = -ENOMEM; + return NULL; + } + fp->f_flags = FFLAGS(uflags); + fp->f_ctxt = vfs_context_create(NULL); + + if ((error = vnode_open(filename, fp->f_flags, + mode, 0, &vp, fp->f_ctxt))){ + if (err != NULL) + *err = -error; + _FREE(fp, M_TEMP); + } else { + if (err != NULL) + *err = 0; + fp->f_vp = vp; + } + + return fp; +} + +int +kern_file_close(struct cfs_kern_file *fp) +{ + vnode_close(fp->f_vp, fp->f_flags, fp->f_ctxt); + vfs_context_rele(fp->f_ctxt); + _FREE(fp, M_TEMP); + + return 0; +} + +int +kern_file_read(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos) +{ + struct proc *p = current_proc(); + int resid; + int error; + + assert(buf != NULL); + assert(fp != NULL && fp->f_vp != NULL); + + error = vn_rdwr(UIO_READ, fp->f_vp, buf, nbytes, *pos, + UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p); + if ((error) || (nbytes == resid)) { + if (!error) + error = -EINVAL; + return error; + } + *pos += nbytes - resid; + + return (int)(nbytes - resid); +} + +int +kern_file_write(struct cfs_kern_file *fp, void *buf, size_t nbytes, loff_t *pos) +{ + struct proc *p = current_proc(); + int resid; + int error; + + assert(buf != NULL); + assert(fp != NULL && fp->f_vp != NULL); + + error = vn_rdwr(UIO_WRITE, fp->f_vp, buf, nbytes, *pos, + UIO_SYSSPACE32, 0, vfs_context_ucred(fp->f_ctxt), &resid, p); + if ((error) || (nbytes == resid)) { + if (!error) + error = -EINVAL; + return error; + } + *pos += nbytes - resid; + + return (int)(nbytes - resid); + +} + +int +kern_file_sync (struct cfs_kern_file *fp) +{ + return VNOP_FSYNC(fp->f_vp, MNT_WAIT, fp->f_ctxt); +} + +#else /* !__DARWIN8__ */ + +int +kern_file_size(struct file *fp, off_t *size) +{ + struct vnode *vp = (struct vnode *)fp->f_data; + struct stat sb; + int rc; + + rc = vn_stat(vp, &sb, current_proc()); + if (rc) { + *size = 0; + return rc; + } + *size = sb.st_size; + return 0; +} + +cfs_file_t * +kern_file_open(const char * filename, int flags, int mode, int *err) +{ + struct nameidata nd; + cfs_file_t *fp; + register struct vnode *vp; + int rc; + extern struct fileops vnops; + extern int nfiles; + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + nfiles++; + MALLOC_ZONE(fp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO); + bzero(fp, sizeof(cfs_file_t)); + fp->f_count = 1; + LIST_CIRCLE(fp, f_list); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc()); + if ((rc = vn_open(&nd, flags, mode)) != 0){ + printf("filp_open failed at (%d)\n", rc); + if (err != NULL) + *err = rc; + FREE_ZONE(fp, sizeof *fp, M_FILE); + CFS_CONE_EX; + return NULL; + } + vp = nd.ni_vp; + fp->f_flag = flags & FMASK; + fp->f_type = DTYPE_VNODE; + fp->f_ops = &vnops; + fp->f_data = (caddr_t)vp; + fp->f_cred = current_proc()->p_ucred; + /* + * Hold cred to increase reference + */ + crhold(fp->f_cred); + /* + * vnode is locked inside vn_open for lookup, + * we should release the lock before return + */ + VOP_UNLOCK(vp, 0, current_proc()); + CFS_CONE_EX; + + return fp; +} + +static int +frele_internal(cfs_file_t *fp) +{ + if (fp->f_count == (short)0xffff) + panic("frele of lustre: stale"); + if (--fp->f_count < 0) + panic("frele of lustre: count < 0"); + return ((int)fp->f_count); +} + +int +kern_file_close (cfs_file_t *fp) +{ + struct vnode *vp; + CFS_DECL_CONE_DATA; + + if (fp == NULL) + return 0; + + CFS_CONE_IN; + if (frele_internal(fp) > 0) + goto out; + vp = (struct vnode *)fp->f_data; + (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc()); + /* + * ffree(fp); + * Dont use ffree to release fp!!!! + * ffree will call LIST_REMOVE(fp), + * but fp is not in any list, this will + * cause kernel panic + */ + struct ucred *cred; + cred = fp->f_cred; + if (cred != NOCRED) { + fp->f_cred = NOCRED; + crfree(cred); + } + extern int nfiles; + nfiles--; + memset(fp, 0xff, sizeof *fp); + fp->f_count = (short)0xffff; + FREE_ZONE(fp, sizeof *fp, M_FILE); +out: + CFS_CONE_EX; + return 0; +} + +extern void bwillwrite(void); + +/* + * Write buffer to filp inside kernel + */ +int +kern_file_write (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos) +{ + struct uio auio; + struct iovec aiov; + struct proc *p = current_proc(); + long cnt, error = 0; + int flags = 0; + CFS_DECL_CONE_DATA; + + aiov.iov_base = (void *)(uintptr_t)buf; + aiov.iov_len = nbyte; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + if (pos != NULL) { + auio.uio_offset = *pos; + /* + * Liang: If don't set FOF_OFFSET, vn_write() + * will use fp->f_offset as the the real offset. + * Same in vn_read() + */ + flags |= FOF_OFFSET; + } else + auio.uio_offset = (off_t)-1; + if (nbyte > INT_MAX) + return (EINVAL); + auio.uio_resid = nbyte; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + cnt = nbyte; + CFS_CONE_IN; + if (fp->f_type == DTYPE_VNODE) + bwillwrite(); /* empty stuff now */ + if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { + if (auio.uio_resid != cnt && (error == ERESTART ||\ + error == EINTR || error == EWOULDBLOCK)) + error = 0; + /* The socket layer handles SIGPIPE */ + if (error == EPIPE && fp->f_type != DTYPE_SOCKET) + psignal(p, SIGPIPE); + } + CFS_CONE_EX; + if (error != 0) + cnt = -error; + else + cnt -= auio.uio_resid; + if (pos != NULL) + *pos += cnt; + return cnt; +} + +/* + * Read from filp inside kernel + */ +int +kern_file_read (cfs_file_t *fp, void *buf, size_t nbyte, loff_t *pos) +{ + struct uio auio; + struct iovec aiov; + struct proc *p = current_proc(); + long cnt, error = 0; + int flags = 0; + CFS_DECL_CONE_DATA; + + aiov.iov_base = (caddr_t)buf; + aiov.iov_len = nbyte; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + if (pos != NULL) { + auio.uio_offset = *pos; + flags |= FOF_OFFSET; + } else + auio.uio_offset = (off_t)-1; + if (nbyte > INT_MAX) + return (EINVAL); + auio.uio_resid = nbyte; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + cnt = nbyte; + CFS_CONE_IN; + if ((error = fo_read(fp, &auio, fp->f_cred, flags, p)) != 0) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + CFS_CONE_EX; + if (error != 0) + cnt = -error; + else + cnt -= auio.uio_resid; + if (pos != NULL) + *pos += cnt; + + return cnt; +} + +int +kern_file_sync (cfs_file_t *fp) +{ + struct vnode *vp = (struct vnode *)fp->f_data; + struct proc *p = current_proc(); + int error = 0; + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + if (fref(fp) == -1) { + CFS_CONE_EX; + return (-EBADF); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); + VOP_UNLOCK(vp, 0, p); + frele(fp); + CFS_CONE_EX; + + return error; +} + +#endif /* !__DARWIN8__ */ + +struct posix_acl *posix_acl_alloc(int count, int flags) +{ + static struct posix_acl acl; + return &acl; +} + +/* + * XXX Liang: I've not converted all of them, + * more is needed? + */ +int cfs_oflags2univ(int flags) +{ + int f; + + f = flags & O_ACCMODE; + f |= (flags & O_CREAT) ? CFS_O_CREAT: 0; + f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0; + f |= (flags & O_EXCL) ? CFS_O_EXCL: 0; + f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0; + f |= (flags & O_APPEND) ? CFS_O_APPEND: 0; + f |= (flags & O_NOFOLLOW) ? CFS_O_NOFOLLOW: 0; + f |= (flags & O_SYNC)? CFS_O_SYNC: 0; + return f; +} + +/* + * XXX Liang: we don't need it in OSX. + * But it should be implemented anyway. + */ +int cfs_univ2oflags(int flags) +{ + return flags; +} diff --git a/libcfs/libcfs/darwin/darwin-internal.h b/libcfs/libcfs/darwin/darwin-internal.h new file mode 100644 index 0000000..6c83577 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-internal.h @@ -0,0 +1,22 @@ +#ifndef __LIBCFS_DARWIN_INTERNAL_H__ +#define __LIBCFS_DARWIN_INTERNAL_H__ + +#include +#include +#include +#include +#include + +int cfs_sysctl_isvalid(void); +struct sysctl_oid *cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, int (*handler) SYSCTL_HANDLER_ARGS); +struct sysctl_oid *cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int n, + const char *name, int *ptr, int val); +struct sysctl_oid * cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, int *ptr, int val); +struct sysctl_oid * cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, char *ptr, int len); +struct sysctl_oid * cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, void *ptr, int size); + +#endif diff --git a/libcfs/libcfs/darwin/darwin-mem.c b/libcfs/libcfs/darwin/darwin-mem.c new file mode 100644 index 0000000..3079a56 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-mem.c @@ -0,0 +1,480 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Liang Zhen + * Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include + +#include +#include +#include "darwin-internal.h" + +#if CFS_INDIVIDUAL_ZONE +extern zone_t zinit( vm_size_t, vm_size_t, vm_size_t, const char *); +extern void * zalloc(zone_t zone); +extern void *zalloc_noblock(zone_t zone); +extern void zfree(zone_t zone, void *addr); + +struct cfs_zone_nob { + struct list_head *z_nob; /* Pointer to z_link */ + struct list_head z_link; /* Do NOT access it directly */ +}; + +static struct cfs_zone_nob cfs_zone_nob; +static spinlock_t cfs_zone_guard; + +cfs_mem_cache_t *mem_cache_find(const char *name, size_t objsize) +{ + cfs_mem_cache_t *walker = NULL; + + LASSERT(cfs_zone_nob.z_nob != NULL); + + spin_lock(&cfs_zone_guard); + list_for_each_entry(walker, cfs_zone_nob.z_nob, mc_link) { + if (!strcmp(walker->mc_name, name) && \ + walker->mc_size == objsize) + break; + } + spin_unlock(&cfs_zone_guard); + + return walker; +} + +/* + * our wrapper around kern/zalloc.c:zinit() + * + * Creates copy of name and calls zinit() to do real work. Needed because zone + * survives kext unloading, so that @name cannot be just static string + * embedded into kext image. + */ +cfs_mem_cache_t *mem_cache_create(vm_size_t objsize, const char *name) +{ + cfs_mem_cache_t *mc = NULL; + char *cname; + + MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO); + if (mc == NULL){ + CERROR("cfs_mem_cache created fail!\n"); + return NULL; + } + + cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK); + LASSERT(cname != NULL); + mc->mc_cache = zinit(objsize, (KMEM_MAX_ZONE * objsize), 0, strcpy(cname, name)); + mc->mc_size = objsize; + CFS_INIT_LIST_HEAD(&mc->mc_link); + strncpy(mc->mc_name, name, 1 + strlen(name)); + return mc; +} + +void mem_cache_destroy(cfs_mem_cache_t *mc) +{ + /* + * zone can NOT be destroyed after creating, + * so just keep it in list. + * + * We will not lost a zone after we unload + * libcfs, it can be found by from libcfs.zone + */ + return; +} + +#define mem_cache_alloc(mc) zalloc((mc)->mc_cache) +#ifdef __DARWIN8__ +# define mem_cache_alloc_nb(mc) zalloc((mc)->mc_cache) +#else +/* XXX Liang: Tiger doesn't export zalloc_noblock() */ +# define mem_cache_alloc_nb(mc) zalloc_noblock((mc)->mc_cache) +#endif +#define mem_cache_free(mc, p) zfree((mc)->mc_cache, p) + +#else /* !CFS_INDIVIDUAL_ZONE */ + +cfs_mem_cache_t * +mem_cache_find(const char *name, size_t objsize) +{ + return NULL; +} + +cfs_mem_cache_t *mem_cache_create(vm_size_t size, const char *name) +{ + cfs_mem_cache_t *mc = NULL; + + MALLOC(mc, cfs_mem_cache_t *, sizeof(cfs_mem_cache_t), M_TEMP, M_WAITOK|M_ZERO); + if (mc == NULL){ + CERROR("cfs_mem_cache created fail!\n"); + return NULL; + } + mc->mc_cache = OSMalloc_Tagalloc(name, OSMT_DEFAULT); + mc->mc_size = size; + return mc; +} + +void mem_cache_destroy(cfs_mem_cache_t *mc) +{ + OSMalloc_Tagfree(mc->mc_cache); + FREE(mc, M_TEMP); +} + +#define mem_cache_alloc(mc) OSMalloc((mc)->mc_size, (mc)->mc_cache) +#define mem_cache_alloc_nb(mc) OSMalloc_noblock((mc)->mc_size, (mc)->mc_cache) +#define mem_cache_free(mc, p) OSFree(p, (mc)->mc_size, (mc)->mc_cache) + +#endif /* !CFS_INDIVIDUAL_ZONE */ + +cfs_mem_cache_t * +cfs_mem_cache_create (const char *name, + size_t objsize, size_t off, unsigned long arg1) +{ + cfs_mem_cache_t *mc; + + mc = mem_cache_find(name, objsize); + if (mc) + return mc; + mc = mem_cache_create(objsize, name); + return mc; +} + +int cfs_mem_cache_destroy (cfs_mem_cache_t *cachep) +{ + mem_cache_destroy(cachep); + return 0; +} + +void *cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags) +{ + void *result; + + /* zalloc_canblock() is not exported... Emulate it. */ + if (flags & CFS_ALLOC_ATOMIC) { + result = (void *)mem_cache_alloc_nb(cachep); + } else { + LASSERT(get_preemption_level() == 0); + result = (void *)mem_cache_alloc(cachep); + } + if (result != NULL && (flags & CFS_ALLOC_ZERO)) + memset(result, 0, cachep->mc_size); + + return result; +} + +void cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp) +{ + mem_cache_free(cachep, objp); +} + +/* --------------------------------------------------------------------------- + * Page operations + * + * --------------------------------------------------------------------------- */ + +/* + * "Raw" pages + */ + +static unsigned int raw_pages = 0; +static cfs_mem_cache_t *raw_page_cache = NULL; + +static struct xnu_page_ops raw_page_ops; +static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = { + [XNU_PAGE_RAW] = &raw_page_ops +}; + +#if defined(LIBCFS_DEBUG) +static int page_type_is_valid(cfs_page_t *page) +{ + LASSERT(page != NULL); + return 0 <= page->type && page->type < XNU_PAGE_NTYPES; +} + +static int page_is_raw(cfs_page_t *page) +{ + return page->type == XNU_PAGE_RAW; +} +#endif + +static struct xnu_raw_page *as_raw(cfs_page_t *page) +{ + LASSERT(page_is_raw(page)); + return list_entry(page, struct xnu_raw_page, header); +} + +static void *raw_page_address(cfs_page_t *pg) +{ + return (void *)as_raw(pg)->virtual; +} + +static void *raw_page_map(cfs_page_t *pg) +{ + return (void *)as_raw(pg)->virtual; +} + +static void raw_page_unmap(cfs_page_t *pg) +{ +} + +static struct xnu_page_ops raw_page_ops = { + .page_map = raw_page_map, + .page_unmap = raw_page_unmap, + .page_address = raw_page_address +}; + +extern int get_preemption_level(void); + +struct list_head page_death_row; +spinlock_t page_death_row_phylax; + +static void raw_page_finish(struct xnu_raw_page *pg) +{ + -- raw_pages; + if (pg->virtual != NULL) + cfs_mem_cache_free(raw_page_cache, pg->virtual); + cfs_free(pg); +} + +void raw_page_death_row_clean(void) +{ + struct xnu_raw_page *pg; + + spin_lock(&page_death_row_phylax); + while (!list_empty(&page_death_row)) { + pg = container_of(page_death_row.next, + struct xnu_raw_page, link); + list_del(&pg->link); + spin_unlock(&page_death_row_phylax); + raw_page_finish(pg); + spin_lock(&page_death_row_phylax); + } + spin_unlock(&page_death_row_phylax); +} + +/* Free a "page" */ +void free_raw_page(struct xnu_raw_page *pg) +{ + if (!atomic_dec_and_test(&pg->count)) + return; + /* + * kmem_free()->vm_map_remove()->vm_map_delete()->lock_write() may + * block. (raw_page_done()->upl_abort() can block too) On the other + * hand, cfs_free_page() may be called in non-blockable context. To + * work around this, park pages on global list when cannot block. + */ + if (get_preemption_level() > 0) { + spin_lock(&page_death_row_phylax); + list_add(&pg->link, &page_death_row); + spin_unlock(&page_death_row_phylax); + } else { + raw_page_finish(pg); + raw_page_death_row_clean(); + } +} + +cfs_page_t *cfs_alloc_page(u_int32_t flags) +{ + struct xnu_raw_page *page; + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + page = cfs_alloc(sizeof *page, flags); + if (page != NULL) { + page->virtual = cfs_mem_cache_alloc(raw_page_cache, flags); + if (page->virtual != NULL) { + ++ raw_pages; + page->header.type = XNU_PAGE_RAW; + atomic_set(&page->count, 1); + } else { + cfs_free(page); + page = NULL; + } + } + return page != NULL ? &page->header : NULL; +} + +void cfs_free_page(cfs_page_t *pages) +{ + free_raw_page(as_raw(pages)); +} + +void cfs_get_page(cfs_page_t *p) +{ + atomic_inc(&as_raw(p)->count); +} + +int cfs_put_page_testzero(cfs_page_t *p) +{ + return atomic_dec_and_test(&as_raw(p)->count); +} + +int cfs_page_count(cfs_page_t *p) +{ + return atomic_read(&as_raw(p)->count); +} + +/* + * Generic page operations + */ + +void *cfs_page_address(cfs_page_t *pg) +{ + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + LASSERT(page_type_is_valid(pg)); + return page_ops[pg->type]->page_address(pg); +} + +void *cfs_kmap(cfs_page_t *pg) +{ + LASSERT(page_type_is_valid(pg)); + return page_ops[pg->type]->page_map(pg); +} + +void cfs_kunmap(cfs_page_t *pg) +{ + LASSERT(page_type_is_valid(pg)); + return page_ops[pg->type]->page_unmap(pg); +} + +void xnu_page_ops_register(int type, struct xnu_page_ops *ops) +{ + LASSERT(0 <= type && type < XNU_PAGE_NTYPES); + LASSERT(ops != NULL); + LASSERT(page_ops[type] == NULL); + + page_ops[type] = ops; +} + +void xnu_page_ops_unregister(int type) +{ + LASSERT(0 <= type && type < XNU_PAGE_NTYPES); + LASSERT(page_ops[type] != NULL); + + page_ops[type] = NULL; +} + +/* + * Portable memory allocator API + */ +#ifdef HAVE_GET_PREEMPTION_LEVEL +extern int get_preemption_level(void); +#else +#define get_preemption_level() (0) +#endif + +void *cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + int mflags; + + mflags = 0; + if (flags & CFS_ALLOC_ATOMIC) { + mflags |= M_NOWAIT; + } else { + LASSERT(get_preemption_level() == 0); + mflags |= M_WAITOK; + } + + if (flags & CFS_ALLOC_ZERO) + mflags |= M_ZERO; + + return _MALLOC(nr_bytes, M_TEMP, mflags); +} + +void cfs_free(void *addr) +{ + return _FREE(addr, M_TEMP); +} + +void *cfs_alloc_large(size_t nr_bytes) +{ + LASSERT(get_preemption_level() == 0); + return _MALLOC(nr_bytes, M_TEMP, M_WAITOK); +} + +void cfs_free_large(void *addr) +{ + LASSERT(get_preemption_level() == 0); + return _FREE(addr, M_TEMP); +} + +/* + * Lookup cfs_zone_nob by sysctl.zone, if it cannot be + * found (first load of * libcfs since boot), allocate + * sysctl libcfs.zone. + */ +int cfs_mem_init(void) +{ +#if CFS_INDIVIDUAL_ZONE + int rc; + size_t len; + + len = sizeof(struct cfs_zone_nob); + rc = sysctlbyname("libcfs.zone", + (void *)&cfs_zone_nob, &len, NULL, 0); + if (rc == ENOENT) { + /* zone_nob is not register in libcfs_sysctl */ + struct cfs_zone_nob *nob; + struct sysctl_oid *oid; + + assert(cfs_sysctl_isvalid()); + + nob = _MALLOC(sizeof(struct cfs_zone_nob), + M_TEMP, M_WAITOK | M_ZERO); + CFS_INIT_LIST_HEAD(&nob->z_link); + nob->z_nob = &nob->z_link; + oid = cfs_alloc_sysctl_struct(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, + "zone", nob, sizeof(struct cfs_zone_nob)); + if (oid == NULL) { + _FREE(nob, M_TEMP); + return -ENOMEM; + } + sysctl_register_oid(oid); + + cfs_zone_nob.z_nob = nob->z_nob; + } + spin_lock_init(&cfs_zone_guard); +#endif + CFS_INIT_LIST_HEAD(&page_death_row); + spin_lock_init(&page_death_row_phylax); + raw_page_cache = cfs_mem_cache_create("raw-page", CFS_PAGE_SIZE, 0, 0); + return 0; +} + +void cfs_mem_fini(void) +{ + raw_page_death_row_clean(); + spin_lock_done(&page_death_row_phylax); + cfs_mem_cache_destroy(raw_page_cache); + +#if CFS_INDIVIDUAL_ZONE + cfs_zone_nob.z_nob = NULL; + spin_lock_done(&cfs_zone_guard); +#endif +} diff --git a/libcfs/libcfs/darwin/darwin-module.c b/libcfs/libcfs/darwin/darwin-module.c new file mode 100644 index 0000000..10cb7d8 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-module.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_LNET +#include +#include + +int libcfs_ioctl_getdata(char *buf, char *end, void *arg) +{ + struct libcfs_ioctl_hdr *hdr; + struct libcfs_ioctl_data *data; + int err = 0; + ENTRY; + + hdr = (struct libcfs_ioctl_hdr *)buf; + data = (struct libcfs_ioctl_data *)buf; + /* libcfs_ioctl_data has been copied in by ioctl of osx */ + memcpy(buf, arg, sizeof(struct libcfs_ioctl_data)); + + if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { + CERROR("LIBCFS: version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len + buf >= end) { + CERROR("LIBCFS: user buffer exceeds kernel buffer\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { + CERROR("LIBCFS: user buffer too small for ioctl\n"); + RETURN(-EINVAL); + } + buf += size_round(sizeof(*data)); + + if (data->ioc_inllen1) { + err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1)); + if (err) + RETURN(err); + data->ioc_inlbuf1 = buf; + buf += size_round(data->ioc_inllen1); + } + + if (data->ioc_inllen2) { + copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2)); + if (err) + RETURN(err); + data->ioc_inlbuf2 = buf; + } + + RETURN(err); +} + +int libcfs_ioctl_popdata(void *arg, void *data, int size) +{ + /* + * system call will copy out ioctl arg to user space + */ + memcpy(arg, data, size); + return 0; +} + +extern struct cfs_psdev_ops libcfs_psdev_ops; +struct libcfs_device_userstate *mdev_state[16]; + +static int +libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p) +{ + struct libcfs_device_userstate *mstat = NULL; + int rc = 0; + int devid; + devid = minor(dev); + + if (devid > 16) return (ENXIO); + + if (libcfs_psdev_ops.p_open != NULL) + rc = -libcfs_psdev_ops.p_open(0, &mstat); + else + rc = EPERM; + if (rc == 0) + mdev_state[devid] = mstat; + return rc; +} + +static int +libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p) +{ + int devid; + devid = minor(dev); + int rc = 0; + + if (devid > 16) return (ENXIO); + + if (libcfs_psdev_ops.p_close != NULL) + rc = -libcfs_psdev_ops.p_close(0, mdev_state[devid]); + else + rc = EPERM; + if (rc == 0) + mdev_state[devid] = NULL; + return rc; +} + +static int +libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p) +{ + int rc = 0; + struct cfs_psdev_file pfile; + int devid; + devid = minor(dev); + + if (devid > 16) return (ENXIO); + + if (!is_suser()) + return (EPERM); + + pfile.off = 0; + pfile.private_data = mdev_state[devid]; + + if (libcfs_psdev_ops.p_ioctl != NULL) + rc = -libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); + else + rc = EPERM; + return rc; +} + +static struct cdevsw libcfs_devsw = +{ + .d_open = libcfs_psdev_open, + .d_close = libcfs_psdev_close, + .d_read = eno_rdwrt, + .d_write = eno_rdwrt, + .d_ioctl = libcfs_ioctl, + .d_stop = eno_stop, + .d_reset = eno_reset, + .d_ttys = NULL, + .d_select = eno_select, + .d_mmap = eno_mmap, + .d_strategy = eno_strat, + .d_getc = eno_getc, + .d_putc = eno_putc, + .d_type = 0 +}; + +cfs_psdev_t libcfs_dev = { + -1, + NULL, + "lnet", + &libcfs_devsw, + NULL +}; + +extern spinlock_t trace_cpu_serializer; +extern void cfs_sync_init(void); +extern void cfs_sync_fini(void); +extern int cfs_sysctl_init(void); +extern void cfs_sysctl_fini(void); +extern int cfs_mem_init(void); +extern int cfs_mem_fini(void); +extern void raw_page_death_row_clean(void); +extern void cfs_thread_agent_init(void); +extern void cfs_thread_agent_fini(void); +extern void cfs_symbol_init(void); +extern void cfs_symbol_fini(void); + +int libcfs_arch_init(void) +{ + cfs_sync_init(); + cfs_sysctl_init(); + cfs_mem_init(); + cfs_thread_agent_init(); + cfs_symbol_init(); + + spin_lock_init(&trace_cpu_serializer); + + return 0; +} + +void libcfs_arch_cleanup(void) +{ + spin_lock_done(&trace_cpu_serializer); + + cfs_symbol_fini(); + cfs_thread_agent_fini(); + cfs_mem_fini(); + cfs_sysctl_fini(); + cfs_sync_fini(); +} + diff --git a/libcfs/libcfs/darwin/darwin-prim.c b/libcfs/libcfs/darwin/darwin-prim.c new file mode 100644 index 0000000..cdcabd9 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-prim.c @@ -0,0 +1,581 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * cfs pseudo device, actually pseudo char device in darwin + */ +#define KLNET_MAJOR -1 + +kern_return_t cfs_psdev_register(cfs_psdev_t *dev) { + dev->index = cdevsw_add(KLNET_MAJOR, dev->devsw); + if (dev->index < 0) { + printf("libcfs_init: failed to allocate a major number!\n"); + return KERN_FAILURE; + } + dev->handle = devfs_make_node(makedev (dev->index, 0), + DEVFS_CHAR, UID_ROOT, + GID_WHEEL, 0666, (char *)dev->name, 0); + return KERN_SUCCESS; +} + +kern_return_t cfs_psdev_deregister(cfs_psdev_t *dev) { + devfs_remove(dev->handle); + cdevsw_remove(dev->index, dev->devsw); + return KERN_SUCCESS; +} + +/* + * KPortal symbol register / unregister support + */ +struct rw_semaphore cfs_symbol_lock; +struct list_head cfs_symbol_list; + +void * +cfs_symbol_get(const char *name) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_read(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + sym->ref ++; + break; + } + } + up_read(&cfs_symbol_lock); + if (sym != NULL) + return sym->value; + return NULL; +} + +kern_return_t +cfs_symbol_put(const char *name) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_read(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + sym->ref --; + LASSERT(sym->ref >= 0); + break; + } + } + up_read(&cfs_symbol_lock); + LASSERT(sym != NULL); + + return 0; +} + +kern_return_t +cfs_symbol_register(const char *name, const void *value) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + struct cfs_symbol *new = NULL; + + MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO); + strncpy(new->name, name, CFS_SYMBOL_LEN); + new->value = (void *)value; + new->ref = 0; + CFS_INIT_LIST_HEAD(&new->sym_list); + + down_write(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + up_write(&cfs_symbol_lock); + FREE(new, M_TEMP); + return KERN_NAME_EXISTS; + } + + } + list_add_tail(&new->sym_list, &cfs_symbol_list); + up_write(&cfs_symbol_lock); + + return KERN_SUCCESS; +} + +kern_return_t +cfs_symbol_unregister(const char *name) +{ + struct list_head *walker; + struct list_head *nxt; + struct cfs_symbol *sym = NULL; + + down_write(&cfs_symbol_lock); + list_for_each_safe(walker, nxt, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + LASSERT(sym->ref == 0); + list_del (&sym->sym_list); + FREE(sym, M_TEMP); + break; + } + } + up_write(&cfs_symbol_lock); + + return KERN_SUCCESS; +} + +void +cfs_symbol_init() +{ + CFS_INIT_LIST_HEAD(&cfs_symbol_list); + init_rwsem(&cfs_symbol_lock); +} + +void +cfs_symbol_fini() +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_write(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + LASSERT(sym->ref == 0); + list_del (&sym->sym_list); + FREE(sym, M_TEMP); + } + up_write(&cfs_symbol_lock); + + fini_rwsem(&cfs_symbol_lock); + return; +} + +struct kernel_thread_arg +{ + spinlock_t lock; + atomic_t inuse; + cfs_thread_t func; + void *arg; +}; + +struct kernel_thread_arg cfs_thread_arg; + +#define THREAD_ARG_FREE 0 +#define THREAD_ARG_HOLD 1 +#define THREAD_ARG_RECV 2 + +#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v) +#define get_targ_stat(a) atomic_read(&(a)->inuse) + +/* + * Hold the thread argument and set the status of thread_status + * to THREAD_ARG_HOLD, if the thread argument is held by other + * threads (It's THREAD_ARG_HOLD already), current-thread has to wait. + */ +#define thread_arg_hold(pta, _func, _arg) \ + do { \ + spin_lock(&(pta)->lock); \ + if (get_targ_stat(pta) == THREAD_ARG_FREE) { \ + set_targ_stat((pta), THREAD_ARG_HOLD); \ + (pta)->arg = (void *)_arg; \ + (pta)->func = _func; \ + spin_unlock(&(pta)->lock); \ + break; \ + } \ + spin_unlock(&(pta)->lock); \ + cfs_schedule(); \ + } while(1); \ + +/* + * Release the thread argument if the thread argument has been + * received by the child-thread (Status of thread_args is + * THREAD_ARG_RECV), otherwise current-thread has to wait. + * After release, the thread_args' status will be set to + * THREAD_ARG_FREE, and others can re-use the thread_args to + * create new kernel_thread. + */ +#define thread_arg_release(pta) \ + do { \ + spin_lock(&(pta)->lock); \ + if (get_targ_stat(pta) == THREAD_ARG_RECV) { \ + (pta)->arg = NULL; \ + (pta)->func = NULL; \ + set_targ_stat(pta, THREAD_ARG_FREE); \ + spin_unlock(&(pta)->lock); \ + break; \ + } \ + spin_unlock(&(pta)->lock); \ + cfs_schedule(); \ + } while(1) + +/* + * Receive thread argument (Used in child thread), set the status + * of thread_args to THREAD_ARG_RECV. + */ +#define __thread_arg_recv_fin(pta, _func, _arg, fin) \ + do { \ + spin_lock(&(pta)->lock); \ + if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \ + if (fin) \ + set_targ_stat(pta, THREAD_ARG_RECV);\ + _arg = (pta)->arg; \ + _func = (pta)->func; \ + spin_unlock(&(pta)->lock); \ + break; \ + } \ + spin_unlock(&(pta)->lock); \ + cfs_schedule(); \ + } while (1); \ + +/* + * Just set the thread_args' status to THREAD_ARG_RECV + */ +#define thread_arg_fin(pta) \ + do { \ + spin_lock(&(pta)->lock); \ + assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \ + set_targ_stat(pta, THREAD_ARG_RECV); \ + spin_unlock(&(pta)->lock); \ + } while(0) + +#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1) +#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0) + +void +cfs_thread_agent_init(void) +{ + set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE); + spin_lock_init(&cfs_thread_arg.lock); + cfs_thread_arg.arg = NULL; + cfs_thread_arg.func = NULL; +} + +void +cfs_thread_agent_fini(void) +{ + assert(get_targ_stat(&cfs_thread_arg) == THREAD_ARG_FREE); + + spin_lock_done(&cfs_thread_arg.lock); +} + +/* + * + * All requests to create kernel thread will create a new + * thread instance of cfs_thread_agent, one by one. + * cfs_thread_agent will call the caller's thread function + * with argument supplied by caller. + */ +void +cfs_thread_agent (void) +{ + cfs_thread_t func = NULL; + void *arg = NULL; + + thread_arg_recv(&cfs_thread_arg, func, arg); + /* printf("entry of thread agent (func: %08lx).\n", (void *)func); */ + assert(func != NULL); + func(arg); + /* printf("thread agent exit. (func: %08lx)\n", (void *)func); */ + (void) thread_terminate(current_thread()); +} + +extern thread_t kernel_thread(task_t task, void (*start)(void)); + +int +cfs_kernel_thread(cfs_thread_t func, void *arg, int flag) +{ + int ret = 0; + thread_t th = NULL; + + thread_arg_hold(&cfs_thread_arg, func, arg); + th = kernel_thread(kernel_task, cfs_thread_agent); + thread_arg_release(&cfs_thread_arg); + if (th == THREAD_NULL) + ret = -1; + return ret; +} + +void cfs_daemonize(char *str) +{ + snprintf(cfs_curproc_comm(), CFS_CURPROC_COMM_MAX, "%s", str); + return; +} + +/* + * XXX Liang: kexts cannot access sigmask in Darwin8. + * it's almost impossible for us to get/set signal mask + * without patching kernel. + * Should we provide these functions in xnu? + * + * These signal functions almost do nothing now, we + * need to investigate more about signal in Darwin. + */ +cfs_sigset_t cfs_get_blockedsigs() +{ + return (cfs_sigset_t)0; +} + +extern int block_procsigmask(struct proc *p, int bit); + +cfs_sigset_t cfs_block_allsigs() +{ + cfs_sigset_t old = 0; +#ifdef __DARWIN8__ +#else + block_procsigmask(current_proc(), -1); +#endif + return old; +} + +cfs_sigset_t cfs_block_sigs(sigset_t bit) +{ + cfs_sigset_t old = 0; +#ifdef __DARWIN8__ +#else + block_procsigmask(current_proc(), bit); +#endif + return old; +} + +void cfs_restore_sigs(cfs_sigset_t old) +{ +} + +int cfs_signal_pending(void) + +{ +#ifdef __DARWIN8__ + extern int thread_issignal(proc_t, thread_t, sigset_t); + return thread_issignal(current_proc(), current_thread(), (sigset_t)-1); +#else + return SHOULDissignal(current_proc(), current_uthread()) +#endif +} + +void cfs_clear_sigpending(void) +{ +#ifdef __DARWIN8__ +#else + clear_procsiglist(current_proc(), -1); +#endif +} + +#ifdef __DARWIN8__ + +#else /* !__DARWIN8__ */ + +void lustre_cone_in(boolean_t *state, funnel_t **cone) +{ + *cone = thread_funnel_get(); + if (*cone == network_flock) + thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + else if (*cone == NULL) + *state = thread_funnel_set(kernel_flock, TRUE); +} + +void lustre_cone_ex(boolean_t state, funnel_t *cone) +{ + if (cone == network_flock) + thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + else if (cone == NULL) + (void) thread_funnel_set(kernel_flock, state); +} + +void lustre_net_in(boolean_t *state, funnel_t **cone) +{ + *cone = thread_funnel_get(); + if (*cone == kernel_flock) + thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + else if (*cone == NULL) + *state = thread_funnel_set(network_flock, TRUE); +} + +void lustre_net_ex(boolean_t state, funnel_t *cone) +{ + if (cone == kernel_flock) + thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + else if (cone == NULL) + (void) thread_funnel_set(network_flock, state); +} +#endif /* !__DARWIN8__ */ + +void cfs_waitq_init(struct cfs_waitq *waitq) +{ + ksleep_chan_init(&waitq->wq_ksleep_chan); +} + +void cfs_waitlink_init(struct cfs_waitlink *link) +{ + ksleep_link_init(&link->wl_ksleep_link); +} + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + link->wl_waitq = waitq; + ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); +} + +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link) +{ + link->wl_waitq = waitq; + link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE; + ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); +} + +void cfs_waitq_forward(struct cfs_waitlink *link, + struct cfs_waitq *waitq) +{ + link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan; +} + +void cfs_waitq_del(struct cfs_waitq *waitq, + struct cfs_waitlink *link) +{ + ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); +} + +int cfs_waitq_active(struct cfs_waitq *waitq) +{ + return (1); +} + +void cfs_waitq_signal(struct cfs_waitq *waitq) +{ + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + ksleep_wake(&waitq->wq_ksleep_chan); +} + +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) +{ + ksleep_wake_nr(&waitq->wq_ksleep_chan, nr); +} + +void cfs_waitq_broadcast(struct cfs_waitq *waitq) +{ + ksleep_wake_all(&waitq->wq_ksleep_chan); +} + +void cfs_waitq_wait(struct cfs_waitlink *link, cfs_task_state_t state) +{ + ksleep_wait(&link->wl_waitq->wq_ksleep_chan, state); +} + +cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, + cfs_task_state_t state, + cfs_duration_t timeout) +{ + return ksleep_timedwait(&link->wl_waitq->wq_ksleep_chan, + state, timeout); +} + +typedef void (*ktimer_func_t)(void *); +void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg) +{ + ktimer_init(&t->t, (ktimer_func_t)func, arg); +} + +void cfs_timer_done(struct cfs_timer *t) +{ + ktimer_done(&t->t); +} + +void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline) +{ + ktimer_arm(&t->t, deadline); +} + +void cfs_timer_disarm(struct cfs_timer *t) +{ + ktimer_disarm(&t->t); +} + +int cfs_timer_is_armed(struct cfs_timer *t) +{ + return ktimer_is_armed(&t->t); +} + +cfs_time_t cfs_timer_deadline(struct cfs_timer *t) +{ + return ktimer_deadline(&t->t); +} + +void cfs_enter_debugger(void) +{ +#ifdef __DARWIN8__ + extern void Debugger(const char * reason); + Debugger("CFS"); +#else + extern void PE_enter_debugger(char *cause); + PE_enter_debugger("CFS"); +#endif +} + +int cfs_online_cpus(void) +{ + int activecpu; + size_t size; + +#ifdef __DARWIN8__ + size = sizeof(int); + sysctlbyname("hw.activecpu", &activecpu, &size, NULL, 0); + return activecpu; +#else + host_basic_info_data_t hinfo; + kern_return_t kret; + int count = HOST_BASIC_INFO_COUNT; +#define BSD_HOST 1 + kret = host_info(BSD_HOST, HOST_BASIC_INFO, &hinfo, &count); + if (kret == KERN_SUCCESS) + return (hinfo.avail_cpus); + return(-EINVAL); +#endif +} + +int cfs_ncpus(void) +{ + int ncpu; + size_t size; + + size = sizeof(int); + + sysctlbyname("hw.ncpu", &ncpu, &size, NULL, 0); + return ncpu; +} diff --git a/libcfs/libcfs/darwin/darwin-proc.c b/libcfs/libcfs/darwin/darwin-proc.c new file mode 100644 index 0000000..a001f5b --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-proc.c @@ -0,0 +1,467 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_LNET + +#include + +#define LIBCFS_SYSCTL "libcfs" +#define LIBCFS_SYSCTL_SPRITE "sprite" +#define LIBCFS_SYSCTL_MAGIC 0xbabeface + +static struct libcfs_sysctl_sprite { + int ss_magic; + struct sysctl_oid_list *ss_link; +} libcfs_sysctl_sprite = { 0, NULL }; + +static cfs_sysctl_table_header_t *libcfs_table_header = NULL; +extern unsigned int libcfs_debug; +extern unsigned int libcfs_subsystem_debug; +extern unsigned int libcfs_printk; +extern unsigned int libcfs_console_ratelimit; +extern unsigned int libcfs_catastrophe; +extern atomic_t libcfs_kmemory; + +static int sysctl_debug_kernel SYSCTL_HANDLER_ARGS +{ +#error "Check me" + const int maxstr = 1024; + char *str; + int error; + + if (req->newptr == USER_ADDR_NULL) { + /* read request */ + return -EINVAL; + } + + /* write request */ + error = trace_allocate_string_buffer(&str, maxstr + 1); + if (error != 0) + return error; + + error = SYSCTL_IN(req, str, maxstr); + + /* NB str guaranteed terminted */ + if (error == 0) + error = tracefile_dump_all_pages(str); + + trace_free_string_buffer(str, maxstr + 1); + return error; +} + +static int sysctl_daemon_file SYSCTL_HANDLER_ARGS +{ +#error "Check me" + int error; + char *str; + + if (req->newptr == USER_ADDR_NULL) { + /* a read */ + tracefile_read_lock(); + + /* include terminating '\0' */ + error = SYSCTL_OUT(req, tracefile, strlen(tracefile) + 1); + + tracefile_read_unlock(); + return error; + } + + /* write request */ + error = trace_allocate_string_buffer(&str, TRACEFILE_NAME_SIZE); + if (error != 0) + return error; + + error = SYSCTL_IN(req, str, TRACEFILE_NAME_SIZE - 1); + + /* NB str guaranteed terminted */ + if (error == 0) + error = trace_daemon_command(str); + + trace_free_string_buffer(str, TRACEFILE_NAME_SIZE); + return error; +} + + +static int sysctl_debug_mb SYSCTL_HANDLER_ARGS +{ +#error "Check me" + long mb; + int error; + + if (req->newptr == USER_ADDR_NULL) { + /* read */ + mb = trace_get_debug_mb(); + error = SYSCTL_OUT(req, &mb, sizeof(mb)); + } else { + /* write */ + error = SYSCTL_IN(req, &mb, sizeof(mb)); + if (error == 0) + error = trace_set_debug_mb(mb); + } + + return error; +} + +/* + * sysctl table for lnet + */ + +SYSCTL_NODE (, OID_AUTO, lnet, CTLFLAG_RW, + 0, "lnet sysctl top"); + +SYSCTL_INT(_lnet, OID_AUTO, debug, + CTLTYPE_INT | CTLFLAG_RW , &libcfs_debug, + 0, "debug"); +SYSCTL_INT(_lnet, OID_AUTO, subsystem_debug, + CTLTYPE_INT | CTLFLAG_RW, &libcfs_subsystem_debug, + 0, "subsystem debug"); +SYSCTL_INT(_lnet, OID_AUTO, printk, + CTLTYPE_INT | CTLFLAG_RW, &libcfs_printk, + 0, "printk"); +SYSCTL_INT(_lnet, OID_AUTO, console_ratelimit, + CTLTYPE_INT | CTLFLAG_RW, &libcfs_console_ratelimit, + 0, "console_ratelimit"); +SYSCTL_STRING(_lnet, OID_AUTO, debug_path, + CTLTYPE_STRING | CTLFLAG_RW, debug_file_path, + 1024, "debug path"); +SYSCTL_INT(_lnet, OID_AUTO, memused, + CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_kmemory.counter, + 0, "memused"); +SYSCTL_INT(_lnet, OID_AUTO, catastrophe, + CTLTYPE_INT | CTLFLAG_RW, (int *)&libcfs_catastrophe, + 0, "catastrophe"); + +#error "check me" +SYSCTL_PROC(_lnet, OID_AUTO, debug_kernel, + CTLTYPE_STRING | CTLFLAG_W, 0, + 0, &sysctl_debug_kernel, "A", "debug_kernel"); +SYSCTL_PROC(_lnet, OID_AUTO, daemon_file, + CTLTYPE_STRING | CTLFLAG_RW, 0, + 0, &sysctl_daemon_file, "A", "daemon_file"); +SYSCTL_PROC(_lnet, OID_AUTO, debug_mb, + CTLTYPE_INT | CTLFLAG_RW, 0, + 0, &sysctl_debug_mb, "L", "debug_mb"); + + +static cfs_sysctl_table_t top_table[] = { + &sysctl__lnet, + &sysctl__lnet_debug, + &sysctl__lnet_subsystem_debug, + &sysctl__lnet_printk, + &sysctl__lnet_console_ratelimit, + &sysctl__lnet_debug_path, + &sysctl__lnet_memused, + &sysctl__lnet_catastrophe, + &sysctl__lnet_debug_kernel, + &sysctl__lnet_daemon_file, + &sysctl__lnet_debug_mb, + NULL +}; + +/* + * Register sysctl table + */ +cfs_sysctl_table_header_t * +cfs_register_sysctl_table (cfs_sysctl_table_t *table, int arg) +{ + cfs_sysctl_table_t item; + int i = 0; + + while ((item = table[i++]) != NULL) + sysctl_register_oid(item); + return table; +} + +/* + * Unregister sysctl table + */ +void +cfs_unregister_sysctl_table (cfs_sysctl_table_header_t *table) { + int i = 0; + cfs_sysctl_table_t item; + + while ((item = table[i++]) != NULL) + sysctl_unregister_oid(item); + return; +} + +/* + * Allocate a sysctl oid. + */ +static struct sysctl_oid * +cfs_alloc_sysctl(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, void *arg1, int arg2, const char *fmt, + int (*handler) SYSCTL_HANDLER_ARGS) +{ + struct sysctl_oid *oid; + char *sname = NULL; + char *sfmt = NULL; + + if (strlen(name) + 1 > CTL_MAXNAME) { + printf("libcfs: sysctl name: %s is too long.\n", name); + return NULL; + } + oid = (struct sysctl_oid*)_MALLOC(sizeof(struct sysctl_oid), + M_TEMP, M_WAITOK | M_ZERO); + if (oid == NULL) + return NULL; + + sname = (char *)_MALLOC(sizeof(CTL_MAXNAME), + M_TEMP, M_WAITOK | M_ZERO); + if (sname == NULL) + goto error; + strcpy(sname, name); + + sfmt = (char *)_MALLOC(4, M_TEMP, M_WAITOK | M_ZERO); + if (sfmt == NULL) + goto error; + strcpy(sfmt, fmt); + + if (parent == NULL) + oid->oid_parent = &sysctl__children; + else + oid->oid_parent = parent; + oid->oid_number = nbr; + oid->oid_kind = access; + oid->oid_name = sname; + oid->oid_handler = handler; + oid->oid_fmt = sfmt; + + if ((access & CTLTYPE) == CTLTYPE_NODE){ + /* It's a sysctl node */ + struct sysctl_oid_list *link; + + link = (struct sysctl_oid_list *)_MALLOC(sizeof(struct sysctl_oid_list), + M_TEMP, M_WAITOK | M_ZERO); + if (link == NULL) + goto error; + oid->oid_arg1 = link; + oid->oid_arg2 = 0; + } else { + oid->oid_arg1 = arg1; + oid->oid_arg2 = arg2; + } + + return oid; +error: + if (sfmt != NULL) + _FREE(sfmt, M_TEMP); + if (sname != NULL) + _FREE(sname, M_TEMP); + if (oid != NULL) + _FREE(oid, M_TEMP); + return NULL; +} + +void cfs_free_sysctl(struct sysctl_oid *oid) +{ + if (oid->oid_name != NULL) + _FREE((void *)oid->oid_name, M_TEMP); + if (oid->oid_fmt != NULL) + _FREE((void *)oid->oid_fmt, M_TEMP); + if ((oid->oid_kind & CTLTYPE_NODE != 0) && oid->oid_arg1) + /* XXX Liang: need to assert the list is empty */ + _FREE(oid->oid_arg1, M_TEMP); + _FREE(oid, M_TEMP); +} + +#define CFS_SYSCTL_ISVALID ((libcfs_sysctl_sprite.ss_magic == LIBCFS_SYSCTL_MAGIC) && \ + (libcfs_sysctl_sprite.ss_link != NULL)) + +int +cfs_sysctl_isvalid(void) +{ + return CFS_SYSCTL_ISVALID; +} + +struct sysctl_oid * +cfs_alloc_sysctl_node(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, int (*handler) SYSCTL_HANDLER_ARGS) +{ + if (parent == NULL && CFS_SYSCTL_ISVALID) + parent = libcfs_sysctl_sprite.ss_link; + return cfs_alloc_sysctl(parent, nbr, CTLTYPE_NODE | access, name, + NULL, 0, "N", handler); +} + +struct sysctl_oid * +cfs_alloc_sysctl_int(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, int *ptr, int val) +{ + if (parent == NULL && CFS_SYSCTL_ISVALID) + parent = libcfs_sysctl_sprite.ss_link; + return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, + ptr, val, "I", sysctl_handle_int); +} + +struct sysctl_oid * +cfs_alloc_sysctl_long(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, int *ptr, int val) +{ + if (parent == NULL && CFS_SYSCTL_ISVALID) + parent = libcfs_sysctl_sprite.ss_link; + return cfs_alloc_sysctl(parent, nbr, CTLTYPE_INT | access, name, + ptr, val, "L", sysctl_handle_long); +} + +struct sysctl_oid * +cfs_alloc_sysctl_string(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, char *ptr, int len) +{ + if (parent == NULL && CFS_SYSCTL_ISVALID) + parent = libcfs_sysctl_sprite.ss_link; + return cfs_alloc_sysctl(parent, nbr, CTLTYPE_STRING | access, name, + ptr, len, "A", sysctl_handle_string); +} + +struct sysctl_oid * +cfs_alloc_sysctl_struct(struct sysctl_oid_list *parent, int nbr, int access, + const char *name, void *ptr, int size) +{ + if (parent == NULL && CFS_SYSCTL_ISVALID) + parent = libcfs_sysctl_sprite.ss_link; + return cfs_alloc_sysctl(parent, nbr, CTLTYPE_OPAQUE | access, name, + ptr, size, "S", sysctl_handle_opaque); +} + +/* no proc in osx */ +cfs_proc_dir_entry_t * +cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent) +{ + cfs_proc_dir_entry_t *entry; + MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO); + + return entry; +} + +void +cfs_free_proc_entry(cfs_proc_dir_entry_t *de){ + FREE(de, M_TEMP); + return; +}; + +void +cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry) +{ + cfs_free_proc_entry(entry); + return; +} + +int +insert_proc(void) +{ +#if 1 + if (!libcfs_table_header) + libcfs_table_header = cfs_register_sysctl_table(top_table, 0); +#endif + return 0; +} + +void +remove_proc(void) +{ +#if 1 + if (libcfs_table_header != NULL) + cfs_unregister_sysctl_table(libcfs_table_header); + libcfs_table_header = NULL; +#endif + return; +} + +int +cfs_sysctl_init(void) +{ + struct sysctl_oid *oid_root; + struct sysctl_oid *oid_sprite; + struct libcfs_sysctl_sprite *sprite; + size_t len; + int rc; + + len = sizeof(struct libcfs_sysctl_sprite); + rc = sysctlbyname("libcfs.sprite", + (void *)&libcfs_sysctl_sprite, &len, NULL, 0); + if (rc == 0) { + /* + * XXX Liang: assert (rc == 0 || rc == ENOENT) + * + * libcfs.sprite has been registered by previous + * loading of libcfs + */ + if (libcfs_sysctl_sprite.ss_magic != LIBCFS_SYSCTL_MAGIC) { + printf("libcfs: magic number of libcfs.sprite " + "is not right (%lx, %lx)\n", + libcfs_sysctl_sprite.ss_magic, + LIBCFS_SYSCTL_MAGIC); + return -1; + } + assert(libcfs_sysctl_sprite.ss_link != NULL); + printf("libcfs: registered libcfs.sprite found.\n"); + return 0; + } + oid_root = cfs_alloc_sysctl_node(NULL, OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, + LIBCFS_SYSCTL, 0); + if (oid_root == NULL) + return -1; + sysctl_register_oid(oid_root); + + sprite = (struct libcfs_sysctl_sprite *)_MALLOC(sizeof(struct libcfs_sysctl_sprite), + M_TEMP, M_WAITOK | M_ZERO); + if (sprite == NULL) { + sysctl_unregister_oid(oid_root); + cfs_free_sysctl(oid_root); + return -1; + } + sprite->ss_magic = LIBCFS_SYSCTL_MAGIC; + sprite->ss_link = (struct sysctl_oid_list *)oid_root->oid_arg1; + oid_sprite = cfs_alloc_sysctl_struct((struct sysctl_oid_list *)oid_root->oid_arg1, + OID_AUTO, CTLFLAG_RD | CTLFLAG_KERN, + LIBCFS_SYSCTL_SPRITE, sprite, + sizeof(struct libcfs_sysctl_sprite)); + if (oid_sprite == NULL) { + cfs_free_sysctl(oid_sprite); + sysctl_unregister_oid(oid_root); + cfs_free_sysctl(oid_root); + return -1; + } + sysctl_register_oid(oid_sprite); + + libcfs_sysctl_sprite.ss_magic = sprite->ss_magic; + libcfs_sysctl_sprite.ss_link = sprite->ss_link; + + return 0; +} + +void +cfs_sysctl_fini(void) +{ + libcfs_sysctl_sprite.ss_magic = 0; + libcfs_sysctl_sprite.ss_link = NULL; +} + diff --git a/libcfs/libcfs/darwin/darwin-sync.c b/libcfs/libcfs/darwin/darwin-sync.c new file mode 100644 index 0000000..8b752e3 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-sync.c @@ -0,0 +1,1025 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +/* + * xnu_sync.c + * + * Created by nikita on Sun Jul 18 2004. + * + * XNU synchronization primitives. + */ + +/* + * This file contains very simplistic implementations of (saner) API for + * basic synchronization primitives: + * + * - spin-lock (kspin) + * + * - semaphore (ksem) + * + * - mutex (kmut) + * + * - condition variable (kcond) + * + * - wait-queue (ksleep_chan and ksleep_link) + * + * - timer (ktimer) + * + * A lot can be optimized here. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#ifdef __DARWIN8__ +# include +#else +# include +# include +# include +#endif + +#include +#include + +#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e)) + +#ifdef HAVE_GET_PREEMPTION_LEVEL +extern int get_preemption_level(void); +#else +#define get_preemption_level() (0) +#endif + +#if SMP +#ifdef __DARWIN8__ + +static lck_grp_t *cfs_lock_grp = NULL; +#warning "Verify definition of lck_spin_t hasn't been changed while building!" + +/* hw_lock_* are not exported by Darwin8 */ +static inline void xnu_spin_init(xnu_spin_t *s) +{ + SLASSERT(cfs_lock_grp != NULL); + //*s = lck_spin_alloc_init(cfs_lock_grp, LCK_ATTR_NULL); + lck_spin_init((lck_spin_t *)s, cfs_lock_grp, LCK_ATTR_NULL); +} + +static inline void xnu_spin_done(xnu_spin_t *s) +{ + SLASSERT(cfs_lock_grp != NULL); + //lck_spin_free(*s, cfs_lock_grp); + //*s = NULL; + lck_spin_destroy((lck_spin_t *)s, cfs_lock_grp); +} + +#define xnu_spin_lock(s) lck_spin_lock((lck_spin_t *)(s)) +#define xnu_spin_unlock(s) lck_spin_unlock((lck_spin_t *)(s)) + +#warning "Darwin8 does not export lck_spin_try_lock" +#define xnu_spin_try(s) (1) + +#else /* DARWIN8 */ +extern void hw_lock_init(hw_lock_t); +extern void hw_lock_lock(hw_lock_t); +extern void hw_lock_unlock(hw_lock_t); +extern unsigned int hw_lock_to(hw_lock_t, unsigned int); +extern unsigned int hw_lock_try(hw_lock_t); +extern unsigned int hw_lock_held(hw_lock_t); + +#define xnu_spin_init(s) hw_lock_init(s) +#define xnu_spin_done(s) do {} while (0) +#define xnu_spin_lock(s) hw_lock_lock(s) +#define xnu_spin_unlock(s) hw_lock_unlock(s) +#define xnu_spin_try(s) hw_lock_try(s) +#endif /* DARWIN8 */ + +#else /* SMP */ +#define xnu_spin_init(s) do {} while (0) +#define xnu_spin_done(s) do {} while (0) +#define xnu_spin_lock(s) do {} while (0) +#define xnu_spin_unlock(s) do {} while (0) +#define xnu_spin_try(s) (1) +#endif /* SMP */ + +/* + * Warning: low level libcfs debugging code (libcfs_debug_msg(), for + * example), uses spin-locks, so debugging output here may lead to nasty + * surprises. + * + * In uniprocessor version of spin-lock. Only checks. + */ + +void kspin_init(struct kspin *spin) +{ + SLASSERT(spin != NULL); + xnu_spin_init(&spin->lock); + ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC); + ON_SYNC_DEBUG(spin->owner = NULL); +} + +void kspin_done(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == NULL); + xnu_spin_done(&spin->lock); +} + +void kspin_lock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner != current_thread()); + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + xnu_spin_lock(&spin->lock); + SLASSERT(spin->owner == NULL); + ON_SYNC_DEBUG(spin->owner = current_thread()); +} + +void kspin_unlock(struct kspin *spin) +{ + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == current_thread()); + ON_SYNC_DEBUG(spin->owner = NULL); + xnu_spin_unlock(&spin->lock); +} + +int kspin_trylock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + + if (xnu_spin_try(&spin->lock)) { + SLASSERT(spin->owner == NULL); + ON_SYNC_DEBUG(spin->owner = current_thread()); + return 1; + } else + return 0; +} + +#if XNU_SYNC_DEBUG +int kspin_islocked(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + return spin->owner == current_thread(); +} + +int kspin_isnotlocked(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + return spin->owner != current_thread(); +} +#endif + +/* + * read/write spin-lock + */ +void krw_spin_init(struct krw_spin *rwspin) +{ + SLASSERT(rwspin != NULL); + + kspin_init(&rwspin->guard); + rwspin->count = 0; + ON_SYNC_DEBUG(rwspin->magic = KRW_SPIN_MAGIC); +} + +void krw_spin_done(struct krw_spin *rwspin) +{ + SLASSERT(rwspin != NULL); + SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); + SLASSERT(rwspin->count == 0); + kspin_done(&rwspin->guard); +} + +void krw_spin_down_r(struct krw_spin *rwspin) +{ + int i; + SLASSERT(rwspin != NULL); + SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); + + kspin_lock(&rwspin->guard); + while(rwspin->count < 0) { + i = -1; + kspin_unlock(&rwspin->guard); + while (--i != 0 && rwspin->count < 0) + continue; + kspin_lock(&rwspin->guard); + } + ++ rwspin->count; + kspin_unlock(&rwspin->guard); +} + +void krw_spin_down_w(struct krw_spin *rwspin) +{ + int i; + SLASSERT(rwspin != NULL); + SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); + + kspin_lock(&rwspin->guard); + while (rwspin->count != 0) { + i = -1; + kspin_unlock(&rwspin->guard); + while (--i != 0 && rwspin->count != 0) + continue; + kspin_lock(&rwspin->guard); + } + rwspin->count = -1; + kspin_unlock(&rwspin->guard); +} + +void krw_spin_up_r(struct krw_spin *rwspin) +{ + SLASSERT(rwspin != NULL); + SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); + SLASSERT(rwspin->count > 0); + + kspin_lock(&rwspin->guard); + -- rwspin->count; + kspin_unlock(&rwspin->guard); +} + +void krw_spin_up_w(struct krw_spin *rwspin) +{ + SLASSERT(rwspin != NULL); + SLASSERT(rwspin->magic == KRW_SPIN_MAGIC); + SLASSERT(rwspin->count == -1); + + kspin_lock(&rwspin->guard); + rwspin->count = 0; + kspin_unlock(&rwspin->guard); +} + +/* + * semaphore + */ +#ifdef __DARWIN8__ + +#define xnu_waitq_init(q, a) do {} while (0) +#define xnu_waitq_done(q) do {} while (0) +#define xnu_waitq_wakeup_one(q, e, s) ({wakeup_one((void *)(e)); KERN_SUCCESS;}) +#define xnu_waitq_wakeup_all(q, e, s) ({wakeup((void *)(e)); KERN_SUCCESS;}) +#define xnu_waitq_assert_wait(q, e, s) assert_wait((e), s) + +#else /* DARWIN8 */ + +#define xnu_waitq_init(q, a) wait_queue_init((q), a) +#define xnu_waitq_done(q) do {} while (0) +#define xnu_waitq_wakeup_one(q, e, s) wait_queue_wakeup_one((q), (event_t)(e), s) +#define xnu_waitq_wakeup_all(q, e, s) wait_queue_wakeup_all((q), (event_t)(e), s) +#define xnu_waitq_assert_wait(q, e, s) wait_queue_assert_wait((q), (event_t)(e), s) + +#endif /* DARWIN8 */ +void ksem_init(struct ksem *sem, int value) +{ + SLASSERT(sem != NULL); + kspin_init(&sem->guard); + xnu_waitq_init(&sem->q, SYNC_POLICY_FIFO); + sem->value = value; + ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC); +} + +void ksem_done(struct ksem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + /* + * XXX nikita: cannot check that &sem->q is empty because + * wait_queue_empty() is Apple private API. + */ + kspin_done(&sem->guard); +} + +int ksem_up(struct ksem *sem, int value) +{ + int result; + + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + SLASSERT(value >= 0); + + kspin_lock(&sem->guard); + sem->value += value; + if (sem->value == 0) + result = xnu_waitq_wakeup_one(&sem->q, sem, + THREAD_AWAKENED); + else + result = xnu_waitq_wakeup_all(&sem->q, sem, + THREAD_AWAKENED); + kspin_unlock(&sem->guard); + SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); + return (result == KERN_SUCCESS) ? 0 : 1; +} + +void ksem_down(struct ksem *sem, int value) +{ + int result; + + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + SLASSERT(value >= 0); + SLASSERT(get_preemption_level() == 0); + + kspin_lock(&sem->guard); + while (sem->value < value) { + result = xnu_waitq_assert_wait(&sem->q, sem, + THREAD_UNINT); + SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); + kspin_unlock(&sem->guard); + if (result == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); + kspin_lock(&sem->guard); + } + sem->value -= value; + kspin_unlock(&sem->guard); +} + +int ksem_trydown(struct ksem *sem, int value) +{ + int result; + + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + SLASSERT(value >= 0); + + kspin_lock(&sem->guard); + if (sem->value >= value) { + sem->value -= value; + result = 0; + } else + result = -EBUSY; + kspin_unlock(&sem->guard); + return result; +} + +void kmut_init(struct kmut *mut) +{ + SLASSERT(mut != NULL); + ksem_init(&mut->s, 1); + ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC); + ON_SYNC_DEBUG(mut->owner = NULL); +} + +void kmut_done(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + SLASSERT(mut->owner == NULL); + ksem_done(&mut->s); +} + +void kmut_lock(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + SLASSERT(mut->owner != current_thread()); + SLASSERT(get_preemption_level() == 0); + + ksem_down(&mut->s, 1); + ON_SYNC_DEBUG(mut->owner = current_thread()); +} + +void kmut_unlock(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + SLASSERT(mut->owner == current_thread()); + + ON_SYNC_DEBUG(mut->owner = NULL); + ksem_up(&mut->s, 1); +} + +int kmut_trylock(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + return ksem_trydown(&mut->s, 1); +} + +#if XNU_SYNC_DEBUG +int kmut_islocked(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + return mut->owner == current_thread(); +} + +int kmut_isnotlocked(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + return mut->owner != current_thread(); +} +#endif + + +void kcond_init(struct kcond *cond) +{ + SLASSERT(cond != NULL); + + kspin_init(&cond->guard); + cond->waiters = NULL; + ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC); +} + +void kcond_done(struct kcond *cond) +{ + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(cond->waiters == NULL); + kspin_done(&cond->guard); +} + +void kcond_wait(struct kcond *cond, struct kspin *lock) +{ + struct kcond_link link; + + SLASSERT(cond != NULL); + SLASSERT(lock != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(lock)); + + ksem_init(&link.sem, 0); + kspin_lock(&cond->guard); + link.next = cond->waiters; + cond->waiters = &link; + kspin_unlock(&cond->guard); + kspin_unlock(lock); + + ksem_down(&link.sem, 1); + + kspin_lock(&cond->guard); + kspin_unlock(&cond->guard); + kspin_lock(lock); +} + +void kcond_wait_guard(struct kcond *cond) +{ + struct kcond_link link; + + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(&cond->guard)); + + ksem_init(&link.sem, 0); + link.next = cond->waiters; + cond->waiters = &link; + kspin_unlock(&cond->guard); + + ksem_down(&link.sem, 1); + + kspin_lock(&cond->guard); +} + +void kcond_signal_guard(struct kcond *cond) +{ + struct kcond_link *link; + + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(&cond->guard)); + + link = cond->waiters; + if (link != NULL) { + cond->waiters = link->next; + ksem_up(&link->sem, 1); + } +} + +void kcond_signal(struct kcond *cond) +{ + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + + kspin_lock(&cond->guard); + kcond_signal_guard(cond); + kspin_unlock(&cond->guard); +} + +void kcond_broadcast_guard(struct kcond *cond) +{ + struct kcond_link *link; + + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(&cond->guard)); + + for (link = cond->waiters; link != NULL; link = link->next) + ksem_up(&link->sem, 1); + cond->waiters = NULL; +} + +void kcond_broadcast(struct kcond *cond) +{ + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + + kspin_lock(&cond->guard); + kcond_broadcast_guard(cond); + kspin_unlock(&cond->guard); +} + +void krw_sem_init(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + + kcond_init(&sem->cond); + sem->count = 0; + ON_SYNC_DEBUG(sem->magic = KRW_MAGIC); +} + +void krw_sem_done(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(sem->count == 0); + kcond_done(&sem->cond); +} + +void krw_sem_down_r(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(get_preemption_level() == 0); + + kspin_lock(&sem->cond.guard); + while (sem->count < 0) + kcond_wait_guard(&sem->cond); + ++ sem->count; + kspin_unlock(&sem->cond.guard); +} + +int krw_sem_down_r_try(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + + kspin_lock(&sem->cond.guard); + if (sem->count < 0) { + kspin_unlock(&sem->cond.guard); + return -EBUSY; + } + ++ sem->count; + kspin_unlock(&sem->cond.guard); + return 0; +} + +void krw_sem_down_w(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(get_preemption_level() == 0); + + kspin_lock(&sem->cond.guard); + while (sem->count != 0) + kcond_wait_guard(&sem->cond); + sem->count = -1; + kspin_unlock(&sem->cond.guard); +} + +int krw_sem_down_w_try(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + + kspin_lock(&sem->cond.guard); + if (sem->count != 0) { + kspin_unlock(&sem->cond.guard); + return -EBUSY; + } + sem->count = -1; + kspin_unlock(&sem->cond.guard); + return 0; +} + +void krw_sem_up_r(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(sem->count > 0); + + kspin_lock(&sem->cond.guard); + -- sem->count; + if (sem->count == 0) + kcond_broadcast_guard(&sem->cond); + kspin_unlock(&sem->cond.guard); +} + +void krw_sem_up_w(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(sem->count == -1); + + kspin_lock(&sem->cond.guard); + sem->count = 0; + kspin_unlock(&sem->cond.guard); + kcond_broadcast(&sem->cond); +} + +void ksleep_chan_init(struct ksleep_chan *chan) +{ + SLASSERT(chan != NULL); + + kspin_init(&chan->guard); + CFS_INIT_LIST_HEAD(&chan->waiters); + ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC); +} + +void ksleep_chan_done(struct ksleep_chan *chan) +{ + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(list_empty(&chan->waiters)); + kspin_done(&chan->guard); +} + +void ksleep_link_init(struct ksleep_link *link) +{ + SLASSERT(link != NULL); + + CFS_INIT_LIST_HEAD(&link->linkage); + link->flags = 0; + link->event = current_thread(); + link->hits = 0; + link->forward = NULL; + ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC); +} + +void ksleep_link_done(struct ksleep_link *link) +{ + SLASSERT(link != NULL); + SLASSERT(link->magic == KSLEEP_LINK_MAGIC); + SLASSERT(list_empty(&link->linkage)); +} + +void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link) +{ + SLASSERT(chan != NULL); + SLASSERT(link != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(link->magic == KSLEEP_LINK_MAGIC); + SLASSERT(list_empty(&link->linkage)); + + kspin_lock(&chan->guard); + if (link->flags & KSLEEP_EXCLUSIVE) + list_add_tail(&link->linkage, &chan->waiters); + else + list_add(&link->linkage, &chan->waiters); + kspin_unlock(&chan->guard); +} + +void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link) +{ + SLASSERT(chan != NULL); + SLASSERT(link != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(link->magic == KSLEEP_LINK_MAGIC); + + kspin_lock(&chan->guard); + list_del_init(&link->linkage); + kspin_unlock(&chan->guard); +} + +static int has_hits(struct ksleep_chan *chan, event_t event) +{ + struct ksleep_link *scan; + + SLASSERT(kspin_islocked(&chan->guard)); + list_for_each_entry(scan, &chan->waiters, linkage) { + if (scan->event == event && scan->hits > 0) { + /* consume hit */ + -- scan->hits; + return 1; + } + } + return 0; +} + +static void add_hit(struct ksleep_chan *chan, event_t event) +{ + struct ksleep_link *scan; + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + SLASSERT(kspin_islocked(&chan->guard)); + list_for_each_entry(scan, &chan->waiters, linkage) { + if (scan->event == event) { + ++ scan->hits; + break; + } + } +} + +void ksleep_wait(struct ksleep_chan *chan, cfs_task_state_t state) +{ + event_t event; + int result; + + ENTRY; + + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(get_preemption_level() == 0); + + event = current_thread(); + kspin_lock(&chan->guard); + if (!has_hits(chan, event)) { + result = assert_wait(event, state); + kspin_unlock(&chan->guard); + SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); + if (result == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); + } else + kspin_unlock(&chan->guard); + EXIT; +} + +/* + * Sleep on @chan for no longer than @timeout nano-seconds. Return remaining + * sleep time (non-zero only if thread was waken by a signal (not currently + * implemented), or waitq was already in the "signalled" state). + */ +int64_t ksleep_timedwait(struct ksleep_chan *chan, + cfs_task_state_t state, + __u64 timeout) +{ + event_t event; + + ENTRY; + + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(get_preemption_level() == 0); + + event = current_thread(); + kspin_lock(&chan->guard); + if (!has_hits(chan, event)) { + int result; + __u64 expire; + result = assert_wait(event, state); + if (timeout > 0) { + /* + * arm a timer. thread_set_timer()'s first argument is + * uint32_t, so we have to cook deadline ourselves. + */ + nanoseconds_to_absolutetime(timeout, &expire); + clock_absolutetime_interval_to_deadline(expire, &expire); + thread_set_timer_deadline(expire); + } + kspin_unlock(&chan->guard); + SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); + if (result == THREAD_WAITING) + result = thread_block(THREAD_CONTINUE_NULL); + thread_cancel_timer(); + + if (result == THREAD_TIMED_OUT) + timeout = 0; + else { + __u64 now; + clock_get_uptime(&now); + if (expire > now) + absolutetime_to_nanoseconds(expire - now, &timeout); + else + timeout = 0; + } + } else { + /* just return timeout, because I've got event and don't need to wait */ + kspin_unlock(&chan->guard); + } + + RETURN(timeout); +} + +/* + * wake up single exclusive waiter (plus some arbitrary number of * + * non-exclusive) + */ +void ksleep_wake(struct ksleep_chan *chan) +{ + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + ksleep_wake_nr(chan, 1); +} + +/* + * wake up all waiters on @chan + */ +void ksleep_wake_all(struct ksleep_chan *chan) +{ + ENTRY; + ksleep_wake_nr(chan, 0); + EXIT; +} + +/* + * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary + * number of non-exclusive. If @nr is 0, wake up all waiters. + */ +void ksleep_wake_nr(struct ksleep_chan *chan, int nr) +{ + struct ksleep_link *scan; + int result; + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + + kspin_lock(&chan->guard); + list_for_each_entry(scan, &chan->waiters, linkage) { + struct ksleep_chan *forward; + + forward = scan->forward; + if (forward != NULL) + kspin_lock(&forward->guard); + result = thread_wakeup(scan->event); + SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); + if (result == KERN_NOT_WAITING) { + ++ scan->hits; + if (forward != NULL) + add_hit(forward, scan->event); + } + if (forward != NULL) + kspin_unlock(&forward->guard); + if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0) + break; + } + kspin_unlock(&chan->guard); +} + +void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg) +{ + SLASSERT(t != NULL); + SLASSERT(func != NULL); + + kspin_init(&t->guard); + t->func = func; + t->arg = arg; + ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC); +} + +void ktimer_done(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + kspin_done(&t->guard); + ON_SYNC_DEBUG(t->magic = 0); +} + +static void ktimer_actor(void *arg0, void *arg1) +{ + struct ktimer *t; + int armed; + + t = arg0; + /* + * this assumes that ktimer's are never freed. + */ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + /* + * call actual timer function + */ + kspin_lock(&t->guard); + armed = t->armed; + t->armed = 0; + kspin_unlock(&t->guard); + + if (armed) + t->func(t->arg); +} + +extern boolean_t thread_call_func_cancel(thread_call_func_t, thread_call_param_t, boolean_t); +extern void thread_call_func_delayed(thread_call_func_t, thread_call_param_t, __u64); + +static void ktimer_disarm_locked(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + thread_call_func_cancel(ktimer_actor, t, FALSE); +} + +/* + * Received deadline is nanoseconds, but time checked by + * thread_call is absolute time (The abstime unit is equal to + * the length of one bus cycle, so the duration is dependent + * on the bus speed of the computer), so we need to convert + * nanotime to abstime by nanoseconds_to_absolutetime(). + * + * Refer to _delayed_call_timer(...) + * + * if thread_call_func_delayed is not exported in the future, + * we can use timeout() or bsd_timeout() to replace it. + */ +void ktimer_arm(struct ktimer *t, u_int64_t deadline) +{ + cfs_time_t abstime; + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + kspin_lock(&t->guard); + ktimer_disarm_locked(t); + t->armed = 1; + nanoseconds_to_absolutetime(deadline, &abstime); + thread_call_func_delayed(ktimer_actor, t, deadline); + kspin_unlock(&t->guard); +} + +void ktimer_disarm(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + kspin_lock(&t->guard); + t->armed = 0; + ktimer_disarm_locked(t); + kspin_unlock(&t->guard); +} + +int ktimer_is_armed(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + /* + * no locking---result is only a hint anyway. + */ + return t->armed; +} + +u_int64_t ktimer_deadline(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + return t->deadline; +} + +void cfs_sync_init(void) +{ +#ifdef __DARWIN8__ + /* Initialize lock group */ + cfs_lock_grp = lck_grp_alloc_init("libcfs sync", LCK_GRP_ATTR_NULL); +#endif +} + +void cfs_sync_fini(void) +{ +#ifdef __DARWIN8__ + /* + * XXX Liang: destroy lock group. As we haven't called lock_done + * for all locks, cfs_lock_grp may not be freed by kernel(reference + * count > 1). + */ + lck_grp_free(cfs_lock_grp); + cfs_lock_grp = NULL; +#endif +} +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/libcfs/darwin/darwin-tcpip.c b/libcfs/libcfs/darwin/darwin-tcpip.c new file mode 100644 index 0000000..c6609a7 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-tcpip.c @@ -0,0 +1,1339 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + +static __inline__ struct sockaddr_in +blank_sin() +{ + struct sockaddr_in blank = { sizeof(struct sockaddr_in), AF_INET }; + return (blank); +} + +void +libcfs_ipif_free_enumeration (char **names, int n) +{ + int i; + + LASSERT (n > 0); + + for (i = 0; i < n && names[i] != NULL; i++) + LIBCFS_FREE(names[i], IFNAMSIZ); + + LIBCFS_FREE(names, n * sizeof(*names)); +} + +#ifdef __DARWIN8__ +/* + * Darwin 8.x + * + * No hack kernel structre, all using KPI. + */ + +int +libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) +{ + struct ifreq ifr; + socket_t so; + __u32 val; + int nob; + int rc; + + rc = -sock_socket(PF_INET, SOCK_STREAM, 0, + NULL, NULL, &so); + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return rc; + } + + nob = strnlen(name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + CERROR("Interface name %s too long\n", name); + rc = -EINVAL; + goto out; + } + + CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); + bzero(&ifr, sizeof(ifr)); + strcpy(ifr.ifr_name, name); + rc = -sock_ioctl (so, SIOCGIFFLAGS, &ifr); + + if (rc != 0) { + CERROR("Can't get flags for interface %s\n", name); + goto out; + } + + if ((ifr.ifr_flags & IFF_UP) == 0) { + CDEBUG(D_NET, "Interface %s down\n", name); + *up = 0; + *ip = *mask = 0; + goto out; + } + + *up = 1; + + bzero(&ifr, sizeof(ifr)); + strcpy(ifr.ifr_name, name); + *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); + rc = -sock_ioctl(so, SIOCGIFADDR, &ifr); + + if (rc != 0) { + CERROR("Can't get IP address for interface %s\n", name); + goto out; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *ip = ntohl(val); + + bzero(&ifr, sizeof(ifr)); + strcpy(ifr.ifr_name, name); + *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); + rc = -sock_ioctl(so, SIOCGIFNETMASK, &ifr); + + if (rc != 0) { + CERROR("Can't get netmask for interface %s\n", name); + goto out; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *mask = ntohl(val); +out: + sock_close(so); + return rc; +} + +int +libcfs_ipif_enumerate (char ***namesp) +{ + /* Allocate and fill in 'names', returning # interfaces/error */ + char **names; + int toobig; + int nalloc; + int nfound; + socket_t so; + struct ifreq *ifr; + struct ifconf ifc; + int rc; + int nob; + int i; + + rc = -sock_socket(PF_INET, SOCK_STREAM, 0, + NULL, NULL, &so); + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return (rc); + } + + nalloc = 16; /* first guess at max interfaces */ + toobig = 0; + for (;;) { + if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { + toobig = 1; + nalloc = CFS_PAGE_SIZE/sizeof(*ifr); + CWARN("Too many interfaces: only enumerating first %d\n", + nalloc); + } + + LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); + if (ifr == NULL) { + CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); + rc = -ENOMEM; + goto out0; + } + + ifc.ifc_buf = (char *)ifr; + ifc.ifc_len = nalloc * sizeof(*ifr); + +#if 1 + /* + * XXX Liang: + * sock_ioctl(..., SIOCGIFCONF, ...) is not supposed to be used in + * kernel space because it always try to copy result to userspace. + * So we can't get interfaces name by sock_ioctl(...,SIOCGIFCONF,...). + * I've created a bug for Apple, let's wait... + */ + nfound = 0; + for (i = 0; i < 16; i++) { + struct ifreq en; + bzero(&en, sizeof(en)); + snprintf(en.ifr_name, IFNAMSIZ, "en%d", i); + rc = -sock_ioctl (so, SIOCGIFFLAGS, &en); + if (rc != 0) + continue; + strcpy(ifr[nfound++].ifr_name, en.ifr_name); + } + +#else /* NOT in using now */ + rc = -sock_ioctl(so, SIOCGIFCONF, (caddr_t)&ifc); + + if (rc < 0) { + CERROR ("Error %d enumerating interfaces\n", rc); + goto out1; + } + + nfound = ifc.ifc_len/sizeof(*ifr); + LASSERT (nfound <= nalloc); +#endif + + if (nfound < nalloc || toobig) + break; + + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + nalloc *= 2; + } + if (nfound == 0) + goto out1; + + LIBCFS_ALLOC(names, nfound * sizeof(*names)); + if (names == NULL) { + rc = -ENOMEM; + goto out1; + } + /* NULL out all names[i] */ + memset (names, 0, nfound * sizeof(*names)); + + for (i = 0; i < nfound; i++) { + + nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + /* no space for terminating NULL */ + CERROR("interface name %.*s too long (%d max)\n", + nob, ifr[i].ifr_name, IFNAMSIZ); + rc = -ENAMETOOLONG; + goto out2; + } + + LIBCFS_ALLOC(names[i], IFNAMSIZ); + if (names[i] == NULL) { + rc = -ENOMEM; + goto out2; + } + + memcpy(names[i], ifr[i].ifr_name, nob); + names[i][nob] = 0; + } + + *namesp = names; + rc = nfound; + +out2: + if (rc < 0) + libcfs_ipif_free_enumeration(names, nfound); +out1: + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); +out0: + sock_close(so); + return rc; + +} + +/* + * Public entry of socket upcall. + * + * so_upcall can only be installed while create/accept of socket in + * Darwin 8.0, so we setup libcfs_sock_upcall() as upcall for all + * sockets in creat/accept, it will call upcall provided by user + * which can be setup after create/accept of socket. + */ +static void libcfs_sock_upcall(socket_t so, void* arg, int waitf) +{ + cfs_socket_t *sock; + + sock = (cfs_socket_t *)arg; + LASSERT(sock->s_magic == CFS_SOCK_MAGIC); + + if ((sock->s_flags & CFS_SOCK_UPCALL) != 0 && sock->s_upcall != NULL) + sock->s_upcall(so, sock->s_upcallarg, waitf); + return; +} + +void libcfs_sock_set_cb(cfs_socket_t *sock, so_upcall callback, void *arg) +{ + sock->s_upcall = callback; + sock->s_upcallarg = arg; + sock->s_flags |= CFS_SOCK_UPCALL; + return; +} + +void libcfs_sock_reset_cb(cfs_socket_t *sock) +{ + sock->s_flags &= ~CFS_SOCK_UPCALL; + sock->s_upcall = NULL; + sock->s_upcallarg = NULL; + return; +} + +static int +libcfs_sock_create (cfs_socket_t **sockp, int *fatal, + __u32 local_ip, int local_port) +{ + struct sockaddr_in locaddr; + cfs_socket_t *sock; + int option; + int optlen; + int rc; + + /* All errors are fatal except bind failure if the port is in use */ + *fatal = 1; + + sock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); + if (!sock) { + CERROR("Can't allocate cfs_socket.\n"); + return -ENOMEM; + } + *sockp = sock; + sock->s_magic = CFS_SOCK_MAGIC; + + rc = -sock_socket(PF_INET, SOCK_STREAM, 0, + libcfs_sock_upcall, sock, &C2B_SOCK(sock)); + if (rc != 0) + goto out; + option = 1; + optlen = sizeof(option); + rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, + SO_REUSEADDR, &option, optlen); + if (rc != 0) + goto out; + + /* can't specify a local port without a local IP */ + LASSERT (local_ip == 0 || local_port != 0); + + if (local_ip != 0 || local_port != 0) { + bzero (&locaddr, sizeof (locaddr)); + locaddr.sin_len = sizeof(struct sockaddr_in); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons (local_port); + locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : INADDR_ANY; + rc = -sock_bind(C2B_SOCK(sock), (struct sockaddr *)&locaddr); + if (rc == -EADDRINUSE) { + CDEBUG(D_NET, "Port %d already in use\n", local_port); + *fatal = 0; + goto out; + } + if (rc != 0) { + CERROR("Error trying to bind to port %d: %d\n", + local_port, rc); + goto out; + } + } + return 0; +out: + if (C2B_SOCK(sock) != NULL) + sock_close(C2B_SOCK(sock)); + FREE(sock, M_TEMP); + return rc; +} + +int +libcfs_sock_listen (cfs_socket_t **sockp, + __u32 local_ip, int local_port, int backlog) +{ + cfs_socket_t *sock; + int fatal; + int rc; + + rc = libcfs_sock_create(&sock, &fatal, local_ip, local_port); + if (rc != 0) { + if (!fatal) + CERROR("Can't create socket: port %d already in use\n", + local_port); + return rc; + + } + rc = -sock_listen(C2B_SOCK(sock), backlog); + if (rc == 0) { + *sockp = sock; + return 0; + } + + if (C2B_SOCK(sock) != NULL) + sock_close(C2B_SOCK(sock)); + FREE(sock, M_TEMP); + return rc; +} + +int +libcfs_sock_accept (cfs_socket_t **newsockp, cfs_socket_t *sock) +{ + cfs_socket_t *newsock; + int rc; + + newsock = _MALLOC(sizeof(cfs_socket_t), M_TEMP, M_WAITOK|M_ZERO); + if (!newsock) { + CERROR("Can't allocate cfs_socket.\n"); + return -ENOMEM; + } + newsock->s_magic = CFS_SOCK_MAGIC; + /* + * thread will sleep in sock_accept by calling of msleep(), + * it can be interrupted because msleep() use PCATCH as argument. + */ + rc = -sock_accept(C2B_SOCK(sock), NULL, 0, 0, + libcfs_sock_upcall, newsock, &C2B_SOCK(newsock)); + if (rc) { + if (C2B_SOCK(newsock) != NULL) + sock_close(C2B_SOCK(newsock)); + FREE(newsock, M_TEMP); + if ((sock->s_flags & CFS_SOCK_DOWN) != 0) + /* shutdown by libcfs_sock_abort_accept(), fake + * error number for lnet_acceptor() */ + rc = -EAGAIN; + return rc; + } + *newsockp = newsock; + return 0; +} + +void +libcfs_sock_abort_accept (cfs_socket_t *sock) +{ + /* + * XXX Liang: + * + * we want to wakeup thread blocked by sock_accept, but we don't + * know the address where thread is sleeping on, so we cannot + * wakeup it directly. + * The thread slept in sock_accept will be waken up while: + * 1. interrupt by signal + * 2. new connection is coming (sonewconn) + * 3. disconnecting of the socket (soisconnected) + * + * Cause we can't send signal to a thread directly(no KPI), so the + * only thing can be done here is disconnect the socket (by + * sock_shutdown() or sth else? ). + * + * Shutdown request of socket with SHUT_WR or SHUT_RDWR will + * be issured to the protocol. + * sock_shutdown()->tcp_usr_shutdown()->tcp_usrclosed()-> + * tcp_close()->soisdisconnected(), it will wakeup thread by + * wakeup((caddr_t)&so->so_timeo); + */ + sock->s_flags |= CFS_SOCK_DOWN; + sock_shutdown(C2B_SOCK(sock), SHUT_RDWR); +} + +int +libcfs_sock_read (cfs_socket_t *sock, void *buffer, int nob, int timeout) +{ + size_t rcvlen; + int rc; + cfs_duration_t to = cfs_time_seconds(timeout); + cfs_time_t then; + struct timeval tv; + + LASSERT(nob > 0); + + for (;;) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0, + }; + cfs_duration_usec(to, &tv); + rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_RCVTIMEO, + &tv, sizeof(tv)); + if (rc != 0) { + CERROR("Can't set socket recv timeout " + "%ld.%06d: %d\n", + (long)tv.tv_sec, (int)tv.tv_usec, rc); + return rc; + } + + then = cfs_time_current(); + rc = -sock_receive(C2B_SOCK(sock), &msg, 0, &rcvlen); + to -= cfs_time_current() - then; + + if (rc != 0 && rc != -EWOULDBLOCK) + return rc; + if (rcvlen == nob) + return 0; + + if (to <= 0) + return -EAGAIN; + + buffer = ((char *)buffer) + rcvlen; + nob -= rcvlen; + } + return 0; +} + +int +libcfs_sock_write (cfs_socket_t *sock, void *buffer, int nob, int timeout) +{ + size_t sndlen; + int rc; + cfs_duration_t to = cfs_time_seconds(timeout); + cfs_time_t then; + struct timeval tv; + + LASSERT(nob > 0); + + for (;;) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0, + }; + + if (timeout != 0) { + cfs_duration_usec(to, &tv); + rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDTIMEO, + &tv, sizeof(tv)); + if (rc != 0) { + CERROR("Can't set socket send timeout " + "%ld.%06d: %d\n", + (long)tv.tv_sec, (int)tv.tv_usec, rc); + return rc; + } + } + + then = cfs_time_current(); + rc = -sock_send(C2B_SOCK(sock), &msg, + ((timeout == 0) ? MSG_DONTWAIT : 0), &sndlen); + to -= cfs_time_current() - then; + + if (rc != 0 && rc != -EWOULDBLOCK) + return rc; + if (sndlen == nob) + return 0; + + if (to <= 0) + return -EAGAIN; + buffer = ((char *)buffer) + sndlen; + nob -= sndlen; + } + return 0; + +} + +int +libcfs_sock_getaddr (cfs_socket_t *sock, int remote, __u32 *ip, int *port) +{ + struct sockaddr_in sin; + int rc; + + if (remote != 0) + /* Get remote address */ + rc = -sock_getpeername(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); + else + /* Get local address */ + rc = -sock_getsockname(C2B_SOCK(sock), (struct sockaddr *)&sin, sizeof(sin)); + if (rc != 0) { + CERROR ("Error %d getting sock %s IP/port\n", + rc, remote ? "peer" : "local"); + return rc; + } + + if (ip != NULL) + *ip = ntohl (sin.sin_addr.s_addr); + + if (port != NULL) + *port = ntohs (sin.sin_port); + return 0; +} + +int +libcfs_sock_setbuf (cfs_socket_t *sock, int txbufsize, int rxbufsize) +{ + int option; + int rc; + + if (txbufsize != 0) { + option = txbufsize; + rc = -sock_setsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, + (char *)&option, sizeof (option)); + if (rc != 0) { + CERROR ("Can't set send buffer %d: %d\n", + option, rc); + return (rc); + } + } + + if (rxbufsize != 0) { + option = rxbufsize; + rc = -sock_setsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, + (char *)&option, sizeof (option)); + if (rc != 0) { + CERROR ("Can't set receive buffer %d: %d\n", + option, rc); + return (rc); + } + } + return 0; +} + +int +libcfs_sock_getbuf (cfs_socket_t *sock, int *txbufsize, int *rxbufsize) +{ + int option; + int optlen; + int rc; + + if (txbufsize != NULL) { + optlen = sizeof(option); + rc = -sock_getsockopt(C2B_SOCK(sock), SOL_SOCKET, SO_SNDBUF, + (char *)&option, &optlen); + if (rc != 0) { + CERROR ("Can't get send buffer size: %d\n", rc); + return (rc); + } + *txbufsize = option; + } + + if (rxbufsize != NULL) { + optlen = sizeof(option); + rc = -sock_getsockopt (C2B_SOCK(sock), SOL_SOCKET, SO_RCVBUF, + (char *)&option, &optlen); + if (rc != 0) { + CERROR ("Can't get receive buffer size: %d\n", rc); + return (rc); + } + *rxbufsize = option; + } + return 0; +} + +void +libcfs_sock_release (cfs_socket_t *sock) +{ + if (C2B_SOCK(sock) != NULL) { + sock_shutdown(C2B_SOCK(sock), 2); + sock_close(C2B_SOCK(sock)); + } + FREE(sock, M_TEMP); +} + +int +libcfs_sock_connect (cfs_socket_t **sockp, int *fatal, + __u32 local_ip, int local_port, + __u32 peer_ip, int peer_port) +{ + cfs_socket_t *sock; + struct sockaddr_in srvaddr; + int rc; + + rc = libcfs_sock_create(&sock, fatal, local_ip, local_port); + if (rc != 0) + return rc; + + bzero(&srvaddr, sizeof(srvaddr)); + srvaddr.sin_len = sizeof(struct sockaddr_in); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons(peer_port); + srvaddr.sin_addr.s_addr = htonl(peer_ip); + + rc = -sock_connect(C2B_SOCK(sock), (struct sockaddr *)&srvaddr, 0); + if (rc == 0) { + *sockp = sock; + return 0; + } + + *fatal = !(rc == -EADDRNOTAVAIL || rc == -EADDRINUSE); + CDEBUG(*fatal ? D_NETERROR : D_NET, + "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, + HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); + + libcfs_sock_release(sock); + return rc; +} + +#else /* !__DARWIN8__ */ + +/* + * To use bigger buffer for socket: + * 1. Increase nmbclusters (Cannot increased by sysctl because it's ready only, so + * we must patch kernel). + * 2. Increase net.inet.tcp.reass.maxsegments + * 3. Increase net.inet.tcp.sendspace + * 4. Increase net.inet.tcp.recvspace + * 5. Increase kern.ipc.maxsockbuf + */ +#define KSOCK_MAX_BUF (1152*1024) + +int +libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) +{ + struct socket *so; + struct ifreq ifr; + int nob; + int rc; + __u32 val; + CFS_DECL_FUNNEL_DATA; + + CFS_NET_IN; + rc = socreate(PF_INET, &so, SOCK_STREAM, 0); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return (-rc); + } + nob = strnlen(name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + CERROR("Interface name %s too long\n", name); + rc = -EINVAL; + goto out; + } + + CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); + strcpy(ifr.ifr_name, name); + CFS_NET_IN; + rc = ifioctl(so, SIOCGIFFLAGS, (caddr_t)&ifr, current_proc()); + CFS_NET_EX; + + if (rc != 0) { + CERROR("Can't get flags for interface %s\n", name); + goto out; + } + if ((ifr.ifr_flags & IFF_UP) == 0) { + CDEBUG(D_NET, "Interface %s down\n", name); + *up = 0; + *ip = *mask = 0; + goto out; + } + + *up = 1; + strcpy(ifr.ifr_name, name); + *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); + CFS_NET_IN; + rc = ifioctl(so, SIOCGIFADDR, (caddr_t)&ifr, current_proc()); + CFS_NET_EX; + + if (rc != 0) { + CERROR("Can't get IP address for interface %s\n", name); + goto out; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *ip = ntohl(val); + + strcpy(ifr.ifr_name, name); + *((struct sockaddr_in *)&ifr.ifr_addr) = blank_sin(); + CFS_NET_IN; + rc = ifioctl(so, SIOCGIFNETMASK, (caddr_t)&ifr, current_proc()); + CFS_NET_EX; + + if (rc != 0) { + CERROR("Can't get netmask for interface %s\n", name); + goto out; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *mask = ntohl(val); +out: + CFS_NET_IN; + soclose(so); + CFS_NET_EX; + return -rc; +} + +int +libcfs_ipif_enumerate (char ***namesp) +{ + /* Allocate and fill in 'names', returning # interfaces/error */ + char **names; + int toobig; + int nalloc; + int nfound; + struct socket *so; + struct ifreq *ifr; + struct ifconf ifc; + int rc; + int nob; + int i; + CFS_DECL_FUNNEL_DATA; + + CFS_NET_IN; + rc = socreate(PF_INET, &so, SOCK_STREAM, 0); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return (-rc); + } + + nalloc = 16; /* first guess at max interfaces */ + toobig = 0; + for (;;) { + if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { + toobig = 1; + nalloc = CFS_PAGE_SIZE/sizeof(*ifr); + CWARN("Too many interfaces: only enumerating first %d\n", + nalloc); + } + + LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); + if (ifr == NULL) { + CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); + rc = -ENOMEM; + goto out0; + } + + ifc.ifc_buf = (char *)ifr; + ifc.ifc_len = nalloc * sizeof(*ifr); + + CFS_NET_IN; + rc = -ifioctl(so, SIOCGIFCONF, (caddr_t)&ifc, current_proc()); + CFS_NET_EX; + + if (rc < 0) { + CERROR ("Error %d enumerating interfaces\n", rc); + goto out1; + } + + nfound = ifc.ifc_len/sizeof(*ifr); + LASSERT (nfound <= nalloc); + + if (nfound < nalloc || toobig) + break; + + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + nalloc *= 2; + } + if (nfound == 0) + goto out1; + + LIBCFS_ALLOC(names, nfound * sizeof(*names)); + if (names == NULL) { + rc = -ENOMEM; + goto out1; + } + /* NULL out all names[i] */ + memset (names, 0, nfound * sizeof(*names)); + + for (i = 0; i < nfound; i++) { + + nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + /* no space for terminating NULL */ + CERROR("interface name %.*s too long (%d max)\n", + nob, ifr[i].ifr_name, IFNAMSIZ); + rc = -ENAMETOOLONG; + goto out2; + } + + LIBCFS_ALLOC(names[i], IFNAMSIZ); + if (names[i] == NULL) { + rc = -ENOMEM; + goto out2; + } + + memcpy(names[i], ifr[i].ifr_name, nob); + names[i][nob] = 0; + } + + *namesp = names; + rc = nfound; + +out2: + if (rc < 0) + libcfs_ipif_free_enumeration(names, nfound); +out1: + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); +out0: + CFS_NET_IN; + soclose(so); + CFS_NET_EX; + return rc; +} + +static int +libcfs_sock_create (struct socket **sockp, int *fatal, + __u32 local_ip, int local_port) +{ + struct sockaddr_in locaddr; + struct socket *so; + struct sockopt sopt; + int option; + int rc; + CFS_DECL_FUNNEL_DATA; + + *fatal = 1; + CFS_NET_IN; + rc = socreate(PF_INET, &so, SOCK_STREAM, 0); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return (-rc); + } + + bzero(&sopt, sizeof sopt); + option = 1; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_REUSEADDR; + sopt.sopt_val = &option; + sopt.sopt_valsize = sizeof(option); + CFS_NET_IN; + rc = sosetopt(so, &sopt); + if (rc != 0) { + CFS_NET_EX; + CERROR ("Can't set sock reuse address: %d\n", rc); + goto out; + } + /* can't specify a local port without a local IP */ + LASSERT (local_ip == 0 || local_port != 0); + + if (local_ip != 0 || local_port != 0) { + bzero (&locaddr, sizeof (locaddr)); + locaddr.sin_len = sizeof(struct sockaddr_in); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons (local_port); + locaddr.sin_addr.s_addr = (local_ip != 0) ? htonl(local_ip) : + INADDR_ANY; + + rc = sobind(so, (struct sockaddr *)&locaddr); + if (rc == EADDRINUSE) { + CFS_NET_EX; + CDEBUG(D_NET, "Port %d already in use\n", local_port); + *fatal = 0; + goto out; + } + if (rc != 0) { + CFS_NET_EX; + CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n", + HIPQUAD(local_ip), rc); + goto out; + } + } + *sockp = so; + return 0; +out: + CFS_NET_IN; + soclose(so); + CFS_NET_EX; + return -rc; +} + +int +libcfs_sock_listen (struct socket **sockp, + __u32 local_ip, int local_port, int backlog) +{ + int fatal; + int rc; + CFS_DECL_FUNNEL_DATA; + + rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port); + if (rc != 0) { + if (!fatal) + CERROR("Can't create socket: port %d already in use\n", + local_port); + return rc; + } + CFS_NET_IN; + rc = solisten(*sockp, backlog); + CFS_NET_EX; + if (rc == 0) + return 0; + CERROR("Can't set listen backlog %d: %d\n", backlog, rc); + CFS_NET_IN; + soclose(*sockp); + CFS_NET_EX; + return -rc; +} + +int +libcfs_sock_accept (struct socket **newsockp, struct socket *sock) +{ + struct socket *so; + struct sockaddr *sa; + int error, s; + CFS_DECL_FUNNEL_DATA; + + CFS_NET_IN; + s = splnet(); + if ((sock->so_options & SO_ACCEPTCONN) == 0) { + splx(s); + CFS_NET_EX; + return (-EINVAL); + } + + if ((sock->so_state & SS_NBIO) && sock->so_comp.tqh_first == NULL) { + splx(s); + CFS_NET_EX; + return (-EWOULDBLOCK); + } + + error = 0; + while (TAILQ_EMPTY(&sock->so_comp) && sock->so_error == 0) { + if (sock->so_state & SS_CANTRCVMORE) { + sock->so_error = ECONNABORTED; + break; + } + error = tsleep((caddr_t)&sock->so_timeo, PSOCK | PCATCH, + "accept", 0); + if (error) { + splx(s); + CFS_NET_EX; + return (-error); + } + } + if (sock->so_error) { + error = sock->so_error; + sock->so_error = 0; + splx(s); + CFS_NET_EX; + return (-error); + } + + /* + * At this point we know that there is at least one connection + * ready to be accepted. Remove it from the queue prior to + * allocating the file descriptor for it since falloc() may + * block allowing another process to accept the connection + * instead. + */ + so = TAILQ_FIRST(&sock->so_comp); + TAILQ_REMOVE(&sock->so_comp, so, so_list); + sock->so_qlen--; + + so->so_state &= ~SS_COMP; + so->so_head = NULL; + sa = 0; + (void) soaccept(so, &sa); + + *newsockp = so; + FREE(sa, M_SONAME); + splx(s); + CFS_NET_EX; + return (-error); +} + +void +libcfs_sock_abort_accept (struct socket *sock) +{ + wakeup(&sock->so_timeo); +} + +/* + * XXX Liang: timeout for write is not supported yet. + */ +int +libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + CFS_DECL_NET_DATA; + + while (nob > 0) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct uio suio = { + .uio_iov = &iov, + .uio_iovcnt = 1, + .uio_offset = 0, + .uio_resid = nob, + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_WRITE, + .uio_procp = NULL + }; + + CFS_NET_IN; + rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0); + CFS_NET_EX; + + if (rc != 0) { + if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ + rc == EWOULDBLOCK)) + rc = 0; + if ( rc != 0 ) + return -rc; + rc = nob - suio.uio_resid; + buffer = ((char *)buffer) + rc; + nob = suio.uio_resid; + continue; + } + break; + } + return (0); +} + +/* + * XXX Liang: timeout for read is not supported yet. + */ +int +libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + CFS_DECL_NET_DATA; + + while (nob > 0) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct uio ruio = { + .uio_iov = &iov, + .uio_iovcnt = 1, + .uio_offset = 0, + .uio_resid = nob, + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_READ, + .uio_procp = NULL + }; + + CFS_NET_IN; + rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0); + CFS_NET_EX; + + if (rc != 0) { + if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ + rc == EWOULDBLOCK)) + rc = 0; + if (rc != 0) + return -rc; + rc = nob - ruio.uio_resid; + buffer = ((char *)buffer) + rc; + nob = ruio.uio_resid; + continue; + } + break; + } + return (0); +} + +int +libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize) +{ + struct sockopt sopt; + int rc = 0; + int option; + CFS_DECL_NET_DATA; + + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_val = &option; + sopt.sopt_valsize = sizeof(option); + + if (txbufsize != 0) { + option = txbufsize; + if (option > KSOCK_MAX_BUF) + option = KSOCK_MAX_BUF; + + sopt.sopt_name = SO_SNDBUF; + CFS_NET_IN; + rc = sosetopt(sock, &sopt); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't set send buffer %d: %d\n", + option, rc); + + return -rc; + } + } + + if (rxbufsize != 0) { + option = rxbufsize; + sopt.sopt_name = SO_RCVBUF; + CFS_NET_IN; + rc = sosetopt(sock, &sopt); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't set receive buffer %d: %d\n", + option, rc); + return -rc; + } + } + return 0; +} + +int +libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port) +{ + struct sockaddr_in *sin; + struct sockaddr *sa = NULL; + int rc; + CFS_DECL_NET_DATA; + + if (remote != 0) { + CFS_NET_IN; + rc = sock->so_proto->pr_usrreqs->pru_peeraddr(sock, &sa); + CFS_NET_EX; + + if (rc != 0) { + if (sa) FREE(sa, M_SONAME); + CERROR ("Error %d getting sock peer IP\n", rc); + return -rc; + } + } else { + CFS_NET_IN; + rc = sock->so_proto->pr_usrreqs->pru_sockaddr(sock, &sa); + CFS_NET_EX; + if (rc != 0) { + if (sa) FREE(sa, M_SONAME); + CERROR ("Error %d getting sock local IP\n", rc); + return -rc; + } + } + if (sa != NULL) { + sin = (struct sockaddr_in *)sa; + if (ip != NULL) + *ip = ntohl (sin->sin_addr.s_addr); + if (port != NULL) + *port = ntohs (sin->sin_port); + if (sa) + FREE(sa, M_SONAME); + } + return 0; +} + +int +libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize) +{ + struct sockopt sopt; + int rc; + CFS_DECL_NET_DATA; + + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = SOL_SOCKET; + + if (txbufsize != NULL) { + sopt.sopt_val = txbufsize; + sopt.sopt_valsize = sizeof(*txbufsize); + sopt.sopt_name = SO_SNDBUF; + CFS_NET_IN; + rc = sogetopt(sock, &sopt); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't get send buffer size: %d\n", rc); + return -rc; + } + } + + if (rxbufsize != NULL) { + sopt.sopt_val = rxbufsize; + sopt.sopt_valsize = sizeof(*rxbufsize); + sopt.sopt_name = SO_RCVBUF; + CFS_NET_IN; + rc = sogetopt(sock, &sopt); + CFS_NET_EX; + if (rc != 0) { + CERROR ("Can't get receive buffer size: %d\n", rc); + return -rc; + } + } + return 0; +} + +int +libcfs_sock_connect (struct socket **sockp, int *fatal, + __u32 local_ip, int local_port, + __u32 peer_ip, int peer_port) +{ + struct sockaddr_in srvaddr; + struct socket *so; + int s; + int rc; + CFS_DECL_FUNNEL_DATA; + + rc = libcfs_sock_create(sockp, fatal, local_ip, local_port); + if (rc != 0) + return rc; + so = *sockp; + bzero(&srvaddr, sizeof(srvaddr)); + srvaddr.sin_len = sizeof(struct sockaddr_in); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons (peer_port); + srvaddr.sin_addr.s_addr = htonl (peer_ip); + + CFS_NET_IN; + rc = soconnect(so, (struct sockaddr *)&srvaddr); + if (rc != 0) { + CFS_NET_EX; + if (rc != EADDRNOTAVAIL && rc != EADDRINUSE) + CDEBUG(D_NETERROR, + "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, + HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); + goto out; + } + s = splnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n"); + (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz); + } + if ((rc = so->so_error) != 0) { + so->so_error = 0; + splx(s); + CFS_NET_EX; + CDEBUG(D_NETERROR, + "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, + HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); + goto out; + } + LASSERT(so->so_state & SS_ISCONNECTED); + splx(s); + CFS_NET_EX; + if (sockp) + *sockp = so; + return (0); +out: + CFS_NET_IN; + soshutdown(so, 2); + soclose(so); + CFS_NET_EX; + return (-rc); +} + +void +libcfs_sock_release (struct socket *sock) +{ + CFS_DECL_FUNNEL_DATA; + CFS_NET_IN; + soshutdown(sock, 0); + CFS_NET_EX; +} + +#endif diff --git a/libcfs/libcfs/darwin/darwin-tracefile.c b/libcfs/libcfs/darwin/darwin-tracefile.c new file mode 100644 index 0000000..e672ad5 --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-tracefile.c @@ -0,0 +1,191 @@ + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE +#include +#include +#include "tracefile.h" + +/* + * We can't support smp tracefile currently. + * Everything is put on one cpu. + */ + +#define M_TCD_MAX_PAGES (128 * 1280) + +static long max_permit_mb = (64 * 1024); + +spinlock_t trace_cpu_serializer; + +/* + * thread currently executing tracefile code or NULL if none does. Used to + * detect recursive calls to libcfs_debug_msg(). + */ +static thread_t trace_owner = NULL; + +extern int get_preemption_level(void); +extern atomic_t tage_allocated; + +struct rw_semaphore tracefile_sem; + +int tracefile_init_arch() { + init_rwsem(&tracefile_sem); +#error "Todo: initialise per-cpu console buffers" + return 0; +} + +void tracefile_fini_arch() { +} + +void tracefile_read_lock() { + down_read(&tracefile_sem); +} + +void tracefile_read_unlock() { + up_read(&tracefile_sem); +} + +void tracefile_write_lock() { + down_write(&tracefile_sem); +} + +void tracefile_write_unlock() { + up_write(&tracefile_sem); +} + +char *trace_get_console_buffer(void) +{ +#error "todo: return a per-cpu/interrupt console buffer and disable pre-emption" +} + +void trace_put_console_buffer(char *buffer) +{ +#error "todo: re-enable pre-emption" +} + +struct trace_cpu_data *trace_get_tcd(void) +{ + struct trace_cpu_data *tcd; + int nr_pages; + struct list_head pages; + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + /* + * debugging check for recursive call to libcfs_debug_msg() + */ + if (trace_owner == current_thread()) { + /* + * Cannot assert here. + */ + printk(KERN_EMERG "recursive call to %s", __FUNCTION__); + /* + * "The death of God left the angels in a strange position." + */ + cfs_enter_debugger(); + } + tcd = &trace_data[0].tcd; + CFS_INIT_LIST_HEAD(&pages); + if (get_preemption_level() == 0) + nr_pages = trace_refill_stock(tcd, CFS_ALLOC_STD, &pages); + else + nr_pages = 0; + spin_lock(&trace_cpu_serializer); + trace_owner = current_thread(); + tcd->tcd_cur_stock_pages += nr_pages; + list_splice(&pages, &tcd->tcd_stock_pages); + return tcd; +} + +extern void raw_page_death_row_clean(void); + +void __trace_put_tcd(struct trace_cpu_data *tcd) +{ + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + LASSERT(trace_owner == current_thread()); + trace_owner = NULL; + spin_unlock(&trace_cpu_serializer); + if (get_preemption_level() == 0) + /* purge all pending pages */ + raw_page_death_row_clean(); +} + +int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) +{ + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + /* XNU has global tcd, and all pages are owned by it */ + return 1; +} + +void +set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, + const int line, unsigned long stack) +{ + struct timeval tv; + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + do_gettimeofday(&tv); + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = cfs_curproc_pid(); + header->ph_line_num = line; + header->ph_extern_pid = (__u32)current_thread(); +} + +void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, + int len, const char *file, const char *fn) +{ + char *prefix = "Lustre", *ptype = KERN_INFO; + + /* + * XXX nikita: do NOT call libcfs_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + if ((mask & D_EMERG) != 0) { + prefix = "LustreError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "LustreError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "Lustre"; + ptype = KERN_WARNING; + } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { + prefix = "Lustre"; + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %.*s", ptype, prefix, len, buf); + } else { + printk("%s%s: %d:%d:(%s:%d:%s()) %*s", + ptype, prefix, hdr->ph_pid, hdr->ph_extern_pid, + file, hdr->ph_line_num, fn, len, buf); + } +} + +int trace_max_debug_mb(void) +{ + return max_permit_mb; +} + +void +trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) +{ +#error "tbd" +} diff --git a/libcfs/libcfs/darwin/darwin-utils.c b/libcfs/libcfs/darwin/darwin-utils.c new file mode 100644 index 0000000..cfd7a2d --- /dev/null +++ b/libcfs/libcfs/darwin/darwin-utils.c @@ -0,0 +1,578 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include +#include +#include +#include + +#include + +#ifndef isspace +inline int +isspace(char c) +{ + return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); +} +#endif + +char * strpbrk(const char * cs,const char * ct) +{ + const char *sc1,*sc2; + + for( sc1 = cs; *sc1 != '\0'; ++sc1) { + for( sc2 = ct; *sc2 != '\0'; ++sc2) { + if (*sc1 == *sc2) + return (char *) sc1; + } + } + return NULL; +} + +char * strsep(char **s, const char *ct) +{ + char *sbegin = *s, *end; + + if (sbegin == NULL) + return NULL; + end = strpbrk(sbegin, ct); + if (end != NULL) + *end++ = '\0'; + *s = end; + + return sbegin; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * +strstr(const char *in, const char *str) +{ + char c; + size_t len; + + c = *str++; + if (!c) + return (char *) in; // Trivial empty string case + len = strlen(str); + do { + char sc; + do { + sc = *in++; + if (!sc) + return (char *) 0; + } while (sc != c); + } while (strncmp(in, str, len) != 0); + return (char *) (in - 1); +} + +char * +strrchr(const char *p, int ch) +{ + const char *end = p + strlen(p); + do { + if (*end == (char)ch) + return (char *)end; + } while (--end >= p); + return NULL; +} + +char * +ul2dstr(unsigned long address, char *buf, int len) +{ + char *pos = buf + len - 1; + + if (len <= 0 || !buf) + return NULL; + *pos = 0; + while (address) { + if (!--len) break; + *--pos = address % 10 + '0'; + address /= 10; + } + return pos; +} + +/* + * miscellaneous libcfs stuff + */ + +/* + * Convert server error code to client format. + * Linux errno.h. + */ + +/* obtained by + * + * cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3 + * + */ +enum linux_errnos { + LINUX_EPERM = 1, + LINUX_ENOENT = 2, + LINUX_ESRCH = 3, + LINUX_EINTR = 4, + LINUX_EIO = 5, + LINUX_ENXIO = 6, + LINUX_E2BIG = 7, + LINUX_ENOEXEC = 8, + LINUX_EBADF = 9, + LINUX_ECHILD = 10, + LINUX_EAGAIN = 11, + LINUX_ENOMEM = 12, + LINUX_EACCES = 13, + LINUX_EFAULT = 14, + LINUX_ENOTBLK = 15, + LINUX_EBUSY = 16, + LINUX_EEXIST = 17, + LINUX_EXDEV = 18, + LINUX_ENODEV = 19, + LINUX_ENOTDIR = 20, + LINUX_EISDIR = 21, + LINUX_EINVAL = 22, + LINUX_ENFILE = 23, + LINUX_EMFILE = 24, + LINUX_ENOTTY = 25, + LINUX_ETXTBSY = 26, + LINUX_EFBIG = 27, + LINUX_ENOSPC = 28, + LINUX_ESPIPE = 29, + LINUX_EROFS = 30, + LINUX_EMLINK = 31, + LINUX_EPIPE = 32, + LINUX_EDOM = 33, + LINUX_ERANGE = 34, + LINUX_EDEADLK = 35, + LINUX_ENAMETOOLONG = 36, + LINUX_ENOLCK = 37, + LINUX_ENOSYS = 38, + LINUX_ENOTEMPTY = 39, + LINUX_ELOOP = 40, + LINUX_ENOMSG = 42, + LINUX_EIDRM = 43, + LINUX_ECHRNG = 44, + LINUX_EL2NSYNC = 45, + LINUX_EL3HLT = 46, + LINUX_EL3RST = 47, + LINUX_ELNRNG = 48, + LINUX_EUNATCH = 49, + LINUX_ENOCSI = 50, + LINUX_EL2HLT = 51, + LINUX_EBADE = 52, + LINUX_EBADR = 53, + LINUX_EXFULL = 54, + LINUX_ENOANO = 55, + LINUX_EBADRQC = 56, + LINUX_EBADSLT = 57, + LINUX_EBFONT = 59, + LINUX_ENOSTR = 60, + LINUX_ENODATA = 61, + LINUX_ETIME = 62, + LINUX_ENOSR = 63, + LINUX_ENONET = 64, + LINUX_ENOPKG = 65, + LINUX_EREMOTE = 66, + LINUX_ENOLINK = 67, + LINUX_EADV = 68, + LINUX_ESRMNT = 69, + LINUX_ECOMM = 70, + LINUX_EPROTO = 71, + LINUX_EMULTIHOP = 72, + LINUX_EDOTDOT = 73, + LINUX_EBADMSG = 74, + LINUX_EOVERFLOW = 75, + LINUX_ENOTUNIQ = 76, + LINUX_EBADFD = 77, + LINUX_EREMCHG = 78, + LINUX_ELIBACC = 79, + LINUX_ELIBBAD = 80, + LINUX_ELIBSCN = 81, + LINUX_ELIBMAX = 82, + LINUX_ELIBEXEC = 83, + LINUX_EILSEQ = 84, + LINUX_ERESTART = 85, + LINUX_ESTRPIPE = 86, + LINUX_EUSERS = 87, + LINUX_ENOTSOCK = 88, + LINUX_EDESTADDRREQ = 89, + LINUX_EMSGSIZE = 90, + LINUX_EPROTOTYPE = 91, + LINUX_ENOPROTOOPT = 92, + LINUX_EPROTONOSUPPORT = 93, + LINUX_ESOCKTNOSUPPORT = 94, + LINUX_EOPNOTSUPP = 95, + LINUX_EPFNOSUPPORT = 96, + LINUX_EAFNOSUPPORT = 97, + LINUX_EADDRINUSE = 98, + LINUX_EADDRNOTAVAIL = 99, + LINUX_ENETDOWN = 100, + LINUX_ENETUNREACH = 101, + LINUX_ENETRESET = 102, + LINUX_ECONNABORTED = 103, + LINUX_ECONNRESET = 104, + LINUX_ENOBUFS = 105, + LINUX_EISCONN = 106, + LINUX_ENOTCONN = 107, + LINUX_ESHUTDOWN = 108, + LINUX_ETOOMANYREFS = 109, + LINUX_ETIMEDOUT = 110, + LINUX_ECONNREFUSED = 111, + LINUX_EHOSTDOWN = 112, + LINUX_EHOSTUNREACH = 113, + LINUX_EALREADY = 114, + LINUX_EINPROGRESS = 115, + LINUX_ESTALE = 116, + LINUX_EUCLEAN = 117, + LINUX_ENOTNAM = 118, + LINUX_ENAVAIL = 119, + LINUX_EISNAM = 120, + LINUX_EREMOTEIO = 121, + LINUX_EDQUOT = 122, + LINUX_ENOMEDIUM = 123, + LINUX_EMEDIUMTYPE = 124, + + /* + * we don't need these, but for completeness.. + */ + LINUX_EDEADLOCK = LINUX_EDEADLK, + LINUX_EWOULDBLOCK = LINUX_EAGAIN +}; + +int convert_server_error(__u64 ecode) +{ + int sign; + int code; + + static int errno_xlate[] = { + /* success is always success */ + [0] = 0, + [LINUX_EPERM] = EPERM, + [LINUX_ENOENT] = ENOENT, + [LINUX_ESRCH] = ESRCH, + [LINUX_EINTR] = EINTR, + [LINUX_EIO] = EIO, + [LINUX_ENXIO] = ENXIO, + [LINUX_E2BIG] = E2BIG, + [LINUX_ENOEXEC] = ENOEXEC, + [LINUX_EBADF] = EBADF, + [LINUX_ECHILD] = ECHILD, + [LINUX_EAGAIN] = EAGAIN, + [LINUX_ENOMEM] = ENOMEM, + [LINUX_EACCES] = EACCES, + [LINUX_EFAULT] = EFAULT, + [LINUX_ENOTBLK] = ENOTBLK, + [LINUX_EBUSY] = EBUSY, + [LINUX_EEXIST] = EEXIST, + [LINUX_EXDEV] = EXDEV, + [LINUX_ENODEV] = ENODEV, + [LINUX_ENOTDIR] = ENOTDIR, + [LINUX_EISDIR] = EISDIR, + [LINUX_EINVAL] = EINVAL, + [LINUX_ENFILE] = ENFILE, + [LINUX_EMFILE] = EMFILE, + [LINUX_ENOTTY] = ENOTTY, + [LINUX_ETXTBSY] = ETXTBSY, + [LINUX_EFBIG] = EFBIG, + [LINUX_ENOSPC] = ENOSPC, + [LINUX_ESPIPE] = ESPIPE, + [LINUX_EROFS] = EROFS, + [LINUX_EMLINK] = EMLINK, + [LINUX_EPIPE] = EPIPE, + [LINUX_EDOM] = EDOM, + [LINUX_ERANGE] = ERANGE, + [LINUX_EDEADLK] = EDEADLK, + [LINUX_ENAMETOOLONG] = ENAMETOOLONG, + [LINUX_ENOLCK] = ENOLCK, + [LINUX_ENOSYS] = ENOSYS, + [LINUX_ENOTEMPTY] = ENOTEMPTY, + [LINUX_ELOOP] = ELOOP, + [LINUX_ENOMSG] = ENOMSG, + [LINUX_EIDRM] = EIDRM, + [LINUX_ECHRNG] = EINVAL /* ECHRNG */, + [LINUX_EL2NSYNC] = EINVAL /* EL2NSYNC */, + [LINUX_EL3HLT] = EINVAL /* EL3HLT */, + [LINUX_EL3RST] = EINVAL /* EL3RST */, + [LINUX_ELNRNG] = EINVAL /* ELNRNG */, + [LINUX_EUNATCH] = EINVAL /* EUNATCH */, + [LINUX_ENOCSI] = EINVAL /* ENOCSI */, + [LINUX_EL2HLT] = EINVAL /* EL2HLT */, + [LINUX_EBADE] = EINVAL /* EBADE */, + [LINUX_EBADR] = EBADRPC, + [LINUX_EXFULL] = EINVAL /* EXFULL */, + [LINUX_ENOANO] = EINVAL /* ENOANO */, + [LINUX_EBADRQC] = EINVAL /* EBADRQC */, + [LINUX_EBADSLT] = EINVAL /* EBADSLT */, + [LINUX_EBFONT] = EINVAL /* EBFONT */, + [LINUX_ENOSTR] = EINVAL /* ENOSTR */, + [LINUX_ENODATA] = EINVAL /* ENODATA */, + [LINUX_ETIME] = EINVAL /* ETIME */, + [LINUX_ENOSR] = EINVAL /* ENOSR */, + [LINUX_ENONET] = EINVAL /* ENONET */, + [LINUX_ENOPKG] = EINVAL /* ENOPKG */, + [LINUX_EREMOTE] = EREMOTE, + [LINUX_ENOLINK] = EINVAL /* ENOLINK */, + [LINUX_EADV] = EINVAL /* EADV */, + [LINUX_ESRMNT] = EINVAL /* ESRMNT */, + [LINUX_ECOMM] = EINVAL /* ECOMM */, + [LINUX_EPROTO] = EPROTOTYPE, + [LINUX_EMULTIHOP] = EINVAL /* EMULTIHOP */, + [LINUX_EDOTDOT] = EINVAL /* EDOTDOT */, + [LINUX_EBADMSG] = EINVAL /* EBADMSG */, + [LINUX_EOVERFLOW] = EOVERFLOW, + [LINUX_ENOTUNIQ] = EINVAL /* ENOTUNIQ */, + [LINUX_EBADFD] = EINVAL /* EBADFD */, + [LINUX_EREMCHG] = EINVAL /* EREMCHG */, + [LINUX_ELIBACC] = EINVAL /* ELIBACC */, + [LINUX_ELIBBAD] = EINVAL /* ELIBBAD */, + [LINUX_ELIBSCN] = EINVAL /* ELIBSCN */, + [LINUX_ELIBMAX] = EINVAL /* ELIBMAX */, + [LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */, + [LINUX_EILSEQ] = EILSEQ, + [LINUX_ERESTART] = EINVAL /* because ERESTART is + * negative in XNU */, + [LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */, + [LINUX_EUSERS] = EUSERS, + [LINUX_ENOTSOCK] = ENOTSOCK, + [LINUX_EDESTADDRREQ] = EDESTADDRREQ, + [LINUX_EMSGSIZE] = EMSGSIZE, + [LINUX_EPROTOTYPE] = EPROTOTYPE, + [LINUX_ENOPROTOOPT] = ENOPROTOOPT, + [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT, + [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT, + [LINUX_EOPNOTSUPP] = EOPNOTSUPP, + [LINUX_EPFNOSUPPORT] = EPFNOSUPPORT, + [LINUX_EAFNOSUPPORT] = EAFNOSUPPORT, + [LINUX_EADDRINUSE] = EADDRINUSE, + [LINUX_EADDRNOTAVAIL] = EADDRNOTAVAIL, + [LINUX_ENETDOWN] = ENETDOWN, + [LINUX_ENETUNREACH] = ENETUNREACH, + [LINUX_ENETRESET] = ENETRESET, + [LINUX_ECONNABORTED] = ECONNABORTED, + [LINUX_ECONNRESET] = ECONNRESET, + [LINUX_ENOBUFS] = ENOBUFS, + [LINUX_EISCONN] = EISCONN, + [LINUX_ENOTCONN] = ENOTCONN, + [LINUX_ESHUTDOWN] = ESHUTDOWN, + [LINUX_ETOOMANYREFS] = ETOOMANYREFS, + [LINUX_ETIMEDOUT] = ETIMEDOUT, + [LINUX_ECONNREFUSED] = ECONNREFUSED, + [LINUX_EHOSTDOWN] = EHOSTDOWN, + [LINUX_EHOSTUNREACH] = EHOSTUNREACH, + [LINUX_EALREADY] = EALREADY, + [LINUX_EINPROGRESS] = EINPROGRESS, + [LINUX_ESTALE] = ESTALE, + [LINUX_EUCLEAN] = EINVAL /* EUCLEAN */, + [LINUX_ENOTNAM] = EINVAL /* ENOTNAM */, + [LINUX_ENAVAIL] = EINVAL /* ENAVAIL */, + [LINUX_EISNAM] = EINVAL /* EISNAM */, + [LINUX_EREMOTEIO] = EINVAL /* EREMOTEIO */, + [LINUX_EDQUOT] = EDQUOT, + [LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */, + [LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */, + }; + code = (int)ecode; + if (code >= 0) { + sign = +1; + } else { + sign = -1; + code = -code; + } + if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) { + code = errno_xlate[code]; + LASSERT(code >= 0); + } + return sign * code; +} + +enum { + LINUX_O_RDONLY = 00, + LINUX_O_WRONLY = 01, + LINUX_O_RDWR = 02, + LINUX_O_CREAT = 0100, + LINUX_O_EXCL = 0200, + LINUX_O_NOCTTY = 0400, + LINUX_O_TRUNC = 01000, + LINUX_O_APPEND = 02000, + LINUX_O_NONBLOCK = 04000, + LINUX_O_NDELAY = LINUX_O_NONBLOCK, + LINUX_O_SYNC = 010000, + LINUX_O_FSYNC = LINUX_O_SYNC, + LINUX_O_ASYNC = 020000, + LINUX_O_DIRECT = 040000, + LINUX_O_NOFOLLOW = 0400000 +}; + +static inline void obit_convert(int *cflag, int *sflag, + unsigned cmask, unsigned smask) +{ + if (*cflag & cmask != 0) { + *sflag |= smask; + *cflag &= ~cmask; + } +} + +/* + * convert flag from XNU client to Linux _i386_ server. + */ +int convert_client_oflag(int cflag, int *result) +{ + int sflag = 0; + + cflag = 0; + obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY); + obit_convert(&cflag, &sflag, O_WRONLY, LINUX_O_WRONLY); + obit_convert(&cflag, &sflag, O_RDWR, LINUX_O_RDWR); + obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK); + obit_convert(&cflag, &sflag, O_APPEND, LINUX_O_APPEND); + obit_convert(&cflag, &sflag, O_ASYNC, LINUX_O_ASYNC); + obit_convert(&cflag, &sflag, O_FSYNC, LINUX_O_FSYNC); + obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW); + obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); + obit_convert(&cflag, &sflag, O_TRUNC, LINUX_O_TRUNC); + obit_convert(&cflag, &sflag, O_EXCL, LINUX_O_EXCL); + obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); + obit_convert(&cflag, &sflag, O_NDELAY, LINUX_O_NDELAY); + obit_convert(&cflag, &sflag, O_NOCTTY, LINUX_O_NOCTTY); + /* + * Some more obscure BSD flags have no Linux counterparts: + * + * O_SHLOCK 0x0010 + * O_EXLOCK 0x0020 + * O_EVTONLY 0x8000 + * O_POPUP 0x80000000 + * O_ALERT 0x20000000 + */ + if (cflag == 0) { + *result = sflag; + return 0; + } else + return -EINVAL; +} + +#ifdef __DARWIN8__ +#else /* !__DARWIN8__ */ +extern int unix_syscall(); +extern int unix_syscall_return(); + +extern int ktrsysret(); +extern int ktrace(); + +extern int ast_taken(); +extern int ast_check(); + +extern int trap(); +extern int syscall_trace(); + +static int is_addr_in_range(void *addr, void *start, void *end) +{ + return start <= addr && addr <= end; +} + +extern void cfs_thread_agent (void); + +static int is_last_frame(void *addr) +{ + if (addr == NULL) + return 1; + else if (is_addr_in_range(addr, unix_syscall, unix_syscall_return)) + return 1; + else if (is_addr_in_range(addr, ktrsysret, ktrace)) + return 1; + else if (is_addr_in_range(addr, ast_taken, ast_check)) + return 1; + else if (is_addr_in_range(addr, trap, syscall_trace)) + return 1; + else if (is_addr_in_range(addr, cfs_thread_agent, cfs_kernel_thread)) + return 1; + else + return 0; +} + +static void *get_frame(int i) +{ + void *result; + +#define CASE(i) case (i): result = __builtin_return_address(i); break + switch (i + 1) { + CASE(1); + CASE(2); + CASE(3); + CASE(4); + CASE(5); + CASE(6); + CASE(7); + CASE(8); + CASE(9); + CASE(10); + CASE(11); + CASE(12); + CASE(13); + CASE(14); + CASE(15); + CASE(16); + CASE(17); + CASE(18); + CASE(19); + CASE(20); + default: + panic("impossible frame number: %d\n", i); + result = NULL; + } + return result; +} + +void cfs_stack_trace_fill(struct cfs_stack_trace *trace) +{ + int i; + + memset(trace, 0, sizeof *trace); + for (i = 0; i < sizeof_array(trace->frame); ++ i) { + void *addr; + + addr = get_frame(i); + trace->frame[i] = addr; + if (is_last_frame(addr)) + break; + } +} + +void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) +{ + if (0 <= frame_no && frame_no < sizeof_array(trace->frame)) + return trace->frame[frame_no]; + else + return NULL; +} +#endif /* !__DARWIN8__ */ diff --git a/libcfs/libcfs/debug.c b/libcfs/libcfs/debug.c new file mode 100644 index 0000000..9810bdb --- /dev/null +++ b/libcfs/libcfs/debug.c @@ -0,0 +1,839 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include +#include "tracefile.h" + +static char debug_file_name[1024]; + +#ifdef __KERNEL__ +unsigned int libcfs_subsystem_debug = ~0; +CFS_MODULE_PARM(libcfs_subsystem_debug, "i", int, 0644, + "Lustre kernel debug subsystem mask"); +EXPORT_SYMBOL(libcfs_subsystem_debug); + +unsigned int libcfs_debug = (D_EMERG | D_ERROR | D_WARNING | D_CONSOLE | + D_NETERROR | D_HA | D_CONFIG | D_IOCTL); +CFS_MODULE_PARM(libcfs_debug, "i", int, 0644, + "Lustre kernel debug mask"); +EXPORT_SYMBOL(libcfs_debug); + +int libcfs_debug_mb = -1; +CFS_MODULE_PARM(libcfs_debug_mb, "i", int, 0644, + "Total debug buffer size."); +EXPORT_SYMBOL(libcfs_debug_mb); + +unsigned int libcfs_printk = D_CANTMASK; +CFS_MODULE_PARM(libcfs_printk, "i", uint, 0644, + "Lustre kernel debug console mask"); +EXPORT_SYMBOL(libcfs_printk); + +unsigned int libcfs_console_ratelimit = 1; +CFS_MODULE_PARM(libcfs_console_ratelimit, "i", uint, 0644, + "Lustre kernel debug console ratelimit (0 to disable)"); +EXPORT_SYMBOL(libcfs_console_ratelimit); + +cfs_duration_t libcfs_console_max_delay; +CFS_MODULE_PARM(libcfs_console_max_delay, "l", ulong, 0644, + "Lustre kernel debug console max delay (jiffies)"); +EXPORT_SYMBOL(libcfs_console_max_delay); + +cfs_duration_t libcfs_console_min_delay; +CFS_MODULE_PARM(libcfs_console_min_delay, "l", ulong, 0644, + "Lustre kernel debug console min delay (jiffies)"); +EXPORT_SYMBOL(libcfs_console_min_delay); + +unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; +CFS_MODULE_PARM(libcfs_console_backoff, "i", uint, 0644, + "Lustre kernel debug console backoff factor"); +EXPORT_SYMBOL(libcfs_console_backoff); + +unsigned int libcfs_debug_binary = 1; +EXPORT_SYMBOL(libcfs_debug_binary); + +unsigned int libcfs_stack; +EXPORT_SYMBOL(libcfs_stack); + +unsigned int portal_enter_debugger; +EXPORT_SYMBOL(portal_enter_debugger); + +unsigned int libcfs_catastrophe; +EXPORT_SYMBOL(libcfs_catastrophe); + +unsigned int libcfs_panic_on_lbug = 0; +CFS_MODULE_PARM(libcfs_panic_on_lbug, "i", uint, 0644, + "Lustre kernel panic on LBUG"); +EXPORT_SYMBOL(libcfs_panic_on_lbug); + +atomic_t libcfs_kmemory = ATOMIC_INIT(0); +EXPORT_SYMBOL(libcfs_kmemory); + +static cfs_waitq_t debug_ctlwq; + +#ifdef __arch_um__ +char debug_file_path[1024] = "/r/tmp/lustre-log"; +#else +char debug_file_path[1024] = "/tmp/lustre-log"; +#endif +CFS_MODULE_PARM(debug_file_path, "s", charp, 0644, + "Path for dumping debug logs, " + "set 'NONE' to prevent log dumping"); + +int libcfs_panic_in_progress; + +/* libcfs_debug_token2mask() expects the returned + * string in lower-case */ +const char * +libcfs_debug_subsys2str(int subsys) +{ + switch (subsys) { + default: + return NULL; + case S_UNDEFINED: + return "undefined"; + case S_MDC: + return "mdc"; + case S_MDS: + return "mds"; + case S_OSC: + return "osc"; + case S_OST: + return "ost"; + case S_CLASS: + return "class"; + case S_LOG: + return "log"; + case S_LLITE: + return "llite"; + case S_RPC: + return "rpc"; + case S_LNET: + return "lnet"; + case S_LND: + return "lnd"; + case S_PINGER: + return "pinger"; + case S_FILTER: + return "filter"; + case S_ECHO: + return "echo"; + case S_LDLM: + return "ldlm"; + case S_LOV: + return "lov"; + case S_LMV: + return "lmv"; + case S_SEC: + return "sec"; + case S_GSS: + return "gss"; + case S_MGC: + return "mgc"; + case S_MGS: + return "mgs"; + case S_FID: + return "fid"; + case S_FLD: + return "fld"; + } +} + +/* libcfs_debug_token2mask() expects the returned + * string in lower-case */ +const char * +libcfs_debug_dbg2str(int debug) +{ + switch (debug) { + default: + return NULL; + case D_TRACE: + return "trace"; + case D_INODE: + return "inode"; + case D_SUPER: + return "super"; + case D_EXT2: + return "ext2"; + case D_MALLOC: + return "malloc"; + case D_CACHE: + return "cache"; + case D_INFO: + return "info"; + case D_IOCTL: + return "ioctl"; + case D_NETERROR: + return "neterror"; + case D_NET: + return "net"; + case D_WARNING: + return "warning"; + case D_BUFFS: + return "buffs"; + case D_OTHER: + return "other"; + case D_DENTRY: + return "dentry"; + case D_NETTRACE: + return "nettrace"; + case D_PAGE: + return "page"; + case D_DLMTRACE: + return "dlmtrace"; + case D_ERROR: + return "error"; + case D_EMERG: + return "emerg"; + case D_HA: + return "ha"; + case D_RPCTRACE: + return "rpctrace"; + case D_VFSTRACE: + return "vfstrace"; + case D_READA: + return "reada"; + case D_MMAP: + return "mmap"; + case D_CONFIG: + return "config"; + case D_CONSOLE: + return "console"; + case D_QUOTA: + return "quota"; + case D_SEC: + return "sec"; + } +} + +int +libcfs_debug_mask2str(char *str, int size, int mask, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : + libcfs_debug_dbg2str; + int len = 0; + const char *token; + int bit; + int i; + + if (mask == 0) { /* "0" */ + if (size > 0) + str[0] = '0'; + len = 1; + } else { /* space-separated tokens */ + for (i = 0; i < 32; i++) { + bit = 1 << i; + + if ((mask & bit) == 0) + continue; + + token = fn(bit); + if (token == NULL) /* unused bit */ + continue; + + if (len > 0) { /* separator? */ + if (len < size) + str[len] = ' '; + len++; + } + + while (*token != 0) { + if (len < size) + str[len] = *token; + token++; + len++; + } + } + } + + /* terminate 'str' */ + if (len < size) + str[len] = 0; + else + str[size - 1] = 0; + + return len; +} + +int +libcfs_debug_token2mask(int *mask, const char *str, int len, int is_subsys) +{ + const char *(*fn)(int bit) = is_subsys ? libcfs_debug_subsys2str : + libcfs_debug_dbg2str; + int i; + int j; + int bit; + const char *token; + + /* match against known tokens */ + for (i = 0; i < 32; i++) { + bit = 1 << i; + + token = fn(bit); + if (token == NULL) /* unused? */ + continue; + + /* strcasecmp */ + for (j = 0; ; j++) { + if (j == len) { /* end of token */ + if (token[j] == 0) { + *mask = bit; + return 0; + } + break; + } + + if (token[j] == 0) + break; + + if (str[j] == token[j]) + continue; + + if (str[j] < 'A' || 'Z' < str[j]) + break; + + if (str[j] - 'A' + 'a' != token[j]) + break; + } + } + + return -EINVAL; /* no match */ +} + +int +libcfs_debug_str2mask(int *mask, const char *str, int is_subsys) +{ + int m = 0; + char op = 0; + int matched; + int n; + int t; + + /* Allow a number for backwards compatibility */ + + for (n = strlen(str); n > 0; n--) + if (!isspace(str[n-1])) + break; + matched = n; + + if ((t = sscanf(str, "%i%n", &m, &matched)) >= 1 && + matched == n) { + *mask = m; + return 0; + } + + /* must be a list of debug tokens or numbers separated by + * whitespace and optionally an operator ('+' or '-'). If an operator + * appears first in , '*mask' is used as the starting point + * (relative), otherwise 0 is used (absolute). An operator applies to + * all following tokens up to the next operator. */ + + matched = 0; + while (*str != 0) { + while (isspace(*str)) /* skip whitespace */ + str++; + + if (*str == 0) + break; + + if (*str == '+' || *str == '-') { + op = *str++; + + /* op on first token == relative */ + if (!matched) + m = *mask; + + while (isspace(*str)) /* skip whitespace */ + str++; + + if (*str == 0) /* trailing op */ + return -EINVAL; + } + + /* find token length */ + for (n = 0; str[n] != 0 && !isspace(str[n]); n++); + + /* match token */ + if (libcfs_debug_token2mask(&t, str, n, is_subsys) != 0) + return -EINVAL; + + matched = 1; + if (op == '-') + m &= ~t; + else + m |= t; + + str += n; + } + + if (!matched) + return -EINVAL; + + *mask = m; + return 0; +} + +void libcfs_debug_dumplog_internal(void *arg) +{ + CFS_DECL_JOURNAL_DATA; + + CFS_PUSH_JOURNAL; + + if (strncmp(debug_file_path, "NONE", 4) != 0) { + snprintf(debug_file_name, sizeof(debug_file_name) - 1, + "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), + (long)arg); + printk(KERN_ALERT "LustreError: dumping log to %s\n", + debug_file_name); + tracefile_dump_all_pages(debug_file_name); + } + CFS_POP_JOURNAL; +} + +int libcfs_debug_dumplog_thread(void *arg) +{ + cfs_daemonize(""); + libcfs_debug_dumplog_internal(arg); + cfs_waitq_signal(&debug_ctlwq); + return 0; +} + +void libcfs_debug_dumplog(void) +{ + int rc; + cfs_waitlink_t wait; + ENTRY; + + /* we're being careful to ensure that the kernel thread is + * able to set our state to running as it exits before we + * get to schedule() */ + cfs_waitlink_init(&wait); + set_current_state(TASK_INTERRUPTIBLE); + cfs_waitq_add(&debug_ctlwq, &wait); + + rc = cfs_kernel_thread(libcfs_debug_dumplog_thread, + (void *)(long)cfs_curproc_pid(), + CLONE_VM | CLONE_FS | CLONE_FILES); + if (rc < 0) + printk(KERN_ERR "LustreError: cannot start log dump thread: " + "%d\n", rc); + else + cfs_waitq_wait(&wait, CFS_TASK_INTERRUPTIBLE); + + /* be sure to teardown if kernel_thread() failed */ + cfs_waitq_del(&debug_ctlwq, &wait); + set_current_state(TASK_RUNNING); +} + +int libcfs_debug_init(unsigned long bufsize) +{ + int rc = 0; + int max = libcfs_debug_mb; + + cfs_waitq_init(&debug_ctlwq); + libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; + libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; + /* If libcfs_debug_mb is set to an invalid value or uninitialized + * then just make the total buffers smp_num_cpus * TCD_MAX_PAGES */ + if (max > trace_max_debug_mb() || max < num_possible_cpus()) { + max = TCD_MAX_PAGES; + } else { + max = (max / num_possible_cpus()); + max = (max << (20 - CFS_PAGE_SHIFT)); + } + rc = tracefile_init(max); + + if (rc == 0) + libcfs_register_panic_notifier(); + + return rc; +} + +int libcfs_debug_cleanup(void) +{ + libcfs_unregister_panic_notifier(); + tracefile_exit(); + return 0; +} + +int libcfs_debug_clear_buffer(void) +{ + trace_flush_pages(); + return 0; +} + +/* Debug markers, although printed by S_LNET + * should not be be marked as such. */ +#undef DEBUG_SUBSYSTEM +#define DEBUG_SUBSYSTEM S_UNDEFINED +int libcfs_debug_mark_buffer(char *text) +{ + CDEBUG(D_TRACE,"***************************************************\n"); + CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text); + CDEBUG(D_TRACE,"***************************************************\n"); + + return 0; +} +#undef DEBUG_SUBSYSTEM +#define DEBUG_SUBSYSTEM S_LNET + +void libcfs_debug_set_level(unsigned int debug_level) +{ + printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n", + debug_level); + libcfs_debug = debug_level; +} + +EXPORT_SYMBOL(libcfs_debug_dumplog); +EXPORT_SYMBOL(libcfs_debug_set_level); + + +#else /* !__KERNEL__ */ + +#include + +#ifdef HAVE_CATAMOUNT_DATA_H +#include +#include + +static char source_nid[16]; +/* 0 indicates no messages to console, 1 is errors, > 1 is all debug messages */ +static int toconsole = 1; +unsigned int libcfs_console_ratelimit = 1; +cfs_duration_t libcfs_console_max_delay; +cfs_duration_t libcfs_console_min_delay; +unsigned int libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; +#else /* !HAVE_CATAMOUNT_DATA_H */ +#ifdef HAVE_NETDB_H +#include +#endif /* HAVE_NETDB_H */ +struct utsname *tmp_utsname; +static char source_nid[sizeof(tmp_utsname->nodename)]; +#endif /* HAVE_CATAMOUNT_DATA_H */ + +static int source_pid; +int smp_processor_id = 1; +char debug_file_path[1024]; +FILE *debug_file_fd; + +int portals_do_debug_dumplog(void *arg) +{ + printf("Look in %s\n", debug_file_name); + return 0; +} + + +void portals_debug_print(void) +{ + return; +} + + +void libcfs_debug_dumplog(void) +{ + printf("Look in %s\n", debug_file_name); + return; +} + +int libcfs_debug_init(unsigned long bufsize) +{ + char *debug_mask = NULL; + char *debug_subsys = NULL; + char *debug_filename; + +#ifdef HAVE_CATAMOUNT_DATA_H + char *debug_console = NULL; + char *debug_ratelimit = NULL; + char *debug_max_delay = NULL; + char *debug_min_delay = NULL; + char *debug_backoff = NULL; + + libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; + libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; + + snprintf(source_nid, sizeof(source_nid) - 1, "%u", _my_pnid); + source_pid = _my_pid; + + debug_console = getenv("LIBLUSTRE_DEBUG_CONSOLE"); + if (debug_console != NULL) { + toconsole = strtoul(debug_console, NULL, 0); + CDEBUG(D_INFO, "set liblustre toconsole to %u\n", toconsole); + } + debug_ratelimit = getenv("LIBLUSTRE_DEBUG_CONSOLE_RATELIMIT"); + if (debug_ratelimit != NULL) { + libcfs_console_ratelimit = strtoul(debug_ratelimit, NULL, 0); + CDEBUG(D_INFO, "set liblustre console ratelimit to %u\n", + libcfs_console_ratelimit); + } + debug_max_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY"); + if (debug_max_delay != NULL) + libcfs_console_max_delay = + cfs_time_seconds(strtoul(debug_max_delay, NULL, 0)); + debug_min_delay = getenv("LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY"); + if (debug_min_delay != NULL) + libcfs_console_min_delay = + cfs_time_seconds(strtoul(debug_min_delay, NULL, 0)); + if (debug_min_delay || debug_max_delay) { + if (!libcfs_console_max_delay || !libcfs_console_min_delay || + libcfs_console_max_delay < libcfs_console_min_delay) { + libcfs_console_max_delay = CDEBUG_DEFAULT_MAX_DELAY; + libcfs_console_min_delay = CDEBUG_DEFAULT_MIN_DELAY; + CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_MAX_DELAY " + "should be greater than " + "LIBLUSTRE_DEBUG_CONSOLE_MIN_DELAY " + "and both parameters should be non-null" + ": restore default values\n"); + } else { + CDEBUG(D_INFO, "set liblustre console max delay to %lus" + " and min delay to %lus\n", + (cfs_duration_t) + cfs_duration_sec(libcfs_console_max_delay), + (cfs_duration_t) + cfs_duration_sec(libcfs_console_min_delay)); + } + } + debug_backoff = getenv("LIBLUSTRE_DEBUG_CONSOLE_BACKOFF"); + if (debug_backoff != NULL) { + libcfs_console_backoff = strtoul(debug_backoff, NULL, 0); + if (libcfs_console_backoff <= 0) { + libcfs_console_backoff = CDEBUG_DEFAULT_BACKOFF; + CDEBUG(D_INFO, "LIBLUSTRE_DEBUG_CONSOLE_BACKOFF <= 0: " + "restore default value\n"); + } else { + CDEBUG(D_INFO, "set liblustre console backoff to %u\n", + libcfs_console_backoff); + } + } +#else + struct utsname myname; + + if (uname(&myname) == 0) + strcpy(source_nid, myname.nodename); + source_pid = getpid(); +#endif + /* debug masks */ + debug_mask = getenv("LIBLUSTRE_DEBUG_MASK"); + if (debug_mask) + libcfs_debug = (unsigned int) strtol(debug_mask, NULL, 0); + + debug_subsys = getenv("LIBLUSTRE_DEBUG_SUBSYS"); + if (debug_subsys) + libcfs_subsystem_debug = + (unsigned int) strtol(debug_subsys, NULL, 0); + + debug_filename = getenv("LIBLUSTRE_DEBUG_BASE"); + if (debug_filename) + strncpy(debug_file_path,debug_filename,sizeof(debug_file_path)); + + debug_filename = getenv("LIBLUSTRE_DEBUG_FILE"); + if (debug_filename) + strncpy(debug_file_name,debug_filename,sizeof(debug_file_name)); + + if (debug_file_name[0] == '\0' && debug_file_path[0] != '\0') + snprintf(debug_file_name, sizeof(debug_file_name) - 1, + "%s-%s-"CFS_TIME_T".log", debug_file_path, source_nid, time(0)); + + if (strcmp(debug_file_name, "stdout") == 0 || + strcmp(debug_file_name, "-") == 0) { + debug_file_fd = stdout; + } else if (strcmp(debug_file_name, "stderr") == 0) { + debug_file_fd = stderr; + } else if (debug_file_name[0] != '\0') { + debug_file_fd = fopen(debug_file_name, "w"); + if (debug_file_fd == NULL) + fprintf(stderr, "%s: unable to open '%s': %s\n", + source_nid, debug_file_name, strerror(errno)); + } + + if (debug_file_fd == NULL) + debug_file_fd = stdout; + + return 0; +} + +int libcfs_debug_cleanup(void) +{ + if (debug_file_fd != stdout && debug_file_fd != stderr) + fclose(debug_file_fd); + return 0; +} + +int libcfs_debug_clear_buffer(void) +{ + return 0; +} + +int libcfs_debug_mark_buffer(char *text) +{ + + fprintf(debug_file_fd, "*******************************************************************************\n"); + fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); + fprintf(debug_file_fd, "*******************************************************************************\n"); + + return 0; +} + +#ifdef HAVE_CATAMOUNT_DATA_H +#define CATAMOUNT_MAXLINE (256-4) +void catamount_printline(char *buf, size_t size) +{ + char *pos = buf; + int prsize = size; + + while (prsize > 0){ + lputs(pos); + pos += CATAMOUNT_MAXLINE; + prsize -= CATAMOUNT_MAXLINE; + } +} +#endif + +int +libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, + int subsys, int mask, + const char *file, const char *fn, const int line, + const char *format1, va_list args, + const char *format2, ...) +{ + struct timeval tv; + int nob; + int remain; + va_list ap; + char buf[CFS_PAGE_SIZE]; /* size 4096 used for compatimble + * with linux, where message can`t + * be exceed PAGE_SIZE */ + int console = 0; + char *prefix = "Lustre"; + +#ifdef HAVE_CATAMOUNT_DATA_H + /* toconsole == 0 - all messages to debug_file_fd + * toconsole == 1 - warnings to console, all to debug_file_fd + * toconsole > 1 - all debug to console */ + if (((mask & libcfs_printk) && toconsole == 1) || toconsole > 1) + console = 1; +#endif + + if ((!console) && (!debug_file_fd)) { + return 0; + } + + if (mask & (D_EMERG | D_ERROR)) + prefix = "LustreError"; + + nob = snprintf(buf, sizeof(buf), "%s: %u-%s:(%s:%d:%s()): ", prefix, + source_pid, source_nid, file, line, fn); + + remain = sizeof(buf) - nob; + if (format1) { + nob += vsnprintf(&buf[nob], remain, format1, args); + } + + remain = sizeof(buf) - nob; + if ((format2) && (remain > 0)) { + va_start(ap, format2); + nob += vsnprintf(&buf[nob], remain, format2, ap); + va_end(ap); + } + +#ifdef HAVE_CATAMOUNT_DATA_H + if (console) { + /* check rate limit for console */ + if (cdls != NULL) { + if (libcfs_console_ratelimit && + cdls->cdls_next != 0 && /* not first time ever */ + !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { + + /* skipping a console message */ + cdls->cdls_count++; + goto out_file; + } + + if (cfs_time_after(cfs_time_current(), cdls->cdls_next + + libcfs_console_max_delay + + cfs_time_seconds(10))) { + /* last timeout was a long time ago */ + cdls->cdls_delay /= libcfs_console_backoff * 4; + } else { + cdls->cdls_delay *= libcfs_console_backoff; + + if (cdls->cdls_delay < + libcfs_console_min_delay) + cdls->cdls_delay = + libcfs_console_min_delay; + else if (cdls->cdls_delay > + libcfs_console_max_delay) + cdls->cdls_delay = + libcfs_console_max_delay; + } + + /* ensure cdls_next is never zero after it's been seen */ + cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; + } + + if (cdls != NULL && cdls->cdls_count != 0) { + char buf2[100]; + + nob = snprintf(buf2, sizeof(buf2), + "Skipped %d previous similar message%s\n", + cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); + + catamount_printline(buf2, nob); + cdls->cdls_count = 0; + goto out_file; + } + catamount_printline(buf, nob); + } +out_file: + /* return on toconsole > 1, as we don't want the user getting + * spammed by the debug data */ + if (toconsole > 1) + return 0; +#endif + if (debug_file_fd == NULL) + return 0; + + gettimeofday(&tv, NULL); + + fprintf(debug_file_fd, CFS_TIME_T".%06lu:%u:%s:(%s:%d:%s()): %s", + tv.tv_sec, tv.tv_usec, source_pid, source_nid, + file, line, fn, buf); + + return 0; +} + +void +libcfs_assertion_failed(const char *expr, const char *file, const char *func, + const int line) +{ + libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, + "ASSERTION(%s) failed\n", expr); + abort(); +} + +#endif /* __KERNEL__ */ diff --git a/libcfs/libcfs/libcfs.xcode/project.pbxproj b/libcfs/libcfs/libcfs.xcode/project.pbxproj new file mode 100644 index 0000000..479c21b --- /dev/null +++ b/libcfs/libcfs/libcfs.xcode/project.pbxproj @@ -0,0 +1,439 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 39; + objects = { + 06AA1262FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; + }; + isa = PBXBuildStyle; + name = Development; + }; + 06AA1263FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; + }; + isa = PBXBuildStyle; + name = Deployment; + }; +//060 +//061 +//062 +//063 +//064 +//080 +//081 +//082 +//083 +//084 + 089C1669FE841209C02AAC07 = { + buildSettings = { + }; + buildStyles = ( + 06AA1262FFB20DD611CA28AA, + 06AA1263FFB20DD611CA28AA, + ); + hasScannedForEncodings = 1; + isa = PBXProject; + mainGroup = 089C166AFE841209C02AAC07; + projectDirPath = ""; + targets = ( + 32A4FEB80562C75700D090E7, + ); + }; + 089C166AFE841209C02AAC07 = { + children = ( + 247142CAFF3F8F9811CA285C, + 089C167CFE841241C02AAC07, + 19C28FB6FE9D52B211CA2CBB, + ); + isa = PBXGroup; + name = libcfs; + refType = 4; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 = { + children = ( + 32A4FEC30562C75700D090E7, + ); + isa = PBXGroup; + name = Resources; + refType = 4; + sourceTree = ""; + }; +//080 +//081 +//082 +//083 +//084 +//190 +//191 +//192 +//193 +//194 + 19444794072D07AD00DAF9BC = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = tracefile.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19444795072D07AD00DAF9BC = { + fileRef = 19444794072D07AD00DAF9BC; + isa = PBXBuildFile; + settings = { + }; + }; + 19444796072D08AA00DAF9BC = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = debug.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19444797072D08AA00DAF9BC = { + fileRef = 19444796072D08AA00DAF9BC; + isa = PBXBuildFile; + settings = { + }; + }; + 19509C03072CD5FF00A958C3 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = module.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19509C04072CD5FF00A958C3 = { + fileRef = 19509C03072CD5FF00A958C3; + isa = PBXBuildFile; + settings = { + }; + }; + 19713B76072E8274004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_prim.c; + path = arch/xnu/cfs_prim.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713B77072E8274004E8469 = { + fileRef = 19713B76072E8274004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713BB7072E8281004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_mem.c; + path = arch/xnu/cfs_mem.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713BB8072E8281004E8469 = { + fileRef = 19713BB7072E8281004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713BF7072E828E004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_proc.c; + path = arch/xnu/cfs_proc.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713BF8072E828E004E8469 = { + fileRef = 19713BF7072E828E004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713C7A072E82B2004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_utils.c; + path = arch/xnu/cfs_utils.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713C7B072E82B2004E8469 = { + fileRef = 19713C7A072E82B2004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713CD6072E8A56004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_module.c; + path = arch/xnu/cfs_module.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713CD7072E8A56004E8469 = { + fileRef = 19713CD6072E8A56004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713D1B072E8E39004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_fs.c; + path = arch/xnu/cfs_fs.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713D1C072E8E39004E8469 = { + fileRef = 19713D1B072E8E39004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713D60072E9109004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = xnu_sync.c; + path = arch/xnu/xnu_sync.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713D61072E9109004E8469 = { + fileRef = 19713D60072E9109004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713DC2072F994D004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_tracefile.c; + path = arch/xnu/cfs_tracefile.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713DC3072F994D004E8469 = { + fileRef = 19713DC2072F994D004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713E1C072FAFB5004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_debug.c; + path = arch/xnu/cfs_debug.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713E1D072FAFB5004E8469 = { + fileRef = 19713E1C072FAFB5004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19C28FB6FE9D52B211CA2CBB = { + children = ( + 32A4FEC40562C75800D090E7, + ); + isa = PBXGroup; + name = Products; + refType = 4; + sourceTree = ""; + }; +//190 +//191 +//192 +//193 +//194 +//240 +//241 +//242 +//243 +//244 + 247142CAFF3F8F9811CA285C = { + children = ( + 19713E1C072FAFB5004E8469, + 19713DC2072F994D004E8469, + 19713D60072E9109004E8469, + 19713D1B072E8E39004E8469, + 19713CD6072E8A56004E8469, + 19713C7A072E82B2004E8469, + 19713BF7072E828E004E8469, + 19713BB7072E8281004E8469, + 19713B76072E8274004E8469, + 19444796072D08AA00DAF9BC, + 19444794072D07AD00DAF9BC, + 19509C03072CD5FF00A958C3, + ); + isa = PBXGroup; + name = Source; + path = ""; + refType = 4; + sourceTree = ""; + }; +//240 +//241 +//242 +//243 +//244 +//320 +//321 +//322 +//323 +//324 + 32A4FEB80562C75700D090E7 = { + buildPhases = ( + 32A4FEB90562C75700D090E7, + 32A4FEBA0562C75700D090E7, + 32A4FEBB0562C75700D090E7, + 32A4FEBD0562C75700D090E7, + 32A4FEBF0562C75700D090E7, + 32A4FEC00562C75700D090E7, + 32A4FEC10562C75700D090E7, + ); + buildRules = ( + ); + buildSettings = { + FRAMEWORK_SEARCH_PATHS = ""; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = NO; + HEADER_SEARCH_PATHS = ../include; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + LIBRARY_SEARCH_PATHS = ""; + MODULE_NAME = com.clusterfs.lustre.portals.libcfs; + MODULE_START = libcfs_start; + MODULE_STOP = libcfs_stop; + MODULE_VERSION = 1.0.1; + OTHER_CFLAGS = "-D__KERNEL__"; + OTHER_LDFLAGS = ""; + OTHER_REZFLAGS = ""; + PRODUCT_NAME = libcfs; + SECTORDER_FLAGS = ""; + WARNING_CFLAGS = "-Wmost"; + WRAPPER_EXTENSION = kext; + }; + dependencies = ( + ); + isa = PBXNativeTarget; + name = libcfs; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = libcfs; + productReference = 32A4FEC40562C75800D090E7; + productType = "com.apple.product-type.kernel-extension"; + }; + 32A4FEB90562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEBA0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXHeadersBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBB0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXResourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBD0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + 19509C04072CD5FF00A958C3, + 19444795072D07AD00DAF9BC, + 19444797072D08AA00DAF9BC, + 19713B77072E8274004E8469, + 19713BB8072E8281004E8469, + 19713BF8072E828E004E8469, + 19713C7B072E82B2004E8469, + 19713CD7072E8A56004E8469, + 19713D1C072E8E39004E8469, + 19713D61072E9109004E8469, + 19713DC3072F994D004E8469, + 19713E1D072FAFB5004E8469, + ); + isa = PBXSourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBF0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXFrameworksBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC00562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXRezBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC10562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEC30562C75700D090E7 = { + isa = PBXFileReference; + lastKnownFileType = text.plist.xml; + path = Info.plist; + refType = 4; + sourceTree = ""; + }; + 32A4FEC40562C75800D090E7 = { + explicitFileType = wrapper.cfbundle; + includeInIndex = 0; + isa = PBXFileReference; + path = libcfs.kext; + refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; + }; + }; + rootObject = 089C1669FE841209C02AAC07; +} diff --git a/libcfs/libcfs/linux/.cvsignore b/libcfs/libcfs/linux/.cvsignore new file mode 100644 index 0000000..2bc4137 --- /dev/null +++ b/libcfs/libcfs/linux/.cvsignore @@ -0,0 +1,3 @@ +Makefile +Makefile.in +*.o.cmd diff --git a/libcfs/libcfs/linux/Makefile.am b/libcfs/libcfs/linux/Makefile.am new file mode 100644 index 0000000..8bf35cc --- /dev/null +++ b/libcfs/libcfs/linux/Makefile.am @@ -0,0 +1,4 @@ +EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \ + linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \ + linux-module.c linux-sync.c linux-curproc.c linux-tcpip.c + diff --git a/libcfs/libcfs/linux/linux-curproc.c b/libcfs/libcfs/linux/linux-curproc.c new file mode 100644 index 0000000..e446169 --- /dev/null +++ b/libcfs/libcfs/linux/linux-curproc.c @@ -0,0 +1,133 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre curproc API implementation for Linux kernel + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. You should have received a copy of the GNU + * General Public License along with Lustre; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + +/* + * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) + * for Linux kernel. + */ + +uid_t cfs_curproc_uid(void) +{ + return current->uid; +} + +gid_t cfs_curproc_gid(void) +{ + return current->gid; +} + +uid_t cfs_curproc_fsuid(void) +{ + return current->fsuid; +} + +gid_t cfs_curproc_fsgid(void) +{ + return current->fsgid; +} + +pid_t cfs_curproc_pid(void) +{ + return current->pid; +} + +int cfs_curproc_groups_nr(void) +{ + int nr; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + task_lock(current); + nr = current->group_info->ngroups; + task_unlock(current); +#else + nr = current->ngroups; +#endif + return nr; +} + +void cfs_curproc_groups_dump(gid_t *array, int size) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + task_lock(current); + size = min_t(int, size, current->group_info->ngroups); + memcpy(array, current->group_info->blocks[0], size * sizeof(__u32)); + task_unlock(current); +#else + LASSERT(size <= NGROUPS); + size = min_t(int, size, current->ngroups); + memcpy(array, current->groups, size * sizeof(__u32)); +#endif +} + + +int cfs_curproc_is_in_groups(gid_t gid) +{ + return in_group_p(gid); +} + +mode_t cfs_curproc_umask(void) +{ + return current->fs->umask; +} + +char *cfs_curproc_comm(void) +{ + return current->comm; +} + +cfs_kernel_cap_t cfs_curproc_cap_get(void) +{ + return current->cap_effective; +} + +void cfs_curproc_cap_set(cfs_kernel_cap_t cap) +{ + current->cap_effective = cap; +} + +EXPORT_SYMBOL(cfs_curproc_uid); +EXPORT_SYMBOL(cfs_curproc_pid); +EXPORT_SYMBOL(cfs_curproc_gid); +EXPORT_SYMBOL(cfs_curproc_fsuid); +EXPORT_SYMBOL(cfs_curproc_fsgid); +EXPORT_SYMBOL(cfs_curproc_umask); +EXPORT_SYMBOL(cfs_curproc_comm); +EXPORT_SYMBOL(cfs_curproc_groups_nr); +EXPORT_SYMBOL(cfs_curproc_groups_dump); +EXPORT_SYMBOL(cfs_curproc_is_in_groups); +EXPORT_SYMBOL(cfs_curproc_cap_get); +EXPORT_SYMBOL(cfs_curproc_cap_set); + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/libcfs/linux/linux-debug.c b/libcfs/libcfs/linux/linux-debug.c new file mode 100644 index 0000000..9b2a9dc --- /dev/null +++ b/libcfs/libcfs/linux/linux-debug.c @@ -0,0 +1,239 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#ifndef AUTOCONF_INCLUDED +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include + +#include "tracefile.h" + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +#include +#endif + +char lnet_upcall[1024] = "/usr/lib/lustre/lnet_upcall"; + +void libcfs_run_upcall(char **argv) +{ + int rc; + int argc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + ENTRY; + + argv[0] = lnet_upcall; + argc = 1; + while (argv[argc] != NULL) + argc++; + + LASSERT(argc >= 2); + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0 && rc != -ENOENT) { + CERROR("Error %d invoking LNET upcall %s %s%s%s%s%s%s%s%s; " + "check /proc/sys/lnet/upcall\n", + rc, argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } else { + CDEBUG(D_HA, "Invoked LNET upcall %s %s%s%s%s%s%s%s%s\n", + argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } +} + +void libcfs_run_lbug_upcall(char *file, const char *fn, const int line) +{ + char *argv[6]; + char buf[32]; + + ENTRY; + snprintf (buf, sizeof buf, "%d", line); + + argv[1] = "LBUG"; + argv[2] = file; + argv[3] = (char *)fn; + argv[4] = buf; + argv[5] = NULL; + + libcfs_run_upcall (argv); +} + +#ifdef __arch_um__ +void lbug_with_loc(char *file, const char *func, const int line) +{ + libcfs_catastrophe = 1; + libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, + "LBUG - trying to dump log to %s\n", debug_file_path); + libcfs_debug_dumplog(); + libcfs_run_lbug_upcall(file, func, line); + asm("int $3"); + panic("LBUG"); +} +#else +/* coverity[+kill] */ +void lbug_with_loc(char *file, const char *func, const int line) +{ + libcfs_catastrophe = 1; + libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n"); + + if (in_interrupt()) { + panic("LBUG in interrupt.\n"); + /* not reached */ + } + + libcfs_debug_dumpstack(NULL); + libcfs_debug_dumplog(); + libcfs_run_lbug_upcall(file, func, line); + if (libcfs_panic_on_lbug) + panic("LBUG"); + set_task_state(current, TASK_UNINTERRUPTIBLE); + while (1) + schedule(); +} +#endif /* __arch_um__ */ + +#ifdef __KERNEL__ + +void libcfs_debug_dumpstack(struct task_struct *tsk) +{ +#if defined(__arch_um__) + if (tsk != NULL) + CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", + tsk->pid, UML_PID(tsk)); + //asm("int $3"); +#elif defined(HAVE_SHOW_TASK) + /* this is exported by lustre kernel version 42 */ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + CWARN("showing stack for process %d\n", tsk->pid); + show_task(tsk); +#else + if ((tsk == NULL) || (tsk == current)) + dump_stack(); + else + CWARN("can't show stack: kernel doesn't export show_task\n"); +#endif +} + +cfs_task_t *libcfs_current(void) +{ + CWARN("current task struct is %p\n", current); + return current; +} + +static int panic_notifier(struct notifier_block *self, unsigned long unused1, + void *unused2) +{ + if (libcfs_panic_in_progress) + return 0; + + libcfs_panic_in_progress = 1; + mb(); + +#ifdef LNET_DUMP_ON_PANIC + /* This is currently disabled because it spews far too much to the + * console on the rare cases it is ever triggered. */ + + if (in_interrupt()) { + trace_debug_print(); + } else { + while (current->lock_depth >= 0) + unlock_kernel(); + + libcfs_debug_dumplog_internal((void *)(long)cfs_curproc_pid()); + } +#endif + return 0; +} + +static struct notifier_block libcfs_panic_notifier = { + notifier_call : panic_notifier, + next : NULL, + priority : 10000 +}; + +void libcfs_register_panic_notifier(void) +{ +#ifdef HAVE_ATOMIC_PANIC_NOTIFIER + atomic_notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier); +#else + notifier_chain_register(&panic_notifier_list, &libcfs_panic_notifier); +#endif +} + +void libcfs_unregister_panic_notifier(void) +{ +#ifdef HAVE_ATOMIC_PANIC_NOTIFIER + atomic_notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier); +#else + notifier_chain_unregister(&panic_notifier_list, &libcfs_panic_notifier); +#endif +} + +EXPORT_SYMBOL(libcfs_debug_dumpstack); +EXPORT_SYMBOL(libcfs_current); + +#endif /* __KERNEL__ */ + +EXPORT_SYMBOL(libcfs_run_upcall); +EXPORT_SYMBOL(libcfs_run_lbug_upcall); +EXPORT_SYMBOL(lbug_with_loc); diff --git a/libcfs/libcfs/linux/linux-fs.c b/libcfs/libcfs/linux/linux-fs.c new file mode 100644 index 0000000..3b15576 --- /dev/null +++ b/libcfs/libcfs/linux/linux-fs.c @@ -0,0 +1,100 @@ +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include +#include + +#include + +cfs_file_t * +cfs_filp_open (const char *name, int flags, int mode, int *err) +{ + /* XXX + * Maybe we need to handle flags and mode in the future + */ + cfs_file_t *filp = NULL; + + filp = filp_open(name, flags, mode); + if (IS_ERR(filp)) { + int rc; + + rc = PTR_ERR(filp); + printk(KERN_ERR "LustreError: can't open %s file: err %d\n", + name, rc); + if (err) + *err = rc; + filp = NULL; + } + return filp; +} + +/* write a userspace buffer to disk. + * NOTE: this returns 0 on success, not the number of bytes written. */ +ssize_t +cfs_user_write (cfs_file_t *filp, const char *buf, size_t count, loff_t *offset) +{ + mm_segment_t fs; + ssize_t size = 0; + + fs = get_fs(); + set_fs(KERNEL_DS); + while (count > 0) { + size = filp->f_op->write(filp, (char *)buf, count, offset); + if (size < 0) + break; + count -= size; + size = 0; + } + set_fs(fs); + + return size; +} + +#if !(CFS_O_CREAT == O_CREAT && CFS_O_EXCL == O_EXCL && \ + CFS_O_TRUNC == O_TRUNC && CFS_O_APPEND == O_APPEND &&\ + CFS_O_NONBLOCK == O_NONBLOCK && CFS_O_NDELAY == O_NDELAY &&\ + CFS_O_SYNC == O_SYNC && CFS_O_ASYNC == FASYNC &&\ + CFS_O_DIRECT == O_DIRECT && CFS_O_LARGEFILE == O_LARGEFILE &&\ + CFS_O_DIRECTORY == O_DIRECTORY && CFS_O_NOFOLLOW == O_NOFOLLOW) + +int cfs_oflags2univ(int flags) +{ + int f; + + f = flags & O_ACCMODE; + f |= (flags & O_CREAT) ? CFS_O_CREAT: 0; + f |= (flags & O_EXCL) ? CFS_O_EXCL: 0; + f |= (flags & O_NOCTTY) ? CFS_O_NOCTTY: 0; + f |= (flags & O_TRUNC) ? CFS_O_TRUNC: 0; + f |= (flags & O_APPEND) ? CFS_O_APPEND: 0; + f |= (flags & O_NONBLOCK) ? CFS_O_NONBLOCK: 0; + f |= (flags & O_SYNC)? CFS_O_SYNC: 0; + f |= (flags & FASYNC)? CFS_O_ASYNC: 0; + f |= (flags & O_DIRECTORY)? CFS_O_DIRECTORY: 0; + f |= (flags & O_DIRECT)? CFS_O_DIRECT: 0; + f |= (flags & O_LARGEFILE)? CFS_O_LARGEFILE: 0; + f |= (flags & O_NOFOLLOW)? CFS_O_NOFOLLOW: 0; + f |= (flags & O_NOATIME)? CFS_O_NOATIME: 0; + return f; +} +#else + +int cfs_oflags2univ(int flags) +{ + return (flags); +} +#endif + +/* + * XXX Liang: we don't need cfs_univ2oflags() now. + */ +int cfs_univ2oflags(int flags) +{ + return (flags); +} + +EXPORT_SYMBOL(cfs_filp_open); +EXPORT_SYMBOL(cfs_user_write); +EXPORT_SYMBOL(cfs_oflags2univ); +EXPORT_SYMBOL(cfs_univ2oflags); diff --git a/libcfs/libcfs/linux/linux-lock.c b/libcfs/libcfs/linux/linux-lock.c new file mode 100644 index 0000000..01511d6 --- /dev/null +++ b/libcfs/libcfs/linux/linux-lock.c @@ -0,0 +1,4 @@ +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include diff --git a/libcfs/libcfs/linux/linux-lwt.c b/libcfs/libcfs/linux/linux-lwt.c new file mode 100644 index 0000000..520c54c --- /dev/null +++ b/libcfs/libcfs/linux/linux-lwt.c @@ -0,0 +1,2 @@ +# define DEBUG_SUBSYSTEM S_LNET + diff --git a/libcfs/libcfs/linux/linux-mem.c b/libcfs/libcfs/linux/linux-mem.c new file mode 100644 index 0000000..30ecf6a --- /dev/null +++ b/libcfs/libcfs/linux/linux-mem.c @@ -0,0 +1,145 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include +#include +#include + +static unsigned int cfs_alloc_flags_to_gfp(u_int32_t flags) +{ + unsigned int mflags = 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (flags & CFS_ALLOC_ATOMIC) + mflags |= __GFP_HIGH; + else if (flags & CFS_ALLOC_WAIT) + mflags |= __GFP_WAIT; + else + mflags |= (__GFP_HIGH | __GFP_WAIT); + if (flags & CFS_ALLOC_IO) + mflags |= __GFP_IO | __GFP_HIGHIO; +#else + if (flags & CFS_ALLOC_ATOMIC) + mflags |= __GFP_HIGH; + else + mflags |= __GFP_WAIT; + if (flags & CFS_ALLOC_NOWARN) + mflags |= __GFP_NOWARN; + if (flags & CFS_ALLOC_IO) + mflags |= __GFP_IO; +#endif + if (flags & CFS_ALLOC_FS) + mflags |= __GFP_FS; + if (flags & CFS_ALLOC_HIGH) + mflags |= __GFP_HIGH; + return mflags; +} + +void * +cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + void *ptr = NULL; + + ptr = kmalloc(nr_bytes, cfs_alloc_flags_to_gfp(flags)); + if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) + memset(ptr, 0, nr_bytes); + return ptr; +} + +void +cfs_free(void *addr) +{ + kfree(addr); +} + +void * +cfs_alloc_large(size_t nr_bytes) +{ + return vmalloc(nr_bytes); +} + +void +cfs_free_large(void *addr) +{ + vfree(addr); +} + +cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order) +{ + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + return alloc_pages(cfs_alloc_flags_to_gfp(flags), order); +} + +void __cfs_free_pages(cfs_page_t *page, unsigned int order) +{ + __free_pages(page, order); +} + +cfs_mem_cache_t * +cfs_mem_cache_create (const char *name, size_t size, size_t offset, + unsigned long flags) +{ +#ifdef HAVE_KMEM_CACHE_CREATE_DTOR + return kmem_cache_create(name, size, offset, flags, NULL, NULL); +#else + return kmem_cache_create(name, size, offset, flags, NULL); +#endif +} + +int +cfs_mem_cache_destroy (cfs_mem_cache_t * cachep) +{ +#ifdef HAVE_KMEM_CACHE_DESTROY_INT + return kmem_cache_destroy(cachep); +#else + kmem_cache_destroy(cachep); + return 0; +#endif +} + +void * +cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags) +{ + return kmem_cache_alloc(cachep, cfs_alloc_flags_to_gfp(flags)); +} + +void +cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp) +{ + return kmem_cache_free(cachep, objp); +} + +EXPORT_SYMBOL(cfs_alloc); +EXPORT_SYMBOL(cfs_free); +EXPORT_SYMBOL(cfs_alloc_large); +EXPORT_SYMBOL(cfs_free_large); +EXPORT_SYMBOL(cfs_alloc_pages); +EXPORT_SYMBOL(__cfs_free_pages); +EXPORT_SYMBOL(cfs_mem_cache_create); +EXPORT_SYMBOL(cfs_mem_cache_destroy); +EXPORT_SYMBOL(cfs_mem_cache_alloc); +EXPORT_SYMBOL(cfs_mem_cache_free); diff --git a/libcfs/libcfs/linux/linux-module.c b/libcfs/libcfs/linux/linux-module.c new file mode 100644 index 0000000..6f21853 --- /dev/null +++ b/libcfs/libcfs/linux/linux-module.c @@ -0,0 +1,151 @@ +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + +#define LNET_MINOR 240 + +int libcfs_ioctl_getdata(char *buf, char *end, void *arg) +{ + struct libcfs_ioctl_hdr *hdr; + struct libcfs_ioctl_data *data; + int err; + ENTRY; + + hdr = (struct libcfs_ioctl_hdr *)buf; + data = (struct libcfs_ioctl_data *)buf; + + err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); + if (err) + RETURN(err); + + if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { + CERROR("PORTALS: version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len + buf >= end) { + CERROR("PORTALS: user buffer exceeds kernel buffer\n"); + RETURN(-EINVAL); + } + + + if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { + CERROR("PORTALS: user buffer too small for ioctl\n"); + RETURN(-EINVAL); + } + + err = copy_from_user(buf, (void *)arg, hdr->ioc_len); + if (err) + RETURN(err); + + if (libcfs_ioctl_is_invalid(data)) { + CERROR("PORTALS: ioctl not correctly formatted\n"); + RETURN(-EINVAL); + } + + if (data->ioc_inllen1) + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + + if (data->ioc_inllen2) + data->ioc_inlbuf2 = &data->ioc_bulk[0] + + size_round(data->ioc_inllen1); + + RETURN(0); +} + +int libcfs_ioctl_popdata(void *arg, void *data, int size) +{ + if (copy_to_user((char *)arg, data, size)) + return -EFAULT; + return 0; +} + +extern struct cfs_psdev_ops libcfs_psdev_ops; + +static int +libcfs_psdev_open(struct inode * inode, struct file * file) +{ + struct libcfs_device_userstate **pdu = NULL; + int rc = 0; + + if (!inode) + return (-EINVAL); + pdu = (struct libcfs_device_userstate **)&file->private_data; + if (libcfs_psdev_ops.p_open != NULL) + rc = libcfs_psdev_ops.p_open(0, (void *)pdu); + else + return (-EPERM); + return rc; +} + +/* called when closing /dev/device */ +static int +libcfs_psdev_release(struct inode * inode, struct file * file) +{ + struct libcfs_device_userstate *pdu; + int rc = 0; + + if (!inode) + return (-EINVAL); + pdu = file->private_data; + if (libcfs_psdev_ops.p_close != NULL) + rc = libcfs_psdev_ops.p_close(0, (void *)pdu); + else + rc = -EPERM; + return rc; +} + +static int +libcfs_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct cfs_psdev_file pfile; + int rc = 0; + + if (current->fsuid != 0) + return -EACCES; + + if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || + _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || + _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { + CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + return (-EINVAL); + } + + /* Handle platform-dependent IOC requests */ + switch (cmd) { + case IOC_LIBCFS_PANIC: + if (!capable (CAP_SYS_BOOT)) + return (-EPERM); + panic("debugctl-invoked panic"); + return (0); + case IOC_LIBCFS_MEMHOG: + if (!capable (CAP_SYS_ADMIN)) + return -EPERM; + /* go thought */ + } + + pfile.off = 0; + pfile.private_data = file->private_data; + if (libcfs_psdev_ops.p_ioctl != NULL) + rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); + else + rc = -EPERM; + return (rc); +} + +static struct file_operations libcfs_fops = { + ioctl: libcfs_ioctl, + open: libcfs_psdev_open, + release: libcfs_psdev_release +}; + +cfs_psdev_t libcfs_dev = { + LNET_MINOR, + "lnet", + &libcfs_fops +}; + + diff --git a/libcfs/libcfs/linux/linux-prim.c b/libcfs/libcfs/linux/linux-prim.c new file mode 100644 index 0000000..cc02829 --- /dev/null +++ b/libcfs/libcfs/linux/linux-prim.c @@ -0,0 +1,154 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + */ + +#define DEBUG_SUBSYSTEM S_LNET +#ifndef AUTOCONF_INCLUDED +#include +#endif +#include +#include +#include + +#if defined(CONFIG_KGDB) +#include +#endif + +void cfs_enter_debugger(void) +{ +#if defined(CONFIG_KGDB) + BREAKPOINT(); +#elif defined(__arch_um__) + asm("int $3"); +#else + /* nothing */ +#endif +} + +void cfs_daemonize(char *str) { + unsigned long flags; + + lock_kernel(); +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) + daemonize(str); +#else + daemonize(); + exit_files(current); + reparent_to_init(); + snprintf (current->comm, sizeof (current->comm), "%s", str); +#endif + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + unlock_kernel(); +} + +int cfs_daemonize_ctxt(char *str) { + struct task_struct *tsk = current; + struct fs_struct *fs = NULL; + + cfs_daemonize(str); + fs = copy_fs_struct(tsk->fs); + if (fs == NULL) + return -ENOMEM; + exit_fs(tsk); + tsk->fs = fs; + return 0; +} + + +sigset_t +cfs_get_blockedsigs(void) +{ + unsigned long flags; + sigset_t old; + + SIGNAL_MASK_LOCK(current, flags); + old = current->blocked; + SIGNAL_MASK_UNLOCK(current, flags); + return old; +} + +sigset_t +cfs_block_allsigs(void) +{ + unsigned long flags; + sigset_t old; + + SIGNAL_MASK_LOCK(current, flags); + old = current->blocked; + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + return old; +} + +sigset_t +cfs_block_sigs(sigset_t bits) +{ + unsigned long flags; + sigset_t old; + + SIGNAL_MASK_LOCK(current, flags); + old = current->blocked; + current->blocked = bits; + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + return old; +} + +void +cfs_restore_sigs (cfs_sigset_t old) +{ + unsigned long flags; + + SIGNAL_MASK_LOCK(current, flags); + current->blocked = old; + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); +} + +int +cfs_signal_pending(void) +{ + return signal_pending(current); +} + +void +cfs_clear_sigpending(void) +{ + unsigned long flags; + + SIGNAL_MASK_LOCK(current, flags); + CLEAR_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); +} + +int +libcfs_arch_init(void) +{ + return 0; +} + +void +libcfs_arch_cleanup(void) +{ + return; +} + +EXPORT_SYMBOL(libcfs_arch_init); +EXPORT_SYMBOL(libcfs_arch_cleanup); +EXPORT_SYMBOL(cfs_daemonize); +EXPORT_SYMBOL(cfs_daemonize_ctxt); +EXPORT_SYMBOL(cfs_block_allsigs); +EXPORT_SYMBOL(cfs_block_sigs); +EXPORT_SYMBOL(cfs_get_blockedsigs); +EXPORT_SYMBOL(cfs_restore_sigs); +EXPORT_SYMBOL(cfs_signal_pending); +EXPORT_SYMBOL(cfs_clear_sigpending); diff --git a/libcfs/libcfs/linux/linux-proc.c b/libcfs/libcfs/linux/linux-proc.c new file mode 100644 index 0000000..ae3312a --- /dev/null +++ b/libcfs/libcfs/linux/linux-proc.c @@ -0,0 +1,443 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * Author: Zach Brown + * Author: Peter J. Braam + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#ifndef AUTOCONF_INCLUDED +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include "tracefile.h" + +static cfs_sysctl_table_header_t *lnet_table_header = NULL; +extern char lnet_upcall[1024]; + +#define PSDEV_LNET (0x100) +enum { + PSDEV_DEBUG = 1, /* control debugging */ + PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ + PSDEV_PRINTK, /* force all messages to console */ + PSDEV_CONSOLE_RATELIMIT, /* ratelimit console messages */ + PSDEV_CONSOLE_MAX_DELAY_CS, /* maximum delay over which we skip messages */ + PSDEV_CONSOLE_MIN_DELAY_CS, /* initial delay over which we skip messages */ + PSDEV_CONSOLE_BACKOFF, /* delay increase factor */ + PSDEV_DEBUG_PATH, /* crashdump log location */ + PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ + PSDEV_LNET_UPCALL, /* User mode upcall script */ + PSDEV_LNET_MEMUSED, /* bytes currently PORTAL_ALLOCated */ + PSDEV_LNET_CATASTROPHE, /* if we have LBUGged or panic'd */ + PSDEV_LNET_PANIC_ON_LBUG, /* flag to panic on LBUG */ + PSDEV_LNET_DUMP_KERNEL, /* snapshot kernel debug buffer to file */ + PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */ + PSDEV_LNET_DEBUG_MB, /* size of debug buffer */ +}; + +static int +proc_call_handler(void *data, int write, + loff_t *ppos, void *buffer, size_t *lenp, + int (*handler)(void *data, int write, + loff_t pos, void *buffer, int len)) +{ + int rc = handler(data, write, *ppos, buffer, *lenp); + + if (rc < 0) + return rc; + + if (write) { + *ppos += *lenp; + } else { + *lenp = rc; + *ppos += rc; + } + return 0; +} + +#define DECLARE_PROC_HANDLER(name) \ +static int \ +LL_PROC_PROTO(name) \ +{ \ + DECLARE_LL_PROC_PPOS_DECL; \ + \ + return proc_call_handler(table->data, write, \ + ppos, buffer, lenp, \ + __##name); \ +} + +static int __proc_dobitmasks(void *data, int write, + loff_t pos, void *buffer, int nob) +{ + const int tmpstrlen = 512; + char *tmpstr; + int rc; + unsigned int *mask = data; + int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0; + int is_printk = (mask == &libcfs_printk) ? 1 : 0; + + rc = trace_allocate_string_buffer(&tmpstr, tmpstrlen); + if (rc < 0) + return rc; + + if (!write) { + libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys); + rc = strlen(tmpstr); + + if (pos >= rc) { + rc = 0; + } else { + rc = trace_copyout_string(buffer, nob, + tmpstr + pos, "\n"); + } + } else { + rc = trace_copyin_string(tmpstr, tmpstrlen, buffer, nob); + if (rc < 0) + return rc; + + rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys); + /* Always print LBUG/LASSERT to console, so keep this mask */ + if (is_printk) + *mask |= D_EMERG; + } + + trace_free_string_buffer(tmpstr, tmpstrlen); + return rc; +} + +DECLARE_PROC_HANDLER(proc_dobitmasks) + +static int __proc_dump_kernel(void *data, int write, + loff_t pos, void *buffer, int nob) +{ + if (!write) + return 0; + + return trace_dump_debug_buffer_usrstr(buffer, nob); +} + +DECLARE_PROC_HANDLER(proc_dump_kernel) + +static int __proc_daemon_file(void *data, int write, + loff_t pos, void *buffer, int nob) +{ + if (!write) { + int len = strlen(tracefile); + + if (pos >= len) + return 0; + + return trace_copyout_string(buffer, nob, + tracefile + pos, "\n"); + } + + return trace_daemon_command_usrstr(buffer, nob); +} + +DECLARE_PROC_HANDLER(proc_daemon_file) + +static int __proc_debug_mb(void *data, int write, + loff_t pos, void *buffer, int nob) +{ + if (!write) { + char tmpstr[32]; + int len = snprintf(tmpstr, sizeof(tmpstr), "%d", + trace_get_debug_mb()); + + if (pos >= len) + return 0; + + return trace_copyout_string(buffer, nob, tmpstr + pos, "\n"); + } + + return trace_set_debug_mb_usrstr(buffer, nob); +} + +DECLARE_PROC_HANDLER(proc_debug_mb) + +int LL_PROC_PROTO(proc_console_max_delay_cs) +{ + int rc, max_delay_cs; + cfs_sysctl_table_t dummy = *table; + cfs_duration_t d; + + dummy.data = &max_delay_cs; + dummy.proc_handler = &proc_dointvec; + + if (!write) { /* read */ + max_delay_cs = cfs_duration_sec(libcfs_console_max_delay * 100); + rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + return rc; + } + + /* write */ + max_delay_cs = 0; + rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + return rc; + if (max_delay_cs <= 0) + return -EINVAL; + + d = cfs_time_seconds(max_delay_cs) / 100; + if (d == 0 || d < libcfs_console_min_delay) + return -EINVAL; + libcfs_console_max_delay = d; + + return rc; +} + +int LL_PROC_PROTO(proc_console_min_delay_cs) +{ + int rc, min_delay_cs; + cfs_sysctl_table_t dummy = *table; + cfs_duration_t d; + + dummy.data = &min_delay_cs; + dummy.proc_handler = &proc_dointvec; + + if (!write) { /* read */ + min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100); + rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + return rc; + } + + /* write */ + min_delay_cs = 0; + rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + return rc; + if (min_delay_cs <= 0) + return -EINVAL; + + d = cfs_time_seconds(min_delay_cs) / 100; + if (d == 0 || d > libcfs_console_max_delay) + return -EINVAL; + libcfs_console_min_delay = d; + + return rc; +} + +int LL_PROC_PROTO(proc_console_backoff) +{ + int rc, backoff; + cfs_sysctl_table_t dummy = *table; + + dummy.data = &backoff; + dummy.proc_handler = &proc_dointvec; + + if (!write) { /* read */ + backoff= libcfs_console_backoff; + rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + return rc; + } + + /* write */ + backoff = 0; + rc = ll_proc_dointvec(&dummy, write, filp, buffer, lenp, ppos); + if (rc < 0) + return rc; + if (backoff <= 0) + return -EINVAL; + + libcfs_console_backoff = backoff; + + return rc; +} + +static cfs_sysctl_table_t lnet_table[] = { + /* + * NB No .strategy entries have been provided since sysctl(8) prefers + * to go via /proc for portability. + */ + { + .ctl_name = PSDEV_DEBUG, + .procname = "debug", + .data = &libcfs_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = PSDEV_SUBSYSTEM_DEBUG, + .procname = "subsystem_debug", + .data = &libcfs_subsystem_debug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = PSDEV_PRINTK, + .procname = "printk", + .data = &libcfs_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dobitmasks + }, + { + .ctl_name = PSDEV_CONSOLE_RATELIMIT, + .procname = "console_ratelimit", + .data = &libcfs_console_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = PSDEV_CONSOLE_MAX_DELAY_CS, + .procname = "console_max_delay_centisecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_max_delay_cs + }, + { + .ctl_name = PSDEV_CONSOLE_MIN_DELAY_CS, + .procname = "console_min_delay_centisecs", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_min_delay_cs + }, + { + .ctl_name = PSDEV_CONSOLE_BACKOFF, + .procname = "console_backoff", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_console_backoff + }, + + { + .ctl_name = PSDEV_DEBUG_PATH, + .procname = "debug_path", + .data = debug_file_path, + .maxlen = sizeof(debug_file_path), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + + { + .ctl_name = PSDEV_LNET_UPCALL, + .procname = "upcall", + .data = lnet_upcall, + .maxlen = sizeof(lnet_upcall), + .mode = 0644, + .proc_handler = &proc_dostring, + }, + { + .ctl_name = PSDEV_LNET_MEMUSED, + .procname = "memused", + .data = (int *)&libcfs_kmemory.counter, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = PSDEV_LNET_CATASTROPHE, + .procname = "catastrophe", + .data = &libcfs_catastrophe, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = PSDEV_LNET_PANIC_ON_LBUG, + .procname = "panic_on_lbug", + .data = &libcfs_panic_on_lbug, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = PSDEV_LNET_DUMP_KERNEL, + .procname = "dump_kernel", + .mode = 0200, + .proc_handler = &proc_dump_kernel, + }, + { + .ctl_name = PSDEV_LNET_DAEMON_FILE, + .procname = "daemon_file", + .mode = 0644, + .proc_handler = &proc_daemon_file, + }, + { + .ctl_name = PSDEV_LNET_DEBUG_MB, + .procname = "debug_mb", + .mode = 0644, + .proc_handler = &proc_debug_mb, + }, + {0} +}; + +static cfs_sysctl_table_t top_table[2] = { + { + .ctl_name = PSDEV_LNET, + .procname = "lnet", + .data = NULL, + .maxlen = 0, + .mode = 0555, + .child = lnet_table + }, + {0} +}; + +int insert_proc(void) +{ +#ifdef CONFIG_SYSCTL + if (lnet_table_header == NULL) + lnet_table_header = cfs_register_sysctl_table(top_table, 0); +#endif + return 0; +} + +void remove_proc(void) +{ +#ifdef CONFIG_SYSCTL + if (lnet_table_header != NULL) + cfs_unregister_sysctl_table(lnet_table_header); + + lnet_table_header = NULL; +#endif +} diff --git a/libcfs/libcfs/linux/linux-sync.c b/libcfs/libcfs/linux/linux-sync.c new file mode 100644 index 0000000..520c54c --- /dev/null +++ b/libcfs/libcfs/linux/linux-sync.c @@ -0,0 +1,2 @@ +# define DEBUG_SUBSYSTEM S_LNET + diff --git a/libcfs/libcfs/linux/linux-tcpip.c b/libcfs/libcfs/linux/linux-tcpip.c new file mode 100644 index 0000000..e8ceafd --- /dev/null +++ b/libcfs/libcfs/linux/linux-tcpip.c @@ -0,0 +1,683 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + +#include +#include +#include +/* For sys_open & sys_close */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) +#include +#else +#include +#endif + +int +libcfs_sock_ioctl(int cmd, unsigned long arg) +{ + mm_segment_t oldmm = get_fs(); + struct socket *sock; + int fd; + int rc; + struct file *sock_filp; + + rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return rc; + } + + fd = sock_map_fd(sock); + if (fd < 0) { + rc = fd; + sock_release(sock); + goto out; + } + + sock_filp = fget(fd); + if (!sock_filp) { + rc = -ENOMEM; + goto out_fd; + } + + set_fs(KERNEL_DS); +#ifdef HAVE_UNLOCKED_IOCTL + if (sock_filp->f_op->unlocked_ioctl) + rc = sock_filp->f_op->unlocked_ioctl(sock_filp, cmd, arg); + else +#endif + { + lock_kernel(); + rc =sock_filp->f_op->ioctl(sock_filp->f_dentry->d_inode, + sock_filp, cmd, arg); + unlock_kernel(); + } + set_fs(oldmm); + + fput(sock_filp); + + out_fd: + sys_close(fd); + out: + return rc; +} + +int +libcfs_ipif_query (char *name, int *up, __u32 *ip, __u32 *mask) +{ + struct ifreq ifr; + int nob; + int rc; + __u32 val; + + nob = strnlen(name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + CERROR("Interface name %s too long\n", name); + return -EINVAL; + } + + CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); + + strcpy(ifr.ifr_name, name); + rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); + + if (rc != 0) { + CERROR("Can't get flags for interface %s\n", name); + return rc; + } + + if ((ifr.ifr_flags & IFF_UP) == 0) { + CDEBUG(D_NET, "Interface %s down\n", name); + *up = 0; + *ip = *mask = 0; + return 0; + } + + *up = 1; + + strcpy(ifr.ifr_name, name); + ifr.ifr_addr.sa_family = AF_INET; + rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); + + if (rc != 0) { + CERROR("Can't get IP address for interface %s\n", name); + return rc; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *ip = ntohl(val); + + strcpy(ifr.ifr_name, name); + ifr.ifr_addr.sa_family = AF_INET; + rc = libcfs_sock_ioctl(SIOCGIFNETMASK, (unsigned long)&ifr); + + if (rc != 0) { + CERROR("Can't get netmask for interface %s\n", name); + return rc; + } + + val = ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr; + *mask = ntohl(val); + + return 0; +} + +EXPORT_SYMBOL(libcfs_ipif_query); + +int +libcfs_ipif_enumerate (char ***namesp) +{ + /* Allocate and fill in 'names', returning # interfaces/error */ + char **names; + int toobig; + int nalloc; + int nfound; + struct ifreq *ifr; + struct ifconf ifc; + int rc; + int nob; + int i; + + + nalloc = 16; /* first guess at max interfaces */ + toobig = 0; + for (;;) { + if (nalloc * sizeof(*ifr) > CFS_PAGE_SIZE) { + toobig = 1; + nalloc = CFS_PAGE_SIZE/sizeof(*ifr); + CWARN("Too many interfaces: only enumerating first %d\n", + nalloc); + } + + LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); + if (ifr == NULL) { + CERROR ("ENOMEM enumerating up to %d interfaces\n", nalloc); + rc = -ENOMEM; + goto out0; + } + + ifc.ifc_buf = (char *)ifr; + ifc.ifc_len = nalloc * sizeof(*ifr); + + rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); + + if (rc < 0) { + CERROR ("Error %d enumerating interfaces\n", rc); + goto out1; + } + + LASSERT (rc == 0); + + nfound = ifc.ifc_len/sizeof(*ifr); + LASSERT (nfound <= nalloc); + + if (nfound < nalloc || toobig) + break; + + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + nalloc *= 2; + } + + if (nfound == 0) + goto out1; + + LIBCFS_ALLOC(names, nfound * sizeof(*names)); + if (names == NULL) { + rc = -ENOMEM; + goto out1; + } + /* NULL out all names[i] */ + memset (names, 0, nfound * sizeof(*names)); + + for (i = 0; i < nfound; i++) { + + nob = strnlen (ifr[i].ifr_name, IFNAMSIZ); + if (nob == IFNAMSIZ) { + /* no space for terminating NULL */ + CERROR("interface name %.*s too long (%d max)\n", + nob, ifr[i].ifr_name, IFNAMSIZ); + rc = -ENAMETOOLONG; + goto out2; + } + + LIBCFS_ALLOC(names[i], IFNAMSIZ); + if (names[i] == NULL) { + rc = -ENOMEM; + goto out2; + } + + memcpy(names[i], ifr[i].ifr_name, nob); + names[i][nob] = 0; + } + + *namesp = names; + rc = nfound; + + out2: + if (rc < 0) + libcfs_ipif_free_enumeration(names, nfound); + out1: + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + out0: + return rc; +} + +EXPORT_SYMBOL(libcfs_ipif_enumerate); + +void +libcfs_ipif_free_enumeration (char **names, int n) +{ + int i; + + LASSERT (n > 0); + + for (i = 0; i < n && names[i] != NULL; i++) + LIBCFS_FREE(names[i], IFNAMSIZ); + + LIBCFS_FREE(names, n * sizeof(*names)); +} + +EXPORT_SYMBOL(libcfs_ipif_free_enumeration); + +int +libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + mm_segment_t oldmm = get_fs(); + long ticks = timeout * HZ; + unsigned long then; + struct timeval tv; + + LASSERT (nob > 0); + /* Caller may pass a zero timeout if she thinks the socket buffer is + * empty enough to take the whole message immediately */ + + for (;;) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 + }; + + if (timeout != 0) { + /* Set send timeout to remaining time */ + tv = (struct timeval) { + .tv_sec = ticks / HZ, + .tv_usec = ((ticks % HZ) * 1000000) / HZ + }; + set_fs(KERNEL_DS); + rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, + (char *)&tv, sizeof(tv)); + set_fs(oldmm); + if (rc != 0) { + CERROR("Can't set socket send timeout " + "%ld.%06d: %d\n", + (long)tv.tv_sec, (int)tv.tv_usec, rc); + return rc; + } + } + + set_fs (KERNEL_DS); + then = jiffies; + rc = sock_sendmsg (sock, &msg, iov.iov_len); + ticks -= jiffies - then; + set_fs (oldmm); + + if (rc == nob) + return 0; + + if (rc < 0) + return rc; + + if (rc == 0) { + CERROR ("Unexpected zero rc\n"); + return (-ECONNABORTED); + } + + if (ticks <= 0) + return -EAGAIN; + + buffer = ((char *)buffer) + rc; + nob -= rc; + } + + return (0); +} +EXPORT_SYMBOL(libcfs_sock_write); + +int +libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + mm_segment_t oldmm = get_fs(); + long ticks = timeout * HZ; + unsigned long then; + struct timeval tv; + + LASSERT (nob > 0); + LASSERT (ticks > 0); + + for (;;) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + + /* Set receive timeout to remaining time */ + tv = (struct timeval) { + .tv_sec = ticks / HZ, + .tv_usec = ((ticks % HZ) * 1000000) / HZ + }; + set_fs(KERNEL_DS); + rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, + (char *)&tv, sizeof(tv)); + set_fs(oldmm); + if (rc != 0) { + CERROR("Can't set socket recv timeout %ld.%06d: %d\n", + (long)tv.tv_sec, (int)tv.tv_usec, rc); + return rc; + } + + set_fs(KERNEL_DS); + then = jiffies; + rc = sock_recvmsg(sock, &msg, iov.iov_len, 0); + ticks -= jiffies - then; + set_fs(oldmm); + + if (rc < 0) + return rc; + + if (rc == 0) + return -ECONNRESET; + + buffer = ((char *)buffer) + rc; + nob -= rc; + + if (nob == 0) + return 0; + + if (ticks <= 0) + return -ETIMEDOUT; + } +} + +EXPORT_SYMBOL(libcfs_sock_read); + +static int +libcfs_sock_create (struct socket **sockp, int *fatal, + __u32 local_ip, int local_port) +{ + struct sockaddr_in locaddr; + struct socket *sock; + int rc; + int option; + mm_segment_t oldmm = get_fs(); + + /* All errors are fatal except bind failure if the port is in use */ + *fatal = 1; + + rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); + *sockp = sock; + if (rc != 0) { + CERROR ("Can't create socket: %d\n", rc); + return (rc); + } + + set_fs (KERNEL_DS); + option = 1; + rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); + goto failed; + } + + if (local_ip != 0 || local_port != 0) { + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons(local_port); + locaddr.sin_addr.s_addr = (local_ip == 0) ? + INADDR_ANY : htonl(local_ip); + + rc = sock->ops->bind(sock, (struct sockaddr *)&locaddr, + sizeof(locaddr)); + if (rc == -EADDRINUSE) { + CDEBUG(D_NET, "Port %d already in use\n", local_port); + *fatal = 0; + goto failed; + } + if (rc != 0) { + CERROR("Error trying to bind to port %d: %d\n", + local_port, rc); + goto failed; + } + } + + return 0; + + failed: + sock_release(sock); + return rc; +} + +int +libcfs_sock_setbuf (struct socket *sock, int txbufsize, int rxbufsize) +{ + mm_segment_t oldmm = get_fs(); + int option; + int rc; + + if (txbufsize != 0) { + option = txbufsize; + set_fs (KERNEL_DS); + rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set send buffer %d: %d\n", + option, rc); + return (rc); + } + } + + if (rxbufsize != 0) { + option = rxbufsize; + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set receive buffer %d: %d\n", + option, rc); + return (rc); + } + } + + return 0; +} + +EXPORT_SYMBOL(libcfs_sock_setbuf); + +int +libcfs_sock_getaddr (struct socket *sock, int remote, __u32 *ip, int *port) +{ + struct sockaddr_in sin; + int len = sizeof (sin); + int rc; + + rc = sock->ops->getname (sock, (struct sockaddr *)&sin, &len, + remote ? 2 : 0); + if (rc != 0) { + CERROR ("Error %d getting sock %s IP/port\n", + rc, remote ? "peer" : "local"); + return rc; + } + + if (ip != NULL) + *ip = ntohl (sin.sin_addr.s_addr); + + if (port != NULL) + *port = ntohs (sin.sin_port); + + return 0; +} + +EXPORT_SYMBOL(libcfs_sock_getaddr); + +int +libcfs_sock_getbuf (struct socket *sock, int *txbufsize, int *rxbufsize) +{ + + if (txbufsize != NULL) { + *txbufsize = sock->sk->sk_sndbuf; + } + + if (rxbufsize != NULL) { + *rxbufsize = sock->sk->sk_rcvbuf; + } + + return 0; +} + +EXPORT_SYMBOL(libcfs_sock_getbuf); + +int +libcfs_sock_listen (struct socket **sockp, + __u32 local_ip, int local_port, int backlog) +{ + int fatal; + int rc; + + rc = libcfs_sock_create(sockp, &fatal, local_ip, local_port); + if (rc != 0) { + if (!fatal) + CERROR("Can't create socket: port %d already in use\n", + local_port); + return rc; + } + + rc = (*sockp)->ops->listen(*sockp, backlog); + if (rc == 0) + return 0; + + CERROR("Can't set listen backlog %d: %d\n", backlog, rc); + sock_release(*sockp); + return rc; +} + +EXPORT_SYMBOL(libcfs_sock_listen); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) +int sock_create_lite(int family, int type, int protocol, struct socket **res) +{ + struct socket *sock; + + sock = sock_alloc(); + if (sock == NULL) + return -ENOMEM; + + sock->type = type; + *res = sock; + + return 0; +} +#endif + +int +libcfs_sock_accept (struct socket **newsockp, struct socket *sock) +{ + wait_queue_t wait; + struct socket *newsock; + int rc; + + init_waitqueue_entry(&wait, current); + + /* XXX this should add a ref to sock->ops->owner, if + * TCP could be a module */ + rc = sock_create_lite(PF_PACKET, sock->type, IPPROTO_TCP, &newsock); + if (rc) { + CERROR("Can't allocate socket\n"); + return rc; + } + + newsock->ops = sock->ops; + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(sock->sk->sk_sleep, &wait); + + rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + if (rc == -EAGAIN) { + /* Nothing ready, so wait for activity */ + schedule(); + rc = sock->ops->accept(sock, newsock, O_NONBLOCK); + } + + remove_wait_queue(sock->sk->sk_sleep, &wait); + set_current_state(TASK_RUNNING); + + if (rc != 0) + goto failed; + + *newsockp = newsock; + return 0; + + failed: + sock_release(newsock); + return rc; +} + +EXPORT_SYMBOL(libcfs_sock_accept); + +void +libcfs_sock_abort_accept (struct socket *sock) +{ + wake_up_all(sock->sk->sk_sleep); +} + +EXPORT_SYMBOL(libcfs_sock_abort_accept); + +int +libcfs_sock_connect (struct socket **sockp, int *fatal, + __u32 local_ip, int local_port, + __u32 peer_ip, int peer_port) +{ + struct sockaddr_in srvaddr; + int rc; + + rc = libcfs_sock_create(sockp, fatal, local_ip, local_port); + if (rc != 0) + return rc; + + memset (&srvaddr, 0, sizeof (srvaddr)); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons(peer_port); + srvaddr.sin_addr.s_addr = htonl(peer_ip); + + rc = (*sockp)->ops->connect(*sockp, + (struct sockaddr *)&srvaddr, sizeof(srvaddr), + 0); + if (rc == 0) + return 0; + + /* EADDRNOTAVAIL probably means we're already connected to the same + * peer/port on the same local port on a differently typed + * connection. Let our caller retry with a different local + * port... */ + *fatal = !(rc == -EADDRNOTAVAIL); + + CDEBUG(*fatal ? D_NETERROR : D_NET, + "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, + HIPQUAD(local_ip), local_port, HIPQUAD(peer_ip), peer_port); + + sock_release(*sockp); + return rc; +} + +EXPORT_SYMBOL(libcfs_sock_connect); + +void +libcfs_sock_release (struct socket *sock) +{ + sock_release(sock); +} + +EXPORT_SYMBOL(libcfs_sock_release); diff --git a/libcfs/libcfs/linux/linux-tracefile.c b/libcfs/libcfs/linux/linux-tracefile.c new file mode 100644 index 0000000..5956027 --- /dev/null +++ b/libcfs/libcfs/linux/linux-tracefile.c @@ -0,0 +1,266 @@ +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE + +#include +#include +#include "tracefile.h" + +#ifndef get_cpu +#define get_cpu() smp_processor_id() +#define put_cpu() do { } while (0) +#endif + +/* three types of trace_data in linux */ +enum { + TCD_TYPE_PROC = 0, + TCD_TYPE_SOFTIRQ, + TCD_TYPE_IRQ, + TCD_TYPE_MAX +}; + +/* percents to share the total debug memory for each type */ +static unsigned int pages_factor[TCD_TYPE_MAX] = { + 80, /* 80% pages for TCD_TYPE_PROC */ + 10, /* 10% pages for TCD_TYPE_SOFTIRQ */ + 10 /* 10% pages for TCD_TYPE_IRQ */ +}; + +char *trace_console_buffers[NR_CPUS][3]; + +struct rw_semaphore tracefile_sem; + +int tracefile_init_arch() +{ + int i; + int j; + struct trace_cpu_data *tcd; + + init_rwsem(&tracefile_sem); + + /* initialize trace_data */ + memset(trace_data, 0, sizeof(trace_data)); + for (i = 0; i < TCD_TYPE_MAX; i++) { + trace_data[i]=kmalloc(sizeof(union trace_data_union)*NR_CPUS, + GFP_KERNEL); + if (trace_data[i] == NULL) + goto out; + + } + + /* arch related info initialized */ + tcd_for_each(tcd, i, j) { + tcd->tcd_pages_factor = pages_factor[i]; + tcd->tcd_type = i; + tcd->tcd_cpu = j; + } + + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < 3; j++) { + trace_console_buffers[i][j] = + kmalloc(TRACE_CONSOLE_BUFFER_SIZE, + GFP_KERNEL); + + if (trace_console_buffers[i][j] == NULL) + goto out; + } + + return 0; + +out: + tracefile_fini_arch(); + printk(KERN_ERR "lnet: No enough memory\n"); + return -ENOMEM; + +} + +void tracefile_fini_arch() +{ + int i; + int j; + + for (i = 0; i < num_possible_cpus(); i++) + for (j = 0; j < 3; j++) + if (trace_console_buffers[i][j] != NULL) { + kfree(trace_console_buffers[i][j]); + trace_console_buffers[i][j] = NULL; + } + + for (i = 0; trace_data[i] != NULL; i++) { + kfree(trace_data[i]); + trace_data[i] = NULL; + } +} + +void tracefile_read_lock() +{ + down_read(&tracefile_sem); +} + +void tracefile_read_unlock() +{ + up_read(&tracefile_sem); +} + +void tracefile_write_lock() +{ + down_write(&tracefile_sem); +} + +void tracefile_write_unlock() +{ + up_write(&tracefile_sem); +} + +char * +trace_get_console_buffer(void) +{ + int cpu = get_cpu(); + int idx; + + if (in_irq()) { + idx = 0; + } else if (in_softirq()) { + idx = 1; + } else { + idx = 2; + } + + return trace_console_buffers[cpu][idx]; +} + +void +trace_put_console_buffer(char *buffer) +{ + put_cpu(); +} + +struct trace_cpu_data * +trace_get_tcd(void) +{ + int cpu; + + cpu = get_cpu(); + if (in_irq()) + return &(*trace_data[TCD_TYPE_IRQ])[cpu].tcd; + else if (in_softirq()) + return &(*trace_data[TCD_TYPE_SOFTIRQ])[cpu].tcd; + return &(*trace_data[TCD_TYPE_PROC])[cpu].tcd; +} + +void +trace_put_tcd (struct trace_cpu_data *tcd) +{ + put_cpu(); +} + +int trace_lock_tcd(struct trace_cpu_data *tcd) +{ + __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); + if (tcd->tcd_type == TCD_TYPE_IRQ) + local_irq_disable(); + else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ) + local_bh_disable(); + return 1; +} + +void trace_unlock_tcd(struct trace_cpu_data *tcd) +{ + __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); + if (tcd->tcd_type == TCD_TYPE_IRQ) + local_irq_enable(); + else if (tcd->tcd_type == TCD_TYPE_SOFTIRQ) + local_bh_enable(); +} + +int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) +{ + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + return tcd->tcd_cpu == tage->cpu; +} + +void +set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, + const int line, unsigned long stack) +{ + struct timeval tv; + + do_gettimeofday(&tv); + + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = line; +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) + header->ph_extern_pid = current->thread.extern_pid; +#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + header->ph_extern_pid = current->thread.mode.tt.extern_pid; +#else + header->ph_extern_pid = 0; +#endif + return; +} + +void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, + int len, const char *file, const char *fn) +{ + char *prefix = "Lustre", *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = "LustreError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "LustreError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "Lustre"; + ptype = KERN_WARNING; + } else if ((mask & (D_CONSOLE | libcfs_printk)) != 0) { + prefix = "Lustre"; + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %.*s", ptype, prefix, len, buf); + } else { + printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, + hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); + } + return; +} + +int trace_max_debug_mb(void) +{ + int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT)); + + return MAX(512, (total_mb * 80)/100); +} + +void +trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) +{ + cpumask_t cpus_allowed = current->cpus_allowed; + /* use cpus_allowed to quiet 2.4 UP kernel warning only */ + cpumask_t m = cpus_allowed; + int cpu; + + /* Run the given routine on every CPU in thread context */ + for (cpu = 0; cpu < num_possible_cpus(); cpu++) { + if (!cpu_online(cpu)) + continue; + + cpus_clear(m); + cpu_set(cpu, m); + set_cpus_allowed(current, m); + + fn(arg); + + set_cpus_allowed(current, cpus_allowed); + } +} diff --git a/libcfs/libcfs/linux/linux-utils.c b/libcfs/libcfs/linux/linux-utils.c new file mode 100644 index 0000000..60f7cb8 --- /dev/null +++ b/libcfs/libcfs/linux/linux-utils.c @@ -0,0 +1,60 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * miscellaneous libcfs stuff + */ +#define DEBUG_SUBSYSTEM S_LNET +#include + +/* + * Convert server error code to client format. Error codes are from + * Linux errno.h, so for Linux client---identity. + */ +int convert_server_error(__u64 ecode) +{ + return ecode; +} +EXPORT_SYMBOL(convert_server_error); + +/* + * convert flag from client to server. + */ +int convert_client_oflag(int cflag, int *result) +{ + *result = cflag; + return 0; +} +EXPORT_SYMBOL(convert_client_oflag); + +void cfs_stack_trace_fill(struct cfs_stack_trace *trace) +{} + +EXPORT_SYMBOL(cfs_stack_trace_fill); + +void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) +{ + return NULL; +} +EXPORT_SYMBOL(cfs_stack_trace_frame); + diff --git a/libcfs/libcfs/lwt.c b/libcfs/libcfs/lwt.c new file mode 100644 index 0000000..6455ece --- /dev/null +++ b/libcfs/libcfs/lwt.c @@ -0,0 +1,270 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2003 Cluster File Systems, Inc. + * Author: Eric Barton + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#ifndef AUTOCONF_INCLUDED +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_LNET + +#include + +#if LWT_SUPPORT + +#if !KLWT_SUPPORT +int lwt_enabled; +lwt_cpu_t lwt_cpus[NR_CPUS]; +#endif + +int lwt_pages_per_cpu; + +/* NB only root is allowed to retrieve LWT info; it's an open door into the + * kernel... */ + +int +lwt_lookup_string (int *size, char *knl_ptr, + char *user_ptr, int user_size) +{ + int maxsize = 128; + + /* knl_ptr was retrieved from an LWT snapshot and the caller wants to + * turn it into a string. NB we can crash with an access violation + * trying to determine the string length, so we're trusting our + * caller... */ + + if (!capable(CAP_SYS_ADMIN)) + return (-EPERM); + + if (user_size > 0 && + maxsize > user_size) + maxsize = user_size; + + *size = strnlen (knl_ptr, maxsize - 1) + 1; + + if (user_ptr != NULL) { + if (user_size < 4) + return (-EINVAL); + + if (copy_to_user (user_ptr, knl_ptr, *size)) + return (-EFAULT); + + /* Did I truncate the string? */ + if (knl_ptr[*size - 1] != 0) + copy_to_user (user_ptr + *size - 4, "...", 4); + } + + return (0); +} + +int +lwt_control (int enable, int clear) +{ + lwt_page_t *p; + int i; + int j; + + if (!capable(CAP_SYS_ADMIN)) + return (-EPERM); + + if (!enable) { + LWT_EVENT(0,0,0,0); + lwt_enabled = 0; + mb(); + /* give people some time to stop adding traces */ + schedule_timeout(10); + } + + for (i = 0; i < num_online_cpus(); i++) { + p = lwt_cpus[i].lwtc_current_page; + + if (p == NULL) + return (-ENODATA); + + if (!clear) + continue; + + for (j = 0; j < lwt_pages_per_cpu; j++) { + memset (p->lwtp_events, 0, CFS_PAGE_SIZE); + + p = list_entry (p->lwtp_list.next, + lwt_page_t, lwtp_list); + } + } + + if (enable) { + lwt_enabled = 1; + mb(); + LWT_EVENT(0,0,0,0); + } + + return (0); +} + +int +lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, + void *user_ptr, int user_size) +{ + const int events_per_page = CFS_PAGE_SIZE / sizeof(lwt_event_t); + const int bytes_per_page = events_per_page * sizeof(lwt_event_t); + lwt_page_t *p; + int i; + int j; + + if (!capable(CAP_SYS_ADMIN)) + return (-EPERM); + + *ncpu = num_online_cpus(); + *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page; + *now = get_cycles(); + + if (user_ptr == NULL) + return (0); + + for (i = 0; i < num_online_cpus(); i++) { + p = lwt_cpus[i].lwtc_current_page; + + if (p == NULL) + return (-ENODATA); + + for (j = 0; j < lwt_pages_per_cpu; j++) { + if (copy_to_user(user_ptr, p->lwtp_events, + bytes_per_page)) + return (-EFAULT); + + user_ptr = ((char *)user_ptr) + bytes_per_page; + p = list_entry(p->lwtp_list.next, + lwt_page_t, lwtp_list); + + } + } + + return (0); +} + +int +lwt_init () +{ + int i; + int j; + + for (i = 0; i < num_online_cpus(); i++) + if (lwt_cpus[i].lwtc_current_page != NULL) + return (-EALREADY); + + LASSERT (!lwt_enabled); + + /* NULL pointers, zero scalars */ + memset (lwt_cpus, 0, sizeof (lwt_cpus)); + lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * CFS_PAGE_SIZE); + + for (i = 0; i < num_online_cpus(); i++) + for (j = 0; j < lwt_pages_per_cpu; j++) { + struct page *page = alloc_page (GFP_KERNEL); + lwt_page_t *lwtp; + + if (page == NULL) { + CERROR ("Can't allocate page\n"); + lwt_fini (); + return (-ENOMEM); + } + + LIBCFS_ALLOC(lwtp, sizeof (*lwtp)); + if (lwtp == NULL) { + CERROR ("Can't allocate lwtp\n"); + __free_page(page); + lwt_fini (); + return (-ENOMEM); + } + + lwtp->lwtp_page = page; + lwtp->lwtp_events = page_address(page); + memset (lwtp->lwtp_events, 0, CFS_PAGE_SIZE); + + if (j == 0) { + INIT_LIST_HEAD (&lwtp->lwtp_list); + lwt_cpus[i].lwtc_current_page = lwtp; + } else { + list_add (&lwtp->lwtp_list, + &lwt_cpus[i].lwtc_current_page->lwtp_list); + } + } + + lwt_enabled = 1; + mb(); + + LWT_EVENT(0,0,0,0); + + return (0); +} + +void +lwt_fini () +{ + int i; + + lwt_control(0, 0); + + for (i = 0; i < num_online_cpus(); i++) + while (lwt_cpus[i].lwtc_current_page != NULL) { + lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page; + + if (list_empty (&lwtp->lwtp_list)) { + lwt_cpus[i].lwtc_current_page = NULL; + } else { + lwt_cpus[i].lwtc_current_page = + list_entry (lwtp->lwtp_list.next, + lwt_page_t, lwtp_list); + + list_del (&lwtp->lwtp_list); + } + + __free_page (lwtp->lwtp_page); + LIBCFS_FREE (lwtp, sizeof (*lwtp)); + } +} + +EXPORT_SYMBOL(lwt_enabled); +EXPORT_SYMBOL(lwt_cpus); + +EXPORT_SYMBOL(lwt_init); +EXPORT_SYMBOL(lwt_fini); +EXPORT_SYMBOL(lwt_lookup_string); +EXPORT_SYMBOL(lwt_control); +EXPORT_SYMBOL(lwt_snapshot); +#endif diff --git a/libcfs/libcfs/module.c b/libcfs/libcfs/module.c new file mode 100644 index 0000000..5e273cb --- /dev/null +++ b/libcfs/libcfs/module.c @@ -0,0 +1,423 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include +#include "tracefile.h" + +void +kportal_memhog_free (struct libcfs_device_userstate *ldu) +{ + cfs_page_t **level0p = &ldu->ldu_memhog_root_page; + cfs_page_t **level1p; + cfs_page_t **level2p; + int count1; + int count2; + + if (*level0p != NULL) { + + level1p = (cfs_page_t **)cfs_page_address(*level0p); + count1 = 0; + + while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && + *level1p != NULL) { + + level2p = (cfs_page_t **)cfs_page_address(*level1p); + count2 = 0; + + while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && + *level2p != NULL) { + + cfs_free_page(*level2p); + ldu->ldu_memhog_pages--; + level2p++; + count2++; + } + + cfs_free_page(*level1p); + ldu->ldu_memhog_pages--; + level1p++; + count1++; + } + + cfs_free_page(*level0p); + ldu->ldu_memhog_pages--; + + *level0p = NULL; + } + + LASSERT (ldu->ldu_memhog_pages == 0); +} + +int +kportal_memhog_alloc (struct libcfs_device_userstate *ldu, int npages, int flags) +{ + cfs_page_t **level0p; + cfs_page_t **level1p; + cfs_page_t **level2p; + int count1; + int count2; + + LASSERT (ldu->ldu_memhog_pages == 0); + LASSERT (ldu->ldu_memhog_root_page == NULL); + + if (npages < 0) + return -EINVAL; + + if (npages == 0) + return 0; + + level0p = &ldu->ldu_memhog_root_page; + *level0p = cfs_alloc_page(flags); + if (*level0p == NULL) + return -ENOMEM; + ldu->ldu_memhog_pages++; + + level1p = (cfs_page_t **)cfs_page_address(*level0p); + count1 = 0; + memset(level1p, 0, CFS_PAGE_SIZE); + + while (ldu->ldu_memhog_pages < npages && + count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { + + if (cfs_signal_pending()) + return (-EINTR); + + *level1p = cfs_alloc_page(flags); + if (*level1p == NULL) + return -ENOMEM; + ldu->ldu_memhog_pages++; + + level2p = (cfs_page_t **)cfs_page_address(*level1p); + count2 = 0; + memset(level2p, 0, CFS_PAGE_SIZE); + + while (ldu->ldu_memhog_pages < npages && + count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { + + if (cfs_signal_pending()) + return (-EINTR); + + *level2p = cfs_alloc_page(flags); + if (*level2p == NULL) + return (-ENOMEM); + ldu->ldu_memhog_pages++; + + level2p++; + count2++; + } + + level1p++; + count1++; + } + + return 0; +} + +/* called when opening /dev/device */ +static int libcfs_psdev_open(unsigned long flags, void *args) +{ + struct libcfs_device_userstate *ldu; + ENTRY; + + PORTAL_MODULE_USE; + + LIBCFS_ALLOC(ldu, sizeof(*ldu)); + if (ldu != NULL) { + ldu->ldu_memhog_pages = 0; + ldu->ldu_memhog_root_page = NULL; + } + *(struct libcfs_device_userstate **)args = ldu; + + RETURN(0); +} + +/* called when closing /dev/device */ +static int libcfs_psdev_release(unsigned long flags, void *args) +{ + struct libcfs_device_userstate *ldu; + ENTRY; + + ldu = (struct libcfs_device_userstate *)args; + if (ldu != NULL) { + kportal_memhog_free(ldu); + LIBCFS_FREE(ldu, sizeof(*ldu)); + } + + PORTAL_MODULE_UNUSE; + RETURN(0); +} + +static struct rw_semaphore ioctl_list_sem; +static struct list_head ioctl_list; + +int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) +{ + int rc = 0; + + down_write(&ioctl_list_sem); + if (!list_empty(&hand->item)) + rc = -EBUSY; + else + list_add_tail(&hand->item, &ioctl_list); + up_write(&ioctl_list_sem); + + return rc; +} +EXPORT_SYMBOL(libcfs_register_ioctl); + +int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) +{ + int rc = 0; + + down_write(&ioctl_list_sem); + if (list_empty(&hand->item)) + rc = -ENOENT; + else + list_del_init(&hand->item); + up_write(&ioctl_list_sem); + + return rc; +} +EXPORT_SYMBOL(libcfs_deregister_ioctl); + +static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg) +{ + char buf[1024]; + int err = -EINVAL; + struct libcfs_ioctl_data *data; + ENTRY; + + /* 'cmd' and permissions get checked in our arch-specific caller */ + + if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) { + CERROR("PORTALS ioctl: data error\n"); + RETURN(-EINVAL); + } + data = (struct libcfs_ioctl_data *)buf; + + switch (cmd) { + case IOC_LIBCFS_CLEAR_DEBUG: + libcfs_debug_clear_buffer(); + RETURN(0); + /* + * case IOC_LIBCFS_PANIC: + * Handled in arch/cfs_module.c + */ + case IOC_LIBCFS_MARK_DEBUG: + if (data->ioc_inlbuf1 == NULL || + data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') + RETURN(-EINVAL); + libcfs_debug_mark_buffer(data->ioc_inlbuf1); + RETURN(0); +#if LWT_SUPPORT + case IOC_LIBCFS_LWT_CONTROL: + err = lwt_control ((data->ioc_flags & 1) != 0, + (data->ioc_flags & 2) != 0); + break; + + case IOC_LIBCFS_LWT_SNAPSHOT: { + cycles_t now; + int ncpu; + int total_size; + + err = lwt_snapshot (&now, &ncpu, &total_size, + data->ioc_pbuf1, data->ioc_plen1); + data->ioc_u64[0] = now; + data->ioc_u32[0] = ncpu; + data->ioc_u32[1] = total_size; + + /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */ + data->ioc_u32[2] = sizeof(lwt_event_t); + data->ioc_u32[3] = offsetof(lwt_event_t, lwte_where); + + if (err == 0 && + libcfs_ioctl_popdata(arg, data, sizeof (*data))) + err = -EFAULT; + break; + } + + case IOC_LIBCFS_LWT_LOOKUP_STRING: + err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1, + data->ioc_pbuf2, data->ioc_plen2); + if (err == 0 && + libcfs_ioctl_popdata(arg, data, sizeof (*data))) + err = -EFAULT; + break; +#endif + case IOC_LIBCFS_MEMHOG: + if (pfile->private_data == NULL) { + err = -EINVAL; + } else { + kportal_memhog_free(pfile->private_data); + /* XXX The ioc_flags is not GFP flags now, need to be fixed */ + err = kportal_memhog_alloc(pfile->private_data, + data->ioc_count, + data->ioc_flags); + if (err != 0) + kportal_memhog_free(pfile->private_data); + } + break; + + case IOC_LIBCFS_PING_TEST: { + extern void (kping_client)(struct libcfs_ioctl_data *); + void (*ping)(struct libcfs_ioctl_data *); + + CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n", + data->ioc_count, libcfs_nid2str(data->ioc_nid), + libcfs_nid2str(data->ioc_nid)); + ping = PORTAL_SYMBOL_GET(kping_client); + if (!ping) + CERROR("PORTAL_SYMBOL_GET failed\n"); + else { + ping(data); + PORTAL_SYMBOL_PUT(kping_client); + } + RETURN(0); + } + + default: { + struct libcfs_ioctl_handler *hand; + err = -EINVAL; + down_read(&ioctl_list_sem); + list_for_each_entry(hand, &ioctl_list, item) { + err = hand->handle_ioctl(cmd, data); + if (err != -EINVAL) { + if (err == 0) + err = libcfs_ioctl_popdata(arg, + data, sizeof (*data)); + break; + } + } + up_read(&ioctl_list_sem); + break; + } + } + + RETURN(err); +} + +struct cfs_psdev_ops libcfs_psdev_ops = { + libcfs_psdev_open, + libcfs_psdev_release, + NULL, + NULL, + libcfs_ioctl +}; + +extern int insert_proc(void); +extern void remove_proc(void); +MODULE_AUTHOR("Peter J. Braam "); +MODULE_DESCRIPTION("Portals v3.1"); +MODULE_LICENSE("GPL"); + +extern cfs_psdev_t libcfs_dev; +extern struct rw_semaphore tracefile_sem; +extern struct semaphore trace_thread_sem; + +extern void libcfs_init_nidstrings(void); +extern int libcfs_arch_init(void); +extern void libcfs_arch_cleanup(void); + +static int init_libcfs_module(void) +{ + int rc; + + libcfs_arch_init(); + libcfs_init_nidstrings(); + init_rwsem(&tracefile_sem); + init_mutex(&trace_thread_sem); + init_rwsem(&ioctl_list_sem); + CFS_INIT_LIST_HEAD(&ioctl_list); + + rc = libcfs_debug_init(5 * 1024 * 1024); + if (rc < 0) { + printk(KERN_ERR "LustreError: libcfs_debug_init: %d\n", rc); + return (rc); + } + +#if LWT_SUPPORT + rc = lwt_init(); + if (rc != 0) { + CERROR("lwt_init: error %d\n", rc); + goto cleanup_debug; + } +#endif + rc = cfs_psdev_register(&libcfs_dev); + if (rc) { + CERROR("misc_register: error %d\n", rc); + goto cleanup_lwt; + } + + rc = insert_proc(); + if (rc) { + CERROR("insert_proc: error %d\n", rc); + goto cleanup_deregister; + } + + CDEBUG (D_OTHER, "portals setup OK\n"); + return (0); + + cleanup_deregister: + cfs_psdev_deregister(&libcfs_dev); + cleanup_lwt: +#if LWT_SUPPORT + lwt_fini(); + cleanup_debug: +#endif + libcfs_debug_cleanup(); + return rc; +} + +static void exit_libcfs_module(void) +{ + int rc; + + remove_proc(); + + CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", + atomic_read(&libcfs_kmemory)); + + rc = cfs_psdev_deregister(&libcfs_dev); + if (rc) + CERROR("misc_deregister error %d\n", rc); + +#if LWT_SUPPORT + lwt_fini(); +#endif + + if (atomic_read(&libcfs_kmemory) != 0) + CERROR("Portals memory leaked: %d bytes\n", + atomic_read(&libcfs_kmemory)); + + rc = libcfs_debug_cleanup(); + if (rc) + printk(KERN_ERR "LustreError: libcfs_debug_cleanup: %d\n", rc); + libcfs_arch_cleanup(); +} + +cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module); diff --git a/libcfs/libcfs/nidstrings.c b/libcfs/libcfs/nidstrings.c new file mode 100644 index 0000000..5f17f5a --- /dev/null +++ b/libcfs/libcfs/nidstrings.c @@ -0,0 +1,540 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#ifndef __KERNEL__ +#ifdef HAVE_GETHOSTBYNAME +# include +#endif +#endif + +/* CAVEAT VENDITOR! Keep the canonical string representation of nets/nids + * consistent in all conversion functions. Some code fragments are copied + * around for the sake of clarity... + */ + +/* CAVEAT EMPTOR! Racey temporary buffer allocation! + * Choose the number of nidstrings to support the MAXIMUM expected number of + * concurrent users. If there are more, the returned string will be volatile. + * NB this number must allow for a process to be descheduled for a timeslice + * between getting its string and using it. + */ + +#define LNET_NIDSTR_COUNT 128 /* # of nidstrings */ +#define LNET_NIDSTR_SIZE 32 /* size of each one (see below for usage) */ + +static char libcfs_nidstrings[LNET_NIDSTR_COUNT][LNET_NIDSTR_SIZE]; +static int libcfs_nidstring_idx = 0; + +#ifdef __KERNEL__ +static spinlock_t libcfs_nidstring_lock; + +void libcfs_init_nidstrings (void) +{ + spin_lock_init(&libcfs_nidstring_lock); +} + +# define NIDSTR_LOCK(f) spin_lock_irqsave(&libcfs_nidstring_lock, f) +# define NIDSTR_UNLOCK(f) spin_unlock_irqrestore(&libcfs_nidstring_lock, f) +#else +# define NIDSTR_LOCK(f) (f=0) /* avoid unused var warnings */ +# define NIDSTR_UNLOCK(f) (f=0) +#endif + +static char * +libcfs_next_nidstring (void) +{ + char *str; + unsigned long flags; + + NIDSTR_LOCK(flags); + + str = libcfs_nidstrings[libcfs_nidstring_idx++]; + if (libcfs_nidstring_idx == + sizeof(libcfs_nidstrings)/sizeof(libcfs_nidstrings[0])) + libcfs_nidstring_idx = 0; + + NIDSTR_UNLOCK(flags); + return str; +} + +static int libcfs_lo_str2addr(const char *str, int nob, __u32 *addr); +static void libcfs_ip_addr2str(__u32 addr, char *str); +static int libcfs_ip_str2addr(const char *str, int nob, __u32 *addr); +static void libcfs_decnum_addr2str(__u32 addr, char *str); +static void libcfs_hexnum_addr2str(__u32 addr, char *str); +static int libcfs_num_str2addr(const char *str, int nob, __u32 *addr); + +struct netstrfns { + int nf_type; + char *nf_name; + char *nf_modname; + void (*nf_addr2str)(__u32 addr, char *str); + int (*nf_str2addr)(const char *str, int nob, __u32 *addr); +}; + +static struct netstrfns libcfs_netstrfns[] = { + {/* .nf_type */ LOLND, + /* .nf_name */ "lo", + /* .nf_modname */ "klolnd", + /* .nf_addr2str */ libcfs_decnum_addr2str, + /* .nf_str2addr */ libcfs_lo_str2addr}, + {/* .nf_type */ SOCKLND, + /* .nf_name */ "tcp", + /* .nf_modname */ "ksocklnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ O2IBLND, + /* .nf_name */ "o2ib", + /* .nf_modname */ "ko2iblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ CIBLND, + /* .nf_name */ "cib", + /* .nf_modname */ "kciblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ OPENIBLND, + /* .nf_name */ "openib", + /* .nf_modname */ "kopeniblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ IIBLND, + /* .nf_name */ "iib", + /* .nf_modname */ "kiiblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ VIBLND, + /* .nf_name */ "vib", + /* .nf_modname */ "kviblnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ RALND, + /* .nf_name */ "ra", + /* .nf_modname */ "kralnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ QSWLND, + /* .nf_name */ "elan", + /* .nf_modname */ "kqswlnd", + /* .nf_addr2str */ libcfs_decnum_addr2str, + /* .nf_str2addr */ libcfs_num_str2addr}, + {/* .nf_type */ GMLND, + /* .nf_name */ "gm", + /* .nf_modname */ "kgmlnd", + /* .nf_addr2str */ libcfs_hexnum_addr2str, + /* .nf_str2addr */ libcfs_num_str2addr}, + {/* .nf_type */ MXLND, + /* .nf_name */ "mx", + /* .nf_modname */ "kmxlnd", + /* .nf_addr2str */ libcfs_ip_addr2str, + /* .nf_str2addr */ libcfs_ip_str2addr}, + {/* .nf_type */ PTLLND, + /* .nf_name */ "ptl", + /* .nf_modname */ "kptllnd", + /* .nf_addr2str */ libcfs_decnum_addr2str, + /* .nf_str2addr */ libcfs_num_str2addr}, + /* placeholder for net0 alias. It MUST BE THE LAST ENTRY */ + {/* .nf_type */ -1}, +}; + +const int libcfs_nnetstrfns = sizeof(libcfs_netstrfns)/sizeof(libcfs_netstrfns[0]); + +int +libcfs_lo_str2addr(const char *str, int nob, __u32 *addr) +{ + *addr = 0; + return 1; +} + +void +libcfs_ip_addr2str(__u32 addr, char *str) +{ +#if 0 /* never lookup */ +#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME + __u32 netip = htonl(addr); + struct hostent *he = gethostbyaddr(&netip, sizeof(netip), AF_INET); + + if (he != NULL) { + snprintf(str, LNET_NIDSTR_SIZE, "%s", he->h_name); + return; + } +#endif +#endif + snprintf(str, LNET_NIDSTR_SIZE, "%u.%u.%u.%u", + (addr >> 24) & 0xff, (addr >> 16) & 0xff, + (addr >> 8) & 0xff, addr & 0xff); +} + +/* CAVEAT EMPTOR XscanfX + * I use "%n" at the end of a sscanf format to detect trailing junk. However + * sscanf may return immediately if it sees the terminating '0' in a string, so + * I initialise the %n variable to the expected length. If sscanf sets it; + * fine, if it doesn't, then the scan ended at the end of the string, which is + * fine too :) */ + +int +libcfs_ip_str2addr(const char *str, int nob, __u32 *addr) +{ + int a; + int b; + int c; + int d; + int n = nob; /* XscanfX */ + + /* numeric IP? */ + if (sscanf(str, "%u.%u.%u.%u%n", &a, &b, &c, &d, &n) >= 4 && + n == nob && + (a & ~0xff) == 0 && (b & ~0xff) == 0 && + (c & ~0xff) == 0 && (d & ~0xff) == 0) { + *addr = ((a<<24)|(b<<16)|(c<<8)|d); + return 1; + } + +#if !defined(__KERNEL__) && defined HAVE_GETHOSTBYNAME + /* known hostname? */ + if (('a' <= str[0] && str[0] <= 'z') || + ('A' <= str[0] && str[0] <= 'Z')) { + char *tmp; + + LIBCFS_ALLOC(tmp, nob + 1); + if (tmp != NULL) { + struct hostent *he; + + memcpy(tmp, str, nob); + tmp[nob] = 0; + + he = gethostbyname(tmp); + + LIBCFS_FREE(tmp, nob); + + if (he != NULL) { + __u32 ip = *(__u32 *)he->h_addr; + + *addr = ntohl(ip); + return 1; + } + } + } +#endif + return 0; +} + +void +libcfs_decnum_addr2str(__u32 addr, char *str) +{ + snprintf(str, LNET_NIDSTR_SIZE, "%u", addr); +} + +void +libcfs_hexnum_addr2str(__u32 addr, char *str) +{ + snprintf(str, LNET_NIDSTR_SIZE, "0x%x", addr); +} + +int +libcfs_num_str2addr(const char *str, int nob, __u32 *addr) +{ + int n; + + n = nob; + if (sscanf(str, "0x%x%n", addr, &n) >= 1 && n == nob) + return 1; + + n = nob; + if (sscanf(str, "0X%x%n", addr, &n) >= 1 && n == nob) + return 1; + + n = nob; + if (sscanf(str, "%u%n", addr, &n) >= 1 && n == nob) + return 1; + + return 0; +} + +struct netstrfns * +libcfs_lnd2netstrfns(int lnd) +{ + int i; + + if (lnd >= 0) + for (i = 0; i < libcfs_nnetstrfns; i++) + if (lnd == libcfs_netstrfns[i].nf_type) + return &libcfs_netstrfns[i]; + + return NULL; +} + +struct netstrfns * +libcfs_name2netstrfns(const char *name) +{ + int i; + + for (i = 0; i < libcfs_nnetstrfns; i++) + if (libcfs_netstrfns[i].nf_type >= 0 && + !strcmp(libcfs_netstrfns[i].nf_name, name)) + return &libcfs_netstrfns[i]; + + return NULL; +} + +int +libcfs_isknown_lnd(int type) +{ + return libcfs_lnd2netstrfns(type) != NULL; +} + +char * +libcfs_lnd2modname(int lnd) +{ + struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); + + return (nf == NULL) ? NULL : nf->nf_modname; +} + +char * +libcfs_lnd2str(int lnd) +{ + char *str; + struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); + + if (nf != NULL) + return nf->nf_name; + + str = libcfs_next_nidstring(); + snprintf(str, LNET_NIDSTR_SIZE, "?%u?", lnd); + return str; +} + +int +libcfs_str2lnd(const char *str) +{ + struct netstrfns *nf = libcfs_name2netstrfns(str); + + if (nf != NULL) + return nf->nf_type; + + return -1; +} + +char * +libcfs_net2str(__u32 net) +{ + int lnd = LNET_NETTYP(net); + int num = LNET_NETNUM(net); + struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); + char *str = libcfs_next_nidstring(); + + if (nf == NULL) + snprintf(str, LNET_NIDSTR_SIZE, "<%u:%u>", lnd, num); + else if (num == 0) + snprintf(str, LNET_NIDSTR_SIZE, "%s", nf->nf_name); + else + snprintf(str, LNET_NIDSTR_SIZE, "%s%u", nf->nf_name, num); + + return str; +} + +char * +libcfs_nid2str(lnet_nid_t nid) +{ + __u32 addr = LNET_NIDADDR(nid); + __u32 net = LNET_NIDNET(nid); + int lnd = LNET_NETTYP(net); + int nnum = LNET_NETNUM(net); + struct netstrfns *nf; + char *str; + int nob; + + if (nid == LNET_NID_ANY) + return "LNET_NID_ANY"; + + nf = libcfs_lnd2netstrfns(lnd); + str = libcfs_next_nidstring(); + + if (nf == NULL) + snprintf(str, LNET_NIDSTR_SIZE, "%x@<%u:%u>", addr, lnd, nnum); + else { + nf->nf_addr2str(addr, str); + nob = strlen(str); + if (nnum == 0) + snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s", + nf->nf_name); + else + snprintf(str + nob, LNET_NIDSTR_SIZE - nob, "@%s%u", + nf->nf_name, nnum); + } + + return str; +} + +static struct netstrfns * +libcfs_str2net_internal(const char *str, __u32 *net) +{ + struct netstrfns *nf; + int nob; + int netnum; + int i; + + for (i = 0; i < libcfs_nnetstrfns; i++) { + nf = &libcfs_netstrfns[i]; + if (nf->nf_type >= 0 && + !strncmp(str, nf->nf_name, strlen(nf->nf_name))) + break; + } + + if (i == libcfs_nnetstrfns) + return NULL; + + nob = strlen(nf->nf_name); + + if (strlen(str) == (unsigned int)nob) { + netnum = 0; + } else { + if (nf->nf_type == LOLND) /* net number not allowed */ + return NULL; + + str += nob; + i = strlen(str); + if (sscanf(str, "%u%n", &netnum, &i) < 1 || + i != (int)strlen(str)) + return NULL; + } + + *net = LNET_MKNET(nf->nf_type, netnum); + return nf; +} + +__u32 +libcfs_str2net(const char *str) +{ + __u32 net; + + if (libcfs_str2net_internal(str, &net) != NULL) + return net; + + return LNET_NIDNET(LNET_NID_ANY); +} + +lnet_nid_t +libcfs_str2nid(const char *str) +{ + const char *sep = strchr(str, '@'); + struct netstrfns *nf; + __u32 net; + __u32 addr; + + if (sep != NULL) { + nf = libcfs_str2net_internal(sep + 1, &net); + if (nf == NULL) + return LNET_NID_ANY; + } else { + sep = str + strlen(str); + net = LNET_MKNET(SOCKLND, 0); + nf = libcfs_lnd2netstrfns(SOCKLND); + LASSERT (nf != NULL); + } + + if (!nf->nf_str2addr(str, sep - str, &addr)) + return LNET_NID_ANY; + + return LNET_MKNID(net, addr); +} + +char * +libcfs_id2str(lnet_process_id_t id) +{ + char *str = libcfs_next_nidstring(); + + if (id.pid == LNET_PID_ANY) { + snprintf(str, LNET_NIDSTR_SIZE, + "LNET_PID_ANY-%s", libcfs_nid2str(id.nid)); + return str; + } + + snprintf(str, LNET_NIDSTR_SIZE, "%s%u-%s", + ((id.pid & LNET_PID_USERFLAG) != 0) ? "U" : "", + (id.pid & ~LNET_PID_USERFLAG), libcfs_nid2str(id.nid)); + return str; +} + +int +libcfs_str2anynid(lnet_nid_t *nidp, const char *str) +{ + if (!strcmp(str, "*")) { + *nidp = LNET_NID_ANY; + return 1; + } + + *nidp = libcfs_str2nid(str); + return *nidp != LNET_NID_ANY; +} + +#ifdef __KERNEL__ +void +libcfs_setnet0alias(int lnd) +{ + struct netstrfns *nf = libcfs_lnd2netstrfns(lnd); + struct netstrfns *nf0 = &libcfs_netstrfns[libcfs_nnetstrfns - 1]; + + /* Ghastly hack to allow LNET to inter-operate with portals. + * NET type 0 becomes an alias for whatever local network we have, and + * this assignment here means we can parse and print its NIDs */ + + LASSERT (nf != NULL); + LASSERT (nf0->nf_type < 0); + + nf0->nf_name = "zero";//nf->nf_name; + nf0->nf_modname = nf->nf_modname; + nf0->nf_addr2str = nf->nf_addr2str; + nf0->nf_str2addr = nf->nf_str2addr; + mb(); + nf0->nf_type = 0; +} + +EXPORT_SYMBOL(libcfs_isknown_lnd); +EXPORT_SYMBOL(libcfs_lnd2modname); +EXPORT_SYMBOL(libcfs_lnd2str); +EXPORT_SYMBOL(libcfs_str2lnd); +EXPORT_SYMBOL(libcfs_net2str); +EXPORT_SYMBOL(libcfs_nid2str); +EXPORT_SYMBOL(libcfs_str2net); +EXPORT_SYMBOL(libcfs_str2nid); +EXPORT_SYMBOL(libcfs_id2str); +EXPORT_SYMBOL(libcfs_str2anynid); +EXPORT_SYMBOL(libcfs_setnet0alias); +#else /* __KERNEL__ */ +void +libcfs_setnet0alias(int lnd) +{ + LCONSOLE_ERROR_MSG(0x125, "Liblustre cannot interoperate with old " + "Portals.\nportals_compatibility must be set to " + "'none'.\n"); +} +#endif diff --git a/libcfs/libcfs/tracefile.c b/libcfs/libcfs/tracefile.c new file mode 100644 index 0000000..4a5cf52 --- /dev/null +++ b/libcfs/libcfs/tracefile.c @@ -0,0 +1,1114 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Zach Brown + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE +#include "tracefile.h" + +#include +#include + +/* XXX move things up to the top, comment */ +union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS] __cacheline_aligned; + +char tracefile[TRACEFILE_NAME_SIZE]; +long long tracefile_size = TRACEFILE_SIZE; +static struct tracefiled_ctl trace_tctl; +struct semaphore trace_thread_sem; +static int thread_running = 0; + +atomic_t tage_allocated = ATOMIC_INIT(0); + +static void put_pages_on_tcd_daemon_list(struct page_collection *pc, + struct trace_cpu_data *tcd); + +static inline struct trace_page *tage_from_list(struct list_head *list) +{ + return list_entry(list, struct trace_page, linkage); +} + +static struct trace_page *tage_alloc(int gfp) +{ + cfs_page_t *page; + struct trace_page *tage; + + /* + * Don't spam console with allocation failures: they will be reported + * by upper layer anyway. + */ + gfp |= CFS_ALLOC_NOWARN; + page = cfs_alloc_page(gfp); + if (page == NULL) + return NULL; + + tage = cfs_alloc(sizeof(*tage), gfp); + if (tage == NULL) { + cfs_free_page(page); + return NULL; + } + + tage->page = page; + atomic_inc(&tage_allocated); + return tage; +} + +static void tage_free(struct trace_page *tage) +{ + __LASSERT(tage != NULL); + __LASSERT(tage->page != NULL); + + cfs_free_page(tage->page); + cfs_free(tage); + atomic_dec(&tage_allocated); +} + +static void tage_to_tail(struct trace_page *tage, struct list_head *queue) +{ + __LASSERT(tage != NULL); + __LASSERT(queue != NULL); + + list_move_tail(&tage->linkage, queue); +} + +int trace_refill_stock(struct trace_cpu_data *tcd, int gfp, + struct list_head *stock) +{ + int i; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + for (i = 0; i + tcd->tcd_cur_stock_pages < TCD_STOCK_PAGES ; ++ i) { + struct trace_page *tage; + + tage = tage_alloc(gfp); + if (tage == NULL) + break; + list_add_tail(&tage->linkage, stock); + } + return i; +} + +/* return a page that has 'len' bytes left at the end */ +static struct trace_page *trace_get_tage_try(struct trace_cpu_data *tcd, + unsigned long len) +{ + struct trace_page *tage; + + if (tcd->tcd_cur_pages > 0) { + __LASSERT(!list_empty(&tcd->tcd_pages)); + tage = tage_from_list(tcd->tcd_pages.prev); + if (tage->used + len <= CFS_PAGE_SIZE) + return tage; + } + + if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { + if (tcd->tcd_cur_stock_pages > 0) { + tage = tage_from_list(tcd->tcd_stock_pages.prev); + -- tcd->tcd_cur_stock_pages; + list_del_init(&tage->linkage); + } else { + tage = tage_alloc(CFS_ALLOC_ATOMIC); + if (tage == NULL) { + printk(KERN_WARNING + "failure to allocate a tage (%ld)\n", + tcd->tcd_cur_pages); + return NULL; + } + } + + tage->used = 0; + tage->cpu = smp_processor_id(); + tage->type = tcd->tcd_type; + list_add_tail(&tage->linkage, &tcd->tcd_pages); + tcd->tcd_cur_pages++; + + if (tcd->tcd_cur_pages > 8 && thread_running) { + struct tracefiled_ctl *tctl = &trace_tctl; + /* + * wake up tracefiled to process some pages. + */ + cfs_waitq_signal(&tctl->tctl_waitq); + } + return tage; + } + return NULL; +} + +static void tcd_shrink(struct trace_cpu_data *tcd) +{ + int pgcount = tcd->tcd_cur_pages / 10; + struct page_collection pc; + struct trace_page *tage; + struct trace_page *tmp; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + printk(KERN_WARNING "debug daemon buffer overflowed; discarding" + " 10%% of pages (%d of %ld)\n", pgcount + 1, tcd->tcd_cur_pages); + + CFS_INIT_LIST_HEAD(&pc.pc_pages); + spin_lock_init(&pc.pc_lock); + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + if (pgcount-- == 0) + break; + + list_move_tail(&tage->linkage, &pc.pc_pages); + tcd->tcd_cur_pages--; + } + put_pages_on_tcd_daemon_list(&pc, tcd); +} + +/* return a page that has 'len' bytes left at the end */ +static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd, + unsigned long len) +{ + struct trace_page *tage; + + /* + * XXX nikita: do NOT call portals_debug_msg() (CDEBUG/ENTRY/EXIT) + * from here: this will lead to infinite recursion. + */ + + if (len > CFS_PAGE_SIZE) { + printk(KERN_ERR + "cowardly refusing to write %lu bytes in a page\n", len); + return NULL; + } + + tage = trace_get_tage_try(tcd, len); + if (tage != NULL) + return tage; + if (thread_running) + tcd_shrink(tcd); + if (tcd->tcd_cur_pages > 0) { + tage = tage_from_list(tcd->tcd_pages.next); + tage->used = 0; + tage_to_tail(tage, &tcd->tcd_pages); + } + return tage; +} + +int libcfs_debug_vmsg2(cfs_debug_limit_state_t *cdls, int subsys, int mask, + const char *file, const char *fn, const int line, + const char *format1, va_list args, + const char *format2, ...) +{ + struct trace_cpu_data *tcd = NULL; + struct ptldebug_header header; + struct trace_page *tage; + /* string_buf is used only if tcd != NULL, and is always set then */ + char *string_buf = NULL; + char *debug_buf; + int known_size; + int needed = 85; /* average message length */ + int max_nob; + va_list ap; + int depth; + int i; + int remain; + + if (strchr(file, '/')) + file = strrchr(file, '/') + 1; + + + set_ptldebug_header(&header, subsys, mask, line, CDEBUG_STACK()); + + tcd = trace_get_tcd(); + if (tcd == NULL) /* arch may not log in IRQ context */ + goto console; + + if (tcd->tcd_shutting_down) { + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + depth = __current_nesting_level(); + known_size = strlen(file) + 1 + depth; + if (fn) + known_size += strlen(fn) + 1; + + if (libcfs_debug_binary) + known_size += sizeof(header); + + /*/ + * '2' used because vsnprintf return real size required for output + * _without_ terminating NULL. + * if needed is to small for this format. + */ + for (i=0;i<2;i++) { + tage = trace_get_tage(tcd, needed + known_size + 1); + if (tage == NULL) { + if (needed + known_size > CFS_PAGE_SIZE) + mask |= D_ERROR; + + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + string_buf = (char *)cfs_page_address(tage->page)+tage->used+known_size; + + max_nob = CFS_PAGE_SIZE - tage->used - known_size; + if (max_nob <= 0) { + printk(KERN_EMERG "negative max_nob: %i\n", max_nob); + mask |= D_ERROR; + trace_put_tcd(tcd); + tcd = NULL; + goto console; + } + + needed = 0; + if (format1) { + va_copy(ap, args); + needed = vsnprintf(string_buf, max_nob, format1, ap); + va_end(ap); + } + + + if (format2) { + remain = max_nob - needed; + if (remain < 0) + remain = 0; + + va_start(ap, format2); + needed += vsnprintf(string_buf+needed, remain, format2, ap); + va_end(ap); + } + + if (needed < max_nob) /* well. printing ok.. */ + break; + } + + if (*(string_buf+needed-1) != '\n') + printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n", + file, line, fn); + + header.ph_len = known_size + needed; + debug_buf = (char *)cfs_page_address(tage->page) + tage->used; + + if (libcfs_debug_binary) { + memcpy(debug_buf, &header, sizeof(header)); + tage->used += sizeof(header); + debug_buf += sizeof(header); + } + + /* indent message according to the nesting level */ + while (depth-- > 0) { + *(debug_buf++) = '.'; + ++ tage->used; + } + + strcpy(debug_buf, file); + tage->used += strlen(file) + 1; + debug_buf += strlen(file) + 1; + + if (fn) { + strcpy(debug_buf, fn); + tage->used += strlen(fn) + 1; + debug_buf += strlen(fn) + 1; + } + + __LASSERT(debug_buf == string_buf); + + tage->used += needed; + __LASSERT (tage->used <= CFS_PAGE_SIZE); + +console: + if ((mask & libcfs_printk) == 0) { + /* no console output requested */ + if (tcd != NULL) + trace_put_tcd(tcd); + return 1; + } + + if (cdls != NULL) { + if (libcfs_console_ratelimit && + cdls->cdls_next != 0 && /* not first time ever */ + !cfs_time_after(cfs_time_current(), cdls->cdls_next)) { + /* skipping a console message */ + cdls->cdls_count++; + if (tcd != NULL) + trace_put_tcd(tcd); + return 1; + } + + if (cfs_time_after(cfs_time_current(), cdls->cdls_next + + libcfs_console_max_delay + + cfs_time_seconds(10))) { + /* last timeout was a long time ago */ + cdls->cdls_delay /= libcfs_console_backoff * 4; + } else { + cdls->cdls_delay *= libcfs_console_backoff; + + if (cdls->cdls_delay < libcfs_console_min_delay) + cdls->cdls_delay = libcfs_console_min_delay; + else if (cdls->cdls_delay > libcfs_console_max_delay) + cdls->cdls_delay = libcfs_console_max_delay; + } + + /* ensure cdls_next is never zero after it's been seen */ + cdls->cdls_next = (cfs_time_current() + cdls->cdls_delay) | 1; + } + + if (tcd != NULL) { + print_to_console(&header, mask, string_buf, needed, file, fn); + trace_put_tcd(tcd); + } else { + string_buf = trace_get_console_buffer(); + + needed = 0; + if (format1 != NULL) { + va_copy(ap, args); + needed = vsnprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, format1, ap); + va_end(ap); + } + if (format2 != NULL) { + remain = TRACE_CONSOLE_BUFFER_SIZE - needed; + if (remain > 0) { + va_start(ap, format2); + needed += vsnprintf(string_buf+needed, remain, format2, ap); + va_end(ap); + } + } + print_to_console(&header, mask, + string_buf, needed, file, fn); + + trace_put_console_buffer(string_buf); + } + + if (cdls != NULL && cdls->cdls_count != 0) { + string_buf = trace_get_console_buffer(); + + needed = snprintf(string_buf, TRACE_CONSOLE_BUFFER_SIZE, + "Skipped %d previous similar message%s\n", + cdls->cdls_count, (cdls->cdls_count > 1) ? "s" : ""); + + print_to_console(&header, mask, + string_buf, needed, file, fn); + + trace_put_console_buffer(string_buf); + cdls->cdls_count = 0; + } + + return 0; +} +EXPORT_SYMBOL(libcfs_debug_vmsg2); + +void +libcfs_assertion_failed(const char *expr, const char *file, + const char *func, const int line) +{ + libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, + "ASSERTION(%s) failed\n", expr); + LBUG(); +} +EXPORT_SYMBOL(libcfs_assertion_failed); + +void +trace_assertion_failed(const char *str, + const char *fn, const char *file, int line) +{ + struct ptldebug_header hdr; + + libcfs_panic_in_progress = 1; + libcfs_catastrophe = 1; + mb(); + + set_ptldebug_header(&hdr, DEBUG_SUBSYSTEM, D_EMERG, line, + CDEBUG_STACK()); + + print_to_console(&hdr, D_EMERG, str, strlen(str), file, fn); + + LIBCFS_PANIC("Lustre debug assertion failure\n"); + + /* not reached */ +} + +static void +panic_collect_pages(struct page_collection *pc) +{ + /* Do the collect_pages job on a single CPU: assumes that all other + * CPUs have been stopped during a panic. If this isn't true for some + * arch, this will have to be implemented separately in each arch. */ + int i; + int j; + struct trace_cpu_data *tcd; + + CFS_INIT_LIST_HEAD(&pc->pc_pages); + + tcd_for_each(tcd, i, j) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + + if (pc->pc_want_daemon_pages) { + list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); + tcd->tcd_cur_daemon_pages = 0; + } + } +} + +static void collect_pages_on_cpu(void *info) +{ + struct trace_cpu_data *tcd; + struct page_collection *pc = info; + int i; + + spin_lock(&pc->pc_lock); + tcd_for_each_type_lock(tcd, i) { + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); + tcd->tcd_cur_pages = 0; + if (pc->pc_want_daemon_pages) { + list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); + tcd->tcd_cur_daemon_pages = 0; + } + } + spin_unlock(&pc->pc_lock); +} + +static void collect_pages(struct page_collection *pc) +{ + CFS_INIT_LIST_HEAD(&pc->pc_pages); + + if (libcfs_panic_in_progress) + panic_collect_pages(pc); + else + trace_call_on_all_cpus(collect_pages_on_cpu, pc); +} + +static void put_pages_back_on_cpu(void *info) +{ + struct page_collection *pc = info; + struct trace_cpu_data *tcd; + struct list_head *cur_head; + struct trace_page *tage; + struct trace_page *tmp; + int i; + + spin_lock(&pc->pc_lock); + tcd_for_each_type_lock(tcd, i) { + cur_head = tcd->tcd_pages.next; + + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + + __LASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != smp_processor_id() || tage->type != i) + continue; + + tage_to_tail(tage, cur_head); + tcd->tcd_cur_pages++; + } + } + spin_unlock(&pc->pc_lock); +} + +static void put_pages_back(struct page_collection *pc) +{ + if (!libcfs_panic_in_progress) + trace_call_on_all_cpus(put_pages_back_on_cpu, pc); +} + +/* Add pages to a per-cpu debug daemon ringbuffer. This buffer makes sure that + * we have a good amount of data at all times for dumping during an LBUG, even + * if we have been steadily writing (and otherwise discarding) pages via the + * debug daemon. */ +static void put_pages_on_tcd_daemon_list(struct page_collection *pc, + struct trace_cpu_data *tcd) +{ + struct trace_page *tage; + struct trace_page *tmp; + + spin_lock(&pc->pc_lock); + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + + __LASSERT_TAGE_INVARIANT(tage); + + if (tage->cpu != smp_processor_id() || + tage->type != tcd->tcd_type) + continue; + + tage_to_tail(tage, &tcd->tcd_daemon_pages); + tcd->tcd_cur_daemon_pages++; + + if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { + struct trace_page *victim; + + __LASSERT(!list_empty(&tcd->tcd_daemon_pages)); + victim = tage_from_list(tcd->tcd_daemon_pages.next); + + __LASSERT_TAGE_INVARIANT(victim); + + list_del(&victim->linkage); + tage_free(victim); + tcd->tcd_cur_daemon_pages--; + } + } + spin_unlock(&pc->pc_lock); +} + +static void put_pages_on_daemon_list_on_cpu(void *info) +{ + struct trace_cpu_data *tcd; + int i; + + tcd_for_each_type_lock(tcd, i) + put_pages_on_tcd_daemon_list(info, tcd); +} + +static void put_pages_on_daemon_list(struct page_collection *pc) +{ + trace_call_on_all_cpus(put_pages_on_daemon_list_on_cpu, pc); +} + +void trace_debug_print(void) +{ + struct page_collection pc; + struct trace_page *tage; + struct trace_page *tmp; + + spin_lock_init(&pc.pc_lock); + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + char *p, *file, *fn; + cfs_page_t *page; + + __LASSERT_TAGE_INVARIANT(tage); + + page = tage->page; + p = cfs_page_address(page); + while (p < ((char *)cfs_page_address(page) + tage->used)) { + struct ptldebug_header *hdr; + int len; + hdr = (void *)p; + p += sizeof(*hdr); + file = p; + p += strlen(file) + 1; + fn = p; + p += strlen(fn) + 1; + len = hdr->ph_len - (p - (char *)hdr); + + print_to_console(hdr, D_EMERG, p, len, file, fn); + + p += len; + } + + list_del(&tage->linkage); + tage_free(tage); + } +} + +int tracefile_dump_all_pages(char *filename) +{ + struct page_collection pc; + cfs_file_t *filp; + struct trace_page *tage; + struct trace_page *tmp; + int rc; + + CFS_DECL_MMSPACE; + + tracefile_write_lock(); + + filp = cfs_filp_open(filename, + O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc); + if (!filp) { + if (rc != -EEXIST) + printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", + filename, rc); + goto out; + } + + spin_lock_init(&pc.pc_lock); + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) { + rc = 0; + goto close; + } + + /* ok, for now, just write the pages. in the future we'll be building + * iobufs with the pages and calling generic_direct_IO */ + CFS_MMSPACE_OPEN; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + + __LASSERT_TAGE_INVARIANT(tage); + + rc = cfs_filp_write(filp, cfs_page_address(tage->page), + tage->used, cfs_filp_poff(filp)); + if (rc != (int)tage->used) { + printk(KERN_WARNING "wanted to write %u but wrote " + "%d\n", tage->used, rc); + put_pages_back(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + break; + } + list_del(&tage->linkage); + tage_free(tage); + } + CFS_MMSPACE_CLOSE; + rc = cfs_filp_fsync(filp); + if (rc) + printk(KERN_ERR "sync returns %d\n", rc); + close: + cfs_filp_close(filp); + out: + tracefile_write_unlock(); + return rc; +} + +void trace_flush_pages(void) +{ + struct page_collection pc; + struct trace_page *tage; + struct trace_page *tmp; + + spin_lock_init(&pc.pc_lock); + + pc.pc_want_daemon_pages = 1; + collect_pages(&pc); + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + + __LASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + tage_free(tage); + } +} + +int trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char *usr_buffer, int usr_buffer_nob) +{ + int nob; + + if (usr_buffer_nob > knl_buffer_nob) + return -EOVERFLOW; + + if (copy_from_user((void *)knl_buffer, + (void *)usr_buffer, usr_buffer_nob)) + return -EFAULT; + + nob = strnlen(knl_buffer, usr_buffer_nob); + while (nob-- >= 0) /* strip trailing whitespace */ + if (!isspace(knl_buffer[nob])) + break; + + if (nob < 0) /* empty string */ + return -EINVAL; + + if (nob == knl_buffer_nob) /* no space to terminate */ + return -EOVERFLOW; + + knl_buffer[nob + 1] = 0; /* terminate */ + return 0; +} + +int trace_copyout_string(char *usr_buffer, int usr_buffer_nob, + const char *knl_buffer, char *append) +{ + /* NB if 'append' != NULL, it's a single character to append to the + * copied out string - usually "\n", for /proc entries and "" (i.e. a + * terminating zero byte) for sysctl entries */ + int nob = strlen(knl_buffer); + + if (nob > usr_buffer_nob) + nob = usr_buffer_nob; + + if (copy_to_user(usr_buffer, knl_buffer, nob)) + return -EFAULT; + + if (append != NULL && nob < usr_buffer_nob) { + if (copy_to_user(usr_buffer + nob, append, 1)) + return -EFAULT; + + nob++; + } + + return nob; +} + +int trace_allocate_string_buffer(char **str, int nob) +{ + if (nob > 2 * CFS_PAGE_SIZE) /* string must be "sensible" */ + return -EINVAL; + + *str = cfs_alloc(nob, CFS_ALLOC_STD | CFS_ALLOC_ZERO); + if (*str == NULL) + return -ENOMEM; + + return 0; +} + +void trace_free_string_buffer(char *str, int nob) +{ + cfs_free(str); +} + +int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob) +{ + char *str; + int rc; + + rc = trace_allocate_string_buffer(&str, usr_str_nob + 1); + if (rc != 0) + return rc; + + rc = trace_copyin_string(str, usr_str_nob + 1, + usr_str, usr_str_nob); + if (rc != 0) + goto out; + +#if !defined(__WINNT__) + if (str[0] != '/') { + rc = -EINVAL; + goto out; + } +#endif + rc = tracefile_dump_all_pages(str); +out: + trace_free_string_buffer(str, usr_str_nob + 1); + return rc; +} + +int trace_daemon_command(char *str) +{ + int rc = 0; + + tracefile_write_lock(); + + if (strcmp(str, "stop") == 0) { + trace_stop_thread(); + memset(tracefile, 0, sizeof(tracefile)); + + } else if (strncmp(str, "size=", 5) == 0) { + tracefile_size = simple_strtoul(str + 5, NULL, 0); + if (tracefile_size < 10 || tracefile_size > 20480) + tracefile_size = TRACEFILE_SIZE; + else + tracefile_size <<= 20; + + } else if (strlen(str) >= sizeof(tracefile)) { + rc = -ENAMETOOLONG; +#ifndef __WINNT__ + } else if (str[0] != '/') { + rc = -EINVAL; +#endif + } else { + strcpy(tracefile, str); + + printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " + "to %s (%lukB max)\n", tracefile, + (long)(tracefile_size >> 10)); + + trace_start_thread(); + } + + tracefile_write_unlock(); + return rc; +} + +int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob) +{ + char *str; + int rc; + + rc = trace_allocate_string_buffer(&str, usr_str_nob + 1); + if (rc != 0) + return rc; + + rc = trace_copyin_string(str, usr_str_nob + 1, + usr_str, usr_str_nob); + if (rc == 0) + rc = trace_daemon_command(str); + + trace_free_string_buffer(str, usr_str_nob + 1); + return rc; +} + +int trace_set_debug_mb(int mb) +{ + int i; + int j; + int pages; + int limit = trace_max_debug_mb(); + struct trace_cpu_data *tcd; + + if (mb < num_possible_cpus()) + return -EINVAL; + + if (mb > limit) { + printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " + "%dMB - limit is %d\n", mb, limit); + return -EINVAL; + } + + mb /= num_possible_cpus(); + pages = mb << (20 - CFS_PAGE_SHIFT); + + tracefile_write_lock(); + + tcd_for_each(tcd, i, j) + tcd->tcd_max_pages = (pages * tcd->tcd_pages_factor) / 100; + + tracefile_write_unlock(); + + return 0; +} + +int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob) +{ + char str[32]; + int rc; + + rc = trace_copyin_string(str, sizeof(str), usr_str, usr_str_nob); + if (rc < 0) + return rc; + + return trace_set_debug_mb(simple_strtoul(str, NULL, 0)); +} + +int trace_get_debug_mb(void) +{ + int i; + int j; + struct trace_cpu_data *tcd; + int total_pages = 0; + + tracefile_read_lock(); + + tcd_for_each(tcd, i, j) + total_pages += tcd->tcd_max_pages; + + tracefile_read_unlock(); + + return (total_pages >> (20 - CFS_PAGE_SHIFT)) + 1; +} + +static int tracefiled(void *arg) +{ + struct page_collection pc; + struct tracefiled_ctl *tctl = arg; + struct trace_page *tage; + struct trace_page *tmp; + struct ptldebug_header *hdr; + cfs_file_t *filp; + int rc; + + CFS_DECL_MMSPACE; + + /* we're started late enough that we pick up init's fs context */ + /* this is so broken in uml? what on earth is going on? */ + cfs_daemonize("ktracefiled"); + + spin_lock_init(&pc.pc_lock); + complete(&tctl->tctl_start); + + while (1) { + cfs_waitlink_t __wait; + + cfs_waitlink_init(&__wait); + cfs_waitq_add(&tctl->tctl_waitq, &__wait); + set_current_state(TASK_INTERRUPTIBLE); + cfs_waitq_timedwait(&__wait, CFS_TASK_INTERRUPTIBLE, + cfs_time_seconds(1)); + cfs_waitq_del(&tctl->tctl_waitq, &__wait); + + if (atomic_read(&tctl->tctl_shutdown)) + break; + + pc.pc_want_daemon_pages = 0; + collect_pages(&pc); + if (list_empty(&pc.pc_pages)) + continue; + + filp = NULL; + tracefile_read_lock(); + if (tracefile[0] != 0) { + filp = cfs_filp_open(tracefile, + O_CREAT | O_RDWR | O_LARGEFILE, + 0600, &rc); + if (!(filp)) + printk(KERN_WARNING "couldn't open %s: %d\n", + tracefile, rc); + } + tracefile_read_unlock(); + if (filp == NULL) { + put_pages_on_daemon_list(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + continue; + } + + CFS_MMSPACE_OPEN; + + /* mark the first header, so we can sort in chunks */ + tage = tage_from_list(pc.pc_pages.next); + __LASSERT_TAGE_INVARIANT(tage); + + hdr = cfs_page_address(tage->page); + hdr->ph_flags |= PH_FLAG_FIRST_RECORD; + + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + static loff_t f_pos; + + __LASSERT_TAGE_INVARIANT(tage); + + if (f_pos >= (off_t)tracefile_size) + f_pos = 0; + else if (f_pos > cfs_filp_size(filp)) + f_pos = cfs_filp_size(filp); + + rc = cfs_filp_write(filp, cfs_page_address(tage->page), + tage->used, &f_pos); + if (rc != (int)tage->used) { + printk(KERN_WARNING "wanted to write %u but " + "wrote %d\n", tage->used, rc); + put_pages_back(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + } + } + CFS_MMSPACE_CLOSE; + + cfs_filp_close(filp); + put_pages_on_daemon_list(&pc); + __LASSERT(list_empty(&pc.pc_pages)); + } + complete(&tctl->tctl_stop); + return 0; +} + +int trace_start_thread(void) +{ + struct tracefiled_ctl *tctl = &trace_tctl; + int rc = 0; + + mutex_down(&trace_thread_sem); + if (thread_running) + goto out; + + init_completion(&tctl->tctl_start); + init_completion(&tctl->tctl_stop); + cfs_waitq_init(&tctl->tctl_waitq); + atomic_set(&tctl->tctl_shutdown, 0); + + if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) { + rc = -ECHILD; + goto out; + } + + wait_for_completion(&tctl->tctl_start); + thread_running = 1; +out: + mutex_up(&trace_thread_sem); + return rc; +} + +void trace_stop_thread(void) +{ + struct tracefiled_ctl *tctl = &trace_tctl; + + mutex_down(&trace_thread_sem); + if (thread_running) { + printk(KERN_INFO "Lustre: shutting down debug daemon thread...\n"); + atomic_set(&tctl->tctl_shutdown, 1); + wait_for_completion(&tctl->tctl_stop); + thread_running = 0; + } + mutex_up(&trace_thread_sem); +} + +int tracefile_init(int max_pages) +{ + struct trace_cpu_data *tcd; + int i; + int j; + int rc; + int factor; + + rc = tracefile_init_arch(); + if (rc != 0) + return rc; + + tcd_for_each(tcd, i, j) { + /* tcd_pages_factor is initialized int tracefile_init_arch. */ + factor = tcd->tcd_pages_factor; + CFS_INIT_LIST_HEAD(&tcd->tcd_pages); + CFS_INIT_LIST_HEAD(&tcd->tcd_stock_pages); + CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages); + tcd->tcd_cur_pages = 0; + tcd->tcd_cur_stock_pages = 0; + tcd->tcd_cur_daemon_pages = 0; + tcd->tcd_max_pages = (max_pages * factor) / 100; + LASSERT(tcd->tcd_max_pages > 0); + tcd->tcd_shutting_down = 0; + } + + return 0; +} + +static void trace_cleanup_on_cpu(void *info) +{ + struct trace_cpu_data *tcd; + struct trace_page *tage; + struct trace_page *tmp; + int i; + + tcd_for_each_type_lock(tcd, i) { + tcd->tcd_shutting_down = 1; + + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + __LASSERT_TAGE_INVARIANT(tage); + + list_del(&tage->linkage); + tage_free(tage); + } + tcd->tcd_cur_pages = 0; + } +} + +static void trace_cleanup(void) +{ + struct page_collection pc; + + CFS_INIT_LIST_HEAD(&pc.pc_pages); + spin_lock_init(&pc.pc_lock); + + trace_call_on_all_cpus(trace_cleanup_on_cpu, &pc); + + tracefile_fini_arch(); +} + +void tracefile_exit(void) +{ + trace_stop_thread(); + trace_cleanup(); +} diff --git a/libcfs/libcfs/tracefile.h b/libcfs/libcfs/tracefile.h new file mode 100644 index 0000000..7d43392 --- /dev/null +++ b/libcfs/libcfs/tracefile.h @@ -0,0 +1,248 @@ +#ifndef __LIBCFS_TRACEFILE_H__ +#define __LIBCFS_TRACEFILE_H__ + +#include + +/* trace file lock routines */ + +#define TRACEFILE_NAME_SIZE 1024 +extern char tracefile[TRACEFILE_NAME_SIZE]; +extern long long tracefile_size; + +int tracefile_init_arch(void); +void tracefile_fini_arch(void); + +void tracefile_read_lock(void); +void tracefile_read_unlock(void); +void tracefile_write_lock(void); +void tracefile_write_unlock(void); + +int tracefile_dump_all_pages(char *filename); +void trace_debug_print(void); +void trace_flush_pages(void); +int trace_start_thread(void); +void trace_stop_thread(void); +int tracefile_init(int max_pages); +void tracefile_exit(void); + + + +int trace_copyin_string(char *knl_buffer, int knl_buffer_nob, + const char *usr_buffer, int usr_buffer_nob); +int trace_copyout_string(char *usr_buffer, int usr_buffer_nob, + const char *knl_str, char *append); +int trace_allocate_string_buffer(char **str, int nob); +void trace_free_string_buffer(char *str, int nob); +int trace_dump_debug_buffer_usrstr(void *usr_str, int usr_str_nob); +int trace_daemon_command(char *str); +int trace_daemon_command_usrstr(void *usr_str, int usr_str_nob); +int trace_set_debug_mb(int mb); +int trace_set_debug_mb_usrstr(void *usr_str, int usr_str_nob); +int trace_get_debug_mb(void); + +extern void libcfs_debug_dumplog_internal(void *arg); +extern void libcfs_register_panic_notifier(void); +extern void libcfs_unregister_panic_notifier(void); +extern int libcfs_panic_in_progress; +extern int trace_max_debug_mb(void); + +#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) +#define TRACEFILE_SIZE (500 << 20) + +#ifdef LUSTRE_TRACEFILE_PRIVATE + +/* + * Private declare for tracefile + */ +#define TCD_MAX_PAGES (5 << (20 - CFS_PAGE_SHIFT)) +#define TCD_STOCK_PAGES (TCD_MAX_PAGES) + +#define TRACEFILE_SIZE (500 << 20) + +/* Size of a buffer for sprinting console messages if we can't get a page + * from system */ +#define TRACE_CONSOLE_BUFFER_SIZE 1024 + +union trace_data_union { + struct trace_cpu_data { + /* + * pages with trace records not yet processed by tracefiled. + */ + struct list_head tcd_pages; + /* number of pages on ->tcd_pages */ + unsigned long tcd_cur_pages; + + /* + * pages with trace records already processed by + * tracefiled. These pages are kept in memory, so that some + * portion of log can be written in the event of LBUG. This + * list is maintained in LRU order. + * + * Pages are moved to ->tcd_daemon_pages by tracefiled() + * (put_pages_on_daemon_list()). LRU pages from this list are + * discarded when list grows too large. + */ + struct list_head tcd_daemon_pages; + /* number of pages on ->tcd_daemon_pages */ + unsigned long tcd_cur_daemon_pages; + + /* + * Maximal number of pages allowed on ->tcd_pages and + * ->tcd_daemon_pages each. + * Always TCD_MAX_PAGES * tcd_pages_factor / 100 in current + * implementation. + */ + unsigned long tcd_max_pages; + + /* + * preallocated pages to write trace records into. Pages from + * ->tcd_stock_pages are moved to ->tcd_pages by + * portals_debug_msg(). + * + * This list is necessary, because on some platforms it's + * impossible to perform efficient atomic page allocation in a + * non-blockable context. + * + * Such platforms fill ->tcd_stock_pages "on occasion", when + * tracing code is entered in blockable context. + * + * trace_get_tage_try() tries to get a page from + * ->tcd_stock_pages first and resorts to atomic page + * allocation only if this queue is empty. ->tcd_stock_pages + * is replenished when tracing code is entered in blocking + * context (darwin-tracefile.c:trace_get_tcd()). We try to + * maintain TCD_STOCK_PAGES (40 by default) pages in this + * queue. Atomic allocation is only required if more than + * TCD_STOCK_PAGES pagesful are consumed by trace records all + * emitted in non-blocking contexts. Which is quite unlikely. + */ + struct list_head tcd_stock_pages; + /* number of pages on ->tcd_stock_pages */ + unsigned long tcd_cur_stock_pages; + + unsigned short tcd_shutting_down; + unsigned short tcd_cpu; + unsigned short tcd_type; + /* The factors to share debug memory. */ + unsigned short tcd_pages_factor; + } tcd; + char __pad[L1_CACHE_ALIGN(sizeof(struct trace_cpu_data))]; +}; + +#define TCD_MAX_TYPES 8 +extern union trace_data_union (*trace_data[TCD_MAX_TYPES])[NR_CPUS]; + +#define tcd_for_each(tcd, i, j) \ + for (i = 0; trace_data[i] != NULL; i++) \ + for (j = 0, ((tcd) = &(*trace_data[i])[j].tcd); \ + j < num_possible_cpus(); j++, (tcd) = &(*trace_data[i])[j].tcd) + +#define tcd_for_each_type_lock(tcd, i) \ + for (i = 0; trace_data[i] && \ + (tcd = &(*trace_data[i])[smp_processor_id()].tcd) && \ + trace_lock_tcd(tcd); trace_unlock_tcd(tcd), i++) + +/* XXX nikita: this declaration is internal to tracefile.c and should probably + * be moved there */ +struct page_collection { + struct list_head pc_pages; + /* + * spin-lock protecting ->pc_pages. It is taken by smp_call_function() + * call-back functions. XXX nikita: Which is horrible: all processors + * receive NMI at the same time only to be serialized by this + * lock. Probably ->pc_pages should be replaced with an array of + * NR_CPUS elements accessed locklessly. + */ + spinlock_t pc_lock; + /* + * if this flag is set, collect_pages() will spill both + * ->tcd_daemon_pages and ->tcd_pages to the ->pc_pages. Otherwise, + * only ->tcd_pages are spilled. + */ + int pc_want_daemon_pages; +}; + +/* XXX nikita: this declaration is internal to tracefile.c and should probably + * be moved there */ +struct tracefiled_ctl { + struct completion tctl_start; + struct completion tctl_stop; + cfs_waitq_t tctl_waitq; + pid_t tctl_pid; + atomic_t tctl_shutdown; +}; + +/* + * small data-structure for each page owned by tracefiled. + */ +/* XXX nikita: this declaration is internal to tracefile.c and should probably + * be moved there */ +struct trace_page { + /* + * page itself + */ + cfs_page_t *page; + /* + * linkage into one of the lists in trace_data_union or + * page_collection + */ + struct list_head linkage; + /* + * number of bytes used within this page + */ + unsigned int used; + /* + * cpu that owns this page + */ + unsigned short cpu; + /* + * type(context) of this page + */ + unsigned short type; +}; + +extern void set_ptldebug_header(struct ptldebug_header *header, + int subsys, int mask, const int line, + unsigned long stack); +extern void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, + int len, const char *file, const char *fn); + +extern struct trace_cpu_data *trace_get_tcd(void); +extern void trace_put_tcd(struct trace_cpu_data *tcd); +extern int trace_lock_tcd(struct trace_cpu_data *tcd); +extern void trace_unlock_tcd(struct trace_cpu_data *tcd); +extern char *trace_get_console_buffer(void); +extern void trace_put_console_buffer(char *buffer); + +extern void trace_call_on_all_cpus(void (*fn)(void *arg), void *arg); + +int trace_refill_stock(struct trace_cpu_data *tcd, int gfp, + struct list_head *stock); + + +int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage); + +extern void trace_assertion_failed(const char *str, const char *fn, + const char *file, int line); + +/* ASSERTION that is safe to use within the debug system */ +#define __LASSERT(cond) \ +({ \ + if (unlikely(!(cond))) { \ + trace_assertion_failed("ASSERTION("#cond") failed", \ + __FUNCTION__, __FILE__, __LINE__); \ + } \ +}) + +#define __LASSERT_TAGE_INVARIANT(tage) \ +({ \ + __LASSERT(tage != NULL); \ + __LASSERT(tage->page != NULL); \ + __LASSERT(tage->used <= CFS_PAGE_SIZE); \ + __LASSERT(cfs_page_count(tage->page) > 0); \ +}) + +#endif /* LUSTRE_TRACEFILE_PRIVATE */ + +#endif /* __LIBCFS_TRACEFILE_H__ */ diff --git a/libcfs/libcfs/user-bitops.c b/libcfs/libcfs/user-bitops.c new file mode 100644 index 0000000..8f94593 --- /dev/null +++ b/libcfs/libcfs/user-bitops.c @@ -0,0 +1,98 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2007 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + */ +#ifndef __KERNEL__ + +#include +#include +#include + +#define OFF_BY_START(start) ((start)/BITS_PER_LONG) + +unsigned long find_next_bit(unsigned long *addr, + unsigned long size, unsigned long offset) +{ + unsigned long *word, *last; + unsigned long first_bit, bit, base; + + word = addr + OFF_BY_START(offset); + last = addr + OFF_BY_START(size-1); + first_bit = offset % BITS_PER_LONG; + base = offset - first_bit; + + if (offset >= size) + return size; + if (first_bit != 0) { + int tmp = (*word++) & (~0UL << first_bit); + bit = __ffs(tmp); + if (bit < BITS_PER_LONG) + goto found; + word++; + base += BITS_PER_LONG; + } + while (word <= last) { + if (*word != 0UL) { + bit = __ffs(*word); + goto found; + } + word++; + base += BITS_PER_LONG; + } + return size; +found: + return base + bit; +} + +unsigned long find_next_zero_bit(unsigned long *addr, + unsigned long size, unsigned long offset) +{ + unsigned long *word, *last; + unsigned long first_bit, bit, base; + + word = addr + OFF_BY_START(offset); + last = addr + OFF_BY_START(size-1); + first_bit = offset % BITS_PER_LONG; + base = offset - first_bit; + + if (offset >= size) + return size; + if (first_bit != 0) { + int tmp = (*word++) & (~0UL << first_bit); + bit = __ffz(tmp); + if (bit < BITS_PER_LONG) + goto found; + word++; + base += BITS_PER_LONG; + } + while (word <= last) { + if (*word != ~0UL) { + bit = __ffz(*word); + goto found; + } + word++; + base += BITS_PER_LONG; + } + return size; +found: + return base + bit; +} + +#endif diff --git a/libcfs/libcfs/user-lock.c b/libcfs/libcfs/user-lock.c new file mode 100644 index 0000000..c521dc7 --- /dev/null +++ b/libcfs/libcfs/user-lock.c @@ -0,0 +1,343 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +/* Implementations of portable synchronization APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + * + * XXX Liang: There are several branches share lnet with b_hd_newconfig, + * if we define lock APIs at here, there will be conflict with liblustre + * in other branches. + */ + +#ifndef __KERNEL__ + +#include +#include +#include + +/* + * Optional debugging (magic stamping and checking ownership) can be added. + */ + +#if 0 +/* + * spin_lock + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + * + * No-op implementation. + */ + +void spin_lock_init(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_lock(spinlock_t *lock) +{ + (void)lock; +} + +void spin_unlock(spinlock_t *lock) +{ + (void)lock; +} + +int spin_trylock(spinlock_t *lock) +{ + (void)lock; + return 1; +} + +void spin_lock_bh_init(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_lock_bh(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_unlock_bh(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ +struct semaphore {}; + +void sema_init(struct semaphore *s, int val) +{ + LASSERT(s != NULL); + (void)s; + (void)val; +} + +void __down(struct semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +void __up(struct semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +/* + * Mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ + +#define mutex_up(s) __up(s) +#define mutex_down(s) __down(s) + +#define init_mutex(x) sema_init(x, 1) +#define init_mutex_locked(x) sema_init(x, 0) + +/* + * Completion: + * + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + */ +struct completion {}; + +void init_completion(struct completion *c) +{ + LASSERT(c != NULL); + (void)c; +} + +void complete(struct completion *c) +{ + LASSERT(c != NULL); + (void)c; +} + +void wait_for_completion(struct completion *c) +{ + LASSERT(c != NULL); + (void)c; +} + +/* + * rw_semaphore: + * + * - DECLARE_RWSEM(x) + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ +struct rw_semaphore {}; + +void init_rwsem(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +void down_read(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +int down_read_trylock(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; + return 1; +} + +void down_write(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +int down_write_trylock(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; + return 1; +} + +void up_read(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +void up_write(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} +#endif + +#ifdef HAVE_LIBPTHREAD + +/* + * Completion + */ + +void cfs_init_completion(struct cfs_completion *c) +{ + LASSERT(c != NULL); + c->c_done = 0; + pthread_mutex_init(&c->c_mut, NULL); + pthread_cond_init(&c->c_cond, NULL); +} + +void cfs_fini_completion(struct cfs_completion *c) +{ + LASSERT(c != NULL); + pthread_mutex_destroy(&c->c_mut); + pthread_cond_destroy(&c->c_cond); +} + +void cfs_complete(struct cfs_completion *c) +{ + LASSERT(c != NULL); + pthread_mutex_lock(&c->c_mut); + c->c_done++; + pthread_cond_signal(&c->c_cond); + pthread_mutex_unlock(&c->c_mut); +} + +void cfs_wait_for_completion(struct cfs_completion *c) +{ + LASSERT(c != NULL); + pthread_mutex_lock(&c->c_mut); + while (c->c_done == 0) + pthread_cond_wait(&c->c_cond, &c->c_mut); + c->c_done--; + pthread_mutex_unlock(&c->c_mut); +} + +/* + * atomic primitives + */ + +static pthread_mutex_t atomic_guard_lock = PTHREAD_MUTEX_INITIALIZER; + +int cfs_atomic_read(cfs_atomic_t *a) +{ + int r; + + pthread_mutex_lock(&atomic_guard_lock); + r = a->counter; + pthread_mutex_unlock(&atomic_guard_lock); + return r; +} + +void cfs_atomic_set(cfs_atomic_t *a, int b) +{ + pthread_mutex_lock(&atomic_guard_lock); + a->counter = b; + pthread_mutex_unlock(&atomic_guard_lock); +} + +int cfs_atomic_dec_and_test(cfs_atomic_t *a) +{ + int r; + + pthread_mutex_lock(&atomic_guard_lock); + r = --a->counter; + pthread_mutex_unlock(&atomic_guard_lock); + return (r == 0); +} + +void cfs_atomic_inc(cfs_atomic_t *a) +{ + pthread_mutex_lock(&atomic_guard_lock); + ++a->counter; + pthread_mutex_unlock(&atomic_guard_lock); +} + +void cfs_atomic_dec(cfs_atomic_t *a) +{ + pthread_mutex_lock(&atomic_guard_lock); + --a->counter; + pthread_mutex_unlock(&atomic_guard_lock); +} +void cfs_atomic_add(int b, cfs_atomic_t *a) + +{ + pthread_mutex_lock(&atomic_guard_lock); + a->counter += b; + pthread_mutex_unlock(&atomic_guard_lock); +} + +void cfs_atomic_sub(int b, cfs_atomic_t *a) +{ + pthread_mutex_lock(&atomic_guard_lock); + a->counter -= b; + pthread_mutex_unlock(&atomic_guard_lock); +} + +#endif /* HAVE_LIBPTHREAD */ + + +/* !__KERNEL__ */ +#endif + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/libcfs/user-prim.c b/libcfs/libcfs/user-prim.c new file mode 100644 index 0000000..ffa32c1 --- /dev/null +++ b/libcfs/libcfs/user-prim.c @@ -0,0 +1,399 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable APIs for user-level. + * + */ + +/* Implementations of portable APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + */ + +#ifndef __KERNEL__ + +#include +#include + +#include +#ifndef __CYGWIN__ +#include +#ifdef HAVE_ASM_PAGE_H +#include +#endif +#ifdef HAVE_SYS_USER_H +#include +#endif +#else +#include +#endif +#include +#include +#include +#include +#include +#ifdef HAVE_SYS_VFS_H +#include +#endif + +/* + * Sleep channel. No-op implementation. + */ + +void cfs_waitq_init(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitlink_init(struct cfs_waitlink *link) +{ + LASSERT(link != NULL); + (void)link; +} + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +int cfs_waitq_active(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; + return 0; +} + +void cfs_waitq_signal(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_broadcast(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_wait(struct cfs_waitlink *link, int state) +{ + LASSERT(link != NULL); + (void)link; +} + +int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int state, int64_t timeout) +{ + LASSERT(link != NULL); + (void)link; + return 0; +} + +#ifdef HAVE_LIBPTHREAD + +/* + * Threads + */ + +struct lustre_thread_arg { + cfs_thread_t f; + void *arg; +}; +static void *cfs_thread_helper(void *data) +{ + struct lustre_thread_arg *targ = data; + cfs_thread_t f = targ->f; + void *arg = targ->arg; + + free(targ); + + (void)f(arg); + return NULL; +} +int cfs_create_thread(cfs_thread_t func, void *arg) +{ + pthread_t tid; + pthread_attr_t tattr; + int rc; + struct lustre_thread_arg *targ_p = malloc(sizeof(struct lustre_thread_arg)); + + if ( targ_p == NULL ) + return -ENOMEM; + + targ_p->f = func; + targ_p->arg = arg; + + pthread_attr_init(&tattr); + pthread_attr_setdetachstate(&tattr, PTHREAD_CREATE_DETACHED); + rc = pthread_create(&tid, &tattr, cfs_thread_helper, targ_p); + pthread_attr_destroy(&tattr); + return -rc; +} +#endif + +uid_t cfs_curproc_uid(void) +{ + return getuid(); +} + +int cfs_parse_int_tunable(int *value, char *name) +{ + char *env = getenv(name); + char *end; + + if (env == NULL) + return 0; + + *value = strtoull(env, &end, 0); + if (*end == 0) + return 0; + + CERROR("Can't parse tunable %s=%s\n", name, env); + return -EINVAL; +} + +/* + * Allocator + */ + +cfs_page_t *cfs_alloc_page(unsigned int flags) +{ + cfs_page_t *pg = malloc(sizeof(*pg)); + + if (!pg) + return NULL; + pg->addr = malloc(CFS_PAGE_SIZE); + + if (!pg->addr) { + free(pg); + return NULL; + } + return pg; +} + +void cfs_free_page(cfs_page_t *pg) +{ + free(pg->addr); + free(pg); +} + +void *cfs_page_address(cfs_page_t *pg) +{ + return pg->addr; +} + +void *cfs_kmap(cfs_page_t *pg) +{ + return pg->addr; +} + +void cfs_kunmap(cfs_page_t *pg) +{ +} + +/* + * SLAB allocator + */ + +cfs_mem_cache_t * +cfs_mem_cache_create(const char *name, size_t objsize, size_t off, unsigned long flags) +{ + cfs_mem_cache_t *c; + + c = malloc(sizeof(*c)); + if (!c) + return NULL; + c->size = objsize; + CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n", + name, c, (int)objsize); + return c; +} + +int cfs_mem_cache_destroy(cfs_mem_cache_t *c) +{ + CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size); + free(c); + return 0; +} + +void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp) +{ + return cfs_alloc(c->size, gfp); +} + +void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr) +{ + cfs_free(addr); +} + +void cfs_enter_debugger(void) +{ + /* + * nothing for now. + */ +} + +void cfs_daemonize(char *str) +{ + return; +} + +int cfs_daemonize_ctxt(char *str) +{ + return 0; +} + +cfs_sigset_t cfs_block_allsigs(void) +{ + cfs_sigset_t all; + cfs_sigset_t old; + int rc; + + sigfillset(&all); + rc = sigprocmask(SIG_SETMASK, &all, &old); + LASSERT(rc == 0); + + return old; +} + +cfs_sigset_t cfs_block_sigs(cfs_sigset_t blocks) +{ + cfs_sigset_t old; + int rc; + + rc = sigprocmask(SIG_SETMASK, &blocks, &old); + LASSERT (rc == 0); + + return old; +} + +void cfs_restore_sigs(cfs_sigset_t old) +{ + int rc = sigprocmask(SIG_SETMASK, &old, NULL); + + LASSERT (rc == 0); +} + +int cfs_signal_pending(void) +{ + cfs_sigset_t empty; + cfs_sigset_t set; + int rc; + + rc = sigpending(&set); + LASSERT (rc == 0); + + sigemptyset(&empty); + + return !memcmp(&empty, &set, sizeof(set)); +} + +void cfs_clear_sigpending(void) +{ + return; +} + +#ifdef __linux__ + +/* + * In glibc (NOT in Linux, so check above is not right), implement + * stack-back-tracing through backtrace() function. + */ +#include + +void cfs_stack_trace_fill(struct cfs_stack_trace *trace) +{ + backtrace(trace->frame, sizeof_array(trace->frame)); +} + +void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) +{ + if (0 <= frame_no && frame_no < sizeof_array(trace->frame)) + return trace->frame[frame_no]; + else + return NULL; +} + +#else + +void cfs_stack_trace_fill(struct cfs_stack_trace *trace) +{} +void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) +{ + return NULL; +} + +/* __linux__ */ +#endif + +void lbug_with_loc(char *file, const char *func, const int line) +{ + /* No libcfs_catastrophe in userspace! */ + libcfs_debug_msg(NULL, 0, D_EMERG, file, func, line, "LBUG\n"); + abort(); +} + +/* !__KERNEL__ */ +#endif + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/libcfs/libcfs/user-tcpip.c b/libcfs/libcfs/user-tcpip.c new file mode 100644 index 0000000..e0cedb9 --- /dev/null +++ b/libcfs/libcfs/user-tcpip.c @@ -0,0 +1,606 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2005 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#if !defined(__KERNEL__) || !defined(REDSTORM) + +#include +#include + +#include +#ifdef HAVE_NETINET_IN_H +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__sun__) || defined(__sun) +#include +#endif +#ifndef __CYGWIN__ +#include +#endif + +/* + * Functions to get network interfaces info + */ + +int +libcfs_sock_ioctl(int cmd, unsigned long arg) +{ + int fd, rc; + + fd = socket(AF_INET, SOCK_STREAM, 0); + + if (fd < 0) { + rc = -errno; + CERROR("socket() failed: errno==%d\n", errno); + return rc; + } + + rc = ioctl(fd, cmd, arg); + + close(fd); + return rc; +} + +int +libcfs_ipif_query (char *name, int *up, __u32 *ip) +{ + struct ifreq ifr; + int nob; + int rc; + __u32 val; + + nob = strlen(name); + if (nob >= IFNAMSIZ) { + CERROR("Interface name %s too long\n", name); + return -EINVAL; + } + + CLASSERT (sizeof(ifr.ifr_name) >= IFNAMSIZ); + + strcpy(ifr.ifr_name, name); + rc = libcfs_sock_ioctl(SIOCGIFFLAGS, (unsigned long)&ifr); + + if (rc != 0) { + CERROR("Can't get flags for interface %s\n", name); + return rc; + } + + if ((ifr.ifr_flags & IFF_UP) == 0) { + CDEBUG(D_NET, "Interface %s down\n", name); + *up = 0; + *ip = 0; + return 0; + } + + *up = 1; + + strcpy(ifr.ifr_name, name); + ifr.ifr_addr.sa_family = AF_INET; + rc = libcfs_sock_ioctl(SIOCGIFADDR, (unsigned long)&ifr); + + if (rc != 0) { + CERROR("Can't get IP address for interface %s\n", name); + return rc; + } + + val = ((struct sockaddr_in *)&ifr.ifr_addr)->sin_addr.s_addr; + *ip = ntohl(val); + + return 0; +} + +void +libcfs_ipif_free_enumeration (char **names, int n) +{ + int i; + + LASSERT (n > 0); + + for (i = 0; i < n && names[i] != NULL; i++) + LIBCFS_FREE(names[i], IFNAMSIZ); + + LIBCFS_FREE(names, n * sizeof(*names)); +} + +int +libcfs_ipif_enumerate (char ***namesp) +{ + /* Allocate and fill in 'names', returning # interfaces/error */ + char **names; + int nalloc; + int nfound; + struct ifreq *ifr; + struct ifconf ifc; + int rc; + int nob; + int i; + + + nalloc = 16; /* first guess at max interfaces */ + for (;;) { + LIBCFS_ALLOC(ifr, nalloc * sizeof(*ifr)); + if (ifr == NULL) { + CERROR ("ENOMEM enumerating up to %d interfaces\n", + nalloc); + rc = -ENOMEM; + goto out0; + } + + ifc.ifc_buf = (char *)ifr; + ifc.ifc_len = nalloc * sizeof(*ifr); + + rc = libcfs_sock_ioctl(SIOCGIFCONF, (unsigned long)&ifc); + + if (rc < 0) { + CERROR ("Error %d enumerating interfaces\n", rc); + goto out1; + } + + LASSERT (rc == 0); + + nfound = ifc.ifc_len/sizeof(*ifr); + LASSERT (nfound <= nalloc); + + if (nfound < nalloc) + break; + + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + nalloc *= 2; + } + + if (nfound == 0) + goto out1; + + LIBCFS_ALLOC(names, nfound * sizeof(*names)); + if (names == NULL) { + rc = -ENOMEM; + goto out1; + } + /* NULL out all names[i] */ + memset (names, 0, nfound * sizeof(*names)); + + for (i = 0; i < nfound; i++) { + + nob = strlen (ifr[i].ifr_name); + if (nob >= IFNAMSIZ) { + /* no space for terminating NULL */ + CERROR("interface name %.*s too long (%d max)\n", + nob, ifr[i].ifr_name, IFNAMSIZ); + rc = -ENAMETOOLONG; + goto out2; + } + + LIBCFS_ALLOC(names[i], IFNAMSIZ); + if (names[i] == NULL) { + rc = -ENOMEM; + goto out2; + } + + memcpy(names[i], ifr[i].ifr_name, nob); + names[i][nob] = 0; + } + + *namesp = names; + rc = nfound; + + out2: + if (rc < 0) + libcfs_ipif_free_enumeration(names, nfound); + out1: + LIBCFS_FREE(ifr, nalloc * sizeof(*ifr)); + out0: + return rc; +} + +/* + * Network functions used by user-land lnet acceptor + */ + +int +libcfs_sock_listen (int *sockp, __u32 local_ip, int local_port, int backlog) +{ + int rc; + int option; + struct sockaddr_in locaddr; + + *sockp = socket(AF_INET, SOCK_STREAM, 0); + if (*sockp < 0) { + rc = -errno; + CERROR("socket() failed: errno==%d\n", errno); + return rc; + } + + option = 1; + if ( setsockopt(*sockp, SOL_SOCKET, SO_REUSEADDR, + (char *)&option, sizeof (option)) ) { + rc = -errno; + CERROR("setsockopt(SO_REUSEADDR) failed: errno==%d\n", errno); + goto failed; + } + + if (local_ip != 0 || local_port != 0) { + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons(local_port); + locaddr.sin_addr.s_addr = (local_ip == 0) ? + INADDR_ANY : htonl(local_ip); + + if ( bind(*sockp, (struct sockaddr *)&locaddr, sizeof(locaddr)) ) { + rc = -errno; + if ( errno == -EADDRINUSE ) + CDEBUG(D_NET, "Port %d already in use\n", + local_port); + else + CERROR("bind() to port %d failed: errno==%d\n", + local_port, errno); + goto failed; + } + } + + if ( listen(*sockp, backlog) ) { + rc = -errno; + CERROR("listen() with backlog==%d failed: errno==%d\n", + backlog, errno); + goto failed; + } + + return 0; + + failed: + close(*sockp); + return rc; +} + +int +libcfs_sock_accept (int *newsockp, int sock, __u32 *peer_ip, int *peer_port) +{ + struct sockaddr_in accaddr; + socklen_t accaddr_len = sizeof(struct sockaddr_in); + + *newsockp = accept(sock, (struct sockaddr *)&accaddr, &accaddr_len); + + if ( *newsockp < 0 ) { + CERROR("accept() failed: errno==%d\n", errno); + return -errno; + } + + *peer_ip = ntohl(accaddr.sin_addr.s_addr); + *peer_port = ntohs(accaddr.sin_port); + + return 0; +} + +int +libcfs_sock_read (int sock, void *buffer, int nob, int timeout) +{ + int rc; + struct pollfd pfd; + cfs_time_t start_time = cfs_time_current(); + + pfd.fd = sock; + pfd.events = POLLIN; + pfd.revents = 0; + + /* poll(2) measures timeout in msec */ + timeout *= 1000; + + while (nob != 0 && timeout > 0) { + cfs_time_t current_time; + + rc = poll(&pfd, 1, timeout); + if (rc < 0) + return -errno; + if (rc == 0) + return -ETIMEDOUT; + if ((pfd.revents & POLLIN) == 0) + return -EIO; + + rc = read(sock, buffer, nob); + if (rc < 0) + return -errno; + if (rc == 0) + return -EIO; + + buffer = ((char *)buffer) + rc; + nob -= rc; + + current_time = cfs_time_current(); + timeout -= cfs_duration_sec(cfs_time_sub(cfs_time_current(), + start_time)); + } + + if (nob == 0) + return 0; + else + return -ETIMEDOUT; +} + +/* Just try to connect to localhost to wake up entity that are + * sleeping in accept() */ +void +libcfs_sock_abort_accept(__u16 port) +{ + int fd, rc; + struct sockaddr_in locaddr; + + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons(port); + locaddr.sin_addr.s_addr = inet_addr("127.0.0.1"); + + fd = socket(AF_INET, SOCK_STREAM, 0); + if ( fd < 0 ) { + CERROR("socket() failed: errno==%d\n", errno); + return; + } + + rc = connect(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); + if ( rc != 0 ) { + if ( errno != ECONNREFUSED ) + CERROR("connect() failed: errno==%d\n", errno); + else + CDEBUG(D_NET, "Nobody to wake up at %d\n", port); + } + + close(fd); +} + +/* + * Network functions of common use + */ + +int +libcfs_getpeername(int sock_fd, __u32 *ipaddr_p, __u16 *port_p) +{ + int rc; + struct sockaddr_in peer_addr; + socklen_t peer_addr_len = sizeof(peer_addr); + + rc = getpeername(sock_fd, (struct sockaddr *)&peer_addr, &peer_addr_len); + if (rc != 0) + return -errno; + + if (ipaddr_p != NULL) + *ipaddr_p = ntohl(peer_addr.sin_addr.s_addr); + if (port_p != NULL) + *port_p = ntohs(peer_addr.sin_port); + + return 0; +} + +int +libcfs_socketpair(int *fdp) +{ + int rc, i; + + rc = socketpair(AF_UNIX, SOCK_STREAM, 0, fdp); + if (rc != 0) { + rc = -errno; + CERROR ("Cannot create socket pair\n"); + return rc; + } + + for (i = 0; i < 2; i++) { + rc = libcfs_fcntl_nonblock(fdp[i]); + if (rc) { + close(fdp[0]); + close(fdp[1]); + return rc; + } + } + + return 0; +} + +int +libcfs_fcntl_nonblock(int fd) +{ + int rc, flags; + + flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + rc = -errno; + CERROR ("Cannot get socket flags\n"); + return rc; + } + + rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (rc != 0) { + rc = -errno; + CERROR ("Cannot set socket flags\n"); + return rc; + } + + return 0; +} + +int +libcfs_sock_set_nagle(int fd, int nagle) +{ + int rc; + int option = nagle ? 0 : 1; + +#if defined(__sun__) || defined(__sun) + rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &option, sizeof(option)); +#else + rc = setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option)); +#endif + + if (rc != 0) { + rc = -errno; + CERROR ("Cannot set NODELAY socket option\n"); + return rc; + } + + return 0; +} + +int +libcfs_sock_set_bufsiz(int fd, int bufsiz) +{ + int rc, option; + + LASSERT (bufsiz != 0); + + option = bufsiz; + rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option)); + if (rc != 0) { + rc = -errno; + CERROR ("Cannot set SNDBUF socket option\n"); + return rc; + } + + option = bufsiz; + rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option)); + if (rc != 0) { + rc = -errno; + CERROR ("Cannot set RCVBUF socket option\n"); + return rc; + } + + return 0; +} + +int +libcfs_sock_create(int *fdp) +{ + int rc, fd, option; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + rc = -errno; + CERROR ("Cannot create socket\n"); + return rc; + } + + option = 1; + rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + &option, sizeof(option)); + if (rc != 0) { + rc = -errno; + CERROR ("Cannot set SO_REUSEADDR for socket\n"); + close(fd); + return rc; + } + + *fdp = fd; + return 0; +} + +int +libcfs_sock_bind_to_port(int fd, __u16 port) +{ + int rc; + struct sockaddr_in locaddr; + + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_addr.s_addr = INADDR_ANY; + locaddr.sin_port = htons(port); + + rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); + if (rc != 0) { + rc = -errno; + CERROR ("Cannot bind to port %d\n", port); + return rc; + } + + return 0; +} + +int +libcfs_sock_connect(int fd, __u32 ip, __u16 port) +{ + int rc; + struct sockaddr_in addr; + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(ip); + addr.sin_port = htons(port); + + rc = connect(fd, (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)); + + if(rc != 0 && errno != EINPROGRESS) { + rc = -errno; + if (rc != -EADDRINUSE && rc != -EADDRNOTAVAIL) + CERROR ("Cannot connect to %u.%u.%u.%u:%d (err=%d)\n", + HIPQUAD(ip), port, errno); + return rc; + } + + return 0; +} + +/* NB: EPIPE and ECONNRESET are considered as non-fatal + * because: + * 1) it still makes sense to continue reading && + * 2) anyway, poll() will set up POLLHUP|POLLERR flags */ +int libcfs_sock_writev(int fd, const struct iovec *vector, int count) +{ + int rc; + + rc = syscall(SYS_writev, fd, vector, count); + + if (rc == 0) /* write nothing */ + return 0; + + if (rc < 0) { + if (errno == EAGAIN || /* write nothing */ + errno == EPIPE || /* non-fatal error */ + errno == ECONNRESET) /* non-fatal error */ + return 0; + else + return -errno; + } + + return rc; +} + +int libcfs_sock_readv(int fd, const struct iovec *vector, int count) +{ + int rc; + + rc = syscall(SYS_readv, fd, vector, count); + + if (rc == 0) /* EOF */ + return -EIO; + + if (rc < 0) { + if (errno == EAGAIN) /* read nothing */ + return 0; + else + return -errno; + } + + return rc; +} + +#endif /* !__KERNEL__ || !defined(REDSTORM) */ diff --git a/libcfs/libcfs/watchdog.c b/libcfs/libcfs/watchdog.c new file mode 100644 index 0000000..89d757c --- /dev/null +++ b/libcfs/libcfs/watchdog.c @@ -0,0 +1,427 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Jacob Berkman + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include "tracefile.h" + +struct lc_watchdog { + cfs_timer_t lcw_timer; /* kernel timer */ + struct list_head lcw_list; + struct timeval lcw_last_touched; + cfs_task_t *lcw_task; + + void (*lcw_callback)(pid_t, void *); + void *lcw_data; + + pid_t lcw_pid; + cfs_duration_t lcw_time; /* time until watchdog fires, jiffies */ + + enum { + LC_WATCHDOG_DISABLED, + LC_WATCHDOG_ENABLED, + LC_WATCHDOG_EXPIRED + } lcw_state; +}; + +#ifdef WITH_WATCHDOG +/* + * The dispatcher will complete lcw_start_completion when it starts, + * and lcw_stop_completion when it exits. + * Wake lcw_event_waitq to signal timer callback dispatches. + */ +static struct completion lcw_start_completion; +static struct completion lcw_stop_completion; +static wait_queue_head_t lcw_event_waitq; + +/* + * Set this and wake lcw_event_waitq to stop the dispatcher. + */ +enum { + LCW_FLAG_STOP = 0 +}; +static unsigned long lcw_flags = 0; + +/* + * Number of outstanding watchdogs. + * When it hits 1, we start the dispatcher. + * When it hits 0, we stop the distpatcher. + */ +static __u32 lcw_refcount = 0; +static DECLARE_MUTEX(lcw_refcount_sem); + +/* + * List of timers that have fired that need their callbacks run by the + * dispatcher. + */ +static spinlock_t lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED; /* BH lock! */ +static struct list_head lcw_pending_timers = \ + LIST_HEAD_INIT(lcw_pending_timers); + +#ifdef HAVE_TASKLIST_LOCK +static void +lcw_dump(struct lc_watchdog *lcw) +{ + cfs_task_t *tsk; + ENTRY; + + read_lock(&tasklist_lock); + tsk = find_task_by_pid(lcw->lcw_pid); + + if (tsk == NULL) { + CWARN("Process %d was not found in the task list; " + "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); + } else if (tsk != lcw->lcw_task) { + CWARN("The current process %d did not set the watchdog; " + "watchdog callback may be incomplete\n", (int)lcw->lcw_pid); + } else { + libcfs_debug_dumpstack(tsk); + } + + read_unlock(&tasklist_lock); + EXIT; +} +#else +static void +lcw_dump(struct lc_watchdog *lcw) +{ + CERROR("unable to dump stack because of missing export\n"); +} +#endif + +static void lcw_cb(unsigned long data) +{ + struct lc_watchdog *lcw = (struct lc_watchdog *)data; + + ENTRY; + + if (lcw->lcw_state != LC_WATCHDOG_ENABLED) { + EXIT; + return; + } + + lcw->lcw_state = LC_WATCHDOG_EXPIRED; + + /* NB this warning should appear on the console, but may not get into + * the logs since we're running in a softirq handler */ + + CWARN("Watchdog triggered for pid %d: it was inactive for %lds\n", + (int)lcw->lcw_pid, cfs_duration_sec(lcw->lcw_time)); + lcw_dump(lcw); + + spin_lock_bh(&lcw_pending_timers_lock); + + if (list_empty(&lcw->lcw_list)) { + list_add(&lcw->lcw_list, &lcw_pending_timers); + wake_up(&lcw_event_waitq); + } + + spin_unlock_bh(&lcw_pending_timers_lock); + + EXIT; +} + +static int is_watchdog_fired(void) +{ + int rc; + + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) + return 1; + + spin_lock_bh(&lcw_pending_timers_lock); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_bh(&lcw_pending_timers_lock); + return rc; +} + +static int lcw_dispatch_main(void *data) +{ + int rc = 0; + unsigned long flags; + struct lc_watchdog *lcw; + + ENTRY; + + cfs_daemonize("lc_watchdogd"); + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); + + complete(&lcw_start_completion); + + while (1) { + wait_event_interruptible(lcw_event_waitq, is_watchdog_fired()); + CDEBUG(D_INFO, "Watchdog got woken up...\n"); + if (test_bit(LCW_FLAG_STOP, &lcw_flags)) { + CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n"); + + spin_lock_bh(&lcw_pending_timers_lock); + rc = !list_empty(&lcw_pending_timers); + spin_unlock_bh(&lcw_pending_timers_lock); + if (rc) { + CERROR("pending timers list was not empty at " + "time of watchdog dispatch shutdown\n"); + } + break; + } + + spin_lock_bh(&lcw_pending_timers_lock); + while (!list_empty(&lcw_pending_timers)) { + + lcw = list_entry(lcw_pending_timers.next, + struct lc_watchdog, + lcw_list); + list_del_init(&lcw->lcw_list); + spin_unlock_bh(&lcw_pending_timers_lock); + + CDEBUG(D_INFO, "found lcw for pid %d: inactive for " + "%lds\n", (int)lcw->lcw_pid, + cfs_duration_sec(lcw->lcw_time)); + + if (lcw->lcw_state != LC_WATCHDOG_DISABLED) + lcw->lcw_callback(lcw->lcw_pid, lcw->lcw_data); + + spin_lock_bh(&lcw_pending_timers_lock); + } + spin_unlock_bh(&lcw_pending_timers_lock); + } + + complete(&lcw_stop_completion); + + RETURN(rc); +} + +static void lcw_dispatch_start(void) +{ + int rc; + + ENTRY; + LASSERT(lcw_refcount == 1); + + init_completion(&lcw_stop_completion); + init_completion(&lcw_start_completion); + init_waitqueue_head(&lcw_event_waitq); + + CDEBUG(D_INFO, "starting dispatch thread\n"); + rc = kernel_thread(lcw_dispatch_main, NULL, 0); + if (rc < 0) { + CERROR("error spawning watchdog dispatch thread: %d\n", rc); + EXIT; + return; + } + wait_for_completion(&lcw_start_completion); + CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n"); + + EXIT; +} + +static void lcw_dispatch_stop(void) +{ + ENTRY; + LASSERT(lcw_refcount == 0); + + CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n"); + + set_bit(LCW_FLAG_STOP, &lcw_flags); + wake_up(&lcw_event_waitq); + + wait_for_completion(&lcw_stop_completion); + + CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n"); + + EXIT; +} + +struct lc_watchdog *lc_watchdog_add(int timeout_ms, + void (*callback)(pid_t, void *), + void *data) +{ + struct lc_watchdog *lcw = NULL; + ENTRY; + + LIBCFS_ALLOC(lcw, sizeof(*lcw)); + if (lcw == NULL) { + CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n"); + RETURN(ERR_PTR(-ENOMEM)); + } + + lcw->lcw_task = cfs_current(); + lcw->lcw_pid = cfs_curproc_pid(); + lcw->lcw_time = cfs_time_seconds(timeout_ms) / 1000; + lcw->lcw_callback = (callback != NULL) ? callback : lc_watchdog_dumplog; + lcw->lcw_data = data; + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + INIT_LIST_HEAD(&lcw->lcw_list); + + lcw->lcw_timer.function = lcw_cb; + lcw->lcw_timer.data = (unsigned long)lcw; + lcw->lcw_timer.expires = jiffies + lcw->lcw_time; + init_timer(&lcw->lcw_timer); + + down(&lcw_refcount_sem); + if (++lcw_refcount == 1) + lcw_dispatch_start(); + up(&lcw_refcount_sem); + + /* Keep this working in case we enable them by default */ + if (lcw->lcw_state == LC_WATCHDOG_ENABLED) { + do_gettimeofday(&lcw->lcw_last_touched); + add_timer(&lcw->lcw_timer); + } + + RETURN(lcw); +} +EXPORT_SYMBOL(lc_watchdog_add); + +static void lcw_update_time(struct lc_watchdog *lcw, const char *message) +{ + struct timeval newtime; + struct timeval timediff; + + do_gettimeofday(&newtime); + if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) { + cfs_timeval_sub(&newtime, &lcw->lcw_last_touched, &timediff); + CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n", + lcw->lcw_pid, + message, + timediff.tv_sec, + timediff.tv_usec / 100); + } + lcw->lcw_last_touched = newtime; +} + +void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms) +{ + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_bh(&lcw_pending_timers_lock); + list_del_init(&lcw->lcw_list); + spin_unlock_bh(&lcw_pending_timers_lock); + + lcw_update_time(lcw, "touched"); + lcw->lcw_state = LC_WATCHDOG_ENABLED; + + mod_timer(&lcw->lcw_timer, jiffies + + cfs_time_seconds(timeout_ms) / 1000); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_touch_ms); + +/* deprecated - use above instead */ +void lc_watchdog_touch(struct lc_watchdog *lcw) +{ + lc_watchdog_touch_ms(lcw, cfs_duration_sec(lcw->lcw_time) * 1000); +} +EXPORT_SYMBOL(lc_watchdog_touch); + +void lc_watchdog_disable(struct lc_watchdog *lcw) +{ + ENTRY; + LASSERT(lcw != NULL); + + spin_lock_bh(&lcw_pending_timers_lock); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_bh(&lcw_pending_timers_lock); + + lcw_update_time(lcw, "disabled"); + lcw->lcw_state = LC_WATCHDOG_DISABLED; + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_disable); + +void lc_watchdog_delete(struct lc_watchdog *lcw) +{ + ENTRY; + LASSERT(lcw != NULL); + + del_timer(&lcw->lcw_timer); + + lcw_update_time(lcw, "deleted"); + + spin_lock_bh(&lcw_pending_timers_lock); + if (!list_empty(&lcw->lcw_list)) + list_del_init(&lcw->lcw_list); + spin_unlock_bh(&lcw_pending_timers_lock); + + down(&lcw_refcount_sem); + if (--lcw_refcount == 0) + lcw_dispatch_stop(); + up(&lcw_refcount_sem); + + LIBCFS_FREE(lcw, sizeof(*lcw)); + + EXIT; +} +EXPORT_SYMBOL(lc_watchdog_delete); + +/* + * Provided watchdog handlers + */ + +void lc_watchdog_dumplog(pid_t pid, void *data) +{ + libcfs_debug_dumplog_internal((void *)((unsigned long)pid)); +} +EXPORT_SYMBOL(lc_watchdog_dumplog); + +#else /* !defined(WITH_WATCHDOG) */ + +struct lc_watchdog *lc_watchdog_add(int timeout_ms, + void (*callback)(pid_t pid, void *), + void *data) +{ + static struct lc_watchdog watchdog; + return &watchdog; +} +EXPORT_SYMBOL(lc_watchdog_add); + +void lc_watchdog_touch_ms(struct lc_watchdog *lcw, int timeout_ms) +{ +} +EXPORT_SYMBOL(lc_watchdog_touch_ms); + +void lc_watchdog_touch(struct lc_watchdog *lcw) +{ +} +EXPORT_SYMBOL(lc_watchdog_touch); + +void lc_watchdog_disable(struct lc_watchdog *lcw) +{ +} +EXPORT_SYMBOL(lc_watchdog_disable); + +void lc_watchdog_delete(struct lc_watchdog *lcw) +{ +} +EXPORT_SYMBOL(lc_watchdog_delete); + +#endif + diff --git a/libcfs/libcfs/winnt/winnt-curproc.c b/libcfs/libcfs/winnt/winnt-curproc.c new file mode 100644 index 0000000..e21c5c9 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-curproc.c @@ -0,0 +1,453 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + * + * Impletion of winnt curproc routines. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + + +/* + * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) + * for Linux kernel. + */ + +cfs_task_t this_task = + { 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, + "sysetm\0" }; + + +uid_t cfs_curproc_uid(void) +{ + return this_task.uid; +} + +gid_t cfs_curproc_gid(void) +{ + return this_task.gid; +} + +uid_t cfs_curproc_fsuid(void) +{ + return this_task.fsuid; +} + +gid_t cfs_curproc_fsgid(void) +{ + return this_task.fsgid; +} + +pid_t cfs_curproc_pid(void) +{ + return cfs_current()->pid; +} + +int cfs_curproc_groups_nr(void) +{ + return this_task.ngroups; +} + +void cfs_curproc_groups_dump(gid_t *array, int size) +{ + LASSERT(size <= NGROUPS); + size = min_t(int, size, this_task.ngroups); + memcpy(array, this_task.groups, size * sizeof(__u32)); +} + +int cfs_curproc_is_in_groups(gid_t gid) +{ + return in_group_p(gid); +} + +mode_t cfs_curproc_umask(void) +{ + return this_task.umask; +} + +char *cfs_curproc_comm(void) +{ + return this_task.comm; +} + +cfs_kernel_cap_t cfs_curproc_cap_get(void) +{ + return this_task.cap_effective; +} + +void cfs_curproc_cap_set(cfs_kernel_cap_t cap) +{ + this_task.cap_effective = cap; +} + + +/* + * Implementation of linux task management routines + */ + + +/* global of the task manager structure */ + +TASK_MAN TaskMan; + + +/* + * task slot routiens + */ + +PTASK_SLOT +alloc_task_slot() +{ + PTASK_SLOT task = NULL; + + if (TaskMan.slab) { + task = cfs_mem_cache_alloc(TaskMan.slab, 0); + } else { + task = cfs_alloc(sizeof(TASK_SLOT), 0); + } + + return task; +} + +void +init_task_slot(PTASK_SLOT task) +{ + memset(task, 0, sizeof(TASK_SLOT)); + task->Magic = TASKSLT_MAGIC; + task->task = this_task; + task->task.pid = (pid_t)PsGetCurrentThreadId(); + cfs_init_event(&task->Event, TRUE, FALSE); +} + + +void +cleanup_task_slot(PTASK_SLOT task) +{ + if (TaskMan.slab) { + cfs_mem_cache_free(TaskMan.slab, task); + } else { + cfs_free(task); + } +} + +/* + * task manager related routines + */ + +VOID +task_manager_notify( + IN HANDLE ProcessId, + IN HANDLE ThreadId, + IN BOOLEAN Create + ) +{ + PLIST_ENTRY ListEntry = NULL; + PTASK_SLOT TaskSlot = NULL; + + spin_lock(&(TaskMan.Lock)); + + ListEntry = TaskMan.TaskList.Flink; + + while (ListEntry != (&(TaskMan.TaskList))) { + + TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); + + if (TaskSlot->Pid == ProcessId && TaskSlot->Tid == ThreadId) { + + if (Create) { +/* + DbgPrint("task_manager_notify: Pid=%xh Tid %xh resued (TaskSlot->Tet = %xh)...\n", + ProcessId, ThreadId, TaskSlot->Tet); +*/ + } else { + /* remove the taskslot */ + RemoveEntryList(&(TaskSlot->Link)); + TaskMan.NumOfTasks--; + + /* now free the task slot */ + cleanup_task_slot(TaskSlot); + } + } + + ListEntry = ListEntry->Flink; + } + + spin_unlock(&(TaskMan.Lock)); +} + +int +init_task_manager() +{ + NTSTATUS status; + + /* initialize the content and magic */ + memset(&TaskMan, 0, sizeof(TASK_MAN)); + TaskMan.Magic = TASKMAN_MAGIC; + + /* initialize the spinlock protection */ + spin_lock_init(&TaskMan.Lock); + + /* create slab memory cache */ + TaskMan.slab = cfs_mem_cache_create( + "TSLT", sizeof(TASK_SLOT), 0, 0); + + /* intialize the list header */ + InitializeListHead(&(TaskMan.TaskList)); + + /* set the thread creation/destruction notify routine */ + status = PsSetCreateThreadNotifyRoutine(task_manager_notify); + + if (!NT_SUCCESS(status)) { + cfs_enter_debugger(); + } + + return 0; +} + +void +cleanup_task_manager() +{ + PLIST_ENTRY ListEntry = NULL; + PTASK_SLOT TaskSlot = NULL; + + /* we must stay in system since we succeed to register the + CreateThreadNotifyRoutine: task_manager_notify */ + cfs_enter_debugger(); + + + /* cleanup all the taskslots attached to the list */ + spin_lock(&(TaskMan.Lock)); + + while (!IsListEmpty(&(TaskMan.TaskList))) { + + ListEntry = TaskMan.TaskList.Flink; + TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); + + RemoveEntryList(ListEntry); + cleanup_task_slot(TaskSlot); + } + + spin_unlock(&TaskMan.Lock); + + /* destroy the taskslot cache slab */ + cfs_mem_cache_destroy(TaskMan.slab); + memset(&TaskMan, 0, sizeof(TASK_MAN)); +} + + +/* + * schedule routines (task slot list) + */ + + +cfs_task_t * +cfs_current() +{ + HANDLE Pid = PsGetCurrentProcessId(); + HANDLE Tid = PsGetCurrentThreadId(); + PETHREAD Tet = PsGetCurrentThread(); + + PLIST_ENTRY ListEntry = NULL; + PTASK_SLOT TaskSlot = NULL; + + spin_lock(&(TaskMan.Lock)); + + ListEntry = TaskMan.TaskList.Flink; + + while (ListEntry != (&(TaskMan.TaskList))) { + + TaskSlot = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); + + if (TaskSlot->Pid == Pid && TaskSlot->Tid == Tid) { + if (TaskSlot->Tet != Tet) { + +/* + DbgPrint("cfs_current: Pid=%xh Tid %xh Tet = %xh resued (TaskSlot->Tet = %xh)...\n", + Pid, Tid, Tet, TaskSlot->Tet); +*/ + // + // The old thread was already exit. This must be a + // new thread which get the same Tid to the previous. + // + + TaskSlot->Tet = Tet; + } + break; + + } else { + + if ((ULONG)TaskSlot->Pid > (ULONG)Pid) { + TaskSlot = NULL; + break; + } else if ((ULONG)TaskSlot->Pid == (ULONG)Pid) { + if ((ULONG)TaskSlot->Tid > (ULONG)Tid) { + TaskSlot = NULL; + break; + } + } + + TaskSlot = NULL; + } + + ListEntry = ListEntry->Flink; + } + + if (!TaskSlot) { + + TaskSlot = alloc_task_slot(); + + if (!TaskSlot) { + cfs_enter_debugger(); + goto errorout; + } + + init_task_slot(TaskSlot); + + TaskSlot->Pid = Pid; + TaskSlot->Tid = Tid; + TaskSlot->Tet = Tet; + + if (ListEntry == (&(TaskMan.TaskList))) { + // + // Empty case or the biggest case, put it to the tail. + // + InsertTailList(&(TaskMan.TaskList), &(TaskSlot->Link)); + } else { + // + // Get a slot and smaller than it's tid, put it just before. + // + InsertHeadList(ListEntry->Blink, &(TaskSlot->Link)); + } + + TaskMan.NumOfTasks++; + } + + // + // To Check whether he task structures are arranged in the expected order ? + // + + { + PTASK_SLOT Prev = NULL, Curr = NULL; + + ListEntry = TaskMan.TaskList.Flink; + + while (ListEntry != (&(TaskMan.TaskList))) { + + Curr = CONTAINING_RECORD(ListEntry, TASK_SLOT, Link); + ListEntry = ListEntry->Flink; + + if (Prev) { + if ((ULONG)Prev->Pid > (ULONG)Curr->Pid) { + cfs_enter_debugger(); + } else if ((ULONG)Prev->Pid == (ULONG)Curr->Pid) { + if ((ULONG)Prev->Tid > (ULONG)Curr->Tid) { + cfs_enter_debugger(); + } + } + } + + Prev = Curr; + } + } + +errorout: + + spin_unlock(&(TaskMan.Lock)); + + if (!TaskSlot) { + cfs_enter_debugger(); + return NULL; + } + + return (&(TaskSlot->task)); +} + +int +schedule_timeout(int64_t time) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + cfs_enter_debugger(); + return 0; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + cfs_assert(slot->Magic == TASKSLT_MAGIC); + + if (time == MAX_SCHEDULE_TIMEOUT) { + time = 0; + } + + return (cfs_wait_event(&(slot->Event), time) != 0); +} + +int +schedule() +{ + return schedule_timeout(0); +} + +int +wake_up_process( + cfs_task_t * task + ) +{ + PTASK_SLOT slot = NULL; + + if (!task) { + cfs_enter_debugger(); + return 0; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + cfs_assert(slot->Magic == TASKSLT_MAGIC); + + cfs_wake_event(&(slot->Event)); + + return TRUE; +} + +void +sleep_on( + cfs_waitq_t *waitq + ) +{ + cfs_waitlink_t link; + + cfs_waitlink_init(&link); + cfs_waitq_add(waitq, &link); + cfs_waitq_wait(&link, CFS_TASK_INTERRUPTIBLE); + cfs_waitq_del(waitq, &link); +} + +EXPORT_SYMBOL(cfs_curproc_uid); +EXPORT_SYMBOL(cfs_curproc_pid); +EXPORT_SYMBOL(cfs_curproc_gid); +EXPORT_SYMBOL(cfs_curproc_fsuid); +EXPORT_SYMBOL(cfs_curproc_fsgid); +EXPORT_SYMBOL(cfs_curproc_umask); +EXPORT_SYMBOL(cfs_curproc_comm); +EXPORT_SYMBOL(cfs_curproc_groups_nr); +EXPORT_SYMBOL(cfs_curproc_groups_dump); +EXPORT_SYMBOL(cfs_curproc_is_in_groups); +EXPORT_SYMBOL(cfs_curproc_cap_get); +EXPORT_SYMBOL(cfs_curproc_cap_set); diff --git a/libcfs/libcfs/winnt/winnt-debug.c b/libcfs/libcfs/winnt/winnt-debug.c new file mode 100644 index 0000000..9e94f84 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-debug.c @@ -0,0 +1,1057 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include "tracefile.h" + +void lnet_debug_dumpstack(cfs_task_t *tsk) +{ + return; +} + +cfs_task_t *lnet_current(void) +{ + return cfs_current(); +} + +int lnet_arch_debug_init(unsigned long bufsize) +{ + return 0; +} + +int lnet_arch_debug_cleanup(void) +{ + return 0; +} + +void lnet_run_lbug_upcall(char *file, const char *fn, const int line) +{ +} + +void lbug_with_loc(char *file, const char *func, const int line) +{ + libcfs_catastrophe = 1; + CEMERG("LBUG: pid: %u thread: %#x\n", + (unsigned)cfs_curproc_pid(), (unsigned)PsGetCurrentThread()); + // portals_debug_dumplog(); + // portals_run_lbug_upcall(file, func, line); +} + +#if TDI_LIBCFS_DBG + +/* + * Definitions + */ + +LONG KsDebugLevel = 0x5; + + +/* + * Routines + */ + + +/* + * KsNtStatusToString + * Get the error message for a specified nt status + * + * Arguments: + * Status - nt status code + * + * Return Value: + * PUCHAR - message string for the status code + * + * NOTES: + * N/A + */ + +PUCHAR +KsNtStatusToString (IN NTSTATUS Status) +{ + switch (Status) { + + case 0x00000000: return "STATUS_SUCCESS"; + case 0x00000001: return "STATUS_WAIT_1"; + case 0x00000002: return "STATUS_WAIT_2"; + case 0x00000003: return "STATUS_WAIT_3"; + case 0x0000003F: return "STATUS_WAIT_63"; + case 0x00000080: return "STATUS_ABANDONED_WAIT_0"; + case 0x000000BF: return "STATUS_ABANDONED_WAIT_63"; + case 0x000000C0: return "STATUS_USER_APC"; + case 0x00000100: return "STATUS_KERNEL_APC"; + case 0x00000101: return "STATUS_ALERTED"; + case 0x00000102: return "STATUS_TIMEOUT"; + case 0x00000103: return "STATUS_PENDING"; + case 0x00000104: return "STATUS_REPARSE"; + case 0x00000105: return "STATUS_MORE_ENTRIES"; + case 0x00000106: return "STATUS_NOT_ALL_ASSIGNED"; + case 0x00000107: return "STATUS_SOME_NOT_MAPPED"; + case 0x00000108: return "STATUS_OPLOCK_BREAK_IN_PROGRESS"; + case 0x00000109: return "STATUS_VOLUME_MOUNTED"; + case 0x0000010A: return "STATUS_RXACT_COMMITTED"; + case 0x0000010B: return "STATUS_NOTIFY_CLEANUP"; + case 0x0000010C: return "STATUS_NOTIFY_ENUM_DIR"; + case 0x0000010D: return "STATUS_NO_QUOTAS_FOR_ACCOUNT"; + case 0x0000010E: return "STATUS_PRIMARY_TRANSPORT_CONNECT_FAILED"; + case 0x00000110: return "STATUS_PAGE_FAULT_TRANSITION"; + case 0x00000111: return "STATUS_PAGE_FAULT_DEMAND_ZERO"; + case 0x00000112: return "STATUS_PAGE_FAULT_COPY_ON_WRITE"; + case 0x00000113: return "STATUS_PAGE_FAULT_GUARD_PAGE"; + case 0x00000114: return "STATUS_PAGE_FAULT_PAGING_FILE"; + case 0x00000115: return "STATUS_CACHE_PAGE_LOCKED"; + case 0x00000116: return "STATUS_CRASH_DUMP"; + case 0x00000117: return "STATUS_BUFFER_ALL_ZEROS"; + case 0x00000118: return "STATUS_REPARSE_OBJECT"; + case 0x00000119: return "STATUS_RESOURCE_REQUIREMENTS_CHANGED"; + case 0x00000120: return "STATUS_TRANSLATION_COMPLETE"; + case 0x00000121: return "STATUS_DS_MEMBERSHIP_EVALUATED_LOCALLY"; + case 0x00010001: return "DBG_EXCEPTION_HANDLED"; + case 0x00010002: return "DBG_CONTINUE"; + case 0x40000000: return "STATUS_OBJECT_NAME_EXISTS"; + case 0x40000001: return "STATUS_THREAD_WAS_SUSPENDED"; + case 0x40000002: return "STATUS_WORKING_SET_LIMIT_RANGE"; + case 0x40000003: return "STATUS_IMAGE_NOT_AT_BASE"; + case 0x40000004: return "STATUS_RXACT_STATE_CREATED"; + case 0x40000005: return "STATUS_SEGMENT_NOTIFICATION"; + case 0x40000006: return "STATUS_LOCAL_USER_SESSION_KEY"; + case 0x40000007: return "STATUS_BAD_CURRENT_DIRECTORY"; + case 0x40000008: return "STATUS_SERIAL_MORE_WRITES"; + case 0x40000009: return "STATUS_REGISTRY_RECOVERED"; + case 0x4000000A: return "STATUS_FT_READ_RECOVERY_FROM_BACKUP"; + case 0x4000000B: return "STATUS_FT_WRITE_RECOVERY"; + case 0x4000000C: return "STATUS_SERIAL_COUNTER_TIMEOUT"; + case 0x4000000D: return "STATUS_NULL_LM_PASSWORD"; + case 0x4000000E: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH"; + case 0x4000000F: return "STATUS_RECEIVE_PARTIAL"; + case 0x40000010: return "STATUS_RECEIVE_EXPEDITED"; + case 0x40000011: return "STATUS_RECEIVE_PARTIAL_EXPEDITED"; + case 0x40000012: return "STATUS_EVENT_DONE"; + case 0x40000013: return "STATUS_EVENT_PENDING"; + case 0x40000014: return "STATUS_CHECKING_FILE_SYSTEM"; + case 0x40000015: return "STATUS_FATAL_APP_EXIT"; + case 0x40000016: return "STATUS_PREDEFINED_HANDLE"; + case 0x40000017: return "STATUS_WAS_UNLOCKED"; + case 0x40000018: return "STATUS_SERVICE_NOTIFICATION"; + case 0x40000019: return "STATUS_WAS_LOCKED"; + case 0x4000001A: return "STATUS_LOG_HARD_ERROR"; + case 0x4000001B: return "STATUS_ALREADY_WIN32"; + case 0x4000001C: return "STATUS_WX86_UNSIMULATE"; + case 0x4000001D: return "STATUS_WX86_CONTINUE"; + case 0x4000001E: return "STATUS_WX86_SINGLE_STEP"; + case 0x4000001F: return "STATUS_WX86_BREAKPOINT"; + case 0x40000020: return "STATUS_WX86_EXCEPTION_CONTINUE"; + case 0x40000021: return "STATUS_WX86_EXCEPTION_LASTCHANCE"; + case 0x40000022: return "STATUS_WX86_EXCEPTION_CHAIN"; + case 0x40000023: return "STATUS_IMAGE_MACHINE_TYPE_MISMATCH_EXE"; + case 0x40000024: return "STATUS_NO_YIELD_PERFORMED"; + case 0x40000025: return "STATUS_TIMER_RESUME_IGNORED"; + case 0x40000026: return "STATUS_ARBITRATION_UNHANDLED"; + case 0x40000027: return "STATUS_CARDBUS_NOT_SUPPORTED"; + case 0x40000028: return "STATUS_WX86_CREATEWX86TIB"; + case 0x40000029: return "STATUS_MP_PROCESSOR_MISMATCH"; + case 0x40010001: return "DBG_REPLY_LATER"; + case 0x40010002: return "DBG_UNABLE_TO_PROVIDE_HANDLE"; + case 0x40010003: return "DBG_TERMINATE_THREAD"; + case 0x40010004: return "DBG_TERMINATE_PROCESS"; + case 0x40010005: return "DBG_CONTROL_C"; + case 0x40010006: return "DBG_PRINTEXCEPTION_C"; + case 0x40010007: return "DBG_RIPEXCEPTION"; + case 0x40010008: return "DBG_CONTROL_BREAK"; + case 0x80000001: return "STATUS_GUARD_PAGE_VIOLATION"; + case 0x80000002: return "STATUS_DATATYPE_MISALIGNMENT"; + case 0x80000003: return "STATUS_BREAKPOINT"; + case 0x80000004: return "STATUS_SINGLE_STEP"; + case 0x80000005: return "STATUS_BUFFER_OVERFLOW"; + case 0x80000006: return "STATUS_NO_MORE_FILES"; + case 0x80000007: return "STATUS_WAKE_SYSTEM_DEBUGGER"; + case 0x8000000A: return "STATUS_HANDLES_CLOSED"; + case 0x8000000B: return "STATUS_NO_INHERITANCE"; + case 0x8000000C: return "STATUS_GUID_SUBSTITUTION_MADE"; + case 0x8000000D: return "STATUS_PARTIAL_COPY"; + case 0x8000000E: return "STATUS_DEVICE_PAPER_EMPTY"; + case 0x8000000F: return "STATUS_DEVICE_POWERED_OFF"; + case 0x80000010: return "STATUS_DEVICE_OFF_LINE"; + case 0x80000011: return "STATUS_DEVICE_BUSY"; + case 0x80000012: return "STATUS_NO_MORE_EAS"; + case 0x80000013: return "STATUS_INVALID_EA_NAME"; + case 0x80000014: return "STATUS_EA_LIST_INCONSISTENT"; + case 0x80000015: return "STATUS_INVALID_EA_FLAG"; + case 0x80000016: return "STATUS_VERIFY_REQUIRED"; + case 0x80000017: return "STATUS_EXTRANEOUS_INFORMATION"; + case 0x80000018: return "STATUS_RXACT_COMMIT_NECESSARY"; + case 0x8000001A: return "STATUS_NO_MORE_ENTRIES"; + case 0x8000001B: return "STATUS_FILEMARK_DETECTED"; + case 0x8000001C: return "STATUS_MEDIA_CHANGED"; + case 0x8000001D: return "STATUS_BUS_RESET"; + case 0x8000001E: return "STATUS_END_OF_MEDIA"; + case 0x8000001F: return "STATUS_BEGINNING_OF_MEDIA"; + case 0x80000020: return "STATUS_MEDIA_CHECK"; + case 0x80000021: return "STATUS_SETMARK_DETECTED"; + case 0x80000022: return "STATUS_NO_DATA_DETECTED"; + case 0x80000023: return "STATUS_REDIRECTOR_HAS_OPEN_HANDLES"; + case 0x80000024: return "STATUS_SERVER_HAS_OPEN_HANDLES"; + case 0x80000025: return "STATUS_ALREADY_DISCONNECTED"; + case 0x80000026: return "STATUS_LONGJUMP"; + case 0x80010001: return "DBG_EXCEPTION_NOT_HANDLED"; + case 0xC0000001: return "STATUS_UNSUCCESSFUL"; + case 0xC0000002: return "STATUS_NOT_IMPLEMENTED"; + case 0xC0000003: return "STATUS_INVALID_INFO_CLASS"; + case 0xC0000004: return "STATUS_INFO_LENGTH_MISMATCH"; + case 0xC0000005: return "STATUS_ACCESS_VIOLATION"; + case 0xC0000006: return "STATUS_IN_PAGE_ERROR"; + case 0xC0000007: return "STATUS_PAGEFILE_QUOTA"; + case 0xC0000008: return "STATUS_INVALID_HANDLE"; + case 0xC0000009: return "STATUS_BAD_INITIAL_STACK"; + case 0xC000000A: return "STATUS_BAD_INITIAL_PC"; + case 0xC000000B: return "STATUS_INVALID_CID"; + case 0xC000000C: return "STATUS_TIMER_NOT_CANCELED"; + case 0xC000000D: return "STATUS_INVALID_PARAMETER"; + case 0xC000000E: return "STATUS_NO_SUCH_DEVICE"; + case 0xC000000F: return "STATUS_NO_SUCH_FILE"; + case 0xC0000010: return "STATUS_INVALID_DEVICE_REQUEST"; + case 0xC0000011: return "STATUS_END_OF_FILE"; + case 0xC0000012: return "STATUS_WRONG_VOLUME"; + case 0xC0000013: return "STATUS_NO_MEDIA_IN_DEVICE"; + case 0xC0000014: return "STATUS_UNRECOGNIZED_MEDIA"; + case 0xC0000015: return "STATUS_NONEXISTENT_SECTOR"; + case 0xC0000016: return "STATUS_MORE_PROCESSING_REQUIRED"; + case 0xC0000017: return "STATUS_NO_MEMORY"; + case 0xC0000018: return "STATUS_CONFLICTING_ADDRESSES"; + case 0xC0000019: return "STATUS_NOT_MAPPED_VIEW"; + case 0xC000001A: return "STATUS_UNABLE_TO_FREE_VM"; + case 0xC000001B: return "STATUS_UNABLE_TO_DELETE_SECTION"; + case 0xC000001C: return "STATUS_INVALID_SYSTEM_SERVICE"; + case 0xC000001D: return "STATUS_ILLEGAL_INSTRUCTION"; + case 0xC000001E: return "STATUS_INVALID_LOCK_SEQUENCE"; + case 0xC000001F: return "STATUS_INVALID_VIEW_SIZE"; + case 0xC0000020: return "STATUS_INVALID_FILE_FOR_SECTION"; + case 0xC0000021: return "STATUS_ALREADY_COMMITTED"; + case 0xC0000022: return "STATUS_ACCESS_DENIED"; + case 0xC0000023: return "STATUS_BUFFER_TOO_SMALL"; + case 0xC0000024: return "STATUS_OBJECT_TYPE_MISMATCH"; + case 0xC0000025: return "STATUS_NONCONTINUABLE_EXCEPTION"; + case 0xC0000026: return "STATUS_INVALID_DISPOSITION"; + case 0xC0000027: return "STATUS_UNWIND"; + case 0xC0000028: return "STATUS_BAD_STACK"; + case 0xC0000029: return "STATUS_INVALID_UNWIND_TARGET"; + case 0xC000002A: return "STATUS_NOT_LOCKED"; + case 0xC000002B: return "STATUS_PARITY_ERROR"; + case 0xC000002C: return "STATUS_UNABLE_TO_DECOMMIT_VM"; + case 0xC000002D: return "STATUS_NOT_COMMITTED"; + case 0xC000002E: return "STATUS_INVALID_PORT_ATTRIBUTES"; + case 0xC000002F: return "STATUS_PORT_MESSAGE_TOO_LONG"; + case 0xC0000030: return "STATUS_INVALID_PARAMETER_MIX"; + case 0xC0000031: return "STATUS_INVALID_QUOTA_LOWER"; + case 0xC0000032: return "STATUS_DISK_CORRUPT_ERROR"; + case 0xC0000033: return "STATUS_OBJECT_NAME_INVALID"; + case 0xC0000034: return "STATUS_OBJECT_NAME_NOT_FOUND"; + case 0xC0000035: return "STATUS_OBJECT_NAME_COLLISION"; + case 0xC0000037: return "STATUS_PORT_DISCONNECTED"; + case 0xC0000038: return "STATUS_DEVICE_ALREADY_ATTACHED"; + case 0xC0000039: return "STATUS_OBJECT_PATH_INVALID"; + case 0xC000003A: return "STATUS_OBJECT_PATH_NOT_FOUND"; + case 0xC000003B: return "STATUS_OBJECT_PATH_SYNTAX_BAD"; + case 0xC000003C: return "STATUS_DATA_OVERRUN"; + case 0xC000003D: return "STATUS_DATA_LATE_ERROR"; + case 0xC000003E: return "STATUS_DATA_ERROR"; + case 0xC000003F: return "STATUS_CRC_ERROR"; + case 0xC0000040: return "STATUS_SECTION_TOO_BIG"; + case 0xC0000041: return "STATUS_PORT_CONNECTION_REFUSED"; + case 0xC0000042: return "STATUS_INVALID_PORT_HANDLE"; + case 0xC0000043: return "STATUS_SHARING_VIOLATION"; + case 0xC0000044: return "STATUS_QUOTA_EXCEEDED"; + case 0xC0000045: return "STATUS_INVALID_PAGE_PROTECTION"; + case 0xC0000046: return "STATUS_MUTANT_NOT_OWNED"; + case 0xC0000047: return "STATUS_SEMAPHORE_LIMIT_EXCEEDED"; + case 0xC0000048: return "STATUS_PORT_ALREADY_SET"; + case 0xC0000049: return "STATUS_SECTION_NOT_IMAGE"; + case 0xC000004A: return "STATUS_SUSPEND_COUNT_EXCEEDED"; + case 0xC000004B: return "STATUS_THREAD_IS_TERMINATING"; + case 0xC000004C: return "STATUS_BAD_WORKING_SET_LIMIT"; + case 0xC000004D: return "STATUS_INCOMPATIBLE_FILE_MAP"; + case 0xC000004E: return "STATUS_SECTION_PROTECTION"; + case 0xC000004F: return "STATUS_EAS_NOT_SUPPORTED"; + case 0xC0000050: return "STATUS_EA_TOO_LARGE"; + case 0xC0000051: return "STATUS_NONEXISTENT_EA_ENTRY"; + case 0xC0000052: return "STATUS_NO_EAS_ON_FILE"; + case 0xC0000053: return "STATUS_EA_CORRUPT_ERROR"; + case 0xC0000054: return "STATUS_FILE_LOCK_CONFLICT"; + case 0xC0000055: return "STATUS_LOCK_NOT_GRANTED"; + case 0xC0000056: return "STATUS_DELETE_PENDING"; + case 0xC0000057: return "STATUS_CTL_FILE_NOT_SUPPORTED"; + case 0xC0000058: return "STATUS_UNKNOWN_REVISION"; + case 0xC0000059: return "STATUS_REVISION_MISMATCH"; + case 0xC000005A: return "STATUS_INVALID_OWNER"; + case 0xC000005B: return "STATUS_INVALID_PRIMARY_GROUP"; + case 0xC000005C: return "STATUS_NO_IMPERSONATION_TOKEN"; + case 0xC000005D: return "STATUS_CANT_DISABLE_MANDATORY"; + case 0xC000005E: return "STATUS_NO_LOGON_SERVERS"; + case 0xC000005F: return "STATUS_NO_SUCH_LOGON_SESSION"; + case 0xC0000060: return "STATUS_NO_SUCH_PRIVILEGE"; + case 0xC0000061: return "STATUS_PRIVILEGE_NOT_HELD"; + case 0xC0000062: return "STATUS_INVALID_ACCOUNT_NAME"; + case 0xC0000063: return "STATUS_USER_EXISTS"; + case 0xC0000064: return "STATUS_NO_SUCH_USER"; + case 0xC0000065: return "STATUS_GROUP_EXISTS"; + case 0xC0000066: return "STATUS_NO_SUCH_GROUP"; + case 0xC0000067: return "STATUS_MEMBER_IN_GROUP"; + case 0xC0000068: return "STATUS_MEMBER_NOT_IN_GROUP"; + case 0xC0000069: return "STATUS_LAST_ADMIN"; + case 0xC000006A: return "STATUS_WRONG_PASSWORD"; + case 0xC000006B: return "STATUS_ILL_FORMED_PASSWORD"; + case 0xC000006C: return "STATUS_PASSWORD_RESTRICTION"; + case 0xC000006D: return "STATUS_LOGON_FAILURE"; + case 0xC000006E: return "STATUS_ACCOUNT_RESTRICTION"; + case 0xC000006F: return "STATUS_INVALID_LOGON_HOURS"; + case 0xC0000070: return "STATUS_INVALID_WORKSTATION"; + case 0xC0000071: return "STATUS_PASSWORD_EXPIRED"; + case 0xC0000072: return "STATUS_ACCOUNT_DISABLED"; + case 0xC0000073: return "STATUS_NONE_MAPPED"; + case 0xC0000074: return "STATUS_TOO_MANY_LUIDS_REQUESTED"; + case 0xC0000075: return "STATUS_LUIDS_EXHAUSTED"; + case 0xC0000076: return "STATUS_INVALID_SUB_AUTHORITY"; + case 0xC0000077: return "STATUS_INVALID_ACL"; + case 0xC0000078: return "STATUS_INVALID_SID"; + case 0xC0000079: return "STATUS_INVALID_SECURITY_DESCR"; + case 0xC000007A: return "STATUS_PROCEDURE_NOT_FOUND"; + case 0xC000007B: return "STATUS_INVALID_IMAGE_FORMAT"; + case 0xC000007C: return "STATUS_NO_TOKEN"; + case 0xC000007D: return "STATUS_BAD_INHERITANCE_ACL"; + case 0xC000007E: return "STATUS_RANGE_NOT_LOCKED"; + case 0xC000007F: return "STATUS_DISK_FULL"; + case 0xC0000080: return "STATUS_SERVER_DISABLED"; + case 0xC0000081: return "STATUS_SERVER_NOT_DISABLED"; + case 0xC0000082: return "STATUS_TOO_MANY_GUIDS_REQUESTED"; + case 0xC0000083: return "STATUS_GUIDS_EXHAUSTED"; + case 0xC0000084: return "STATUS_INVALID_ID_AUTHORITY"; + case 0xC0000085: return "STATUS_AGENTS_EXHAUSTED"; + case 0xC0000086: return "STATUS_INVALID_VOLUME_LABEL"; + case 0xC0000087: return "STATUS_SECTION_NOT_EXTENDED"; + case 0xC0000088: return "STATUS_NOT_MAPPED_DATA"; + case 0xC0000089: return "STATUS_RESOURCE_DATA_NOT_FOUND"; + case 0xC000008A: return "STATUS_RESOURCE_TYPE_NOT_FOUND"; + case 0xC000008B: return "STATUS_RESOURCE_NAME_NOT_FOUND"; + case 0xC000008C: return "STATUS_ARRAY_BOUNDS_EXCEEDED"; + case 0xC000008D: return "STATUS_FLOAT_DENORMAL_OPERAND"; + case 0xC000008E: return "STATUS_FLOAT_DIVIDE_BY_ZERO"; + case 0xC000008F: return "STATUS_FLOAT_INEXACT_RESULT"; + case 0xC0000090: return "STATUS_FLOAT_INVALID_OPERATION"; + case 0xC0000091: return "STATUS_FLOAT_OVERFLOW"; + case 0xC0000092: return "STATUS_FLOAT_STACK_CHECK"; + case 0xC0000093: return "STATUS_FLOAT_UNDERFLOW"; + case 0xC0000094: return "STATUS_INTEGER_DIVIDE_BY_ZERO"; + case 0xC0000095: return "STATUS_INTEGER_OVERFLOW"; + case 0xC0000096: return "STATUS_PRIVILEGED_INSTRUCTION"; + case 0xC0000097: return "STATUS_TOO_MANY_PAGING_FILES"; + case 0xC0000098: return "STATUS_FILE_INVALID"; + case 0xC0000099: return "STATUS_ALLOTTED_SPACE_EXCEEDED"; + case 0xC000009A: return "STATUS_INSUFFICIENT_RESOURCES"; + case 0xC000009B: return "STATUS_DFS_EXIT_PATH_FOUND"; + case 0xC000009C: return "STATUS_DEVICE_DATA_ERROR"; + case 0xC000009D: return "STATUS_DEVICE_NOT_CONNECTED"; + case 0xC000009E: return "STATUS_DEVICE_POWER_FAILURE"; + case 0xC000009F: return "STATUS_FREE_VM_NOT_AT_BASE"; + case 0xC00000A0: return "STATUS_MEMORY_NOT_ALLOCATED"; + case 0xC00000A1: return "STATUS_WORKING_SET_QUOTA"; + case 0xC00000A2: return "STATUS_MEDIA_WRITE_PROTECTED"; + case 0xC00000A3: return "STATUS_DEVICE_NOT_READY"; + case 0xC00000A4: return "STATUS_INVALID_GROUP_ATTRIBUTES"; + case 0xC00000A5: return "STATUS_BAD_IMPERSONATION_LEVEL"; + case 0xC00000A6: return "STATUS_CANT_OPEN_ANONYMOUS"; + case 0xC00000A7: return "STATUS_BAD_VALIDATION_CLASS"; + case 0xC00000A8: return "STATUS_BAD_TOKEN_TYPE"; + case 0xC00000A9: return "STATUS_BAD_MASTER_BOOT_RECORD"; + case 0xC00000AA: return "STATUS_INSTRUCTION_MISALIGNMENT"; + case 0xC00000AB: return "STATUS_INSTANCE_NOT_AVAILABLE"; + case 0xC00000AC: return "STATUS_PIPE_NOT_AVAILABLE"; + case 0xC00000AD: return "STATUS_INVALID_PIPE_STATE"; + case 0xC00000AE: return "STATUS_PIPE_BUSY"; + case 0xC00000AF: return "STATUS_ILLEGAL_FUNCTION"; + case 0xC00000B0: return "STATUS_PIPE_DISCONNECTED"; + case 0xC00000B1: return "STATUS_PIPE_CLOSING"; + case 0xC00000B2: return "STATUS_PIPE_CONNECTED"; + case 0xC00000B3: return "STATUS_PIPE_LISTENING"; + case 0xC00000B4: return "STATUS_INVALID_READ_MODE"; + case 0xC00000B5: return "STATUS_IO_TIMEOUT"; + case 0xC00000B6: return "STATUS_FILE_FORCED_CLOSED"; + case 0xC00000B7: return "STATUS_PROFILING_NOT_STARTED"; + case 0xC00000B8: return "STATUS_PROFILING_NOT_STOPPED"; + case 0xC00000B9: return "STATUS_COULD_NOT_INTERPRET"; + case 0xC00000BA: return "STATUS_FILE_IS_A_DIRECTORY"; + case 0xC00000BB: return "STATUS_NOT_SUPPORTED"; + case 0xC00000BC: return "STATUS_REMOTE_NOT_LISTENING"; + case 0xC00000BD: return "STATUS_DUPLICATE_NAME"; + case 0xC00000BE: return "STATUS_BAD_NETWORK_PATH"; + case 0xC00000BF: return "STATUS_NETWORK_BUSY"; + case 0xC00000C0: return "STATUS_DEVICE_DOES_NOT_EXIST"; + case 0xC00000C1: return "STATUS_TOO_MANY_COMMANDS"; + case 0xC00000C2: return "STATUS_ADAPTER_HARDWARE_ERROR"; + case 0xC00000C3: return "STATUS_INVALID_NETWORK_RESPONSE"; + case 0xC00000C4: return "STATUS_UNEXPECTED_NETWORK_ERROR"; + case 0xC00000C5: return "STATUS_BAD_REMOTE_ADAPTER"; + case 0xC00000C6: return "STATUS_PRINT_QUEUE_FULL"; + case 0xC00000C7: return "STATUS_NO_SPOOL_SPACE"; + case 0xC00000C8: return "STATUS_PRINT_CANCELLED"; + case 0xC00000C9: return "STATUS_NETWORK_NAME_DELETED"; + case 0xC00000CA: return "STATUS_NETWORK_ACCESS_DENIED"; + case 0xC00000CB: return "STATUS_BAD_DEVICE_TYPE"; + case 0xC00000CC: return "STATUS_BAD_NETWORK_NAME"; + case 0xC00000CD: return "STATUS_TOO_MANY_NAMES"; + case 0xC00000CE: return "STATUS_TOO_MANY_SESSIONS"; + case 0xC00000CF: return "STATUS_SHARING_PAUSED"; + case 0xC00000D0: return "STATUS_REQUEST_NOT_ACCEPTED"; + case 0xC00000D1: return "STATUS_REDIRECTOR_PAUSED"; + case 0xC00000D2: return "STATUS_NET_WRITE_FAULT"; + case 0xC00000D3: return "STATUS_PROFILING_AT_LIMIT"; + case 0xC00000D4: return "STATUS_NOT_SAME_DEVICE"; + case 0xC00000D5: return "STATUS_FILE_RENAMED"; + case 0xC00000D6: return "STATUS_VIRTUAL_CIRCUIT_CLOSED"; + case 0xC00000D7: return "STATUS_NO_SECURITY_ON_OBJECT"; + case 0xC00000D8: return "STATUS_CANT_WAIT"; + case 0xC00000D9: return "STATUS_PIPE_EMPTY"; + case 0xC00000DA: return "STATUS_CANT_ACCESS_DOMAIN_INFO"; + case 0xC00000DB: return "STATUS_CANT_TERMINATE_SELF"; + case 0xC00000DC: return "STATUS_INVALID_SERVER_STATE"; + case 0xC00000DD: return "STATUS_INVALID_DOMAIN_STATE"; + case 0xC00000DE: return "STATUS_INVALID_DOMAIN_ROLE"; + case 0xC00000DF: return "STATUS_NO_SUCH_DOMAIN"; + case 0xC00000E0: return "STATUS_DOMAIN_EXISTS"; + case 0xC00000E1: return "STATUS_DOMAIN_LIMIT_EXCEEDED"; + case 0xC00000E2: return "STATUS_OPLOCK_NOT_GRANTED"; + case 0xC00000E3: return "STATUS_INVALID_OPLOCK_PROTOCOL"; + case 0xC00000E4: return "STATUS_INTERNAL_DB_CORRUPTION"; + case 0xC00000E5: return "STATUS_INTERNAL_ERROR"; + case 0xC00000E6: return "STATUS_GENERIC_NOT_MAPPED"; + case 0xC00000E7: return "STATUS_BAD_DESCRIPTOR_FORMAT"; + case 0xC00000E8: return "STATUS_INVALID_USER_BUFFER"; + case 0xC00000E9: return "STATUS_UNEXPECTED_IO_ERROR"; + case 0xC00000EA: return "STATUS_UNEXPECTED_MM_CREATE_ERR"; + case 0xC00000EB: return "STATUS_UNEXPECTED_MM_MAP_ERROR"; + case 0xC00000EC: return "STATUS_UNEXPECTED_MM_EXTEND_ERR"; + case 0xC00000ED: return "STATUS_NOT_LOGON_PROCESS"; + case 0xC00000EE: return "STATUS_LOGON_SESSION_EXISTS"; + case 0xC00000EF: return "STATUS_INVALID_PARAMETER_1"; + case 0xC00000F0: return "STATUS_INVALID_PARAMETER_2"; + case 0xC00000F1: return "STATUS_INVALID_PARAMETER_3"; + case 0xC00000F2: return "STATUS_INVALID_PARAMETER_4"; + case 0xC00000F3: return "STATUS_INVALID_PARAMETER_5"; + case 0xC00000F4: return "STATUS_INVALID_PARAMETER_6"; + case 0xC00000F5: return "STATUS_INVALID_PARAMETER_7"; + case 0xC00000F6: return "STATUS_INVALID_PARAMETER_8"; + case 0xC00000F7: return "STATUS_INVALID_PARAMETER_9"; + case 0xC00000F8: return "STATUS_INVALID_PARAMETER_10"; + case 0xC00000F9: return "STATUS_INVALID_PARAMETER_11"; + case 0xC00000FA: return "STATUS_INVALID_PARAMETER_12"; + case 0xC00000FB: return "STATUS_REDIRECTOR_NOT_STARTED"; + case 0xC00000FC: return "STATUS_REDIRECTOR_STARTED"; + case 0xC00000FD: return "STATUS_STACK_OVERFLOW"; + case 0xC00000FE: return "STATUS_NO_SUCH_PACKAGE"; + case 0xC00000FF: return "STATUS_BAD_FUNCTION_TABLE"; + case 0xC0000100: return "STATUS_VARIABLE_NOT_FOUND"; + case 0xC0000101: return "STATUS_DIRECTORY_NOT_EMPTY"; + case 0xC0000102: return "STATUS_FILE_CORRUPT_ERROR"; + case 0xC0000103: return "STATUS_NOT_A_DIRECTORY"; + case 0xC0000104: return "STATUS_BAD_LOGON_SESSION_STATE"; + case 0xC0000105: return "STATUS_LOGON_SESSION_COLLISION"; + case 0xC0000106: return "STATUS_NAME_TOO_LONG"; + case 0xC0000107: return "STATUS_FILES_OPEN"; + case 0xC0000108: return "STATUS_CONNECTION_IN_USE"; + case 0xC0000109: return "STATUS_MESSAGE_NOT_FOUND"; + case 0xC000010A: return "STATUS_PROCESS_IS_TERMINATING"; + case 0xC000010B: return "STATUS_INVALID_LOGON_TYPE"; + case 0xC000010C: return "STATUS_NO_GUID_TRANSLATION"; + case 0xC000010D: return "STATUS_CANNOT_IMPERSONATE"; + case 0xC000010E: return "STATUS_IMAGE_ALREADY_LOADED"; + case 0xC000010F: return "STATUS_ABIOS_NOT_PRESENT"; + case 0xC0000110: return "STATUS_ABIOS_LID_NOT_EXIST"; + case 0xC0000111: return "STATUS_ABIOS_LID_ALREADY_OWNED"; + case 0xC0000112: return "STATUS_ABIOS_NOT_LID_OWNER"; + case 0xC0000113: return "STATUS_ABIOS_INVALID_COMMAND"; + case 0xC0000114: return "STATUS_ABIOS_INVALID_LID"; + case 0xC0000115: return "STATUS_ABIOS_SELECTOR_NOT_AVAILABLE"; + case 0xC0000116: return "STATUS_ABIOS_INVALID_SELECTOR"; + case 0xC0000117: return "STATUS_NO_LDT"; + case 0xC0000118: return "STATUS_INVALID_LDT_SIZE"; + case 0xC0000119: return "STATUS_INVALID_LDT_OFFSET"; + case 0xC000011A: return "STATUS_INVALID_LDT_DESCRIPTOR"; + case 0xC000011B: return "STATUS_INVALID_IMAGE_NE_FORMAT"; + case 0xC000011C: return "STATUS_RXACT_INVALID_STATE"; + case 0xC000011D: return "STATUS_RXACT_COMMIT_FAILURE"; + case 0xC000011E: return "STATUS_MAPPED_FILE_SIZE_ZERO"; + case 0xC000011F: return "STATUS_TOO_MANY_OPENED_FILES"; + case 0xC0000120: return "STATUS_CANCELLED"; + case 0xC0000121: return "STATUS_CANNOT_DELETE"; + case 0xC0000122: return "STATUS_INVALID_COMPUTER_NAME"; + case 0xC0000123: return "STATUS_FILE_DELETED"; + case 0xC0000124: return "STATUS_SPECIAL_ACCOUNT"; + case 0xC0000125: return "STATUS_SPECIAL_GROUP"; + case 0xC0000126: return "STATUS_SPECIAL_USER"; + case 0xC0000127: return "STATUS_MEMBERS_PRIMARY_GROUP"; + case 0xC0000128: return "STATUS_FILE_CLOSED"; + case 0xC0000129: return "STATUS_TOO_MANY_THREADS"; + case 0xC000012A: return "STATUS_THREAD_NOT_IN_PROCESS"; + case 0xC000012B: return "STATUS_TOKEN_ALREADY_IN_USE"; + case 0xC000012C: return "STATUS_PAGEFILE_QUOTA_EXCEEDED"; + case 0xC000012D: return "STATUS_COMMITMENT_LIMIT"; + case 0xC000012E: return "STATUS_INVALID_IMAGE_LE_FORMAT"; + case 0xC000012F: return "STATUS_INVALID_IMAGE_NOT_MZ"; + case 0xC0000130: return "STATUS_INVALID_IMAGE_PROTECT"; + case 0xC0000131: return "STATUS_INVALID_IMAGE_WIN_16"; + case 0xC0000132: return "STATUS_LOGON_SERVER_CONFLICT"; + case 0xC0000133: return "STATUS_TIME_DIFFERENCE_AT_DC"; + case 0xC0000134: return "STATUS_SYNCHRONIZATION_REQUIRED"; + case 0xC0000135: return "STATUS_DLL_NOT_FOUND"; + case 0xC0000136: return "STATUS_OPEN_FAILED"; + case 0xC0000137: return "STATUS_IO_PRIVILEGE_FAILED"; + case 0xC0000138: return "STATUS_ORDINAL_NOT_FOUND"; + case 0xC0000139: return "STATUS_ENTRYPOINT_NOT_FOUND"; + case 0xC000013A: return "STATUS_CONTROL_C_EXIT"; + case 0xC000013B: return "STATUS_LOCAL_DISCONNECT"; + case 0xC000013C: return "STATUS_REMOTE_DISCONNECT"; + case 0xC000013D: return "STATUS_REMOTE_RESOURCES"; + case 0xC000013E: return "STATUS_LINK_FAILED"; + case 0xC000013F: return "STATUS_LINK_TIMEOUT"; + case 0xC0000140: return "STATUS_INVALID_CONNECTION"; + case 0xC0000141: return "STATUS_INVALID_ADDRESS"; + case 0xC0000142: return "STATUS_DLL_INIT_FAILED"; + case 0xC0000143: return "STATUS_MISSING_SYSTEMFILE"; + case 0xC0000144: return "STATUS_UNHANDLED_EXCEPTION"; + case 0xC0000145: return "STATUS_APP_INIT_FAILURE"; + case 0xC0000146: return "STATUS_PAGEFILE_CREATE_FAILED"; + case 0xC0000147: return "STATUS_NO_PAGEFILE"; + case 0xC0000148: return "STATUS_INVALID_LEVEL"; + case 0xC0000149: return "STATUS_WRONG_PASSWORD_CORE"; + case 0xC000014A: return "STATUS_ILLEGAL_FLOAT_CONTEXT"; + case 0xC000014B: return "STATUS_PIPE_BROKEN"; + case 0xC000014C: return "STATUS_REGISTRY_CORRUPT"; + case 0xC000014D: return "STATUS_REGISTRY_IO_FAILED"; + case 0xC000014E: return "STATUS_NO_EVENT_PAIR"; + case 0xC000014F: return "STATUS_UNRECOGNIZED_VOLUME"; + case 0xC0000150: return "STATUS_SERIAL_NO_DEVICE_INITED"; + case 0xC0000151: return "STATUS_NO_SUCH_ALIAS"; + case 0xC0000152: return "STATUS_MEMBER_NOT_IN_ALIAS"; + case 0xC0000153: return "STATUS_MEMBER_IN_ALIAS"; + case 0xC0000154: return "STATUS_ALIAS_EXISTS"; + case 0xC0000155: return "STATUS_LOGON_NOT_GRANTED"; + case 0xC0000156: return "STATUS_TOO_MANY_SECRETS"; + case 0xC0000157: return "STATUS_SECRET_TOO_LONG"; + case 0xC0000158: return "STATUS_INTERNAL_DB_ERROR"; + case 0xC0000159: return "STATUS_FULLSCREEN_MODE"; + case 0xC000015A: return "STATUS_TOO_MANY_CONTEXT_IDS"; + case 0xC000015B: return "STATUS_LOGON_TYPE_NOT_GRANTED"; + case 0xC000015C: return "STATUS_NOT_REGISTRY_FILE"; + case 0xC000015D: return "STATUS_NT_CROSS_ENCRYPTION_REQUIRED"; + case 0xC000015E: return "STATUS_DOMAIN_CTRLR_CONFIG_ERROR"; + case 0xC000015F: return "STATUS_FT_MISSING_MEMBER"; + case 0xC0000160: return "STATUS_ILL_FORMED_SERVICE_ENTRY"; + case 0xC0000161: return "STATUS_ILLEGAL_CHARACTER"; + case 0xC0000162: return "STATUS_UNMAPPABLE_CHARACTER"; + case 0xC0000163: return "STATUS_UNDEFINED_CHARACTER"; + case 0xC0000164: return "STATUS_FLOPPY_VOLUME"; + case 0xC0000165: return "STATUS_FLOPPY_ID_MARK_NOT_FOUND"; + case 0xC0000166: return "STATUS_FLOPPY_WRONG_CYLINDER"; + case 0xC0000167: return "STATUS_FLOPPY_UNKNOWN_ERROR"; + case 0xC0000168: return "STATUS_FLOPPY_BAD_REGISTERS"; + case 0xC0000169: return "STATUS_DISK_RECALIBRATE_FAILED"; + case 0xC000016A: return "STATUS_DISK_OPERATION_FAILED"; + case 0xC000016B: return "STATUS_DISK_RESET_FAILED"; + case 0xC000016C: return "STATUS_SHARED_IRQ_BUSY"; + case 0xC000016D: return "STATUS_FT_ORPHANING"; + case 0xC000016E: return "STATUS_BIOS_FAILED_TO_CONNECT_INTERRUPT"; + case 0xC0000172: return "STATUS_PARTITION_FAILURE"; + case 0xC0000173: return "STATUS_INVALID_BLOCK_LENGTH"; + case 0xC0000174: return "STATUS_DEVICE_NOT_PARTITIONED"; + case 0xC0000175: return "STATUS_UNABLE_TO_LOCK_MEDIA"; + case 0xC0000176: return "STATUS_UNABLE_TO_UNLOAD_MEDIA"; + case 0xC0000177: return "STATUS_EOM_OVERFLOW"; + case 0xC0000178: return "STATUS_NO_MEDIA"; + case 0xC000017A: return "STATUS_NO_SUCH_MEMBER"; + case 0xC000017B: return "STATUS_INVALID_MEMBER"; + case 0xC000017C: return "STATUS_KEY_DELETED"; + case 0xC000017D: return "STATUS_NO_LOG_SPACE"; + case 0xC000017E: return "STATUS_TOO_MANY_SIDS"; + case 0xC000017F: return "STATUS_LM_CROSS_ENCRYPTION_REQUIRED"; + case 0xC0000180: return "STATUS_KEY_HAS_CHILDREN"; + case 0xC0000181: return "STATUS_CHILD_MUST_BE_VOLATILE"; + case 0xC0000182: return "STATUS_DEVICE_CONFIGURATION_ERROR"; + case 0xC0000183: return "STATUS_DRIVER_INTERNAL_ERROR"; + case 0xC0000184: return "STATUS_INVALID_DEVICE_STATE"; + case 0xC0000185: return "STATUS_IO_DEVICE_ERROR"; + case 0xC0000186: return "STATUS_DEVICE_PROTOCOL_ERROR"; + case 0xC0000187: return "STATUS_BACKUP_CONTROLLER"; + case 0xC0000188: return "STATUS_LOG_FILE_FULL"; + case 0xC0000189: return "STATUS_TOO_LATE"; + case 0xC000018A: return "STATUS_NO_TRUST_LSA_SECRET"; + case 0xC000018B: return "STATUS_NO_TRUST_SAM_ACCOUNT"; + case 0xC000018C: return "STATUS_TRUSTED_DOMAIN_FAILURE"; + case 0xC000018D: return "STATUS_TRUSTED_RELATIONSHIP_FAILURE"; + case 0xC000018E: return "STATUS_EVENTLOG_FILE_CORRUPT"; + case 0xC000018F: return "STATUS_EVENTLOG_CANT_START"; + case 0xC0000190: return "STATUS_TRUST_FAILURE"; + case 0xC0000191: return "STATUS_MUTANT_LIMIT_EXCEEDED"; + case 0xC0000192: return "STATUS_NETLOGON_NOT_STARTED"; + case 0xC0000193: return "STATUS_ACCOUNT_EXPIRED"; + case 0xC0000194: return "STATUS_POSSIBLE_DEADLOCK"; + case 0xC0000195: return "STATUS_NETWORK_CREDENTIAL_CONFLICT"; + case 0xC0000196: return "STATUS_REMOTE_SESSION_LIMIT"; + case 0xC0000197: return "STATUS_EVENTLOG_FILE_CHANGED"; + case 0xC0000198: return "STATUS_NOLOGON_INTERDOMAIN_TRUST_ACCOUNT"; + case 0xC0000199: return "STATUS_NOLOGON_WORKSTATION_TRUST_ACCOUNT"; + case 0xC000019A: return "STATUS_NOLOGON_SERVER_TRUST_ACCOUNT"; + case 0xC000019B: return "STATUS_DOMAIN_TRUST_INCONSISTENT"; + case 0xC000019C: return "STATUS_FS_DRIVER_REQUIRED"; + case 0xC0000202: return "STATUS_NO_USER_SESSION_KEY"; + case 0xC0000203: return "STATUS_USER_SESSION_DELETED"; + case 0xC0000204: return "STATUS_RESOURCE_LANG_NOT_FOUND"; + case 0xC0000205: return "STATUS_INSUFF_SERVER_RESOURCES"; + case 0xC0000206: return "STATUS_INVALID_BUFFER_SIZE"; + case 0xC0000207: return "STATUS_INVALID_ADDRESS_COMPONENT"; + case 0xC0000208: return "STATUS_INVALID_ADDRESS_WILDCARD"; + case 0xC0000209: return "STATUS_TOO_MANY_ADDRESSES"; + case 0xC000020A: return "STATUS_ADDRESS_ALREADY_EXISTS"; + case 0xC000020B: return "STATUS_ADDRESS_CLOSED"; + case 0xC000020C: return "STATUS_CONNECTION_DISCONNECTED"; + case 0xC000020D: return "STATUS_CONNECTION_RESET"; + case 0xC000020E: return "STATUS_TOO_MANY_NODES"; + case 0xC000020F: return "STATUS_TRANSACTION_ABORTED"; + case 0xC0000210: return "STATUS_TRANSACTION_TIMED_OUT"; + case 0xC0000211: return "STATUS_TRANSACTION_NO_RELEASE"; + case 0xC0000212: return "STATUS_TRANSACTION_NO_MATCH"; + case 0xC0000213: return "STATUS_TRANSACTION_RESPONDED"; + case 0xC0000214: return "STATUS_TRANSACTION_INVALID_ID"; + case 0xC0000215: return "STATUS_TRANSACTION_INVALID_TYPE"; + case 0xC0000216: return "STATUS_NOT_SERVER_SESSION"; + case 0xC0000217: return "STATUS_NOT_CLIENT_SESSION"; + case 0xC0000218: return "STATUS_CANNOT_LOAD_REGISTRY_FILE"; + case 0xC0000219: return "STATUS_DEBUG_ATTACH_FAILED"; + case 0xC000021A: return "STATUS_SYSTEM_PROCESS_TERMINATED"; + case 0xC000021B: return "STATUS_DATA_NOT_ACCEPTED"; + case 0xC000021C: return "STATUS_NO_BROWSER_SERVERS_FOUND"; + case 0xC000021D: return "STATUS_VDM_HARD_ERROR"; + case 0xC000021E: return "STATUS_DRIVER_CANCEL_TIMEOUT"; + case 0xC000021F: return "STATUS_REPLY_MESSAGE_MISMATCH"; + case 0xC0000220: return "STATUS_MAPPED_ALIGNMENT"; + case 0xC0000221: return "STATUS_IMAGE_CHECKSUM_MISMATCH"; + case 0xC0000222: return "STATUS_LOST_WRITEBEHIND_DATA"; + case 0xC0000223: return "STATUS_CLIENT_SERVER_PARAMETERS_INVALID"; + case 0xC0000224: return "STATUS_PASSWORD_MUST_CHANGE"; + case 0xC0000225: return "STATUS_NOT_FOUND"; + case 0xC0000226: return "STATUS_NOT_TINY_STREAM"; + case 0xC0000227: return "STATUS_RECOVERY_FAILURE"; + case 0xC0000228: return "STATUS_STACK_OVERFLOW_READ"; + case 0xC0000229: return "STATUS_FAIL_CHECK"; + case 0xC000022A: return "STATUS_DUPLICATE_OBJECTID"; + case 0xC000022B: return "STATUS_OBJECTID_EXISTS"; + case 0xC000022C: return "STATUS_CONVERT_TO_LARGE"; + case 0xC000022D: return "STATUS_RETRY"; + case 0xC000022E: return "STATUS_FOUND_OUT_OF_SCOPE"; + case 0xC000022F: return "STATUS_ALLOCATE_BUCKET"; + case 0xC0000230: return "STATUS_PROPSET_NOT_FOUND"; + case 0xC0000231: return "STATUS_MARSHALL_OVERFLOW"; + case 0xC0000232: return "STATUS_INVALID_VARIANT"; + case 0xC0000233: return "STATUS_DOMAIN_CONTROLLER_NOT_FOUND"; + case 0xC0000234: return "STATUS_ACCOUNT_LOCKED_OUT"; + case 0xC0000235: return "STATUS_HANDLE_NOT_CLOSABLE"; + case 0xC0000236: return "STATUS_CONNECTION_REFUSED"; + case 0xC0000237: return "STATUS_GRACEFUL_DISCONNECT"; + case 0xC0000238: return "STATUS_ADDRESS_ALREADY_ASSOCIATED"; + case 0xC0000239: return "STATUS_ADDRESS_NOT_ASSOCIATED"; + case 0xC000023A: return "STATUS_CONNECTION_INVALID"; + case 0xC000023B: return "STATUS_CONNECTION_ACTIVE"; + case 0xC000023C: return "STATUS_NETWORK_UNREACHABLE"; + case 0xC000023D: return "STATUS_HOST_UNREACHABLE"; + case 0xC000023E: return "STATUS_PROTOCOL_UNREACHABLE"; + case 0xC000023F: return "STATUS_PORT_UNREACHABLE"; + case 0xC0000240: return "STATUS_REQUEST_ABORTED"; + case 0xC0000241: return "STATUS_CONNECTION_ABORTED"; + case 0xC0000242: return "STATUS_BAD_COMPRESSION_BUFFER"; + case 0xC0000243: return "STATUS_USER_MAPPED_FILE"; + case 0xC0000244: return "STATUS_AUDIT_FAILED"; + case 0xC0000245: return "STATUS_TIMER_RESOLUTION_NOT_SET"; + case 0xC0000246: return "STATUS_CONNECTION_COUNT_LIMIT"; + case 0xC0000247: return "STATUS_LOGIN_TIME_RESTRICTION"; + case 0xC0000248: return "STATUS_LOGIN_WKSTA_RESTRICTION"; + case 0xC0000249: return "STATUS_IMAGE_MP_UP_MISMATCH"; + case 0xC0000250: return "STATUS_INSUFFICIENT_LOGON_INFO"; + case 0xC0000251: return "STATUS_BAD_DLL_ENTRYPOINT"; + case 0xC0000252: return "STATUS_BAD_SERVICE_ENTRYPOINT"; + case 0xC0000253: return "STATUS_LPC_REPLY_LOST"; + case 0xC0000254: return "STATUS_IP_ADDRESS_CONFLICT1"; + case 0xC0000255: return "STATUS_IP_ADDRESS_CONFLICT2"; + case 0xC0000256: return "STATUS_REGISTRY_QUOTA_LIMIT"; + case 0xC0000257: return "STATUS_PATH_NOT_COVERED"; + case 0xC0000258: return "STATUS_NO_CALLBACK_ACTIVE"; + case 0xC0000259: return "STATUS_LICENSE_QUOTA_EXCEEDED"; + case 0xC000025A: return "STATUS_PWD_TOO_SHORT"; + case 0xC000025B: return "STATUS_PWD_TOO_RECENT"; + case 0xC000025C: return "STATUS_PWD_HISTORY_CONFLICT"; + case 0xC000025E: return "STATUS_PLUGPLAY_NO_DEVICE"; + case 0xC000025F: return "STATUS_UNSUPPORTED_COMPRESSION"; + case 0xC0000260: return "STATUS_INVALID_HW_PROFILE"; + case 0xC0000261: return "STATUS_INVALID_PLUGPLAY_DEVICE_PATH"; + case 0xC0000262: return "STATUS_DRIVER_ORDINAL_NOT_FOUND"; + case 0xC0000263: return "STATUS_DRIVER_ENTRYPOINT_NOT_FOUND"; + case 0xC0000264: return "STATUS_RESOURCE_NOT_OWNED"; + case 0xC0000265: return "STATUS_TOO_MANY_LINKS"; + case 0xC0000266: return "STATUS_QUOTA_LIST_INCONSISTENT"; + case 0xC0000267: return "STATUS_FILE_IS_OFFLINE"; + case 0xC0000268: return "STATUS_EVALUATION_EXPIRATION"; + case 0xC0000269: return "STATUS_ILLEGAL_DLL_RELOCATION"; + case 0xC000026A: return "STATUS_LICENSE_VIOLATION"; + case 0xC000026B: return "STATUS_DLL_INIT_FAILED_LOGOFF"; + case 0xC000026C: return "STATUS_DRIVER_UNABLE_TO_LOAD"; + case 0xC000026D: return "STATUS_DFS_UNAVAILABLE"; + case 0xC000026E: return "STATUS_VOLUME_DISMOUNTED"; + case 0xC000026F: return "STATUS_WX86_INTERNAL_ERROR"; + case 0xC0000270: return "STATUS_WX86_FLOAT_STACK_CHECK"; + case 0xC0000271: return "STATUS_VALIDATE_CONTINUE"; + case 0xC0000272: return "STATUS_NO_MATCH"; + case 0xC0000273: return "STATUS_NO_MORE_MATCHES"; + case 0xC0000275: return "STATUS_NOT_A_REPARSE_POINT"; + case 0xC0000276: return "STATUS_IO_REPARSE_TAG_INVALID"; + case 0xC0000277: return "STATUS_IO_REPARSE_TAG_MISMATCH"; + case 0xC0000278: return "STATUS_IO_REPARSE_DATA_INVALID"; + case 0xC0000279: return "STATUS_IO_REPARSE_TAG_NOT_HANDLED"; + case 0xC0000280: return "STATUS_REPARSE_POINT_NOT_RESOLVED"; + case 0xC0000281: return "STATUS_DIRECTORY_IS_A_REPARSE_POINT"; + case 0xC0000282: return "STATUS_RANGE_LIST_CONFLICT"; + case 0xC0000283: return "STATUS_SOURCE_ELEMENT_EMPTY"; + case 0xC0000284: return "STATUS_DESTINATION_ELEMENT_FULL"; + case 0xC0000285: return "STATUS_ILLEGAL_ELEMENT_ADDRESS"; + case 0xC0000286: return "STATUS_MAGAZINE_NOT_PRESENT"; + case 0xC0000287: return "STATUS_REINITIALIZATION_NEEDED"; + case 0x80000288: return "STATUS_DEVICE_REQUIRES_CLEANING"; + case 0x80000289: return "STATUS_DEVICE_DOOR_OPEN"; + case 0xC000028A: return "STATUS_ENCRYPTION_FAILED"; + case 0xC000028B: return "STATUS_DECRYPTION_FAILED"; + case 0xC000028C: return "STATUS_RANGE_NOT_FOUND"; + case 0xC000028D: return "STATUS_NO_RECOVERY_POLICY"; + case 0xC000028E: return "STATUS_NO_EFS"; + case 0xC000028F: return "STATUS_WRONG_EFS"; + case 0xC0000290: return "STATUS_NO_USER_KEYS"; + case 0xC0000291: return "STATUS_FILE_NOT_ENCRYPTED"; + case 0xC0000292: return "STATUS_NOT_EXPORT_FORMAT"; + case 0xC0000293: return "STATUS_FILE_ENCRYPTED"; + case 0x40000294: return "STATUS_WAKE_SYSTEM"; + case 0xC0000295: return "STATUS_WMI_GUID_NOT_FOUND"; + case 0xC0000296: return "STATUS_WMI_INSTANCE_NOT_FOUND"; + case 0xC0000297: return "STATUS_WMI_ITEMID_NOT_FOUND"; + case 0xC0000298: return "STATUS_WMI_TRY_AGAIN"; + case 0xC0000299: return "STATUS_SHARED_POLICY"; + case 0xC000029A: return "STATUS_POLICY_OBJECT_NOT_FOUND"; + case 0xC000029B: return "STATUS_POLICY_ONLY_IN_DS"; + case 0xC000029C: return "STATUS_VOLUME_NOT_UPGRADED"; + case 0xC000029D: return "STATUS_REMOTE_STORAGE_NOT_ACTIVE"; + case 0xC000029E: return "STATUS_REMOTE_STORAGE_MEDIA_ERROR"; + case 0xC000029F: return "STATUS_NO_TRACKING_SERVICE"; + case 0xC00002A0: return "STATUS_SERVER_SID_MISMATCH"; + case 0xC00002A1: return "STATUS_DS_NO_ATTRIBUTE_OR_VALUE"; + case 0xC00002A2: return "STATUS_DS_INVALID_ATTRIBUTE_SYNTAX"; + case 0xC00002A3: return "STATUS_DS_ATTRIBUTE_TYPE_UNDEFINED"; + case 0xC00002A4: return "STATUS_DS_ATTRIBUTE_OR_VALUE_EXISTS"; + case 0xC00002A5: return "STATUS_DS_BUSY"; + case 0xC00002A6: return "STATUS_DS_UNAVAILABLE"; + case 0xC00002A7: return "STATUS_DS_NO_RIDS_ALLOCATED"; + case 0xC00002A8: return "STATUS_DS_NO_MORE_RIDS"; + case 0xC00002A9: return "STATUS_DS_INCORRECT_ROLE_OWNER"; + case 0xC00002AA: return "STATUS_DS_RIDMGR_INIT_ERROR"; + case 0xC00002AB: return "STATUS_DS_OBJ_CLASS_VIOLATION"; + case 0xC00002AC: return "STATUS_DS_CANT_ON_NON_LEAF"; + case 0xC00002AD: return "STATUS_DS_CANT_ON_RDN"; + case 0xC00002AE: return "STATUS_DS_CANT_MOD_OBJ_CLASS"; + case 0xC00002AF: return "STATUS_DS_CROSS_DOM_MOVE_FAILED"; + case 0xC00002B0: return "STATUS_DS_GC_NOT_AVAILABLE"; + case 0xC00002B1: return "STATUS_DIRECTORY_SERVICE_REQUIRED"; + case 0xC00002B2: return "STATUS_REPARSE_ATTRIBUTE_CONFLICT"; + case 0xC00002B3: return "STATUS_CANT_ENABLE_DENY_ONLY"; + case 0xC00002B4: return "STATUS_FLOAT_MULTIPLE_FAULTS"; + case 0xC00002B5: return "STATUS_FLOAT_MULTIPLE_TRAPS"; + case 0xC00002B6: return "STATUS_DEVICE_REMOVED"; + case 0xC00002B7: return "STATUS_JOURNAL_DELETE_IN_PROGRESS"; + case 0xC00002B8: return "STATUS_JOURNAL_NOT_ACTIVE"; + case 0xC00002B9: return "STATUS_NOINTERFACE"; + case 0xC00002C1: return "STATUS_DS_ADMIN_LIMIT_EXCEEDED"; + case 0xC00002C2: return "STATUS_DRIVER_FAILED_SLEEP"; + case 0xC00002C3: return "STATUS_MUTUAL_AUTHENTICATION_FAILED"; + case 0xC00002C4: return "STATUS_CORRUPT_SYSTEM_FILE"; + case 0xC00002C5: return "STATUS_DATATYPE_MISALIGNMENT_ERROR"; + case 0xC00002C6: return "STATUS_WMI_READ_ONLY"; + case 0xC00002C7: return "STATUS_WMI_SET_FAILURE"; + case 0xC00002C8: return "STATUS_COMMITMENT_MINIMUM"; + case 0xC00002C9: return "STATUS_REG_NAT_CONSUMPTION"; + case 0xC00002CA: return "STATUS_TRANSPORT_FULL"; + case 0xC00002CB: return "STATUS_DS_SAM_INIT_FAILURE"; + case 0xC00002CC: return "STATUS_ONLY_IF_CONNECTED"; + case 0xC00002CD: return "STATUS_DS_SENSITIVE_GROUP_VIOLATION"; + case 0xC00002CE: return "STATUS_PNP_RESTART_ENUMERATION"; + case 0xC00002CF: return "STATUS_JOURNAL_ENTRY_DELETED"; + case 0xC00002D0: return "STATUS_DS_CANT_MOD_PRIMARYGROUPID"; + case 0xC00002D1: return "STATUS_SYSTEM_IMAGE_BAD_SIGNATURE"; + case 0xC00002D2: return "STATUS_PNP_REBOOT_REQUIRED"; + case 0xC00002D3: return "STATUS_POWER_STATE_INVALID"; + case 0xC00002D4: return "STATUS_DS_INVALID_GROUP_TYPE"; + case 0xC00002D5: return "STATUS_DS_NO_NEST_GLOBALGROUP_IN_MIXEDDOMAIN"; + case 0xC00002D6: return "STATUS_DS_NO_NEST_LOCALGROUP_IN_MIXEDDOMAIN"; + case 0xC00002D7: return "STATUS_DS_GLOBAL_CANT_HAVE_LOCAL_MEMBER"; + case 0xC00002D8: return "STATUS_DS_GLOBAL_CANT_HAVE_UNIVERSAL_MEMBER"; + case 0xC00002D9: return "STATUS_DS_UNIVERSAL_CANT_HAVE_LOCAL_MEMBER"; + case 0xC00002DA: return "STATUS_DS_GLOBAL_CANT_HAVE_CROSSDOMAIN_MEMBER"; + case 0xC00002DB: return "STATUS_DS_LOCAL_CANT_HAVE_CROSSDOMAIN_LOCAL_MEMBER"; + case 0xC00002DC: return "STATUS_DS_HAVE_PRIMARY_MEMBERS"; + case 0xC00002DD: return "STATUS_WMI_NOT_SUPPORTED"; + case 0xC00002DE: return "STATUS_INSUFFICIENT_POWER"; + case 0xC00002DF: return "STATUS_SAM_NEED_BOOTKEY_PASSWORD"; + case 0xC00002E0: return "STATUS_SAM_NEED_BOOTKEY_FLOPPY"; + case 0xC00002E1: return "STATUS_DS_CANT_START"; + case 0xC00002E2: return "STATUS_DS_INIT_FAILURE"; + case 0xC00002E3: return "STATUS_SAM_INIT_FAILURE"; + case 0xC00002E4: return "STATUS_DS_GC_REQUIRED"; + case 0xC00002E5: return "STATUS_DS_LOCAL_MEMBER_OF_LOCAL_ONLY"; + case 0xC00002E6: return "STATUS_DS_NO_FPO_IN_UNIVERSAL_GROUPS"; + case 0xC00002E7: return "STATUS_DS_MACHINE_ACCOUNT_QUOTA_EXCEEDED"; + case 0xC00002E8: return "STATUS_MULTIPLE_FAULT_VIOLATION"; + case 0xC0000300: return "STATUS_NOT_SUPPORTED_ON_SBS"; + case 0xC0009898: return "STATUS_WOW_ASSERTION"; + case 0xC0010001: return "DBG_NO_STATE_CHANGE"; + case 0xC0010002: return "DBG_APP_NOT_IDLE"; + case 0xC0020001: return "RPC_NT_INVALID_STRING_BINDING"; + case 0xC0020002: return "RPC_NT_WRONG_KIND_OF_BINDING"; + case 0xC0020003: return "RPC_NT_INVALID_BINDING"; + case 0xC0020004: return "RPC_NT_PROTSEQ_NOT_SUPPORTED"; + case 0xC0020005: return "RPC_NT_INVALID_RPC_PROTSEQ"; + case 0xC0020006: return "RPC_NT_INVALID_STRING_UUID"; + case 0xC0020007: return "RPC_NT_INVALID_ENDPOINT_FORMAT"; + case 0xC0020008: return "RPC_NT_INVALID_NET_ADDR"; + case 0xC0020009: return "RPC_NT_NO_ENDPOINT_FOUND"; + case 0xC002000A: return "RPC_NT_INVALID_TIMEOUT"; + case 0xC002000B: return "RPC_NT_OBJECT_NOT_FOUND"; + case 0xC002000C: return "RPC_NT_ALREADY_REGISTERED"; + case 0xC002000D: return "RPC_NT_TYPE_ALREADY_REGISTERED"; + case 0xC002000E: return "RPC_NT_ALREADY_LISTENING"; + case 0xC002000F: return "RPC_NT_NO_PROTSEQS_REGISTERED"; + case 0xC0020010: return "RPC_NT_NOT_LISTENING"; + case 0xC0020011: return "RPC_NT_UNKNOWN_MGR_TYPE"; + case 0xC0020012: return "RPC_NT_UNKNOWN_IF"; + case 0xC0020013: return "RPC_NT_NO_BINDINGS"; + case 0xC0020014: return "RPC_NT_NO_PROTSEQS"; + case 0xC0020015: return "RPC_NT_CANT_CREATE_ENDPOINT"; + case 0xC0020016: return "RPC_NT_OUT_OF_RESOURCES"; + case 0xC0020017: return "RPC_NT_SERVER_UNAVAILABLE"; + case 0xC0020018: return "RPC_NT_SERVER_TOO_BUSY"; + case 0xC0020019: return "RPC_NT_INVALID_NETWORK_OPTIONS"; + case 0xC002001A: return "RPC_NT_NO_CALL_ACTIVE"; + case 0xC002001B: return "RPC_NT_CALL_FAILED"; + case 0xC002001C: return "RPC_NT_CALL_FAILED_DNE"; + case 0xC002001D: return "RPC_NT_PROTOCOL_ERROR"; + case 0xC002001F: return "RPC_NT_UNSUPPORTED_TRANS_SYN"; + case 0xC0020021: return "RPC_NT_UNSUPPORTED_TYPE"; + case 0xC0020022: return "RPC_NT_INVALID_TAG"; + case 0xC0020023: return "RPC_NT_INVALID_BOUND"; + case 0xC0020024: return "RPC_NT_NO_ENTRY_NAME"; + case 0xC0020025: return "RPC_NT_INVALID_NAME_SYNTAX"; + case 0xC0020026: return "RPC_NT_UNSUPPORTED_NAME_SYNTAX"; + case 0xC0020028: return "RPC_NT_UUID_NO_ADDRESS"; + case 0xC0020029: return "RPC_NT_DUPLICATE_ENDPOINT"; + case 0xC002002A: return "RPC_NT_UNKNOWN_AUTHN_TYPE"; + case 0xC002002B: return "RPC_NT_MAX_CALLS_TOO_SMALL"; + case 0xC002002C: return "RPC_NT_STRING_TOO_LONG"; + case 0xC002002D: return "RPC_NT_PROTSEQ_NOT_FOUND"; + case 0xC002002E: return "RPC_NT_PROCNUM_OUT_OF_RANGE"; + case 0xC002002F: return "RPC_NT_BINDING_HAS_NO_AUTH"; + case 0xC0020030: return "RPC_NT_UNKNOWN_AUTHN_SERVICE"; + case 0xC0020031: return "RPC_NT_UNKNOWN_AUTHN_LEVEL"; + case 0xC0020032: return "RPC_NT_INVALID_AUTH_IDENTITY"; + case 0xC0020033: return "RPC_NT_UNKNOWN_AUTHZ_SERVICE"; + case 0xC0020034: return "EPT_NT_INVALID_ENTRY"; + case 0xC0020035: return "EPT_NT_CANT_PERFORM_OP"; + case 0xC0020036: return "EPT_NT_NOT_REGISTERED"; + case 0xC0020037: return "RPC_NT_NOTHING_TO_EXPORT"; + case 0xC0020038: return "RPC_NT_INCOMPLETE_NAME"; + case 0xC0020039: return "RPC_NT_INVALID_VERS_OPTION"; + case 0xC002003A: return "RPC_NT_NO_MORE_MEMBERS"; + case 0xC002003B: return "RPC_NT_NOT_ALL_OBJS_UNEXPORTED"; + case 0xC002003C: return "RPC_NT_INTERFACE_NOT_FOUND"; + case 0xC002003D: return "RPC_NT_ENTRY_ALREADY_EXISTS"; + case 0xC002003E: return "RPC_NT_ENTRY_NOT_FOUND"; + case 0xC002003F: return "RPC_NT_NAME_SERVICE_UNAVAILABLE"; + case 0xC0020040: return "RPC_NT_INVALID_NAF_ID"; + case 0xC0020041: return "RPC_NT_CANNOT_SUPPORT"; + case 0xC0020042: return "RPC_NT_NO_CONTEXT_AVAILABLE"; + case 0xC0020043: return "RPC_NT_INTERNAL_ERROR"; + case 0xC0020044: return "RPC_NT_ZERO_DIVIDE"; + case 0xC0020045: return "RPC_NT_ADDRESS_ERROR"; + case 0xC0020046: return "RPC_NT_FP_DIV_ZERO"; + case 0xC0020047: return "RPC_NT_FP_UNDERFLOW"; + case 0xC0020048: return "RPC_NT_FP_OVERFLOW"; + case 0xC0030001: return "RPC_NT_NO_MORE_ENTRIES"; + case 0xC0030002: return "RPC_NT_SS_CHAR_TRANS_OPEN_FAIL"; + case 0xC0030003: return "RPC_NT_SS_CHAR_TRANS_SHORT_FILE"; + case 0xC0030004: return "RPC_NT_SS_IN_NULL_CONTEXT"; + case 0xC0030005: return "RPC_NT_SS_CONTEXT_MISMATCH"; + case 0xC0030006: return "RPC_NT_SS_CONTEXT_DAMAGED"; + case 0xC0030007: return "RPC_NT_SS_HANDLES_MISMATCH"; + case 0xC0030008: return "RPC_NT_SS_CANNOT_GET_CALL_HANDLE"; + case 0xC0030009: return "RPC_NT_NULL_REF_POINTER"; + case 0xC003000A: return "RPC_NT_ENUM_VALUE_OUT_OF_RANGE"; + case 0xC003000B: return "RPC_NT_BYTE_COUNT_TOO_SMALL"; + case 0xC003000C: return "RPC_NT_BAD_STUB_DATA"; + case 0xC0020049: return "RPC_NT_CALL_IN_PROGRESS"; + case 0xC002004A: return "RPC_NT_NO_MORE_BINDINGS"; + case 0xC002004B: return "RPC_NT_GROUP_MEMBER_NOT_FOUND"; + case 0xC002004C: return "EPT_NT_CANT_CREATE"; + case 0xC002004D: return "RPC_NT_INVALID_OBJECT"; + case 0xC002004F: return "RPC_NT_NO_INTERFACES"; + case 0xC0020050: return "RPC_NT_CALL_CANCELLED"; + case 0xC0020051: return "RPC_NT_BINDING_INCOMPLETE"; + case 0xC0020052: return "RPC_NT_COMM_FAILURE"; + case 0xC0020053: return "RPC_NT_UNSUPPORTED_AUTHN_LEVEL"; + case 0xC0020054: return "RPC_NT_NO_PRINC_NAME"; + case 0xC0020055: return "RPC_NT_NOT_RPC_ERROR"; + case 0x40020056: return "RPC_NT_UUID_LOCAL_ONLY"; + case 0xC0020057: return "RPC_NT_SEC_PKG_ERROR"; + case 0xC0020058: return "RPC_NT_NOT_CANCELLED"; + case 0xC0030059: return "RPC_NT_INVALID_ES_ACTION"; + case 0xC003005A: return "RPC_NT_WRONG_ES_VERSION"; + case 0xC003005B: return "RPC_NT_WRONG_STUB_VERSION"; + case 0xC003005C: return "RPC_NT_INVALID_PIPE_OBJECT"; + case 0xC003005D: return "RPC_NT_INVALID_PIPE_OPERATION"; + case 0xC003005E: return "RPC_NT_WRONG_PIPE_VERSION"; + case 0xC003005F: return "RPC_NT_PIPE_CLOSED"; + case 0xC0030060: return "RPC_NT_PIPE_DISCIPLINE_ERROR"; + case 0xC0030061: return "RPC_NT_PIPE_EMPTY"; + case 0xC0020062: return "RPC_NT_INVALID_ASYNC_HANDLE"; + case 0xC0020063: return "RPC_NT_INVALID_ASYNC_CALL"; + case 0x400200AF: return "RPC_NT_SEND_INCOMPLETE"; + case 0xC0140001: return "STATUS_ACPI_INVALID_OPCODE"; + case 0xC0140002: return "STATUS_ACPI_STACK_OVERFLOW"; + case 0xC0140003: return "STATUS_ACPI_ASSERT_FAILED"; + case 0xC0140004: return "STATUS_ACPI_INVALID_INDEX"; + case 0xC0140005: return "STATUS_ACPI_INVALID_ARGUMENT"; + case 0xC0140006: return "STATUS_ACPI_FATAL"; + case 0xC0140007: return "STATUS_ACPI_INVALID_SUPERNAME"; + case 0xC0140008: return "STATUS_ACPI_INVALID_ARGTYPE"; + case 0xC0140009: return "STATUS_ACPI_INVALID_OBJTYPE"; + case 0xC014000A: return "STATUS_ACPI_INVALID_TARGETTYPE"; + case 0xC014000B: return "STATUS_ACPI_INCORRECT_ARGUMENT_COUNT"; + case 0xC014000C: return "STATUS_ACPI_ADDRESS_NOT_MAPPED"; + case 0xC014000D: return "STATUS_ACPI_INVALID_EVENTTYPE"; + case 0xC014000E: return "STATUS_ACPI_HANDLER_COLLISION"; + case 0xC014000F: return "STATUS_ACPI_INVALID_DATA"; + case 0xC0140010: return "STATUS_ACPI_INVALID_REGION"; + case 0xC0140011: return "STATUS_ACPI_INVALID_ACCESS_SIZE"; + case 0xC0140012: return "STATUS_ACPI_ACQUIRE_GLOBAL_LOCK"; + case 0xC0140013: return "STATUS_ACPI_ALREADY_INITIALIZED"; + case 0xC0140014: return "STATUS_ACPI_NOT_INITIALIZED"; + case 0xC0140015: return "STATUS_ACPI_INVALID_MUTEX_LEVEL"; + case 0xC0140016: return "STATUS_ACPI_MUTEX_NOT_OWNED"; + case 0xC0140017: return "STATUS_ACPI_MUTEX_NOT_OWNER"; + case 0xC0140018: return "STATUS_ACPI_RS_ACCESS"; + case 0xC0140019: return "STATUS_ACPI_INVALID_TABLE"; + case 0xC0140020: return "STATUS_ACPI_REG_HANDLER_FAILED"; + case 0xC0140021: return "STATUS_ACPI_POWER_REQUEST_FAILED"; + case 0xC00A0001: return "STATUS_CTX_WINSTATION_NAME_INVALID"; + case 0xC00A0002: return "STATUS_CTX_INVALID_PD"; + case 0xC00A0003: return "STATUS_CTX_PD_NOT_FOUND"; + case 0x400A0004: return "STATUS_CTX_CDM_CONNECT"; + case 0x400A0005: return "STATUS_CTX_CDM_DISCONNECT"; + case 0xC00A0006: return "STATUS_CTX_CLOSE_PENDING"; + case 0xC00A0007: return "STATUS_CTX_NO_OUTBUF"; + case 0xC00A0008: return "STATUS_CTX_MODEM_INF_NOT_FOUND"; + case 0xC00A0009: return "STATUS_CTX_INVALID_MODEMNAME"; + case 0xC00A000A: return "STATUS_CTX_RESPONSE_ERROR"; + case 0xC00A000B: return "STATUS_CTX_MODEM_RESPONSE_TIMEOUT"; + case 0xC00A000C: return "STATUS_CTX_MODEM_RESPONSE_NO_CARRIER"; + case 0xC00A000D: return "STATUS_CTX_MODEM_RESPONSE_NO_DIALTONE"; + case 0xC00A000E: return "STATUS_CTX_MODEM_RESPONSE_BUSY"; + case 0xC00A000F: return "STATUS_CTX_MODEM_RESPONSE_VOICE"; + case 0xC00A0010: return "STATUS_CTX_TD_ERROR"; + case 0xC00A0012: return "STATUS_CTX_LICENSE_CLIENT_INVALID"; + case 0xC00A0013: return "STATUS_CTX_LICENSE_NOT_AVAILABLE"; + case 0xC00A0014: return "STATUS_CTX_LICENSE_EXPIRED"; + case 0xC00A0015: return "STATUS_CTX_WINSTATION_NOT_FOUND"; + case 0xC00A0016: return "STATUS_CTX_WINSTATION_NAME_COLLISION"; + case 0xC00A0017: return "STATUS_CTX_WINSTATION_BUSY"; + case 0xC00A0018: return "STATUS_CTX_BAD_VIDEO_MODE"; + case 0xC00A0022: return "STATUS_CTX_GRAPHICS_INVALID"; + case 0xC00A0024: return "STATUS_CTX_NOT_CONSOLE"; + case 0xC00A0026: return "STATUS_CTX_CLIENT_QUERY_TIMEOUT"; + case 0xC00A0027: return "STATUS_CTX_CONSOLE_DISCONNECT"; + case 0xC00A0028: return "STATUS_CTX_CONSOLE_CONNECT"; + case 0xC00A002A: return "STATUS_CTX_SHADOW_DENIED"; + case 0xC00A002B: return "STATUS_CTX_WINSTATION_ACCESS_DENIED"; + case 0xC00A002E: return "STATUS_CTX_INVALID_WD"; + case 0xC00A002F: return "STATUS_CTX_WD_NOT_FOUND"; + case 0xC00A0030: return "STATUS_CTX_SHADOW_INVALID"; + case 0xC00A0031: return "STATUS_CTX_SHADOW_DISABLED"; + case 0xC00A0032: return "STATUS_RDP_PROTOCOL_ERROR"; + case 0xC00A0033: return "STATUS_CTX_CLIENT_LICENSE_NOT_SET"; + case 0xC00A0034: return "STATUS_CTX_CLIENT_LICENSE_IN_USE"; + case 0xC0040035: return "STATUS_PNP_BAD_MPS_TABLE"; + case 0xC0040036: return "STATUS_PNP_TRANSLATION_FAILED"; + case 0xC0040037: return "STATUS_PNP_IRQ_TRANSLATION_FAILED"; + default: return "STATUS_UNKNOWN"; + } +} + + +/* + * KsPrintf + * This function is variable-argument, level-sensitive debug print routine. + * If the specified debug level for the print statement is lower or equal + * to the current debug level, the message will be printed. + * + * Arguments: + * DebugPrintLevel - Specifies at which debugging level the string should + * be printed + * DebugMessage - Variable argument ascii c string + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ + +VOID +KsPrintf( + LONG DebugPrintLevel, + PCHAR DebugMessage, + ... + ) +{ + va_list ap; + + va_start(ap, DebugMessage); + + if (DebugPrintLevel <= KsDebugLevel) + { + CHAR buffer[0x200]; + + vsprintf(buffer, DebugMessage, ap); + + KdPrint(("TID:%8.8x: %s", PsGetCurrentThread(), buffer)); + } + + va_end(ap); + +} // KsPrint() + +#endif diff --git a/libcfs/libcfs/winnt/winnt-fs.c b/libcfs/libcfs/winnt/winnt-fs.c new file mode 100644 index 0000000..128781b --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-fs.c @@ -0,0 +1,541 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +# define DEBUG_SUBSYSTEM S_LNET + +#include + +const CHAR *dos_file_prefix = "\\??\\"; + +/* + * cfs_filp_open + * To open or create a file in kernel mode + * + * Arguments: + * name: name of the file to be opened or created, no dos path prefix + * flags: open/creation attribute options + * mode: access mode/permission to open or create + * err: error code + * + * Return Value: + * the pointer to the cfs_file_t or NULL if it fails + * + * Notes: + * N/A + */ + +cfs_file_t *cfs_filp_open(const char *name, int flags, int mode, int *err) +{ + cfs_file_t * fp = NULL; + + NTSTATUS Status; + + OBJECT_ATTRIBUTES ObjectAttributes; + HANDLE FileHandle; + IO_STATUS_BLOCK IoStatus; + ACCESS_MASK DesiredAccess; + ULONG CreateDisposition; + ULONG ShareAccess; + ULONG CreateOptions; + + USHORT NameLength = 0; + USHORT PrefixLength = 0; + + UNICODE_STRING UnicodeName; + PWCHAR UnicodeString = NULL; + + ANSI_STRING AnsiName; + PUCHAR AnsiString = NULL; + + /* Analyze the flags settings */ + + if (cfs_is_flag_set(flags, O_WRONLY)) { + DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); + ShareAccess = 0; + } else if (cfs_is_flag_set(flags, O_RDWR)) { + DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); + ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; + } else { + DesiredAccess = (GENERIC_READ | SYNCHRONIZE); + ShareAccess = FILE_SHARE_READ; + } + + if (cfs_is_flag_set(flags, O_CREAT)) { + if (cfs_is_flag_set(flags, O_EXCL)) { + CreateDisposition = FILE_CREATE; + } else { + CreateDisposition = FILE_OPEN_IF; + } + } else { + CreateDisposition = FILE_OPEN; + } + + if (cfs_is_flag_set(flags, O_TRUNC)) { + if (cfs_is_flag_set(flags, O_EXCL)) { + CreateDisposition = FILE_OVERWRITE; + } else { + CreateDisposition = FILE_OVERWRITE_IF; + } + } + + CreateOptions = 0; + + if (cfs_is_flag_set(flags, O_DIRECTORY)) { + cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); + } + + if (cfs_is_flag_set(flags, O_SYNC)) { + cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); + } + + if (cfs_is_flag_set(flags, O_DIRECT)) { + cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); + } + + /* Initialize the unicode path name for the specified file */ + + NameLength = (USHORT)strlen(name); + + if (name[0] != '\\') { + PrefixLength = (USHORT)strlen(dos_file_prefix); + } + + AnsiString = cfs_alloc( sizeof(CHAR) * (NameLength + PrefixLength + 1), + CFS_ALLOC_ZERO); + if (NULL == AnsiString) { + if (err) *err = -ENOMEM; + return NULL; + } + + UnicodeString = cfs_alloc( sizeof(WCHAR) * (NameLength + PrefixLength + 1), + CFS_ALLOC_ZERO); + + if (NULL == UnicodeString) { + if (err) *err = -ENOMEM; + cfs_free(AnsiString); + return NULL; + } + + if (PrefixLength) { + RtlCopyMemory(&AnsiString[0], dos_file_prefix , PrefixLength); + } + + RtlCopyMemory(&AnsiString[PrefixLength], name, NameLength); + NameLength += PrefixLength; + + AnsiName.MaximumLength = NameLength + 1; + AnsiName.Length = NameLength; + AnsiName.Buffer = AnsiString; + + UnicodeName.MaximumLength = (NameLength + 1) * sizeof(WCHAR); + UnicodeName.Length = 0; + UnicodeName.Buffer = (PWSTR)UnicodeString; + + RtlAnsiStringToUnicodeString(&UnicodeName, &AnsiName, FALSE); + + /* Setup the object attributes structure for the file. */ + + InitializeObjectAttributes( + &ObjectAttributes, + &UnicodeName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL ); + + /* Now to open or create the file now */ + + Status = ZwCreateFile( + &FileHandle, + DesiredAccess, + &ObjectAttributes, + &IoStatus, + 0, + FILE_ATTRIBUTE_NORMAL, + ShareAccess, + CreateDisposition, + CreateOptions, + NULL, + 0 ); + + /* Check the returned status of IoStatus... */ + + if (!NT_SUCCESS(IoStatus.Status)) { + *err = cfs_error_code(IoStatus.Status); + cfs_free(UnicodeString); + cfs_free(AnsiString); + return NULL; + } + + /* Allocate the cfs_file_t: libcfs file object */ + + fp = cfs_alloc(sizeof(cfs_file_t) + NameLength, CFS_ALLOC_ZERO); + + if (NULL == fp) { + Status = ZwClose(FileHandle); + ASSERT(NT_SUCCESS(Status)); + *err = -ENOMEM; + cfs_free(UnicodeString); + cfs_free(AnsiString); + return NULL; + } + + fp->f_handle = FileHandle; + strcpy(fp->f_name, name); + fp->f_flags = flags; + fp->f_mode = (mode_t)mode; + fp->f_count = 1; + *err = 0; + + /* free the memory of temporary name strings */ + cfs_free(UnicodeString); + cfs_free(AnsiString); + + return fp; +} + + +/* + * cfs_filp_close + * To close the opened file and release the filp structure + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * + * Return Value: + * ZERO: on success + * Non-Zero: on failure + * + * Notes: + * N/A + */ + +int cfs_filp_close(cfs_file_t *fp) +{ + NTSTATUS Status; + + ASSERT(fp != NULL); + ASSERT(fp->f_handle != NULL); + + /* release the file handle */ + Status = ZwClose(fp->f_handle); + ASSERT(NT_SUCCESS(Status)); + + /* free the file flip structure */ + cfs_free(fp); + return 0; +} + + +/* + * cfs_filp_read + * To read data from the opened file + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * buf: pointer to the buffer to contain the data + * nbytes: size in bytes to be read from the file + * pos: offset in file where reading starts, if pos + * NULL, then read from current file offset + * + * Return Value: + * Actual size read into the buffer in success case + * Error code in failure case + * + * Notes: + * N/A + */ + +int cfs_filp_read(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) +{ + LARGE_INTEGER address; + NTSTATUS Status; + IO_STATUS_BLOCK IoStatus; + + int rc = 0; + + /* Read data from the file into the specified buffer */ + + if (pos != NULL) { + address.QuadPart = *pos; + } else { + address.QuadPart = fp->f_pos; + } + + Status = ZwReadFile( fp->f_handle, + 0, + NULL, + NULL, + &IoStatus, + buf, + nbytes, + &address, + NULL ); + + if (!NT_SUCCESS(IoStatus.Status)) { + rc = cfs_error_code(IoStatus.Status); + } else { + rc = (int)IoStatus.Information; + fp->f_pos = address.QuadPart + rc; + + if (pos != NULL) { + *pos = fp->f_pos; + } + } + + return rc; +} + + +/* + * cfs_filp_wrtie + * To write specified data to the opened file + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * buf: pointer to the buffer containing the data + * nbytes: size in bytes to be written to the file + * pos: offset in file where writing starts, if pos + * NULL, then write to current file offset + * + * Return Value: + * Actual size written into the buffer in success case + * Error code in failure case + * + * Notes: + * N/A + */ + +int cfs_filp_write(cfs_file_t *fp, void *buf, size_t nbytes, loff_t *pos) +{ + LARGE_INTEGER address; + NTSTATUS Status; + IO_STATUS_BLOCK IoStatus; + int rc = 0; + + /* Write user specified data into the file */ + + if (pos != NULL) { + address.QuadPart = *pos; + } else { + address.QuadPart = fp->f_pos; + } + + Status = ZwWriteFile( fp->f_handle, + 0, + NULL, + NULL, + &IoStatus, + buf, + nbytes, + &address, + NULL ); + + if (!NT_SUCCESS(Status)) { + rc = cfs_error_code(Status); + } else { + rc = (int)IoStatus.Information; + fp->f_pos = address.QuadPart + rc; + + if (pos != NULL) { + *pos = fp->f_pos; + } + } + + return rc; +} + + +NTSTATUS +CompletionRoutine( + PDEVICE_OBJECT DeviceObject, + PIRP Irp, + PVOID Context) +{ + /* copy the IoStatus result */ + *Irp->UserIosb = Irp->IoStatus; + + /* singal the event we set */ + KeSetEvent(Irp->UserEvent, 0, FALSE); + + /* free the Irp we allocated */ + IoFreeIrp(Irp); + + return STATUS_MORE_PROCESSING_REQUIRED; +} + + +/* + * cfs_filp_fsync + * To sync the dirty data of the file to disk + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * + * Return Value: + * Zero: in success case + * Error code: in failure case + * + * Notes: + * Nt kernel doesn't export such a routine to flush a file, + * we must allocate our own Irp and issue it to the file + * system driver. + */ + +int cfs_filp_fsync(cfs_file_t *fp) +{ + + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; + + NTSTATUS Status; + PIRP Irp; + KEVENT Event; + IO_STATUS_BLOCK IoSb; + PIO_STACK_LOCATION IrpSp; + + /* get the FileObject and the DeviceObject */ + + Status = ObReferenceObjectByHandle( + fp->f_handle, + FILE_WRITE_DATA, + NULL, + KernelMode, + (PVOID*)&FileObject, + NULL ); + + if (!NT_SUCCESS(Status)) { + return cfs_error_code(Status); + } + + DeviceObject = IoGetRelatedDeviceObject(FileObject); + + /* allocate a new Irp */ + + Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); + + if (!Irp) { + + ObDereferenceObject(FileObject); + return -ENOMEM; + } + + /* intialize the event */ + KeInitializeEvent(&Event, SynchronizationEvent, FALSE); + + /* setup the Irp */ + Irp->UserEvent = &Event; + Irp->UserIosb = &IoSb; + Irp->RequestorMode = KernelMode; + + Irp->Tail.Overlay.Thread = PsGetCurrentThread(); + Irp->Tail.Overlay.OriginalFileObject = FileObject; + + /* setup the Irp stack location */ + IrpSp = IoGetNextIrpStackLocation(Irp); + + IrpSp->MajorFunction = IRP_MJ_FLUSH_BUFFERS; + IrpSp->DeviceObject = DeviceObject; + IrpSp->FileObject = FileObject; + + IoSetCompletionRoutine(Irp, CompletionRoutine, 0, TRUE, TRUE, TRUE); + + + /* issue the Irp to the underlying file system driver */ + IoCallDriver(DeviceObject, Irp); + + /* wait until it is finished */ + KeWaitForSingleObject(&Event, Executive, KernelMode, TRUE, 0); + + /* cleanup our reference on it */ + ObDereferenceObject(FileObject); + + Status = IoSb.Status; + + return cfs_error_code(Status); +} + +/* + * cfs_get_file + * To increase the reference of the file object + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * + * Return Value: + * Zero: in success case + * Non-Zero: in failure case + * + * Notes: + * N/A + */ + +int cfs_get_file(cfs_file_t *fp) +{ + InterlockedIncrement(&(fp->f_count)); + return 0; +} + + +/* + * cfs_put_file + * To decrease the reference of the file object + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * + * Return Value: + * Zero: in success case + * Non-Zero: in failure case + * + * Notes: + * N/A + */ + +int cfs_put_file(cfs_file_t *fp) +{ + if (InterlockedDecrement(&(fp->f_count)) == 0) { + cfs_filp_close(fp); + } + + return 0; +} + + +/* + * cfs_file_count + * To query the reference count of the file object + * + * Arguments: + * fp: the pointer of the cfs_file_t strcture + * + * Return Value: + * the reference count of the file object + * + * Notes: + * N/A + */ + +int cfs_file_count(cfs_file_t *fp) +{ + return (int)(fp->f_count); +} diff --git a/libcfs/libcfs/winnt/winnt-lock.c b/libcfs/libcfs/winnt/winnt-lock.c new file mode 100644 index 0000000..12dbc67 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-lock.c @@ -0,0 +1,353 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + + +# define DEBUG_SUBSYSTEM S_LNET + +#include + + +#if _X86_ + +void __declspec (naked) FASTCALL +atomic_add( + int i, + atomic_t *v + ) +{ + // ECX = i + // EDX = v ; [EDX][0] = v->counter + + __asm { + lock add dword ptr [edx][0], ecx + ret + } +} + +void __declspec (naked) FASTCALL +atomic_sub( + int i, + atomic_t *v + ) +{ + // ECX = i + // EDX = v ; [EDX][0] = v->counter + + __asm { + lock sub dword ptr [edx][0], ecx + ret + } +} + +void __declspec (naked) FASTCALL +atomic_inc( + atomic_t *v + ) +{ + //InterlockedIncrement((PULONG)(&((v)->counter))); + + //` ECX = v ; [ECX][0] = v->counter + + __asm { + lock inc dword ptr [ecx][0] + ret + } +} + +void __declspec (naked) FASTCALL +atomic_dec( + atomic_t *v + ) +{ + // ECX = v ; [ECX][0] = v->counter + + __asm { + lock dec dword ptr [ecx][0] + ret + } +} + +int __declspec (naked) FASTCALL +atomic_sub_and_test( + int i, + atomic_t *v + ) +{ + + // ECX = i + // EDX = v ; [EDX][0] = v->counter + + __asm { + xor eax, eax + lock sub dword ptr [edx][0], ecx + sete al + ret + } +} + +int __declspec (naked) FASTCALL +atomic_inc_and_test( + atomic_t *v + ) +{ + // ECX = v ; [ECX][0] = v->counter + + __asm { + xor eax, eax + lock inc dword ptr [ecx][0] + sete al + ret + } +} + +int __declspec (naked) FASTCALL +atomic_dec_and_test( + atomic_t *v + ) +{ + // ECX = v ; [ECX][0] = v->counter + + __asm { + xor eax, eax + lock dec dword ptr [ecx][0] + sete al + ret + } +} + +#else + +void FASTCALL +atomic_add( + int i, + atomic_t *v + ) +{ + InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (i)); +} + +void FASTCALL +atomic_sub( + int i, + atomic_t *v + ) +{ + InterlockedExchangeAdd( (PULONG)(&((v)->counter)) , (LONG) (-1*i)); +} + +void FASTCALL +atomic_inc( + atomic_t *v + ) +{ + InterlockedIncrement((PULONG)(&((v)->counter))); +} + +void FASTCALL +atomic_dec( + atomic_t *v + ) +{ + InterlockedDecrement((PULONG)(&((v)->counter))); +} + +int FASTCALL +atomic_sub_and_test( + int i, + atomic_t *v + ) +{ + int counter, result; + + do { + + counter = v->counter; + result = counter - i; + + } while ( InterlockedCompareExchange( + &(v->counter), + result, + counter) != counter); + + return (result == 0); +} + +int FASTCALL +atomic_inc_and_test( + atomic_t *v + ) +{ + int counter, result; + + do { + + counter = v->counter; + result = counter + 1; + + } while ( InterlockedCompareExchange( + &(v->counter), + result, + counter) != counter); + + return (result == 0); +} + +int FASTCALL +atomic_dec_and_test( + atomic_t *v + ) +{ + int counter, result; + + do { + + counter = v->counter; + result = counter + 1; + + } while ( InterlockedCompareExchange( + &(v->counter), + result, + counter) != counter); + + return (result == 0); +} + +#endif + + +/* + * rw spinlock + */ + + +void +rwlock_init(rwlock_t * rwlock) +{ + spin_lock_init(&rwlock->guard); + rwlock->count = 0; +} + +void +rwlock_fini(rwlock_t * rwlock) +{ +} + +void +read_lock(rwlock_t * rwlock) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + /* should bugchk here */ + cfs_enter_debugger(); + return; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + ASSERT(slot->Magic == TASKSLT_MAGIC); + + slot->irql = KeRaiseIrqlToDpcLevel(); + + while (TRUE) { + spin_lock(&rwlock->guard); + if (rwlock->count >= 0) + break; + spin_unlock(&rwlock->guard); + } + + rwlock->count++; + spin_unlock(&rwlock->guard); +} + +void +read_unlock(rwlock_t * rwlock) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + /* should bugchk here */ + cfs_enter_debugger(); + return; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + ASSERT(slot->Magic == TASKSLT_MAGIC); + + spin_lock(&rwlock->guard); + ASSERT(rwlock->count > 0); + rwlock->count--; + if (rwlock < 0) { + cfs_enter_debugger(); + } + spin_unlock(&rwlock->guard); + + KeLowerIrql(slot->irql); +} + +void +write_lock(rwlock_t * rwlock) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + /* should bugchk here */ + cfs_enter_debugger(); + return; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + ASSERT(slot->Magic == TASKSLT_MAGIC); + + slot->irql = KeRaiseIrqlToDpcLevel(); + + while (TRUE) { + spin_lock(&rwlock->guard); + if (rwlock->count == 0) + break; + spin_unlock(&rwlock->guard); + } + + rwlock->count = -1; + spin_unlock(&rwlock->guard); +} + +void +write_unlock(rwlock_t * rwlock) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + /* should bugchk here */ + cfs_enter_debugger(); + return; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + ASSERT(slot->Magic == TASKSLT_MAGIC); + + spin_lock(&rwlock->guard); + ASSERT(rwlock->count == -1); + rwlock->count = 0; + spin_unlock(&rwlock->guard); + + KeLowerIrql(slot->irql); +} diff --git a/libcfs/libcfs/winnt/winnt-lwt.c b/libcfs/libcfs/winnt/winnt-lwt.c new file mode 100644 index 0000000..272cbcf --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-lwt.c @@ -0,0 +1,20 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +# define DEBUG_SUBSYSTEM S_LNET + diff --git a/libcfs/libcfs/winnt/winnt-mem.c b/libcfs/libcfs/winnt/winnt-mem.c new file mode 100644 index 0000000..6b66a95 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-mem.c @@ -0,0 +1,332 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include + + +cfs_mem_cache_t *cfs_page_t_slab = NULL; +cfs_mem_cache_t *cfs_page_p_slab = NULL; + +/* + * cfs_alloc_page + * To allocate the cfs_page_t and also 1 page of memory + * + * Arguments: + * flags: the allocation options + * + * Return Value: + * pointer to the cfs_page_t strcture in success or + * NULL in failure case + * + * Notes: + * N/A + */ + +cfs_page_t * cfs_alloc_page(int flags) +{ + cfs_page_t *pg; + pg = cfs_mem_cache_alloc(cfs_page_t_slab, 0); + + if (NULL == pg) { + cfs_enter_debugger(); + return NULL; + } + + memset(pg, 0, sizeof(cfs_page_t)); + pg->addr = cfs_mem_cache_alloc(cfs_page_p_slab, 0); + atomic_set(&pg->count, 1); + + if (pg->addr) { + if (cfs_is_flag_set(flags, CFS_ALLOC_ZERO)) { + memset(pg->addr, 0, CFS_PAGE_SIZE); + } + } else { + cfs_enter_debugger(); + cfs_mem_cache_free(cfs_page_t_slab, pg); + pg = NULL; + } + + return pg; +} + +/* + * cfs_free_page + * To free the cfs_page_t including the page + * + * Arguments: + * pg: pointer to the cfs_page_t strcture + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ +void cfs_free_page(cfs_page_t *pg) +{ + ASSERT(pg != NULL); + ASSERT(pg->addr != NULL); + ASSERT(atomic_read(&pg->count) <= 1); + + cfs_mem_cache_free(cfs_page_p_slab, pg->addr); + cfs_mem_cache_free(cfs_page_t_slab, pg); +} + + +/* + * cfs_alloc + * To allocate memory from system pool + * + * Arguments: + * nr_bytes: length in bytes of the requested buffer + * flags: flags indiction + * + * Return Value: + * NULL: if there's no enough memory space in system + * the address of the allocated memory in success. + * + * Notes: + * This operation can be treated as atomic. + */ + +void * +cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + void *ptr; + + /* Ignore the flags: always allcoate from NonPagedPool */ + + ptr = ExAllocatePoolWithTag(NonPagedPool, nr_bytes, 'Lufs'); + + if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) { + memset(ptr, 0, nr_bytes); + } + + if (!ptr) { + cfs_enter_debugger(); + } + + return ptr; +} + +/* + * cfs_free + * To free the sepcified memory to system pool + * + * Arguments: + * addr: pointer to the buffer to be freed + * + * Return Value: + * N/A + * + * Notes: + * This operation can be treated as atomic. + */ + +void +cfs_free(void *addr) +{ + ExFreePool(addr); +} + +/* + * cfs_alloc_large + * To allocate large block of memory from system pool + * + * Arguments: + * nr_bytes: length in bytes of the requested buffer + * + * Return Value: + * NULL: if there's no enough memory space in system + * the address of the allocated memory in success. + * + * Notes: + * N/A + */ + +void * +cfs_alloc_large(size_t nr_bytes) +{ + return cfs_alloc(nr_bytes, 0); +} + +/* + * cfs_free_large + * To free the sepcified memory to system pool + * + * Arguments: + * addr: pointer to the buffer to be freed + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +cfs_free_large(void *addr) +{ + cfs_free(addr); +} + + +/* + * cfs_mem_cache_create + * To create a SLAB cache + * + * Arguments: + * name: name string of the SLAB cache to be created + * size: size in bytes of SLAB entry buffer + * offset: offset in the page + * flags: SLAB creation flags +* + * Return Value: + * The poitner of cfs_memory_cache structure in success. + * NULL pointer in failure case. + * + * Notes: + * 1, offset won't be used here. + * 2, it could be better to induce a lock to protect the access of the + * SLAB structure on SMP if there's not outside lock protection. + * 3, parameters C/D are removed. + */ + +cfs_mem_cache_t * +cfs_mem_cache_create( + const char * name, + size_t size, + size_t offset, + unsigned long flags + ) +{ + cfs_mem_cache_t * kmc = NULL; + + /* The name of the SLAB could not exceed 20 chars */ + + if (name && strlen(name) >= 20) { + goto errorout; + } + + /* Allocate and initialize the SLAB strcture */ + + kmc = cfs_alloc (sizeof(cfs_mem_cache_t), 0); + + if (NULL == kmc) { + goto errorout; + } + + memset(kmc, 0, sizeof(cfs_mem_cache_t)); + + kmc->flags = flags; + + if (name) { + strcpy(&kmc->name[0], name); + } + + /* Initialize the corresponding LookAside list */ + + ExInitializeNPagedLookasideList( + &(kmc->npll), + NULL, + NULL, + 0, + size, + 'pnmk', + 0); + +errorout: + + return kmc; +} + +/* + * cfs_mem_cache_destroy + * To destroy the unused SLAB cache + * + * Arguments: + * kmc: the SLAB cache to be destroied. + * + * Return Value: + * 0: in success case. + * 1: in failure case. + * + * Notes: + * N/A + */ + +int cfs_mem_cache_destroy (cfs_mem_cache_t * kmc) +{ + ASSERT(kmc != NULL); + + ExDeleteNPagedLookasideList(&(kmc->npll)); + + cfs_free(kmc); + + return 0; +} + +/* + * cfs_mem_cache_alloc + * To allocate an object (LookAside entry) from the SLAB + * + * Arguments: + * kmc: the SLAB cache to be allocated from. + * flags: flags for allocation options + * + * Return Value: + * object buffer address: in success case. + * NULL: in failure case. + * + * Notes: + * N/A + */ + +void *cfs_mem_cache_alloc(cfs_mem_cache_t * kmc, int flags) +{ + void *buf = NULL; + + buf = ExAllocateFromNPagedLookasideList(&(kmc->npll)); + + return buf; +} + +/* + * cfs_mem_cache_free + * To free an object (LookAside entry) to the SLAB cache + * + * Arguments: + * kmc: the SLAB cache to be freed to. + * buf: the pointer to the object to be freed. + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_mem_cache_free(cfs_mem_cache_t * kmc, void * buf) +{ + ExFreeToNPagedLookasideList(&(kmc->npll), buf); +} diff --git a/libcfs/libcfs/winnt/winnt-module.c b/libcfs/libcfs/winnt/winnt-module.c new file mode 100644 index 0000000..2b6b008 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-module.c @@ -0,0 +1,160 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + + +#define DEBUG_SUBSYSTEM S_LIBCFS + +#include +#include + +#define LIBCFS_MINOR 240 + +int libcfs_ioctl_getdata(char *buf, char *end, void *arg) +{ + struct libcfs_ioctl_hdr *hdr; + struct libcfs_ioctl_data *data; + int err; + ENTRY; + + hdr = (struct libcfs_ioctl_hdr *)buf; + data = (struct libcfs_ioctl_data *)buf; + + err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); + if (err) + RETURN(err); + + if (hdr->ioc_version != LIBCFS_IOCTL_VERSION) { + CERROR(("LIBCFS: version mismatch kernel vs application\n")); + RETURN(-EINVAL); + } + + if (hdr->ioc_len + buf >= end) { + CERROR(("LIBCFS: user buffer exceeds kernel buffer\n")); + RETURN(-EINVAL); + } + + if (hdr->ioc_len < sizeof(struct libcfs_ioctl_data)) { + CERROR(("LIBCFS: user buffer too small for ioctl\n")); + RETURN(-EINVAL); + } + + err = copy_from_user(buf, (void *)arg, hdr->ioc_len); + if (err) + RETURN(err); + + if (libcfs_ioctl_is_invalid(data)) { + CERROR(("LIBCFS: ioctl not correctly formatted\n")); + RETURN(-EINVAL); + } + + if (data->ioc_inllen1) + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + + if (data->ioc_inllen2) + data->ioc_inlbuf2 = &data->ioc_bulk[0] + + size_round(data->ioc_inllen1); + + RETURN(0); +} + +extern struct cfs_psdev_ops libcfs_psdev_ops; + +static int +libcfs_psdev_open(cfs_file_t * file) +{ + struct libcfs_device_userstate **pdu = NULL; + int rc = 0; + + pdu = (struct libcfs_device_userstate **)&file->private_data; + if (libcfs_psdev_ops.p_open != NULL) + rc = libcfs_psdev_ops.p_open(0, (void *)pdu); + else + return (-EPERM); + return rc; +} + +/* called when closing /dev/device */ +static int +libcfs_psdev_release(cfs_file_t * file) +{ + struct libcfss_device_userstate *pdu; + int rc = 0; + + pdu = file->private_data; + if (libcfs_psdev_ops.p_close != NULL) + rc = libcfs_psdev_ops.p_close(0, (void *)pdu); + else + rc = -EPERM; + return rc; +} + +static int +libcfs_ioctl(cfs_file_t * file, unsigned int cmd, ulong_ptr arg) +{ + struct cfs_psdev_file pfile; + int rc = 0; + + if ( _IOC_TYPE(cmd) != IOC_LIBCFS_TYPE || + _IOC_NR(cmd) < IOC_LIBCFS_MIN_NR || + _IOC_NR(cmd) > IOC_LIBCFS_MAX_NR ) { + CDEBUG(D_IOCTL, ("invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd))); + return (-EINVAL); + } + + /* Handle platform-dependent IOC requests */ + switch (cmd) { + case IOC_LIBCFS_PANIC: + if (!capable (CAP_SYS_BOOT)) + return (-EPERM); + CERROR(("debugctl-invoked panic")); + KeBugCheckEx('LUFS', (ULONG_PTR)libcfs_ioctl, (ULONG_PTR)NULL, (ULONG_PTR)NULL, (ULONG_PTR)NULL); + + return (0); + case IOC_LIBCFS_MEMHOG: + + if (!capable (CAP_SYS_ADMIN)) + return -EPERM; + break; + } + + pfile.off = 0; + pfile.private_data = file->private_data; + if (libcfs_psdev_ops.p_ioctl != NULL) + rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); + else + rc = -EPERM; + return (rc); +} + +static struct file_operations libcfs_fops = { + /* lseek: */ NULL, + /* read: */ NULL, + /* write: */ NULL, + /* ioctl: */ libcfs_ioctl, + /* open: */ libcfs_psdev_open, + /* release:*/ libcfs_psdev_release +}; + +cfs_psdev_t libcfs_dev = { + LIBCFS_MINOR, + "lnet", + &libcfs_fops +}; + diff --git a/libcfs/libcfs/winnt/winnt-prim.c b/libcfs/libcfs/winnt/winnt-prim.c new file mode 100644 index 0000000..064b071 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-prim.c @@ -0,0 +1,650 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +#define DEBUG_SUBSYSTEM S_LNET + +#include +#include + + +/* + * Thread routines + */ + +/* + * cfs_thread_proc + * Lustre thread procedure wrapper routine (It's an internal routine) + * + * Arguments: + * context: a structure of cfs_thread_context_t, containing + * all the necessary parameters + * + * Return Value: + * void: N/A + * + * Notes: + * N/A + */ + +void +cfs_thread_proc( + void * context + ) +{ + cfs_thread_context_t * thread_context = + (cfs_thread_context_t *) context; + + /* Execute the specified function ... */ + + if (thread_context->func) { + (thread_context->func)(thread_context->arg); + } + + /* Free the context memory */ + + cfs_free(context); + + /* Terminate this system thread */ + + PsTerminateSystemThread(STATUS_SUCCESS); +} + +/* + * cfs_kernel_thread + * Create a system thread to execute the routine specified + * + * Arguments: + * func: function to be executed in the thread + * arg: argument transferred to func function + * flag: thread creation flags. + * + * Return Value: + * int: 0 on success or error codes + * + * Notes: + * N/A + */ + +int cfs_kernel_thread(int (*func)(void *), void *arg, int flag) +{ + cfs_handle_t thread = NULL; + NTSTATUS status; + cfs_thread_context_t * context = NULL; + + /* Allocate the context to be transferred to system thread */ + + context = cfs_alloc(sizeof(cfs_thread_context_t), CFS_ALLOC_ZERO); + + if (!context) { + return -ENOMEM; + } + + context->func = func; + context->arg = arg; + + /* Create system thread with the cfs_thread_proc wrapper */ + + status = PsCreateSystemThread( + &thread, + (ACCESS_MASK)0L, + 0, 0, 0, + cfs_thread_proc, + context); + + if (!NT_SUCCESS(status)) { + + + cfs_free(context); + + /* We need translate the nt status to linux error code */ + + return cfs_error_code(status); + } + + // + // Query the thread id of the newly created thread + // + + ZwClose(thread); + + return 0; +} + + +/* + * Symbols routines + */ + + +static CFS_DECL_RWSEM(cfs_symbol_lock); +CFS_LIST_HEAD(cfs_symbol_list); + +int MPSystem = FALSE; + +/* + * cfs_symbol_get + * To query the specified symbol form the symbol table + * + * Arguments: + * name: the symbol name to be queried + * + * Return Value: + * If the symbol is in the table, return the address of it. + * If not, return NULL. + * + * Notes: + * N/A + */ + +void * +cfs_symbol_get(const char *name) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_read(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + sym->ref ++; + break; + } + } + up_read(&cfs_symbol_lock); + + if (sym != NULL) + return sym->value; + + return NULL; +} + +/* + * cfs_symbol_put + * To decrease the reference of the specified symbol + * + * Arguments: + * name: the symbol name to be dereferred + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +cfs_symbol_put(const char *name) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_read(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + LASSERT(sym->ref > 0); + sym->ref--; + break; + } + } + up_read(&cfs_symbol_lock); + + LASSERT(sym != NULL); +} + + +/* + * cfs_symbol_register + * To register the specified symbol infromation + * + * Arguments: + * name: the symbol name to be dereferred + * value: the value that the symbol stands for + * + * Return Value: + * N/A + * + * Notes: + * Zero: Succeed to register + * Non-Zero: Fail to register the symbol + */ + +int +cfs_symbol_register(const char *name, const void *value) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + struct cfs_symbol *new = NULL; + + new = cfs_alloc(sizeof(struct cfs_symbol), CFS_ALLOC_ZERO); + if (!new) { + return (-ENOMEM); + } + strncpy(new->name, name, CFS_SYMBOL_LEN); + new->value = (void *)value; + new->ref = 0; + CFS_INIT_LIST_HEAD(&new->sym_list); + + down_write(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + up_write(&cfs_symbol_lock); + cfs_free(new); + return 0; // alreay registerred + } + } + list_add_tail(&new->sym_list, &cfs_symbol_list); + up_write(&cfs_symbol_lock); + + return 0; +} + +/* + * cfs_symbol_unregister + * To unregister/remove the specified symbol + * + * Arguments: + * name: the symbol name to be dereferred + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +cfs_symbol_unregister(const char *name) +{ + struct list_head *walker; + struct list_head *nxt; + struct cfs_symbol *sym = NULL; + + down_write(&cfs_symbol_lock); + list_for_each_safe(walker, nxt, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + LASSERT(sym->ref == 0); + list_del (&sym->sym_list); + cfs_free(sym); + break; + } + } + up_write(&cfs_symbol_lock); +} + +/* + * cfs_symbol_clean + * To clean all the symbols + * + * Arguments: + * N/A + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +cfs_symbol_clean() +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_write(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + LASSERT(sym->ref == 0); + list_del (&sym->sym_list); + cfs_free(sym); + } + up_write(&cfs_symbol_lock); + return; +} + + + +/* + * Timer routines + */ + + +/* Timer dpc procedure */ + +static void +cfs_timer_dpc_proc ( + IN PKDPC Dpc, + IN PVOID DeferredContext, + IN PVOID SystemArgument1, + IN PVOID SystemArgument2) +{ + cfs_timer_t * timer; + KIRQL Irql; + + timer = (cfs_timer_t *) DeferredContext; + + /* clear the flag */ + KeAcquireSpinLock(&(timer->Lock), &Irql); + cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); + KeReleaseSpinLock(&(timer->Lock), Irql); + + /* call the user specified timer procedure */ + timer->proc((unsigned long)(timer->arg)); +} + +/* + * cfs_timer_init + * To initialize the cfs_timer_t + * + * Arguments: + * timer: the cfs_timer to be initialized + * func: the timer callback procedure + * arg: argument for the callback proc + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_timer_init(cfs_timer_t *timer, void (*func)(unsigned long), void *arg) +{ + memset(timer, 0, sizeof(cfs_timer_t)); + + timer->proc = func; + timer->arg = arg; + + KeInitializeSpinLock(&(timer->Lock)); + KeInitializeTimer(&timer->Timer); + KeInitializeDpc (&timer->Dpc, cfs_timer_dpc_proc, timer); + + cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_INITED); +} + +/* + * cfs_timer_done + * To finialize the cfs_timer_t (unused) + * + * Arguments: + * timer: the cfs_timer to be cleaned up + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_timer_done(cfs_timer_t *timer) +{ + return; +} + +/* + * cfs_timer_arm + * To schedule the timer while touching @deadline + * + * Arguments: + * timer: the cfs_timer to be freed + * dealine: timeout value to wake up the timer + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_timer_arm(cfs_timer_t *timer, cfs_time_t deadline) +{ + LARGE_INTEGER timeout; + KIRQL Irql; + + KeAcquireSpinLock(&(timer->Lock), &Irql); + if (!cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)){ + + timeout.QuadPart = (LONGLONG)-1*1000*1000*10/HZ*deadline; + + if (KeSetTimer(&timer->Timer, timeout, &timer->Dpc )) { + cfs_set_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); + } + + timer->deadline = deadline; + } + + KeReleaseSpinLock(&(timer->Lock), Irql); +} + +/* + * cfs_timer_disarm + * To discard the timer to be scheduled + * + * Arguments: + * timer: the cfs_timer to be discarded + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_timer_disarm(cfs_timer_t *timer) +{ + KIRQL Irql; + + KeAcquireSpinLock(&(timer->Lock), &Irql); + KeCancelTimer(&(timer->Timer)); + cfs_clear_flag(timer->Flags, CFS_TIMER_FLAG_TIMERED); + KeReleaseSpinLock(&(timer->Lock), Irql); +} + + +/* + * cfs_timer_is_armed + * To check the timer is scheduled or not + * + * Arguments: + * timer: the cfs_timer to be checked + * + * Return Value: + * 1: if it's armed. + * 0: if it's not. + * + * Notes: + * N/A + */ + +int cfs_timer_is_armed(cfs_timer_t *timer) +{ + int rc = 0; + KIRQL Irql; + + KeAcquireSpinLock(&(timer->Lock), &Irql); + if (cfs_is_flag_set(timer->Flags, CFS_TIMER_FLAG_TIMERED)) { + rc = 1; + } + KeReleaseSpinLock(&(timer->Lock), Irql); + + return rc; +} + +/* + * cfs_timer_deadline + * To query the deadline of the timer + * + * Arguments: + * timer: the cfs_timer to be queried + * + * Return Value: + * the deadline value + * + * Notes: + * N/A + */ + +cfs_time_t cfs_timer_deadline(cfs_timer_t * timer) +{ + return timer->deadline; +} + +/* + * daemonize routine stub + */ + +void cfs_daemonize(char *str) +{ + return; +} + +/* + * routine related with sigals + */ + +cfs_sigset_t cfs_get_blockedsigs() +{ + return 0; +} + +cfs_sigset_t cfs_block_allsigs() +{ + return 0; +} + +cfs_sigset_t cfs_block_sigs(sigset_t bit) +{ + return 0; +} + +void cfs_restore_sigs(cfs_sigset_t old) +{ +} + +int cfs_signal_pending(void) +{ + return 0; +} + +void cfs_clear_sigpending(void) +{ + return; +} + +/** + ** Initialize routines + **/ + +int +libcfs_arch_init(void) +{ + int rc; + + spinlock_t lock; + /* Workground to check the system is MP build or UP build */ + spin_lock_init(&lock); + spin_lock(&lock); + MPSystem = (int)lock.lock; + /* MP build system: it's a real spin, for UP build system, it + only raises the IRQL to DISPATCH_LEVEL */ + spin_unlock(&lock); + + /* create slab memory caches for page alloctors */ + cfs_page_t_slab = cfs_mem_cache_create( + "CPGT", sizeof(cfs_page_t), 0, 0 ); + + cfs_page_p_slab = cfs_mem_cache_create( + "CPGP", CFS_PAGE_SIZE, 0, 0 ); + + if ( cfs_page_t_slab == NULL || + cfs_page_p_slab == NULL ){ + rc = -ENOMEM; + goto errorout; + } + + rc = init_task_manager(); + + if (rc != 0) { + cfs_enter_debugger(); + KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing task manager ...\n")); + goto errorout; + } + + /* initialize the proc file system */ + rc = proc_init_fs(); + + if (rc != 0) { + cfs_enter_debugger(); + KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing proc fs ...\n")); + cleanup_task_manager(); + goto errorout; + } + + /* initialize the tdi data */ + rc = ks_init_tdi_data(); + + if (rc != 0) { + cfs_enter_debugger(); + KdPrint(("winnt-prim.c:libcfs_arch_init: error initializing tdi ...\n")); + proc_destroy_fs(); + cleanup_task_manager(); + goto errorout; + } + +errorout: + + if (rc != 0) { + /* destroy the taskslot cache slab */ + if (cfs_page_t_slab) { + cfs_mem_cache_destroy(cfs_page_t_slab); + } + if (cfs_page_p_slab) { + cfs_mem_cache_destroy(cfs_page_p_slab); + } + } + + return rc; +} + +void +libcfs_arch_cleanup(void) +{ + /* finialize the tdi data */ + ks_fini_tdi_data(); + + /* detroy the whole proc fs tree and nodes */ + proc_destroy_fs(); + + /* destroy the taskslot cache slab */ + if (cfs_page_t_slab) { + cfs_mem_cache_destroy(cfs_page_t_slab); + } + + if (cfs_page_p_slab) { + cfs_mem_cache_destroy(cfs_page_p_slab); + } + + return; +} + +EXPORT_SYMBOL(libcfs_arch_init); +EXPORT_SYMBOL(libcfs_arch_cleanup); diff --git a/libcfs/libcfs/winnt/winnt-proc.c b/libcfs/libcfs/winnt/winnt-proc.c new file mode 100644 index 0000000..cfb8d38 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-proc.c @@ -0,0 +1,2039 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +# define DEBUG_SUBSYSTEM S_LNET + +#include +#include +#include "tracefile.h" + +#ifdef __KERNEL__ + + +/* + * /proc emulator routines ... + */ + +/* The root node of the proc fs emulation: /proc */ +cfs_proc_entry_t * proc_fs_root = NULL; + + +/* The sys root: /proc/sys */ +cfs_proc_entry_t * proc_sys_root = NULL; + + +/* The sys root: /proc/dev | to implement misc device */ + +cfs_proc_entry_t * proc_dev_root = NULL; + + +/* SLAB object for cfs_proc_entry_t allocation */ + +cfs_mem_cache_t * proc_entry_cache = NULL; + +/* root node for sysctl table */ + +cfs_sysctl_table_header_t root_table_header; + +/* The global lock to protect all the access */ + +#if LIBCFS_PROCFS_SPINLOCK +spinlock_t proc_fs_lock; + +#define INIT_PROCFS_LOCK() spin_lock_init(&proc_fs_lock) +#define LOCK_PROCFS() spin_lock(&proc_fs_lock) +#define UNLOCK_PROCFS() spin_unlock(&proc_fs_lock) + +#else + +mutex_t proc_fs_lock; + +#define INIT_PROCFS_LOCK() init_mutex(&proc_fs_lock) +#define LOCK_PROCFS() mutex_down(&proc_fs_lock) +#define UNLOCK_PROCFS() mutex_up(&proc_fs_lock) + +#endif + +static ssize_t +proc_file_read(struct file * file, const char * buf, size_t nbytes, loff_t *ppos) +{ + char *page; + ssize_t retval=0; + int eof=0; + ssize_t n, count; + char *start; + cfs_proc_entry_t * dp; + + dp = (cfs_proc_entry_t *) file->private_data; + if (!(page = (char*) cfs_alloc(CFS_PAGE_SIZE, 0))) + return -ENOMEM; + + while ((nbytes > 0) && !eof) { + + count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); + + start = NULL; + if (dp->read_proc) { + n = dp->read_proc( page, &start, (long)*ppos, + count, &eof, dp->data); + } else + break; + + if (!start) { + /* + * For proc files that are less than 4k + */ + start = page + *ppos; + n -= (ssize_t)(*ppos); + if (n <= 0) + break; + if (n > count) + n = count; + } + if (n == 0) + break; /* End of file */ + if (n < 0) { + if (retval == 0) + retval = n; + break; + } + + n -= copy_to_user((void *)buf, start, n); + if (n == 0) { + if (retval == 0) + retval = -EFAULT; + break; + } + + *ppos += n; + nbytes -= n; + buf += n; + retval += n; + } + cfs_free(page); + + return retval; +} + +static ssize_t +proc_file_write(struct file * file, const char * buffer, + size_t count, loff_t *ppos) +{ + cfs_proc_entry_t * dp; + + dp = (cfs_proc_entry_t *) file->private_data; + + if (!dp->write_proc) + return -EIO; + + /* FIXME: does this routine need ppos? probably... */ + return dp->write_proc(file, buffer, count, dp->data); +} + +struct file_operations proc_file_operations = { + /*lseek:*/ NULL, //proc_file_lseek, + /*read:*/ proc_file_read, + /*write:*/ proc_file_write, + /*ioctl:*/ NULL, + /*open:*/ NULL, + /*release:*/ NULL +}; + +/* allocate proc entry block */ + +cfs_proc_entry_t * +proc_alloc_entry() +{ + cfs_proc_entry_t * entry = NULL; + + entry = cfs_mem_cache_alloc(proc_entry_cache, 0); + if (!entry) { + return NULL; + } + + memset(entry, 0, sizeof(cfs_proc_entry_t)); + + entry->magic = CFS_PROC_ENTRY_MAGIC; + RtlInitializeSplayLinks(&(entry->s_link)); + entry->proc_fops = &proc_file_operations; + + return entry; +} + +/* free the proc entry block */ + +void +proc_free_entry(cfs_proc_entry_t * entry) + +{ + ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC); + + cfs_mem_cache_free(proc_entry_cache, entry); +} + +/* dissect the path string for a given full proc path */ + +void +proc_dissect_name( + char *path, + char **first, + int *first_len, + char **remain + ) +{ + int i = 0, j = 0, len = 0; + + *first = *remain = NULL; + *first_len = 0; + + len = strlen(path); + + while (i < len && (path[i] == '/')) i++; + + if (i < len) { + + *first = path + i; + while (i < len && (path[i] != '/')) i++; + *first_len = (path + i - *first); + + if (i + 1 < len) { + *remain = path + i + 1; + } + } +} + +/* search the children entries of the parent entry */ + +cfs_proc_entry_t * +proc_search_splay ( + cfs_proc_entry_t * parent, + char * name + ) +{ + cfs_proc_entry_t * node; + PRTL_SPLAY_LINKS link; + + ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); + ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); + + link = parent->root; + + while (link) { + + ANSI_STRING ename,nname; + long result; + + node = CONTAINING_RECORD(link, cfs_proc_entry_t, s_link); + + ASSERT(node->magic == CFS_PROC_ENTRY_MAGIC); + + /* Compare the prefix in the tree with the full name */ + + RtlInitAnsiString(&ename, name); + RtlInitAnsiString(&nname, node->name); + + result = RtlCompareString(&nname, &ename,TRUE); + + if (result > 0) { + + /* The prefix is greater than the full name + so we go down the left child */ + + link = RtlLeftChild(link); + + } else if (result < 0) { + + /* The prefix is less than the full name + so we go down the right child */ + // + + link = RtlRightChild(link); + + } else { + + /* We got the entry in the splay tree and + make it root node instead */ + + parent->root = RtlSplay(link); + + return node; + } + + /* we need continue searching down the tree ... */ + } + + /* There's no the exptected entry in the splay tree */ + + return NULL; +} + +int +proc_insert_splay ( + cfs_proc_entry_t * parent, + cfs_proc_entry_t * child + ) +{ + cfs_proc_entry_t * entry; + + ASSERT(parent != NULL && child != NULL); + ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); + ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); + ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); + + if (!parent->root) { + parent->root = &(child->s_link); + } else { + entry = CONTAINING_RECORD(parent->root, cfs_proc_entry_t, s_link); + while (TRUE) { + long result; + ANSI_STRING ename, cname; + + ASSERT(entry->magic == CFS_PROC_ENTRY_MAGIC); + + RtlInitAnsiString(&ename, entry->name); + RtlInitAnsiString(&cname, child->name); + + result = RtlCompareString(&ename, &cname,TRUE); + + if (result == 0) { + cfs_enter_debugger(); + if (entry == child) { + break; + } + return FALSE; + } + + if (result > 0) { + if (RtlLeftChild(&entry->s_link) == NULL) { + RtlInsertAsLeftChild(&entry->s_link, &child->s_link); + break; + } else { + entry = CONTAINING_RECORD( RtlLeftChild(&entry->s_link), + cfs_proc_entry_t, s_link); + } + } else { + if (RtlRightChild(&entry->s_link) == NULL) { + RtlInsertAsRightChild(&entry->s_link, &child->s_link); + break; + } else { + entry = CONTAINING_RECORD( RtlRightChild(&entry->s_link), + cfs_proc_entry_t, s_link ); + } + } + } + } + + cfs_set_flag(child->flags, CFS_PROC_FLAG_ATTACHED); + parent->nlink++; + + return TRUE; +} + + +/* remove a child entry from the splay tree */ +int +proc_remove_splay ( + cfs_proc_entry_t * parent, + cfs_proc_entry_t * child + ) +{ + cfs_proc_entry_t * entry = NULL; + + ASSERT(parent != NULL && child != NULL); + ASSERT(parent->magic == CFS_PROC_ENTRY_MAGIC); + ASSERT(child->magic == CFS_PROC_ENTRY_MAGIC); + ASSERT(cfs_is_flag_set(parent->flags, CFS_PROC_FLAG_DIRECTORY)); + ASSERT(cfs_is_flag_set(child->flags, CFS_PROC_FLAG_ATTACHED)); + + entry = proc_search_splay(parent, child->name); + + if (entry) { + ASSERT(entry == child); + parent->root = RtlDelete(&(entry->s_link)); + parent->nlink--; + } else { + cfs_enter_debugger(); + return FALSE; + } + + return TRUE; +} + + +/* search a node inside the proc fs tree */ + +cfs_proc_entry_t * +proc_search_entry( + char * name, + cfs_proc_entry_t * root + ) +{ + cfs_proc_entry_t * entry; + cfs_proc_entry_t * parent; + char *first, *remain; + int flen; + char *ename = NULL; + + parent = root; + entry = NULL; + + ename = cfs_alloc(0x21, CFS_ALLOC_ZERO); + + if (ename == NULL) { + goto errorout; + } + +again: + + /* dissect the file name string */ + proc_dissect_name(name, &first, &flen, &remain); + + if (first) { + + if (flen >= 0x20) { + cfs_enter_debugger(); + entry = NULL; + goto errorout; + } + + memset(ename, 0, 0x20); + memcpy(ename, first, flen); + + entry = proc_search_splay(parent, ename); + + if (!entry) { + goto errorout; + } + + if (remain) { + name = remain; + parent = entry; + + goto again; + } + } + +errorout: + + if (ename) { + cfs_free(ename); + } + + return entry; +} + +/* insert the path nodes to the proc fs tree */ + +cfs_proc_entry_t * +proc_insert_entry( + char * name, + cfs_proc_entry_t * root + ) +{ + cfs_proc_entry_t *entry; + cfs_proc_entry_t *parent; + char *first, *remain; + int flen; + char ename[0x20]; + + parent = root; + entry = NULL; + +again: + + proc_dissect_name(name, &first, &flen, &remain); + + if (first) { + + if (flen >= 0x20) { + return NULL; + } + + memset(ename, 0, 0x20); + memcpy(ename, first, flen); + + entry = proc_search_splay(parent, ename); + + if (!entry) { + entry = proc_alloc_entry(); + memcpy(entry->name, ename, flen); + + if (entry) { + if(!proc_insert_splay(parent, entry)) { + proc_free_entry(entry); + entry = NULL; + } + } + } + + if (!entry) { + return NULL; + } + + if (remain) { + entry->mode |= S_IFDIR | S_IRUGO | S_IXUGO; + cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY); + name = remain; + parent = entry; + goto again; + } + } + + return entry; +} + +/* remove the path nodes from the proc fs tree */ + +void +proc_remove_entry( + char * name, + cfs_proc_entry_t * root + ) +{ + cfs_proc_entry_t *entry; + char *first, *remain; + int flen; + char ename[0x20]; + + entry = NULL; + + proc_dissect_name(name, &first, &flen, &remain); + + if (first) { + + memset(ename, 0, 0x20); + memcpy(ename, first, flen); + + entry = proc_search_splay(root, ename); + + if (entry) { + + if (remain) { + ASSERT(S_ISDIR(entry->mode)); + proc_remove_entry(remain, entry); + } + + if (!entry->nlink) { + proc_remove_splay(root, entry); + proc_free_entry(entry); + } + } + } else { + cfs_enter_debugger(); + } +} + +/* create proc entry and insert it into the proc fs */ + +cfs_proc_entry_t * +create_proc_entry ( + char * name, + mode_t mode, + cfs_proc_entry_t * root + ) +{ + cfs_proc_entry_t *parent = root; + cfs_proc_entry_t *entry = NULL; + + if (S_ISDIR(mode)) { + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO | S_IXUGO; + } else { + if ((mode & S_IFMT) == 0) + mode |= S_IFREG; + if ((mode & S_IALLUGO) == 0) + mode |= S_IRUGO; + } + + LOCK_PROCFS(); + + ASSERT(NULL != proc_fs_root); + + if (!parent) { + parent = proc_fs_root; + } + + entry = proc_search_entry(name, parent); + + if (!entry) { + entry = proc_insert_entry(name, parent); + if (!entry) { + /* Failed to create/insert the splay node ... */ + cfs_enter_debugger(); + goto errorout; + } + /* Initializing entry ... */ + entry->mode = mode; + + if (S_ISDIR(mode)) { + cfs_set_flag(entry->flags, CFS_PROC_FLAG_DIRECTORY); + } + } + +errorout: + + UNLOCK_PROCFS(); + + return entry; +} + + +/* search the specified entry form the proc fs */ + +cfs_proc_entry_t * +search_proc_entry( + char * name, + cfs_proc_entry_t * root + ) +{ + cfs_proc_entry_t * entry; + + LOCK_PROCFS(); + if (root == NULL) { + root = proc_fs_root; + } + entry = proc_search_entry(name, root); + UNLOCK_PROCFS(); + + return entry; +} + +/* remove the entry from the proc fs */ + +void +remove_proc_entry( + char * name, + cfs_proc_entry_t * parent + ) +{ + LOCK_PROCFS(); + if (parent == NULL) { + parent = proc_fs_root; + } + proc_remove_entry(name, parent); + UNLOCK_PROCFS(); +} + + +void proc_destroy_splay(cfs_proc_entry_t * entry) +{ + cfs_proc_entry_t * node; + + if (S_ISDIR(entry->mode)) { + + while (entry->root) { + node = CONTAINING_RECORD(entry->root, cfs_proc_entry_t, s_link); + entry->root = RtlDelete(&(node->s_link)); + proc_destroy_splay(node); + } + } + + proc_free_entry(entry); +} + + +/* destory the whole proc fs tree */ + +void proc_destroy_fs() +{ + LOCK_PROCFS(); + + if (proc_fs_root) { + proc_destroy_splay(proc_fs_root); + } + + if (proc_entry_cache) { + cfs_mem_cache_destroy(proc_entry_cache); + } + + UNLOCK_PROCFS(); +} + +/* initilaize / build the proc fs tree */ + +int proc_init_fs() +{ + cfs_proc_entry_t * root = NULL; + + memset(&(root_table_header), 0, sizeof(struct ctl_table_header)); + INIT_LIST_HEAD(&(root_table_header.ctl_entry)); + + INIT_PROCFS_LOCK(); + proc_entry_cache = cfs_mem_cache_create( + NULL, + sizeof(cfs_proc_entry_t), + 0, + 0 + ); + + if (!proc_entry_cache) { + return (-ENOMEM); + } + + root = proc_alloc_entry(); + + if (!root) { + proc_destroy_fs(); + return (-ENOMEM); + } + + root->magic = CFS_PROC_ENTRY_MAGIC; + root->flags = CFS_PROC_FLAG_DIRECTORY; + root->mode = S_IFDIR | S_IRUGO | S_IXUGO; + root->nlink = 3; // root should never be deleted. + + root->name[0]='p'; + root->name[1]='r'; + root->name[2]='o'; + root->name[3]='c'; + + proc_fs_root = root; + + proc_sys_root = create_proc_entry("sys", S_IFDIR, root); + + if (!proc_sys_root) { + proc_free_entry(root); + proc_fs_root = NULL; + proc_destroy_fs(); + return (-ENOMEM); + } + + proc_sys_root->nlink = 1; + + proc_dev_root = create_proc_entry("dev", S_IFDIR, root); + + if (!proc_dev_root) { + proc_free_entry(proc_sys_root); + proc_sys_root = NULL; + proc_free_entry(proc_fs_root); + proc_fs_root = NULL; + proc_destroy_fs(); + return (-ENOMEM); + } + + proc_dev_root->nlink = 1; + + return 0; +} + + +static ssize_t do_rw_proc(int write, struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + int op; + cfs_proc_entry_t *de; + struct ctl_table *table; + size_t res; + ssize_t error; + + de = (cfs_proc_entry_t *) file->proc_dentry; + + if (!de || !de->data) + return -ENOTDIR; + table = (struct ctl_table *) de->data; + if (!table || !table->proc_handler) + return -ENOTDIR; + op = (write ? 002 : 004); + +// if (ctl_perm(table, op)) +// return -EPERM; + + res = count; + + /* + * FIXME: we need to pass on ppos to the handler. + */ + + error = (*table->proc_handler) (table, write, file, buf, &res); + if (error) + return error; + return res; +} + +static ssize_t proc_readsys(struct file * file, char * buf, + size_t count, loff_t *ppos) +{ + return do_rw_proc(0, file, buf, count, ppos); +} + +static ssize_t proc_writesys(struct file * file, const char * buf, + size_t count, loff_t *ppos) +{ + return do_rw_proc(1, file, (char *) buf, count, ppos); +} + + +struct file_operations proc_sys_file_operations = { + /*lseek:*/ NULL, + /*read:*/ proc_readsys, + /*write:*/ proc_writesys, + /*ioctl:*/ NULL, + /*open:*/ NULL, + /*release:*/ NULL +}; + + +/* Scan the sysctl entries in table and add them all into /proc */ +void register_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t * root) +{ + cfs_proc_entry_t * de; + int len; + mode_t mode; + + for (; table->ctl_name; table++) { + /* Can't do anything without a proc name. */ + if (!table->procname) + continue; + /* Maybe we can't do anything with it... */ + if (!table->proc_handler && !table->child) { + printk(KERN_WARNING "SYSCTL: Can't register %s\n", + table->procname); + continue; + } + + len = strlen(table->procname); + mode = table->mode; + + de = NULL; + if (table->proc_handler) + mode |= S_IFREG; + else { + de = search_proc_entry(table->procname, root); + if (de) { + break; + } + /* If the subdir exists already, de is non-NULL */ + } + + if (!de) { + + de = create_proc_entry((char *)table->procname, mode, root); + if (!de) + continue; + de->data = (void *) table; + if (table->proc_handler) { + de->proc_fops = &proc_sys_file_operations; + } + } + table->de = de; + if (de->mode & S_IFDIR) + register_proc_table(table->child, de); + } +} + + +/* + * Unregister a /proc sysctl table and any subdirectories. + */ +void unregister_proc_table(cfs_sysctl_table_t * table, cfs_proc_entry_t *root) +{ + cfs_proc_entry_t *de; + for (; table->ctl_name; table++) { + if (!(de = table->de)) + continue; + if (de->mode & S_IFDIR) { + if (!table->child) { + printk (KERN_ALERT "Help - malformed sysctl tree on free\n"); + continue; + } + unregister_proc_table(table->child, de); + + /* Don't unregister directories which still have entries.. */ + if (de->nlink) + continue; + } + + /* Don't unregister proc entries that are still being used.. */ + if (de->nlink) + continue; + + table->de = NULL; + remove_proc_entry((char *)table->procname, root); + } +} + +/* The generic string strategy routine: */ +int sysctl_string(cfs_sysctl_table_t *table, int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + int l, len; + + if (!table->data || !table->maxlen) + return -ENOTDIR; + + if (oldval && oldlenp) { + if(get_user(len, oldlenp)) + return -EFAULT; + if (len) { + l = strlen(table->data); + if (len > l) len = l; + if (len >= table->maxlen) + len = table->maxlen; + if(copy_to_user(oldval, table->data, len)) + return -EFAULT; + if(put_user(0, ((char *) oldval) + len)) + return -EFAULT; + if(put_user(len, oldlenp)) + return -EFAULT; + } + } + if (newval && newlen) { + len = newlen; + if (len > table->maxlen) + len = table->maxlen; + if(copy_from_user(table->data, newval, len)) + return -EFAULT; + if (len == table->maxlen) + len--; + ((char *) table->data)[len] = 0; + } + return 0; +} + +/** + * simple_strtoul - convert a string to an unsigned long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0, value; + + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +#define OP_SET 0 +#define OP_AND 1 +#define OP_OR 2 +#define OP_MAX 3 +#define OP_MIN 4 + + +static int do_proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, + void *buffer, size_t *lenp, int conv, int op) +{ + int *i, vleft, first=1, neg, val; + size_t left, len; + + #define TMPBUFLEN 20 + char buf[TMPBUFLEN], *p; + + if (!table->data || !table->maxlen || !*lenp) + { + *lenp = 0; + return 0; + } + + i = (int *) table->data; + vleft = table->maxlen / sizeof(int); + left = *lenp; + + for (; left && vleft--; i++, first=0) { + if (write) { + while (left) { + char c; + if(get_user(c,(char *) buffer)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + ((char *) buffer)++; + } + if (!left) + break; + neg = 0; + len = left; + if (len > TMPBUFLEN-1) + len = TMPBUFLEN-1; + if(copy_from_user(buf, buffer, len)) + return -EFAULT; + buf[len] = 0; + p = buf; + if (*p == '-' && left > 1) { + neg = 1; + left--, p++; + } + if (*p < '0' || *p > '9') + break; + val = simple_strtoul(p, &p, 0) * conv; + len = p-buf; + if ((len < left) && *p && !isspace(*p)) + break; + if (neg) + val = -val; + (char *)buffer += len; + left -= len; + switch(op) { + case OP_SET: *i = val; break; + case OP_AND: *i &= val; break; + case OP_OR: *i |= val; break; + case OP_MAX: if(*i < val) + *i = val; + break; + case OP_MIN: if(*i > val) + *i = val; + break; + } + } else { + p = buf; + if (!first) + *p++ = '\t'; + sprintf(p, "%d", (*i) / conv); + len = strlen(buf); + if (len > left) + len = left; + if(copy_to_user(buffer, buf, len)) + return -EFAULT; + left -= len; + (char *)buffer += len; + } + } + + if (!write && !first && left) { + if(put_user('\n', (char *) buffer)) + return -EFAULT; + left--, ((char *)buffer)++; + } + if (write) { + p = (char *) buffer; + while (left) { + char c; + if(get_user(c, p++)) + return -EFAULT; + if (!isspace(c)) + break; + left--; + } + } + if (write && first) + return -EINVAL; + *lenp -= left; + memset(&(filp->f_pos) , 0, sizeof(loff_t)); + filp->f_pos += (loff_t)(*lenp); + return 0; +} + +/** + * proc_dointvec - read a vector of integers + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * Returns 0 on success. + */ +int proc_dointvec(cfs_sysctl_table_t *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + return do_proc_dointvec(table,write,filp,buffer,lenp,1,OP_SET); +} + + +/** + * proc_dostring - read a string sysctl + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @filp: the file structure + * @buffer: the user buffer + * @lenp: the size of the user buffer + * + * Reads/writes a string from/to the user buffer. If the kernel + * buffer provided is not large enough to hold the string, the + * string is truncated. The copied string is %NULL-terminated. + * If the string is being read by the user process, it is copied + * and a newline '\n' is added. It is truncated if the buffer is + * not large enough. + * + * Returns 0 on success. + */ +int proc_dostring(cfs_sysctl_table_t *table, int write, struct file *filp, + void *buffer, size_t *lenp) +{ + size_t len; + char *p, c; + + if (!table->data || !table->maxlen || !*lenp || + (filp->f_pos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = 0; + p = buffer; + while (len < *lenp) { + if(get_user(c, p++)) + return -EFAULT; + if (c == 0 || c == '\n') + break; + len++; + } + if (len >= (size_t)table->maxlen) + len = (size_t)table->maxlen-1; + if(copy_from_user(table->data, buffer, len)) + return -EFAULT; + ((char *) table->data)[len] = 0; + filp->f_pos += *lenp; + } else { + len = (size_t)strlen(table->data); + if (len > (size_t)table->maxlen) + len = (size_t)table->maxlen; + if (len > *lenp) + len = *lenp; + if (len) + if(copy_to_user(buffer, table->data, len)) + return -EFAULT; + if (len < *lenp) { + if(put_user('\n', ((char *) buffer) + len)) + return -EFAULT; + len++; + } + *lenp = len; + filp->f_pos += len; + } + return 0; +} + +/* Perform the actual read/write of a sysctl table entry. */ +int do_sysctl_strategy (cfs_sysctl_table_t *table, + int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, void **context) +{ + int op = 0, rc; + size_t len; + + if (oldval) + op |= 004; + if (newval) + op |= 002; + + if (table->strategy) { + rc = table->strategy(table, name, nlen, oldval, oldlenp, + newval, newlen, context); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + } + + /* If there is no strategy routine, or if the strategy returns + * zero, proceed with automatic r/w */ + if (table->data && table->maxlen) { + if (oldval && oldlenp) { + get_user(len, oldlenp); + if (len) { + if (len > (size_t)table->maxlen) + len = (size_t)table->maxlen; + if(copy_to_user(oldval, table->data, len)) + return -EFAULT; + if(put_user(len, oldlenp)) + return -EFAULT; + } + } + if (newval && newlen) { + len = newlen; + if (len > (size_t)table->maxlen) + len = (size_t)table->maxlen; + if(copy_from_user(table->data, newval, len)) + return -EFAULT; + } + } + return 0; +} + +static int parse_table(int *name, int nlen, + void *oldval, size_t *oldlenp, + void *newval, size_t newlen, + cfs_sysctl_table_t *table, void **context) +{ + int n; + +repeat: + + if (!nlen) + return -ENOTDIR; + if (get_user(n, name)) + return -EFAULT; + for ( ; table->ctl_name; table++) { + if (n == table->ctl_name || table->ctl_name == CTL_ANY) { + int error; + if (table->child) { +/* + if (ctl_perm(table, 001)) + return -EPERM; +*/ + if (table->strategy) { + error = table->strategy( + table, name, nlen, + oldval, oldlenp, + newval, newlen, context); + if (error) + return error; + } + name++; + nlen--; + table = table->child; + goto repeat; + } + error = do_sysctl_strategy(table, name, nlen, + oldval, oldlenp, + newval, newlen, context); + return error; + } + } + return -ENOTDIR; +} + +int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp, + void *newval, size_t newlen) +{ + struct list_head *tmp; + + if (nlen <= 0 || nlen >= CTL_MAXNAME) + return -ENOTDIR; + if (oldval) { + int old_len; + if (!oldlenp || get_user(old_len, oldlenp)) + return -EFAULT; + } + tmp = &root_table_header.ctl_entry; + do { + struct ctl_table_header *head = + list_entry(tmp, struct ctl_table_header, ctl_entry); + void *context = NULL; + int error = parse_table(name, nlen, oldval, oldlenp, + newval, newlen, head->ctl_table, + &context); + if (context) + cfs_free(context); + if (error != -ENOTDIR) + return error; + tmp = tmp->next; + } while (tmp != &root_table_header.ctl_entry); + return -ENOTDIR; +} + +/** + * register_sysctl_table - register a sysctl heirarchy + * @table: the top-level table structure + * @insert_at_head: whether the entry should be inserted in front or at the end + * + * Register a sysctl table heirarchy. @table should be a filled in ctl_table + * array. An entry with a ctl_name of 0 terminates the table. + * + * The members of the &ctl_table structure are used as follows: + * + * ctl_name - This is the numeric sysctl value used by sysctl(2). The number + * must be unique within that level of sysctl + * + * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not + * enter a sysctl file + * + * data - a pointer to data for use by proc_handler + * + * maxlen - the maximum size in bytes of the data + * + * mode - the file permissions for the /proc/sys file, and for sysctl(2) + * + * child - a pointer to the child sysctl table if this entry is a directory, or + * %NULL. + * + * proc_handler - the text handler routine (described below) + * + * strategy - the strategy routine (described below) + * + * de - for internal use by the sysctl routines + * + * extra1, extra2 - extra pointers usable by the proc handler routines + * + * Leaf nodes in the sysctl tree will be represented by a single file + * under /proc; non-leaf nodes will be represented by directories. + * + * sysctl(2) can automatically manage read and write requests through + * the sysctl table. The data and maxlen fields of the ctl_table + * struct enable minimal validation of the values being written to be + * performed, and the mode field allows minimal authentication. + * + * More sophisticated management can be enabled by the provision of a + * strategy routine with the table entry. This will be called before + * any automatic read or write of the data is performed. + * + * The strategy routine may return + * + * < 0 - Error occurred (error is passed to user process) + * + * 0 - OK - proceed with automatic read or write. + * + * > 0 - OK - read or write has been done by the strategy routine, so + * return immediately. + * + * There must be a proc_handler routine for any terminal nodes + * mirrored under /proc/sys (non-terminals are handled by a built-in + * directory handler). Several default handlers are available to + * cover common cases - + * + * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(), + * proc_dointvec_minmax(), proc_doulongvec_ms_jiffies_minmax(), + * proc_doulongvec_minmax() + * + * It is the handler's job to read the input buffer from user memory + * and process it. The handler should return 0 on success. + * + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ +struct ctl_table_header *register_sysctl_table(cfs_sysctl_table_t * table, + int insert_at_head) +{ + struct ctl_table_header *tmp; + tmp = cfs_alloc(sizeof(struct ctl_table_header), 0); + if (!tmp) + return NULL; + tmp->ctl_table = table; + + INIT_LIST_HEAD(&tmp->ctl_entry); + if (insert_at_head) + list_add(&tmp->ctl_entry, &root_table_header.ctl_entry); + else + list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); +#ifdef CONFIG_PROC_FS + register_proc_table(table, proc_sys_root); +#endif + return tmp; +} + +/** + * unregister_sysctl_table - unregister a sysctl table heirarchy + * @header: the header returned from register_sysctl_table + * + * Unregisters the sysctl table and all children. proc entries may not + * actually be removed until they are no longer used by anyone. + */ +void unregister_sysctl_table(struct ctl_table_header * header) +{ + list_del(&header->ctl_entry); +#ifdef CONFIG_PROC_FS + unregister_proc_table(header->ctl_table, proc_sys_root); +#endif + cfs_free(header); +} + + +int cfs_psdev_register(cfs_psdev_t * psdev) +{ + cfs_proc_entry_t * entry; + + entry = create_proc_entry ( + (char *)psdev->name, + S_IFREG, + proc_dev_root + ); + + if (!entry) { + return -ENOMEM; + } + + entry->flags |= CFS_PROC_FLAG_MISCDEV; + + entry->proc_fops = psdev->fops; + entry->data = (void *)psdev; + + return 0; +} + +int cfs_psdev_deregister(cfs_psdev_t * psdev) +{ + cfs_proc_entry_t * entry; + + entry = search_proc_entry ( + (char *)psdev->name, + proc_dev_root + ); + + if (entry) { + + ASSERT(entry->data == (void *)psdev); + ASSERT(entry->flags & CFS_PROC_FLAG_MISCDEV); + + remove_proc_entry( + (char *)psdev->name, + proc_dev_root + ); + } + + return 0; +} + +extern char debug_file_path[1024]; + +#define PSDEV_LNET (0x100) +enum { + PSDEV_DEBUG = 1, /* control debugging */ + PSDEV_SUBSYSTEM_DEBUG, /* control debugging */ + PSDEV_PRINTK, /* force all messages to console */ + PSDEV_CONSOLE_RATELIMIT, /* rate limit console messages */ + PSDEV_DEBUG_PATH, /* crashdump log location */ + PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */ + PSDEV_LIBCFS_MEMUSED, /* bytes currently PORTAL_ALLOCated */ +}; + +static struct ctl_table lnet_table[] = { + {PSDEV_DEBUG, "debug", &libcfs_debug, sizeof(int), 0644, NULL, + &proc_dointvec}, + {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &libcfs_subsystem_debug, + sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_PRINTK, "printk", &libcfs_printk, sizeof(int), 0644, NULL, + &proc_dointvec}, + {PSDEV_CONSOLE_RATELIMIT, "console_ratelimit", &libcfs_console_ratelimit, + sizeof(int), 0644, NULL, &proc_dointvec}, + {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, + sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, +/* + {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, + sizeof(portals_upcall), 0644, NULL, &proc_dostring, + &sysctl_string}, +*/ + {PSDEV_LIBCFS_MEMUSED, "memused", (int *)&libcfs_kmemory.counter, + sizeof(int), 0644, NULL, &proc_dointvec}, + {0} +}; + +static struct ctl_table top_table[2] = { + {PSDEV_LNET, "lnet", NULL, 0, 0555, lnet_table}, + {0} +}; + + +int trace_write_dump_kernel(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int rc = trace_dump_debug_buffer_usrstr(buffer, count); + + return (rc < 0) ? rc : count; +} + +int trace_write_daemon_file(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int rc = trace_daemon_command_usrstr(buffer, count); + + return (rc < 0) ? rc : count; +} + +int trace_read_daemon_file(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + + tracefile_read_lock(); + + rc = trace_copyout_string(page, count, tracefile, "\n"); + + tracefile_read_unlock(); + + return rc; +} + +int trace_write_debug_mb(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int rc = trace_set_debug_mb_userstr(buffer, count); + + return (rc < 0) ? rc : count; +} + +int trace_read_debug_mb(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + char str[32]; + + snprintf(str, sizeof(str), "%d\n", trace_get_debug_mb()); + + return trace_copyout_string(page, count, str, NULL); +} + +int insert_proc(void) +{ + cfs_proc_entry_t *ent; + + ent = create_proc_entry("sys/lnet/dump_kernel", 0, NULL); + if (ent == NULL) { + CERROR(("couldn't register dump_kernel\n")); + return -1; + } + ent->write_proc = trace_write_dump_kernel; + + ent = create_proc_entry("sys/lnet/daemon_file", 0, NULL); + if (ent == NULL) { + CERROR(("couldn't register daemon_file\n")); + return -1; + } + ent->write_proc = trace_write_daemon_file; + ent->read_proc = trace_read_daemon_file; + + ent = create_proc_entry("sys/lnet/debug_mb", 0, NULL); + if (ent == NULL) { + CERROR(("couldn't register debug_mb\n")); + return -1; + } + ent->write_proc = trace_write_debug_mb; + ent->read_proc = trace_read_debug_mb; + + return 0; +} + +void remove_proc(void) +{ + remove_proc_entry("sys/portals/dump_kernel", NULL); + remove_proc_entry("sys/portals/daemon_file", NULL); + remove_proc_entry("sys/portals/debug_mb", NULL); + +#ifdef CONFIG_SYSCTL + if (portals_table_header) + unregister_sysctl_table(portals_table_header); + portals_table_header = NULL; +#endif +} + + +/* + * proc process routines of kernel space + */ + +cfs_file_t * +lustre_open_file(char * filename) +{ + int rc = 0; + cfs_file_t * fh = NULL; + cfs_proc_entry_t * fp = NULL; + + fp = search_proc_entry(filename, proc_fs_root); + + if (!fp) { + rc = -ENOENT; + return NULL; + } + + fh = cfs_alloc(sizeof(cfs_file_t), CFS_ALLOC_ZERO); + + if (!fh) { + rc = -ENOMEM; + return NULL; + } + + fh->private_data = (void *)fp; + fh->f_op = fp->proc_fops; + + if (fh->f_op->open) { + rc = (fh->f_op->open)(fh); + } else { + fp->nlink++; + } + + if (0 != rc) { + cfs_free(fh); + return NULL; + } + + return fh; +} + +int +lustre_close_file(cfs_file_t * fh) +{ + int rc = 0; + cfs_proc_entry_t * fp = NULL; + + fp = (cfs_proc_entry_t *) fh->private_data; + + if (fh->f_op->release) { + rc = (fh->f_op->release)(fh); + } else { + fp->nlink--; + } + + cfs_free(fh); + + return rc; +} + +int +lustre_do_ioctl( cfs_file_t * fh, + unsigned long cmd, + ulong_ptr arg ) +{ + int rc = 0; + + if (fh->f_op->ioctl) { + rc = (fh->f_op->ioctl)(fh, cmd, arg); + } + + if (rc != 0) { + printk("lustre_do_ioctl: fialed: cmd = %xh arg = %xh rc = %d\n", + cmd, arg, rc); + } + + return rc; +} + +int +lustre_ioctl_file(cfs_file_t * fh, PCFS_PROC_IOCTL devctl) +{ + int rc = 0; + ulong_ptr data; + + data = (ulong_ptr)devctl + sizeof(CFS_PROC_IOCTL); + + /* obd ioctl code */ + if (_IOC_TYPE(devctl->cmd) == 'f') { +#if 0 + struct obd_ioctl_data * obd = (struct obd_ioctl_data *) data; + + if ( devctl->cmd != (ULONG)OBD_IOC_BRW_WRITE && + devctl->cmd != (ULONG)OBD_IOC_BRW_READ ) { + + unsigned long off = obd->ioc_len; + + if (obd->ioc_pbuf1) { + obd->ioc_pbuf1 = (char *)(data + off); + off += size_round(obd->ioc_plen1); + } + + if (obd->ioc_pbuf2) { + obd->ioc_pbuf2 = (char *)(data + off); + } + } + #endif + } + + rc = lustre_do_ioctl(fh, devctl->cmd, data); + + return rc; +} + + +size_t +lustre_read_file( + cfs_file_t * fh, + loff_t off, + size_t size, + char * buf + ) +{ + size_t rc = 0; + + if (fh->f_op->read) { + rc = (fh->f_op->read) (fh, buf, size, &off); + } + + return rc; +} + + +size_t +lustre_write_file( + cfs_file_t * fh, + loff_t off, + size_t size, + char * buf + ) +{ + size_t rc = 0; + + if (fh->f_op->write) { + rc = (fh->f_op->write)(fh, buf, size, &off); + } + + return rc; +} + +#else /* !__KERNEL__ */ + +#include +#include +#include + +/* + * proc process routines of user space + */ + +HANDLE cfs_proc_open (char * filename, int oflag) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + int rc; + + HANDLE FileHandle = INVALID_HANDLE_VALUE; + OBJECT_ATTRIBUTES ObjectAttributes; + ACCESS_MASK DesiredAccess; + ULONG CreateDisposition; + ULONG ShareAccess; + ULONG CreateOptions; + UNICODE_STRING UnicodeName; + USHORT NameLength; + + PFILE_FULL_EA_INFORMATION Ea = NULL; + ULONG EaLength; + UCHAR EaBuffer[EA_MAX_LENGTH]; + + /* Check the filename: should start with "/proc" or "/dev" */ + NameLength = (USHORT)strlen(filename); + if (NameLength > 0x05) { + if (_strnicmp(filename, "/proc/", 6) == 0) { + filename += 6; + NameLength -=6; + if (NameLength <= 0) { + rc = -EINVAL; + goto errorout; + } + } else if (_strnicmp(filename, "/dev/", 5) == 0) { + } else { + rc = -EINVAL; + goto errorout; + } + } else { + rc = -EINVAL; + goto errorout; + } + + /* Analyze the flags settings */ + + if (cfs_is_flag_set(oflag, O_WRONLY)) { + DesiredAccess = (GENERIC_WRITE | SYNCHRONIZE); + ShareAccess = 0; + } else if (cfs_is_flag_set(oflag, O_RDWR)) { + DesiredAccess = (GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE); + ShareAccess = FILE_SHARE_READ | FILE_SHARE_WRITE; + } else { + DesiredAccess = (GENERIC_READ | SYNCHRONIZE); + ShareAccess = FILE_SHARE_READ; + } + + if (cfs_is_flag_set(oflag, O_CREAT)) { + if (cfs_is_flag_set(oflag, O_EXCL)) { + CreateDisposition = FILE_CREATE; + rc = -EINVAL; + goto errorout; + } else { + CreateDisposition = FILE_OPEN_IF; + } + } else { + CreateDisposition = FILE_OPEN; + } + + if (cfs_is_flag_set(oflag, O_TRUNC)) { + if (cfs_is_flag_set(oflag, O_EXCL)) { + CreateDisposition = FILE_OVERWRITE; + } else { + CreateDisposition = FILE_OVERWRITE_IF; + } + } + + CreateOptions = 0; + + if (cfs_is_flag_set(oflag, O_DIRECTORY)) { + cfs_set_flag(CreateOptions, FILE_DIRECTORY_FILE); + } + + if (cfs_is_flag_set(oflag, O_SYNC)) { + cfs_set_flag(CreateOptions, FILE_WRITE_THROUGH); + } + + if (cfs_is_flag_set(oflag, O_DIRECT)) { + cfs_set_flag(CreateOptions, FILE_NO_INTERMEDIATE_BUFFERING); + } + + /* Initialize the unicode path name for the specified file */ + RtlInitUnicodeString(&UnicodeName, LUSTRE_PROC_SYMLNK); + + /* Setup the object attributes structure for the file. */ + InitializeObjectAttributes( + &ObjectAttributes, + &UnicodeName, + OBJ_CASE_INSENSITIVE, + NULL, + NULL ); + + /* building EA for the proc entry ... */ + Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; + Ea->NextEntryOffset = 0; + Ea->Flags = 0; + Ea->EaNameLength = (UCHAR)NameLength; + Ea->EaValueLength = 0; + RtlCopyMemory( + &(Ea->EaName), + filename, + NameLength + 1 + ); + EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + + Ea->EaNameLength + 1; + + /* Now to open or create the file now */ + status = ZwCreateFile( + &FileHandle, + DesiredAccess, + &ObjectAttributes, + &iosb, + 0, + FILE_ATTRIBUTE_NORMAL, + ShareAccess, + CreateDisposition, + CreateOptions, + Ea, + EaLength ); + + /* Check the returned status of Iosb ... */ + + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + goto errorout; + } + +errorout: + + return FileHandle; +} + +int cfs_proc_close(HANDLE handle) +{ + if (handle) { + NtClose((HANDLE)handle); + } + + return 0; +} + +int cfs_proc_read(HANDLE handle, void *buffer, unsigned int count) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + LARGE_INTEGER offset; + + + offset.QuadPart = 0; + + /* read file data */ + status = NtReadFile( + (HANDLE)handle, + 0, + NULL, + NULL, + &iosb, + buffer, + count, + &offset, + NULL); + + /* check the return status */ + if (!NT_SUCCESS(status)) { + printf("NtReadFile request failed 0x%0x\n", status); + goto errorout; + } + +errorout: + + if (NT_SUCCESS(status)) { + return iosb.Information; + } + + return cfs_error_code(status); +} + + +int cfs_proc_write(HANDLE handle, void *buffer, unsigned int count) +{ + NTSTATUS status; + IO_STATUS_BLOCK iosb; + LARGE_INTEGER offset; + + offset.QuadPart = -1; + + /* write buffer to the opened file */ + status = NtWriteFile( + (HANDLE)handle, + 0, + NULL, + NULL, + &iosb, + buffer, + count, + &offset, + NULL); + + /* check the return status */ + if (!NT_SUCCESS(status)) { + printf("NtWriteFile request failed 0x%0x\n", status); + goto errorout; + } + +errorout: + + if (NT_SUCCESS(status)) { + return iosb.Information; + } + + return cfs_error_code(status); +} + +int cfs_proc_ioctl(HANDLE handle, int cmd, void *buffer) +{ + PUCHAR procdat = NULL; + CFS_PROC_IOCTL procctl; + ULONG length = 0; + ULONG extra = 0; + + NTSTATUS status; + IO_STATUS_BLOCK iosb; + + procctl.cmd = cmd; + + if(_IOC_TYPE(cmd) == IOC_LIBCFS_TYPE) { + struct libcfs_ioctl_data * portal; + portal = (struct libcfs_ioctl_data *) buffer; + length = portal->ioc_len; + } else if (_IOC_TYPE(cmd) == 'f') { + struct obd_ioctl_data * obd; + obd = (struct obd_ioctl_data *) buffer; + length = obd->ioc_len; + extra = size_round(obd->ioc_plen1) + size_round(obd->ioc_plen2); + } else if(_IOC_TYPE(cmd) == 'u') { + length = 4; + extra = 0; + } else { + printf("user:winnt-proc:cfs_proc_ioctl: un-supported ioctl type ...\n"); + cfs_enter_debugger(); + status = STATUS_INVALID_PARAMETER; + goto errorout; + } + + procctl.len = length + extra; + procdat = malloc(length + extra + sizeof(CFS_PROC_IOCTL)); + + if (NULL == procdat) { + printf("user:winnt-proc:cfs_proc_ioctl: no enough memory ...\n"); + status = STATUS_INSUFFICIENT_RESOURCES; + cfs_enter_debugger(); + goto errorout; + } + memset(procdat, 0, length + extra + sizeof(CFS_PROC_IOCTL)); + memcpy(procdat, &procctl, sizeof(CFS_PROC_IOCTL)); + memcpy(&procdat[sizeof(CFS_PROC_IOCTL)], buffer, length); + length += sizeof(CFS_PROC_IOCTL); + + if (_IOC_TYPE(cmd) == 'f') { + + char *ptr; + struct obd_ioctl_data * data; + struct obd_ioctl_data * obd; + + data = (struct obd_ioctl_data *) buffer; + obd = (struct obd_ioctl_data *) (procdat + sizeof(CFS_PROC_IOCTL)); + ptr = obd->ioc_bulk; + + if (data->ioc_inlbuf1) { + obd->ioc_inlbuf1 = ptr; + LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); + } + + if (data->ioc_inlbuf2) { + obd->ioc_inlbuf2 = ptr; + LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); + } + if (data->ioc_inlbuf3) { + obd->ioc_inlbuf3 = ptr; + LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); + } + if (data->ioc_inlbuf4) { + obd->ioc_inlbuf4 = ptr; + LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); + } + + if ( cmd != (ULONG)OBD_IOC_BRW_WRITE && + cmd != (ULONG)OBD_IOC_BRW_READ ) { + + if (data->ioc_pbuf1 && data->ioc_plen1) { + obd->ioc_pbuf1 = &procdat[length]; + memcpy(obd->ioc_pbuf1, data->ioc_pbuf1, data->ioc_plen1); + length += size_round(data->ioc_plen1); + } + + if (data->ioc_pbuf2 && data->ioc_plen2) { + obd->ioc_pbuf2 = &procdat[length]; + memcpy(obd->ioc_pbuf2, data->ioc_pbuf2, data->ioc_plen2); + length += size_round(data->ioc_plen2); + } + } + + if (obd_ioctl_is_invalid(obd)) { + cfs_enter_debugger(); + } + } + + status = NtDeviceIoControlFile( + (HANDLE)handle, + NULL, NULL, NULL, &iosb, + IOCTL_LIBCFS_ENTRY, + procdat, length, + procdat, length ); + + + if (NT_SUCCESS(status)) { + memcpy(buffer, &procdat[sizeof(CFS_PROC_IOCTL)], procctl.len); + } + +errorout: + + if (procdat) { + free(procdat); + } + + return cfs_error_code(status); +} + +#endif /* __KERNEL__ */ diff --git a/libcfs/libcfs/winnt/winnt-sync.c b/libcfs/libcfs/winnt/winnt-sync.c new file mode 100644 index 0000000..5094bef --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-sync.c @@ -0,0 +1,449 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +#define DEBUG_SUBSYSTEM S_LIBCFS + +#include +#include + + +/* + * Wait queue routines + */ + +/* + * cfs_waitq_init + * To initialize the wait queue + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_init(cfs_waitq_t *waitq) +{ + waitq->magic = CFS_WAITQ_MAGIC; + waitq->flags = 0; + INIT_LIST_HEAD(&(waitq->waiters)); + spin_lock_init(&(waitq->guard)); +} + +/* + * cfs_waitlink_init + * To initialize the wake link node + * + * Arguments: + * link: pointer to the cfs_waitlink_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitlink_init(cfs_waitlink_t *link) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + /* should bugchk here */ + cfs_enter_debugger(); + return; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + cfs_assert(slot->Magic == TASKSLT_MAGIC); + + memset(link, 0, sizeof(cfs_waitlink_t)); + + link->magic = CFS_WAITLINK_MAGIC; + link->flags = 0; + + link->event = &(slot->Event); + link->hits = &(slot->hits); + + atomic_inc(&slot->count); + + INIT_LIST_HEAD(&(link->waitq[0].link)); + INIT_LIST_HEAD(&(link->waitq[1].link)); + + link->waitq[0].waitl = link->waitq[1].waitl = link; +} + + +/* + * cfs_waitlink_fini + * To finilize the wake link node + * + * Arguments: + * link: pointer to the cfs_waitlink_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitlink_fini(cfs_waitlink_t *link) +{ + cfs_task_t * task = cfs_current(); + PTASK_SLOT slot = NULL; + + if (!task) { + /* should bugchk here */ + cfs_enter_debugger(); + return; + } + + slot = CONTAINING_RECORD(task, TASK_SLOT, task); + cfs_assert(slot->Magic == TASKSLT_MAGIC); + cfs_assert(link->magic == CFS_WAITLINK_MAGIC); + cfs_assert(link->waitq[0].waitq == NULL); + cfs_assert(link->waitq[1].waitq == NULL); + + atomic_dec(&slot->count); +} + + +/* + * cfs_waitq_add_internal + * To queue the wait link node to the wait queue + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * link: pointer to the cfs_waitlink_t structure + * int: queue no (Normal or Forward waitq) + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_add_internal(cfs_waitq_t *waitq, + cfs_waitlink_t *link, + __u32 waitqid ) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + LASSERT(waitq->magic == CFS_WAITQ_MAGIC); + LASSERT(link->magic == CFS_WAITLINK_MAGIC); + LASSERT(waitqid < CFS_WAITQ_CHANNELS); + + spin_lock(&(waitq->guard)); + LASSERT(link->waitq[waitqid].waitq == NULL); + link->waitq[waitqid].waitq = waitq; + if (link->flags & CFS_WAITQ_EXCLUSIVE) { + list_add_tail(&link->waitq[waitqid].link, &waitq->waiters); + } else { + list_add(&link->waitq[waitqid].link, &waitq->waiters); + } + spin_unlock(&(waitq->guard)); +} +/* + * cfs_waitq_add + * To queue the wait link node to the wait queue + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * link: pointer to the cfs_waitlink_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_add(cfs_waitq_t *waitq, + cfs_waitlink_t *link) +{ + cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_NORMAL); +} + +/* + * cfs_waitq_add_exclusive + * To set the wait link node to exclusive mode + * and queue it to the wait queue + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * link: pointer to the cfs_wait_link structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_add_exclusive( cfs_waitq_t *waitq, + cfs_waitlink_t *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + LASSERT(waitq->magic == CFS_WAITQ_MAGIC); + LASSERT(link->magic == CFS_WAITLINK_MAGIC); + + link->flags |= CFS_WAITQ_EXCLUSIVE; + cfs_waitq_add(waitq, link); +} + +/* + * cfs_waitq_forward + * To be determinated. + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * link: pointer to the cfs_waitlink_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_forward( cfs_waitlink_t *link, + cfs_waitq_t *waitq) +{ + cfs_waitq_add_internal(waitq, link, CFS_WAITQ_CHAN_FORWARD); +} + +/* + * cfs_waitq_del + * To remove the wait link node from the waitq + * + * Arguments: + * waitq: pointer to the cfs_ waitq_t structure + * link: pointer to the cfs_waitlink_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_del( cfs_waitq_t *waitq, + cfs_waitlink_t *link) +{ + int i = 0; + + LASSERT(waitq != NULL); + LASSERT(link != NULL); + + LASSERT(waitq->magic == CFS_WAITQ_MAGIC); + LASSERT(link->magic == CFS_WAITLINK_MAGIC); + + spin_lock(&(waitq->guard)); + + for (i=0; i < CFS_WAITQ_CHANNELS; i++) { + if (link->waitq[i].waitq == waitq) + break; + } + + if (i < CFS_WAITQ_CHANNELS) { + link->waitq[i].waitq = NULL; + list_del_init(&link->waitq[i].link); + } else { + cfs_enter_debugger(); + } + + spin_unlock(&(waitq->guard)); +} + +/* + * cfs_waitq_active + * Is the waitq active (not empty) ? + * + * Arguments: + * waitq: pointer to the cfs_ waitq_t structure + * + * Return Value: + * Zero: the waitq is empty + * Non-Zero: the waitq is active + * + * Notes: + * We always returns TRUE here, the same to Darwin. + */ + +int cfs_waitq_active(cfs_waitq_t *waitq) +{ + LASSERT(waitq != NULL); + LASSERT(waitq->magic == CFS_WAITQ_MAGIC); + + return (1); +} + +/* + * cfs_waitq_signal_nr + * To wake up all the non-exclusive tasks plus nr exclusive + * ones in the waitq + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * nr: number of exclusive tasks to be woken up + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + + +void cfs_waitq_signal_nr(cfs_waitq_t *waitq, int nr) +{ + int result; + cfs_waitlink_channel_t * scan; + + LASSERT(waitq != NULL); + LASSERT(waitq->magic == CFS_WAITQ_MAGIC); + + spin_lock(&waitq->guard); + + list_for_each_entry(scan, &waitq->waiters, cfs_waitlink_channel_t, link) { + + cfs_waitlink_t *waitl = scan->waitl; + + result = cfs_wake_event(waitl->event); + LASSERT( result == FALSE || result == TRUE ); + + if (result) { + atomic_inc(waitl->hits); + } + + if ((waitl->flags & CFS_WAITQ_EXCLUSIVE) && --nr == 0) + break; + } + + spin_unlock(&waitq->guard); + return; +} + +/* + * cfs_waitq_signal + * To wake up all the non-exclusive tasks and 1 exclusive + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_signal(cfs_waitq_t *waitq) +{ + cfs_waitq_signal_nr(waitq, 1); +} + + +/* + * cfs_waitq_broadcast + * To wake up all the tasks in the waitq + * + * Arguments: + * waitq: pointer to the cfs_waitq_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_broadcast(cfs_waitq_t *waitq) +{ + LASSERT(waitq != NULL); + LASSERT(waitq->magic ==CFS_WAITQ_MAGIC); + + cfs_waitq_signal_nr(waitq, 0); +} + +/* + * cfs_waitq_wait + * To wait on the link node until it is signaled. + * + * Arguments: + * link: pointer to the cfs_waitlink_t structure + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void cfs_waitq_wait(cfs_waitlink_t *link, cfs_task_state_t state) +{ + LASSERT(link != NULL); + LASSERT(link->magic == CFS_WAITLINK_MAGIC); + + if (atomic_read(link->hits) > 0) { + atomic_dec(link->hits); + LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); + } else { + cfs_wait_event(link->event, 0); + } +} + +/* + * cfs_waitq_timedwait + * To wait the link node to be signaled with a timeout limit + * + * Arguments: + * link: pointer to the cfs_waitlink_t structure + * timeout: the timeout limitation + * + * Return Value: + * Woken up: return the difference of the current time and + * the timeout + * Timeout: return 0 + * + * Notes: + * What if it happens to be woken up at the just timeout time !? + */ + +cfs_duration_t cfs_waitq_timedwait( cfs_waitlink_t *link, + cfs_task_state_t state, + cfs_duration_t timeout) +{ + + if (atomic_read(link->hits) > 0) { + atomic_dec(link->hits); + LASSERT((__u32)atomic_read(link->hits) < (__u32)0xFFFFFF00); + return TRUE; + } + + return (cfs_duration_t)cfs_wait_event(link->event, timeout); +} + + diff --git a/libcfs/libcfs/winnt/winnt-tcpip.c b/libcfs/libcfs/winnt/winnt-tcpip.c new file mode 100644 index 0000000..d0c725c --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-tcpip.c @@ -0,0 +1,6706 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_LIBCFS + +#include +#include +#include + +#define TDILND_MODULE_NAME L"Tdilnd" + +ks_data_t ks_data; + +ULONG +ks_tdi_send_flags(ULONG SockFlags) +{ + ULONG TdiFlags = 0; + + if (cfs_is_flag_set(SockFlags, MSG_OOB)) { + cfs_set_flag(TdiFlags, TDI_SEND_EXPEDITED); + } + + if (cfs_is_flag_set(SockFlags, MSG_MORE)) { + cfs_set_flag(TdiFlags, TDI_SEND_PARTIAL); + } + + if (cfs_is_flag_set(SockFlags, MSG_DONTWAIT)) { + cfs_set_flag(TdiFlags, TDI_SEND_NON_BLOCKING); + } + + return TdiFlags; +} + +NTSTATUS +KsIrpCompletionRoutine( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + if (NULL != Context) { + KeSetEvent((PKEVENT)Context, IO_NETWORK_INCREMENT, FALSE); + } + + return STATUS_MORE_PROCESSING_REQUIRED; + + UNREFERENCED_PARAMETER(DeviceObject); + UNREFERENCED_PARAMETER(Irp); +} + + +/* + * KsBuildTdiIrp + * Allocate a new IRP and initialize it to be issued to tdi + * + * Arguments: + * DeviceObject: device object created by the underlying + * TDI transport driver + * + * Return Value: + * PRIP: the allocated Irp in success or NULL in failure. + * + * NOTES: + * N/A + */ + +PIRP +KsBuildTdiIrp( + IN PDEVICE_OBJECT DeviceObject + ) +{ + PIRP Irp; + PIO_STACK_LOCATION IrpSp; + + // + // Allocating the IRP ... + // + + Irp = IoAllocateIrp(DeviceObject->StackSize, FALSE); + + if (NULL != Irp) { + + // + // Getting the Next Stack Location ... + // + + IrpSp = IoGetNextIrpStackLocation(Irp); + + // + // Initializing Irp ... + // + + IrpSp->MajorFunction = IRP_MJ_INTERNAL_DEVICE_CONTROL; + IrpSp->Parameters.DeviceIoControl.IoControlCode = 0; + } + + return Irp; +} + +/* + * KsSubmitTdiIrp + * Issue the Irp to the underlying tdi driver + * + * Arguments: + * DeviceObject: the device object created by TDI driver + * Irp: the I/O request packet to be processed + * bSynchronous: synchronous or not. If true, we need wait + * until the process is finished. + * Information: returned info + * + * Return Value: + * NTSTATUS: kernel status code + * + * NOTES: + * N/A + */ + +NTSTATUS +KsSubmitTdiIrp( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN BOOLEAN bSynchronous, + OUT PULONG Information + ) +{ + NTSTATUS Status; + KEVENT Event; + + if (bSynchronous) { + + KeInitializeEvent( + &Event, + SynchronizationEvent, + FALSE + ); + + + IoSetCompletionRoutine( + Irp, + KsIrpCompletionRoutine, + &Event, + TRUE, + TRUE, + TRUE + ); + } + + Status = IoCallDriver(DeviceObject, Irp); + + if (bSynchronous) { + + if (STATUS_PENDING == Status) { + + Status = KeWaitForSingleObject( + &Event, + Executive, + KernelMode, + FALSE, + NULL + ); + } + + Status = Irp->IoStatus.Status; + + if (Information) { + *Information = (ULONG)(Irp->IoStatus.Information); + } + + Irp->MdlAddress = NULL; + IoFreeIrp(Irp); + } + + if (!NT_SUCCESS(Status)) { + + KsPrint((2, "KsSubmitTdiIrp: Error when submitting the Irp: Status = %xh (%s) ...\n", + Status, KsNtStatusToString(Status))); + } + + return (Status); +} + + + +/* + * KsOpenControl + * Open the Control Channel Object ... + * + * Arguments: + * DeviceName: the device name to be opened + * Handle: opened handle in success case + * FileObject: the fileobject of the device + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsOpenControl( + IN PUNICODE_STRING DeviceName, + OUT HANDLE * Handle, + OUT PFILE_OBJECT * FileObject + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + OBJECT_ATTRIBUTES ObjectAttributes; + IO_STATUS_BLOCK IoStatus; + + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + // + // Initializing ... + // + + InitializeObjectAttributes( + &ObjectAttributes, + DeviceName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL + ); + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + // + // Creating the Transport Address Object ... + // + + Status = ZwCreateFile( + Handle, + FILE_READ_DATA | FILE_WRITE_DATA, + &ObjectAttributes, + &IoStatus, + 0, + FILE_ATTRIBUTE_NORMAL, + FILE_SHARE_READ | FILE_SHARE_WRITE, + FILE_OPEN, + 0, + NULL, + 0 + ); + + + if (NT_SUCCESS(Status)) { + + // + // Now Obtaining the FileObject of the Transport Address ... + // + + Status = ObReferenceObjectByHandle( + *Handle, + FILE_ANY_ACCESS, + NULL, + KernelMode, + FileObject, + NULL + ); + + if (!NT_SUCCESS(Status)) { + + cfs_enter_debugger(); + ZwClose(*Handle); + } + + } else { + + cfs_enter_debugger(); + } + + return (Status); +} + + +/* + * KsCloseControl + * Release the Control Channel Handle and FileObject + * + * Arguments: + * Handle: the channel handle to be released + * FileObject: the fileobject to be released + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsCloseControl( + IN HANDLE Handle, + IN PFILE_OBJECT FileObject + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + if (FileObject) { + + ObDereferenceObject(FileObject); + } + + if (Handle) { + + Status = ZwClose(Handle); + } + + ASSERT(NT_SUCCESS(Status)); + + return (Status); +} + + +/* + * KsOpenAddress + * Open the tdi address object + * + * Arguments: + * DeviceName: device name of the address object + * pAddress: tdi address of the address object + * AddressLength: length in bytes of the tdi address + * Handle: the newly opened handle + * FileObject: the newly opened fileobject + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsOpenAddress( + IN PUNICODE_STRING DeviceName, + IN PTRANSPORT_ADDRESS pAddress, + IN ULONG AddressLength, + OUT HANDLE * Handle, + OUT PFILE_OBJECT * FileObject + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + PFILE_FULL_EA_INFORMATION Ea = NULL; + ULONG EaLength; + UCHAR EaBuffer[EA_MAX_LENGTH]; + + OBJECT_ATTRIBUTES ObjectAttributes; + IO_STATUS_BLOCK IoStatus; + + // + // Building EA for the Address Object to be Opened ... + // + + Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; + Ea->NextEntryOffset = 0; + Ea->Flags = 0; + Ea->EaNameLength = TDI_TRANSPORT_ADDRESS_LENGTH; + Ea->EaValueLength = (USHORT)AddressLength; + RtlCopyMemory( + &(Ea->EaName), + TdiTransportAddress, + Ea->EaNameLength + 1 + ); + RtlMoveMemory( + &(Ea->EaName[Ea->EaNameLength + 1]), + pAddress, + AddressLength + ); + EaLength = sizeof(FILE_FULL_EA_INFORMATION) + + Ea->EaNameLength + AddressLength; + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + + // + // Initializing ... + // + + InitializeObjectAttributes( + &ObjectAttributes, + DeviceName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL + ); + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + // + // Creating the Transport Address Object ... + // + + Status = ZwCreateFile( + Handle, + FILE_READ_DATA | FILE_WRITE_DATA, + &ObjectAttributes, + &IoStatus, + 0, + FILE_ATTRIBUTE_NORMAL, + FILE_SHARE_READ | FILE_SHARE_WRITE, /* 0: DON'T REUSE */ + FILE_OPEN, + 0, + Ea, + EaLength + ); + + + if (NT_SUCCESS(Status)) { + + // + // Now Obtaining the FileObject of the Transport Address ... + // + + Status = ObReferenceObjectByHandle( + *Handle, + FILE_ANY_ACCESS, + NULL, + KernelMode, + FileObject, + NULL + ); + + if (!NT_SUCCESS(Status)) { + + cfs_enter_debugger(); + ZwClose(*Handle); + } + + } else { + + cfs_enter_debugger(); + } + + return (Status); +} + +/* + * KsCloseAddress + * Release the Hanlde and FileObject of an opened tdi + * address object + * + * Arguments: + * Handle: the handle to be released + * FileObject: the fileobject to be released + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsCloseAddress( + IN HANDLE Handle, + IN PFILE_OBJECT FileObject +) +{ + NTSTATUS Status = STATUS_SUCCESS; + + if (FileObject) { + + ObDereferenceObject(FileObject); + } + + if (Handle) { + + Status = ZwClose(Handle); + } + + ASSERT(NT_SUCCESS(Status)); + + return (Status); +} + + +/* + * KsOpenConnection + * Open a tdi connection object + * + * Arguments: + * DeviceName: device name of the connection object + * ConnectionContext: the connection context + * Handle: the newly opened handle + * FileObject: the newly opened fileobject + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsOpenConnection( + IN PUNICODE_STRING DeviceName, + IN CONNECTION_CONTEXT ConnectionContext, + OUT HANDLE * Handle, + OUT PFILE_OBJECT * FileObject + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + PFILE_FULL_EA_INFORMATION Ea = NULL; + ULONG EaLength; + UCHAR EaBuffer[EA_MAX_LENGTH]; + + OBJECT_ATTRIBUTES ObjectAttributes; + IO_STATUS_BLOCK IoStatus; + + // + // Building EA for the Address Object to be Opened ... + // + + Ea = (PFILE_FULL_EA_INFORMATION)EaBuffer; + Ea->NextEntryOffset = 0; + Ea->Flags = 0; + Ea->EaNameLength = TDI_CONNECTION_CONTEXT_LENGTH; + Ea->EaValueLength = (USHORT)sizeof(CONNECTION_CONTEXT); + RtlCopyMemory( + &(Ea->EaName), + TdiConnectionContext, + Ea->EaNameLength + 1 + ); + RtlMoveMemory( + &(Ea->EaName[Ea->EaNameLength + 1]), + &ConnectionContext, + sizeof(CONNECTION_CONTEXT) + ); + EaLength = sizeof(FILE_FULL_EA_INFORMATION) - 1 + + Ea->EaNameLength + 1 + sizeof(CONNECTION_CONTEXT); + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + + // + // Initializing ... + // + + InitializeObjectAttributes( + &ObjectAttributes, + DeviceName, + OBJ_CASE_INSENSITIVE | + OBJ_KERNEL_HANDLE, + NULL, + NULL + ); + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + // + // Creating the Connection Object ... + // + + Status = ZwCreateFile( + Handle, + FILE_READ_DATA | FILE_WRITE_DATA, + &ObjectAttributes, + &IoStatus, + NULL, + FILE_ATTRIBUTE_NORMAL, + 0, + FILE_OPEN, + 0, + Ea, + EaLength + ); + + + if (NT_SUCCESS(Status)) { + + // + // Now Obtaining the FileObject of the Transport Address ... + // + + Status = ObReferenceObjectByHandle( + *Handle, + FILE_ANY_ACCESS, + NULL, + KernelMode, + FileObject, + NULL + ); + + if (!NT_SUCCESS(Status)) { + + cfs_enter_debugger(); + ZwClose(*Handle); + } + + } else { + + cfs_enter_debugger(); + } + + return (Status); +} + +/* + * KsCloseConnection + * Release the Hanlde and FileObject of an opened tdi + * connection object + * + * Arguments: + * Handle: the handle to be released + * FileObject: the fileobject to be released + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsCloseConnection( + IN HANDLE Handle, + IN PFILE_OBJECT FileObject + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + if (FileObject) { + + ObDereferenceObject(FileObject); + } + + if (Handle) { + + Status = ZwClose(Handle); + } + + ASSERT(NT_SUCCESS(Status)); + + return (Status); +} + + +/* + * KsAssociateAddress + * Associate an address object with a connection object + * + * Arguments: + * AddressHandle: the handle of the address object + * ConnectionObject: the FileObject of the connection + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsAssociateAddress( + IN HANDLE AddressHandle, + IN PFILE_OBJECT ConnectionObject + ) +{ + NTSTATUS Status; + PDEVICE_OBJECT DeviceObject; + PIRP Irp; + + // + // Getting the DeviceObject from Connection FileObject + // + + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + // + // Building Tdi Internal Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + // + // Assocating the Address Object with the Connection Object + // + + TdiBuildAssociateAddress( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL, + AddressHandle + ); + + // + // Calling the Transprot Driver with the Prepared Irp + // + + Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); + } + + return (Status); +} + + +/* + * KsDisassociateAddress + * Disassociate the connection object (the relationship will + * the corresponding address object will be dismissed. ) + * + * Arguments: + * ConnectionObject: the FileObject of the connection + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsDisassociateAddress( + IN PFILE_OBJECT ConnectionObject + ) +{ + NTSTATUS Status; + PDEVICE_OBJECT DeviceObject; + PIRP Irp; + + // + // Getting the DeviceObject from Connection FileObject + // + + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + // + // Building Tdi Internal Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + // + // Disassocating the Address Object with the Connection Object + // + + TdiBuildDisassociateAddress( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL + ); + + // + // Calling the Transprot Driver with the Prepared Irp + // + + Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); + } + + return (Status); +} + + +/* + +// +// Connection Control Event Callbacks +// + +TDI_EVENT_CONNECT +TDI_EVENT_DISCONNECT +TDI_EVENT_ERROR + +// +// Tcp Event Callbacks +// + +TDI_EVENT_RECEIVE +TDI_EVENT_RECEIVE_EXPEDITED +TDI_EVENT_CHAINED_RECEIVE +TDI_EVENT_CHAINED_RECEIVE_EXPEDITED + +// +// Udp Event Callbacks +// + +TDI_EVENT_RECEIVE_DATAGRAM +TDI_EVENT_CHAINED_RECEIVE_DATAGRAM + +*/ + + +/* + * KsSetEventHandlers + * Set the tdi event callbacks with an address object + * + * Arguments: + * AddressObject: the FileObject of the address object + * EventContext: the parameter for the callbacks + * Handlers: the handlers indictor array + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * NOTES: + * N/A + */ + +NTSTATUS +KsSetEventHandlers( + IN PFILE_OBJECT AddressObject, // Address File Object + IN PVOID EventContext, // Context for Handlers + IN PKS_EVENT_HANDLERS Handlers // Handlers Indictor + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + PDEVICE_OBJECT DeviceObject; + USHORT i = 0; + + DeviceObject = IoGetRelatedDeviceObject(AddressObject); + + for (i=0; i < TDI_EVENT_MAXIMUM_HANDLER; i++) { + + // + // Setup the tdi event callback handler if requested. + // + + if (Handlers->IsActive[i]) { + + PIRP Irp; + + // + // Building Tdi Internal Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + // + // Building the Irp to set the Event Handler ... + // + + TdiBuildSetEventHandler( + Irp, + DeviceObject, + AddressObject, + NULL, + NULL, + i, /* tdi event type */ + Handlers->Handler[i], /* tdi event handler */ + EventContext /* context for the handler */ + ); + + // + // Calling the Transprot Driver with the Prepared Irp + // + + Status = KsSubmitTdiIrp(DeviceObject, Irp, TRUE, NULL); + + // + // tcp/ip tdi does not support these two event callbacks + // + + if ((!NT_SUCCESS(Status)) && ( i == TDI_EVENT_SEND_POSSIBLE || + i == TDI_EVENT_CHAINED_RECEIVE_EXPEDITED )) { + cfs_enter_debugger(); + Status = STATUS_SUCCESS; + } + } + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + } + } + + +errorout: + + if (!NT_SUCCESS(Status)) { + + KsPrint((2, "KsSetEventHandlers: Error Status = %xh (%s)\n", + Status, KsNtStatusToString(Status) )); + } + + return (Status); +} + + + +/* + * KsQueryAddressInfo + * Query the address of the FileObject specified + * + * Arguments: + * FileObject: the FileObject to be queried + * AddressInfo: buffer to contain the address info + * AddressSize: length of the AddressInfo buffer + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * Notes: + * N/A + */ + +NTSTATUS +KsQueryAddressInfo( + PFILE_OBJECT FileObject, + PTDI_ADDRESS_INFO AddressInfo, + PULONG AddressSize + ) +{ + NTSTATUS Status = STATUS_UNSUCCESSFUL; + PIRP Irp = NULL; + PMDL Mdl; + PDEVICE_OBJECT DeviceObject; + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + DeviceObject = IoGetRelatedDeviceObject(FileObject); + + RtlZeroMemory(AddressInfo, *(AddressSize)); + + // + // Allocating the Tdi Setting Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + // + // Locking the User Buffer / Allocating a MDL for it + // + + Status = KsLockUserBuffer( + AddressInfo, + FALSE, + *(AddressSize), + IoModifyAccess, + &Mdl + ); + + if (!NT_SUCCESS(Status)) { + + IoFreeIrp(Irp); + Irp = NULL; + } + } + + if (Irp) { + + LASSERT(NT_SUCCESS(Status)); + + TdiBuildQueryInformation( + Irp, + DeviceObject, + FileObject, + NULL, + NULL, + TDI_QUERY_ADDRESS_INFO, + Mdl + ); + + Status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + AddressSize + ); + + KsReleaseMdl(Mdl, FALSE); + } + + if (!NT_SUCCESS(Status)) { + + cfs_enter_debugger(); + //TDI_BUFFER_OVERFLOW + } + + return (Status); +} + +/* + * KsQueryProviderInfo + * Query the underlying transport device's information + * + * Arguments: + * TdiDeviceName: the transport device's name string + * ProviderInfo: TDI_PROVIDER_INFO struncture + * + * Return Value: + * NTSTATUS: Nt system status code + * + * NOTES: + * N/A + */ + +NTSTATUS +KsQueryProviderInfo( + PWSTR TdiDeviceName, + PTDI_PROVIDER_INFO ProviderInfo + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + PIRP Irp = NULL; + PMDL Mdl = NULL; + + UNICODE_STRING ControlName; + + HANDLE Handle; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; + + ULONG ProviderSize = 0; + + RtlInitUnicodeString(&ControlName, TdiDeviceName); + + // + // Open the Tdi Control Channel + // + + Status = KsOpenControl( + &ControlName, + &Handle, + &FileObject + ); + + if (!NT_SUCCESS(Status)) { + + KsPrint((2, "KsQueryProviderInfo: Fail to open the tdi control channel.\n")); + return (Status); + } + + // + // Obtain The Related Device Object + // + + DeviceObject = IoGetRelatedDeviceObject(FileObject); + + ProviderSize = sizeof(TDI_PROVIDER_INFO); + RtlZeroMemory(ProviderInfo, ProviderSize); + + // + // Allocating the Tdi Setting Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + // + // Locking the User Buffer / Allocating a MDL for it + // + + Status = KsLockUserBuffer( + ProviderInfo, + FALSE, + ProviderSize, + IoModifyAccess, + &Mdl + ); + + if (!NT_SUCCESS(Status)) { + + IoFreeIrp(Irp); + Irp = NULL; + } + } + + if (Irp) { + + LASSERT(NT_SUCCESS(Status)); + + TdiBuildQueryInformation( + Irp, + DeviceObject, + FileObject, + NULL, + NULL, + TDI_QUERY_PROVIDER_INFO, + Mdl + ); + + Status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + &ProviderSize + ); + + KsReleaseMdl(Mdl, FALSE); + } + + if (!NT_SUCCESS(Status)) { + + cfs_enter_debugger(); + //TDI_BUFFER_OVERFLOW + } + + KsCloseControl(Handle, FileObject); + + return (Status); +} + +/* + * KsQueryConnectionInfo + * Query the connection info of the FileObject specified + * (some statics data of the traffic) + * + * Arguments: + * FileObject: the FileObject to be queried + * ConnectionInfo: buffer to contain the connection info + * ConnectionSize: length of the ConnectionInfo buffer + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * NOTES: + * N/A + */ + +NTSTATUS +KsQueryConnectionInfo( + PFILE_OBJECT ConnectionObject, + PTDI_CONNECTION_INFO ConnectionInfo, + PULONG ConnectionSize + ) +{ + NTSTATUS Status = STATUS_UNSUCCESSFUL; + PIRP Irp = NULL; + PMDL Mdl; + PDEVICE_OBJECT DeviceObject; + + LASSERT( KeGetCurrentIrql() < DISPATCH_LEVEL ); + + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + RtlZeroMemory(ConnectionInfo, *(ConnectionSize)); + + // + // Allocating the Tdi Query Irp ... + // + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + // + // Locking the User Buffer / Allocating a MDL for it + // + + Status = KsLockUserBuffer( + ConnectionInfo, + FALSE, + *(ConnectionSize), + IoModifyAccess, + &Mdl + ); + + if (NT_SUCCESS(Status)) { + + IoFreeIrp(Irp); + Irp = NULL; + } + } + + if (Irp) { + + LASSERT(NT_SUCCESS(Status)); + + TdiBuildQueryInformation( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL, + TDI_QUERY_CONNECTION_INFO, + Mdl + ); + + Status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + ConnectionSize + ); + + KsReleaseMdl(Mdl, FALSE); + } + + return (Status); +} + + +/* + * KsInitializeTdiAddress + * Initialize the tdi addresss + * + * Arguments: + * pTransportAddress: tdi address to be initialized + * IpAddress: the ip address of object + * IpPort: the ip port of the object + * + * Return Value: + * ULONG: the total size of the tdi address + * + * NOTES: + * N/A + */ + +ULONG +KsInitializeTdiAddress( + IN OUT PTA_IP_ADDRESS pTransportAddress, + IN ULONG IpAddress, + IN USHORT IpPort + ) +{ + pTransportAddress->TAAddressCount = 1; + pTransportAddress->Address[ 0 ].AddressLength = TDI_ADDRESS_LENGTH_IP; + pTransportAddress->Address[ 0 ].AddressType = TDI_ADDRESS_TYPE_IP; + pTransportAddress->Address[ 0 ].Address[ 0 ].sin_port = IpPort; + pTransportAddress->Address[ 0 ].Address[ 0 ].in_addr = IpAddress; + + return (FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + TDI_ADDRESS_LENGTH_IP); +} + +/* + * KsQueryTdiAddressLength + * Query the total size of the tdi address + * + * Arguments: + * pTransportAddress: tdi address to be queried + * + * Return Value: + * ULONG: the total size of the tdi address + * + * NOTES: + * N/A + */ + +ULONG +KsQueryTdiAddressLength( + PTRANSPORT_ADDRESS pTransportAddress + ) +{ + ULONG TotalLength = 0; + LONG i; + + PTA_ADDRESS UNALIGNED pTaAddress = NULL; + + ASSERT (NULL != pTransportAddress); + + TotalLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address) + + FIELD_OFFSET(TA_ADDRESS, Address) * pTransportAddress->TAAddressCount; + + pTaAddress = (TA_ADDRESS UNALIGNED *)pTransportAddress->Address; + + for (i = 0; i < pTransportAddress->TAAddressCount; i++) + { + TotalLength += pTaAddress->AddressLength; + pTaAddress = (TA_ADDRESS UNALIGNED *)((PCHAR)pTaAddress + + FIELD_OFFSET(TA_ADDRESS,Address) + + pTaAddress->AddressLength ); + } + + return (TotalLength); +} + + +/* + * KsQueryIpAddress + * Query the ip address of the tdi object + * + * Arguments: + * FileObject: tdi object to be queried + * TdiAddress: TdiAddress buffer, to store the queried + * tdi ip address + * AddressLength: buffer length of the TdiAddress + * + * Return Value: + * ULONG: the total size of the tdi ip address + * + * NOTES: + * N/A + */ + +NTSTATUS +KsQueryIpAddress( + PFILE_OBJECT FileObject, + PVOID TdiAddress, + ULONG* AddressLength + ) +{ + NTSTATUS Status; + + PTDI_ADDRESS_INFO TdiAddressInfo; + ULONG Length; + + + // + // Maximum length of TDI_ADDRESSS_INFO with one TRANSPORT_ADDRESS + // + + Length = MAX_ADDRESS_LENGTH; + + TdiAddressInfo = (PTDI_ADDRESS_INFO) + ExAllocatePoolWithTag( + NonPagedPool, + Length, + 'KSAI' ); + + if (NULL == TdiAddressInfo) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + + Status = KsQueryAddressInfo( + FileObject, + TdiAddressInfo, + &Length + ); + +errorout: + + if (NT_SUCCESS(Status)) + { + if (*AddressLength < Length) { + + Status = STATUS_BUFFER_TOO_SMALL; + + } else { + + *AddressLength = Length; + RtlCopyMemory( + TdiAddress, + &(TdiAddressInfo->Address), + Length + ); + + Status = STATUS_SUCCESS; + } + + } else { + + } + + + if (NULL != TdiAddressInfo) { + + ExFreePool(TdiAddressInfo); + } + + return Status; +} + + +/* + * KsErrorEventHandler + * the common error event handler callback + * + * Arguments: + * TdiEventContext: should be the socket + * Status: the error code + * + * Return Value: + * Status: STATS_SUCCESS + * + * NOTES: + * We need not do anything in such a severe + * error case. System will process it for us. + */ + +NTSTATUS +KsErrorEventHandler( + IN PVOID TdiEventContext, + IN NTSTATUS Status + ) +{ + KsPrint((2, "KsErrorEventHandler called at Irql = %xh ...\n", + KeGetCurrentIrql())); + + cfs_enter_debugger(); + + return (STATUS_SUCCESS); +} + + +/* + * ks_set_handlers + * setup all the event handler callbacks + * + * Arguments: + * tconn: the tdi connecton object + * + * Return Value: + * int: ks error code + * + * NOTES: + * N/A + */ + +int +ks_set_handlers( + ksock_tconn_t * tconn + ) +{ + NTSTATUS status = STATUS_SUCCESS; + KS_EVENT_HANDLERS handlers; + + /* to make sure the address object is opened already */ + if (tconn->kstc_addr.FileObject == NULL) { + goto errorout; + } + + /* initialize the handlers indictor array. for sender and listenr, + there are different set of callbacks. for child, we just return. */ + + memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); + + SetEventHandler(handlers, TDI_EVENT_ERROR, KsErrorEventHandler); + SetEventHandler(handlers, TDI_EVENT_DISCONNECT, KsDisconnectEventHandler); + SetEventHandler(handlers, TDI_EVENT_RECEIVE, KsTcpReceiveEventHandler); + SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, KsTcpReceiveExpeditedEventHandler); + SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, KsTcpChainedReceiveEventHandler); + + // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, KsTcpChainedReceiveExpeditedEventHandler); + + if (tconn->kstc_type == kstt_listener) { + SetEventHandler(handlers, TDI_EVENT_CONNECT, KsConnectEventHandler); + } else if (tconn->kstc_type == kstt_child) { + goto errorout; + } + + /* set all the event callbacks */ + status = KsSetEventHandlers( + tconn->kstc_addr.FileObject, /* Address File Object */ + tconn, /* Event Context */ + &handlers /* Event callback handlers */ + ); + +errorout: + + return cfs_error_code(status); +} + + +/* + * ks_reset_handlers + * disable all the event handler callbacks (set to NULL) + * + * Arguments: + * tconn: the tdi connecton object + * + * Return Value: + * int: ks error code + * + * NOTES: + * N/A + */ + +int +ks_reset_handlers( + ksock_tconn_t * tconn + ) +{ + NTSTATUS status = STATUS_SUCCESS; + KS_EVENT_HANDLERS handlers; + + /* to make sure the address object is opened already */ + if (tconn->kstc_addr.FileObject == NULL) { + goto errorout; + } + + /* initialize the handlers indictor array. for sender and listenr, + there are different set of callbacks. for child, we just return. */ + + memset(&handlers, 0, sizeof(KS_EVENT_HANDLERS)); + + SetEventHandler(handlers, TDI_EVENT_ERROR, NULL); + SetEventHandler(handlers, TDI_EVENT_DISCONNECT, NULL); + SetEventHandler(handlers, TDI_EVENT_RECEIVE, NULL); + SetEventHandler(handlers, TDI_EVENT_RECEIVE_EXPEDITED, NULL); + SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE, NULL); + // SetEventHandler(handlers, TDI_EVENT_CHAINED_RECEIVE_EXPEDITED, NULL); + + if (tconn->kstc_type == kstt_listener) { + SetEventHandler(handlers, TDI_EVENT_CONNECT, NULL); + } else if (tconn->kstc_type == kstt_child) { + goto errorout; + } + + /* set all the event callbacks */ + status = KsSetEventHandlers( + tconn->kstc_addr.FileObject, /* Address File Object */ + tconn, /* Event Context */ + &handlers /* Event callback handlers */ + ); + +errorout: + + return cfs_error_code(status); +} + + +/* + * KsAcceptCompletionRoutine + * Irp completion routine for TdiBuildAccept (KsConnectEventHandler) + * + * Here system gives us a chance to check the conneciton is built + * ready or not. + * + * Arguments: + * DeviceObject: the device object of the transport driver + * Irp: the Irp is being completed. + * Context: the context we specified when issuing the Irp + * + * Return Value: + * Nt status code + * + * Notes: + * N/A + */ + +NTSTATUS +KsAcceptCompletionRoutine( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + ksock_tconn_t * child = (ksock_tconn_t *) Context; + ksock_tconn_t * parent = child->child.kstc_parent; + + KsPrint((2, "KsAcceptCompletionRoutine: called at Irql: %xh\n", + KeGetCurrentIrql() )); + + KsPrint((2, "KsAcceptCompletionRoutine: Context = %xh Status = %xh\n", + Context, Irp->IoStatus.Status)); + + LASSERT(child->kstc_type == kstt_child); + + spin_lock(&(child->kstc_lock)); + + LASSERT(parent->kstc_state == ksts_listening); + LASSERT(child->kstc_state == ksts_connecting); + + if (NT_SUCCESS(Irp->IoStatus.Status)) { + + child->child.kstc_accepted = TRUE; + + child->kstc_state = ksts_connected; + + /* wake up the daemon thread which waits on this event */ + KeSetEvent( + &(parent->listener.kstc_accept_event), + 0, + FALSE + ); + + spin_unlock(&(child->kstc_lock)); + + KsPrint((2, "KsAcceptCompletionRoutine: Get %xh now signal the event ...\n", parent)); + + } else { + + /* re-use this child connecton */ + child->child.kstc_accepted = FALSE; + child->child.kstc_busy = FALSE; + child->kstc_state = ksts_associated; + + spin_unlock(&(child->kstc_lock)); + } + + /* now free the Irp */ + IoFreeIrp(Irp); + + /* drop the refer count of the child */ + ks_put_tconn(child); + + return (STATUS_MORE_PROCESSING_REQUIRED); +} + + +/* + * ks_get_vacancy_backlog + * Get a vacancy listeing child from the backlog list + * + * Arguments: + * parent: the listener daemon connection + * + * Return Value: + * the child listening connection or NULL in failure + * + * Notes + * Parent's lock should be acquired before calling. + */ + +ksock_tconn_t * +ks_get_vacancy_backlog( + ksock_tconn_t * parent + ) +{ + ksock_tconn_t * child; + + LASSERT(parent->kstc_type == kstt_listener); + LASSERT(parent->kstc_state == ksts_listening); + + if (list_empty(&(parent->listener.kstc_listening.list))) { + + child = NULL; + + } else { + + struct list_head * tmp; + + /* check the listening queue and try to get a free connecton */ + + list_for_each(tmp, &(parent->listener.kstc_listening.list)) { + child = list_entry (tmp, ksock_tconn_t, child.kstc_link); + spin_lock(&(child->kstc_lock)); + + if (!child->child.kstc_busy) { + LASSERT(child->kstc_state == ksts_associated); + child->child.kstc_busy = TRUE; + spin_unlock(&(child->kstc_lock)); + break; + } else { + spin_unlock(&(child->kstc_lock)); + child = NULL; + } + } + } + + return child; +} + +ks_addr_slot_t * +KsSearchIpAddress(PUNICODE_STRING DeviceName) +{ + ks_addr_slot_t * slot = NULL; + PLIST_ENTRY list = NULL; + + spin_lock(&ks_data.ksnd_addrs_lock); + + list = ks_data.ksnd_addrs_list.Flink; + while (list != &ks_data.ksnd_addrs_list) { + slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); + if (RtlCompareUnicodeString( + DeviceName, + &slot->devname, + TRUE) == 0) { + break; + } + list = list->Flink; + slot = NULL; + } + + spin_unlock(&ks_data.ksnd_addrs_lock); + + return slot; +} + +void +KsCleanupIpAddresses() +{ + spin_lock(&ks_data.ksnd_addrs_lock); + + while (!IsListEmpty(&ks_data.ksnd_addrs_list)) { + + ks_addr_slot_t * slot = NULL; + PLIST_ENTRY list = NULL; + + list = RemoveHeadList(&ks_data.ksnd_addrs_list); + slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); + cfs_free(slot); + ks_data.ksnd_naddrs--; + } + + cfs_assert(ks_data.ksnd_naddrs == 0); + spin_unlock(&ks_data.ksnd_addrs_lock); +} + +VOID +KsAddAddressHandler( + IN PTA_ADDRESS Address, + IN PUNICODE_STRING DeviceName, + IN PTDI_PNP_CONTEXT Context + ) +{ + PTDI_ADDRESS_IP IpAddress = NULL; + + if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && + Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { + + ks_addr_slot_t * slot = NULL; + + IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; + KsPrint((1, "KsAddAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", + DeviceName, Context, IpAddress->in_addr, + (IpAddress->in_addr & 0xFF000000) >> 24, + (IpAddress->in_addr & 0x00FF0000) >> 16, + (IpAddress->in_addr & 0x0000FF00) >> 8, + (IpAddress->in_addr & 0x000000FF) >> 0 )); + + slot = KsSearchIpAddress(DeviceName); + + if (slot != NULL) { + slot->up = TRUE; + slot->ip_addr = ntohl(IpAddress->in_addr); + } else { + slot = cfs_alloc(sizeof(ks_addr_slot_t) + DeviceName->Length, CFS_ALLOC_ZERO); + if (slot != NULL) { + spin_lock(&ks_data.ksnd_addrs_lock); + InsertTailList(&ks_data.ksnd_addrs_list, &slot->link); + sprintf(slot->iface, "eth%d", ks_data.ksnd_naddrs++); + slot->ip_addr = ntohl(IpAddress->in_addr); + slot->up = TRUE; + RtlMoveMemory(&slot->buffer[0], DeviceName->Buffer, DeviceName->Length); + slot->devname.Length = DeviceName->Length; + slot->devname.MaximumLength = DeviceName->Length + sizeof(WCHAR); + slot->devname.Buffer = slot->buffer; + spin_unlock(&ks_data.ksnd_addrs_lock); + } + } + } +} + +VOID +KsDelAddressHandler( + IN PTA_ADDRESS Address, + IN PUNICODE_STRING DeviceName, + IN PTDI_PNP_CONTEXT Context + ) +{ + PTDI_ADDRESS_IP IpAddress = NULL; + + if ( Address->AddressType == TDI_ADDRESS_TYPE_IP && + Address->AddressLength == TDI_ADDRESS_LENGTH_IP ) { + + ks_addr_slot_t * slot = NULL; + + slot = KsSearchIpAddress(DeviceName); + + if (slot != NULL) { + slot->up = FALSE; + } + + IpAddress = (PTDI_ADDRESS_IP) &Address->Address[0]; + KsPrint((1, "KsDelAddressHandle: Device=%wZ Context=%xh IpAddress=%xh(%d.%d.%d.%d)\n", + DeviceName, Context, IpAddress->in_addr, + (IpAddress->in_addr & 0xFF000000) >> 24, + (IpAddress->in_addr & 0x00FF0000) >> 16, + (IpAddress->in_addr & 0x0000FF00) >> 8, + (IpAddress->in_addr & 0x000000FF) >> 0 )); + } +} + +NTSTATUS +KsRegisterPnpHandlers() +{ + TDI20_CLIENT_INTERFACE_INFO ClientInfo; + + /* initialize the global ks_data members */ + RtlInitUnicodeString(&ks_data.ksnd_client_name, TDILND_MODULE_NAME); + spin_lock_init(&ks_data.ksnd_addrs_lock); + InitializeListHead(&ks_data.ksnd_addrs_list); + + /* register the pnp handlers */ + RtlZeroMemory(&ClientInfo, sizeof(ClientInfo)); + ClientInfo.TdiVersion = TDI_CURRENT_VERSION; + + ClientInfo.ClientName = &ks_data.ksnd_client_name; + ClientInfo.AddAddressHandlerV2 = KsAddAddressHandler; + ClientInfo.DelAddressHandlerV2 = KsDelAddressHandler; + + return TdiRegisterPnPHandlers(&ClientInfo, sizeof(ClientInfo), + &ks_data.ksnd_pnp_handle); +} + +VOID +KsDeregisterPnpHandlers() +{ + if (ks_data.ksnd_pnp_handle) { + + /* De-register the pnp handlers */ + + TdiDeregisterPnPHandlers(ks_data.ksnd_pnp_handle); + ks_data.ksnd_pnp_handle = NULL; + + /* cleanup all the ip address slots */ + KsCleanupIpAddresses(); + } +} + +/* + * KsConnectEventHandler + * Connect event handler event handler, called by the underlying TDI + * transport in response to an incoming request to the listening daemon. + * + * it will grab a vacancy backlog from the children tconn list, and + * build an acception Irp with it, then transfer the Irp to TDI driver. + * + * Arguments: + * TdiEventContext: the tdi connnection object of the listening daemon + * ...... + * + * Return Value: + * Nt kernel status code + * + * Notes: + * N/A + */ + +NTSTATUS +KsConnectEventHandler( + IN PVOID TdiEventContext, + IN LONG RemoteAddressLength, + IN PVOID RemoteAddress, + IN LONG UserDataLength, + IN PVOID UserData, + IN LONG OptionsLength, + IN PVOID Options, + OUT CONNECTION_CONTEXT * ConnectionContext, + OUT PIRP * AcceptIrp + ) +{ + ksock_tconn_t * parent; + ksock_tconn_t * child; + + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; + NTSTATUS Status; + + PIRP Irp = NULL; + PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; + + KsPrint((2,"KsConnectEventHandler: call at Irql: %u\n", KeGetCurrentIrql())); + parent = (ksock_tconn_t *) TdiEventContext; + + LASSERT(parent->kstc_type == kstt_listener); + + spin_lock(&(parent->kstc_lock)); + + if (parent->kstc_state == ksts_listening) { + + /* allocate a new ConnectionInfo to backup the peer's info */ + + ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( + NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + + RemoteAddressLength, 'iCsK' ); + + if (NULL == ConnectionInfo) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + cfs_enter_debugger(); + goto errorout; + } + + /* initializing ConnectionInfo structure ... */ + + ConnectionInfo->UserDataLength = UserDataLength; + ConnectionInfo->UserData = UserData; + ConnectionInfo->OptionsLength = OptionsLength; + ConnectionInfo->Options = Options; + ConnectionInfo->RemoteAddressLength = RemoteAddressLength; + ConnectionInfo->RemoteAddress = ConnectionInfo + 1; + + RtlCopyMemory( + ConnectionInfo->RemoteAddress, + RemoteAddress, + RemoteAddressLength + ); + + /* get the vacancy listening child tdi connections */ + + child = ks_get_vacancy_backlog(parent); + + if (child) { + + spin_lock(&(child->kstc_lock)); + child->child.kstc_info.ConnectionInfo = ConnectionInfo; + child->child.kstc_info.Remote = ConnectionInfo->RemoteAddress; + child->kstc_state = ksts_connecting; + spin_unlock(&(child->kstc_lock)); + + } else { + + KsPrint((2, "KsConnectEventHandler: No enough backlogs: Refsued the connectio: %xh\n", parent)); + + Status = STATUS_INSUFFICIENT_RESOURCES; + + goto errorout; + } + + FileObject = child->child.kstc_info.FileObject; + DeviceObject = IoGetRelatedDeviceObject (FileObject); + + Irp = KsBuildTdiIrp(DeviceObject); + + TdiBuildAccept( + Irp, + DeviceObject, + FileObject, + KsAcceptCompletionRoutine, + child, + NULL, + NULL + ); + + IoSetNextIrpStackLocation(Irp); + + /* grap the refer of the child tdi connection */ + ks_get_tconn(child); + + Status = STATUS_MORE_PROCESSING_REQUIRED; + + *AcceptIrp = Irp; + *ConnectionContext = child; + + } else { + + Status = STATUS_CONNECTION_REFUSED; + goto errorout; + } + + spin_unlock(&(parent->kstc_lock)); + + return Status; + +errorout: + + spin_unlock(&(parent->kstc_lock)); + + { + *AcceptIrp = NULL; + *ConnectionContext = NULL; + + if (ConnectionInfo) { + + ExFreePool(ConnectionInfo); + } + + if (Irp) { + + IoFreeIrp (Irp); + } + } + + return Status; +} + +/* + * KsDisconnectCompletionRoutine + * the Irp completion routine for TdiBuildDisconect + * + * We just signal the event and return MORE_PRO... to + * let the caller take the responsibility of the Irp. + * + * Arguments: + * DeviceObject: the device object of the transport + * Irp: the Irp is being completed. + * Context: the event specified by the caller + * + * Return Value: + * Nt status code + * + * Notes: + * N/A + */ + +NTSTATUS +KsDisconectCompletionRoutine ( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + + KeSetEvent((PKEVENT) Context, 0, FALSE); + + return STATUS_MORE_PROCESSING_REQUIRED; + + UNREFERENCED_PARAMETER(DeviceObject); +} + + +/* + * KsDisconnectHelper + * the routine to be executed in the WorkItem procedure + * this routine is to disconnect a tdi connection + * + * Arguments: + * Workitem: the context transferred to the workitem + * + * Return Value: + * N/A + * + * Notes: + * tconn is already referred in abort_connecton ... + */ + +VOID +KsDisconnectHelper(PKS_DISCONNECT_WORKITEM WorkItem) +{ + ksock_tconn_t * tconn = WorkItem->tconn; + + DbgPrint("KsDisconnectHelper: disconnecting tconn=%p\n", tconn); + ks_disconnect_tconn(tconn, WorkItem->Flags); + + KeSetEvent(&(WorkItem->Event), 0, FALSE); + + spin_lock(&(tconn->kstc_lock)); + cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); +} + + +/* + * KsDisconnectEventHandler + * Disconnect event handler event handler, called by the underlying TDI transport + * in response to an incoming disconnection notification from a remote node. + * + * Arguments: + * ConnectionContext: tdi connnection object + * DisconnectFlags: specifies the nature of the disconnection + * ...... + * + * Return Value: + * Nt kernel status code + * + * Notes: + * N/A + */ + + +NTSTATUS +KsDisconnectEventHandler( + IN PVOID TdiEventContext, + IN CONNECTION_CONTEXT ConnectionContext, + IN LONG DisconnectDataLength, + IN PVOID DisconnectData, + IN LONG DisconnectInformationLength, + IN PVOID DisconnectInformation, + IN ULONG DisconnectFlags + ) +{ + ksock_tconn_t * tconn; + NTSTATUS Status; + PKS_DISCONNECT_WORKITEM WorkItem; + + tconn = (ksock_tconn_t *)ConnectionContext; + + KsPrint((2, "KsTcpDisconnectEventHandler: called at Irql: %xh\n", + KeGetCurrentIrql() )); + + KsPrint((2, "tconn = %x DisconnectFlags= %xh\n", + tconn, DisconnectFlags)); + + ks_get_tconn(tconn); + spin_lock(&(tconn->kstc_lock)); + + WorkItem = &(tconn->kstc_disconnect); + + if (tconn->kstc_state != ksts_connected) { + + Status = STATUS_SUCCESS; + + } else { + + if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_ABORT)) { + + Status = STATUS_REMOTE_DISCONNECT; + + } else if (cfs_is_flag_set(DisconnectFlags, TDI_DISCONNECT_RELEASE)) { + + Status = STATUS_GRACEFUL_DISCONNECT; + } + + if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { + + ks_get_tconn(tconn); + + WorkItem->Flags = DisconnectFlags; + WorkItem->tconn = tconn; + + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); + + /* queue the workitem to call */ + ExQueueWorkItem(&(WorkItem->WorkItem), DelayedWorkQueue); + } + } + + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); + + return (Status); +} + +NTSTATUS +KsTcpReceiveCompletionRoutine( + IN PIRP Irp, + IN PKS_TCP_COMPLETION_CONTEXT Context + ) +{ + NTSTATUS Status = Irp->IoStatus.Status; + + if (NT_SUCCESS(Status)) { + + ksock_tconn_t *tconn = Context->tconn; + + PKS_TSDU_DAT KsTsduDat = Context->CompletionContext; + PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext; + + KsPrint((1, "KsTcpReceiveCompletionRoutine: Total %xh bytes.\n", + Context->KsTsduMgr->TotalBytes )); + + spin_lock(&(tconn->kstc_lock)); + + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { + if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { + cfs_clear_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); + } else { + cfs_enter_debugger(); + } + } else { + ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); + if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { + cfs_clear_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); + } else { + cfs_enter_debugger(); + } + } + + spin_unlock(&(tconn->kstc_lock)); + + /* wake up the thread waiting for the completion of this Irp */ + KeSetEvent(Context->Event, 0, FALSE); + + /* re-active the ks connection and wake up the scheduler */ + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb( tconn, FALSE, NULL, + Context->KsTsduMgr->TotalBytes ); + } + + } else { + + /* un-expected errors occur, we must abort the connection */ + ks_abort_tconn(Context->tconn); + } + + if (Context) { + + /* Freeing the Context structure... */ + ExFreePool(Context); + Context = NULL; + } + + + /* free the Irp */ + if (Irp) { + IoFreeIrp(Irp); + } + + return (Status); +} + + +/* + * KsTcpCompletionRoutine + * the Irp completion routine for TdiBuildSend and TdiBuildReceive ... + * We need call the use's own CompletionRoutine if specified. Or + * it's a synchronous case, we need signal the event. + * + * Arguments: + * DeviceObject: the device object of the transport + * Irp: the Irp is being completed. + * Context: the context we specified when issuing the Irp + * + * Return Value: + * Nt status code + * + * Notes: + * N/A + */ + +NTSTATUS +KsTcpCompletionRoutine( + IN PDEVICE_OBJECT DeviceObject, + IN PIRP Irp, + IN PVOID Context + ) +{ + if (Context) { + + PKS_TCP_COMPLETION_CONTEXT CompletionContext = NULL; + ksock_tconn_t * tconn = NULL; + + CompletionContext = (PKS_TCP_COMPLETION_CONTEXT) Context; + tconn = CompletionContext->tconn; + + /* release the chained mdl */ + KsReleaseMdl(Irp->MdlAddress, FALSE); + Irp->MdlAddress = NULL; + + if (CompletionContext->CompletionRoutine) { + + if ( CompletionContext->bCounted && + InterlockedDecrement(&CompletionContext->ReferCount) != 0 ) { + goto errorout; + } + + // + // Giving control to user specified CompletionRoutine ... + // + + CompletionContext->CompletionRoutine( + Irp, + CompletionContext + ); + + } else { + + // + // Signaling the Event ... + // + + KeSetEvent(CompletionContext->Event, 0, FALSE); + } + + /* drop the reference count of the tconn object */ + ks_put_tconn(tconn); + + } else { + + cfs_enter_debugger(); + } + +errorout: + + return STATUS_MORE_PROCESSING_REQUIRED; +} + +/* + * KsTcpSendCompletionRoutine + * the user specified Irp completion routine for asynchronous + * data transmission requests. + * + * It will do th cleanup job of the ksock_tx_t and wake up the + * ks scheduler thread + * + * Arguments: + * Irp: the Irp is being completed. + * Context: the context we specified when issuing the Irp + * + * Return Value: + * Nt status code + * + * Notes: + * N/A + */ + +NTSTATUS +KsTcpSendCompletionRoutine( + IN PIRP Irp, + IN PKS_TCP_COMPLETION_CONTEXT Context + ) +{ + NTSTATUS Status = Irp->IoStatus.Status; + ULONG rc = Irp->IoStatus.Information; + ksock_tconn_t * tconn = Context->tconn; + PKS_TSDUMGR KsTsduMgr = Context->KsTsduMgr; + + ENTRY; + + LASSERT(tconn) ; + + if (NT_SUCCESS(Status)) { + + if (Context->bCounted) { + PVOID tx = Context->CompletionContext; + + ASSERT(tconn->kstc_update_tx != NULL); + + /* update the tx, rebasing the kiov or iov pointers */ + tx = tconn->kstc_update_tx(tconn, tx, rc); + + /* update the KsTsudMgr total bytes */ + spin_lock(&tconn->kstc_lock); + KsTsduMgr->TotalBytes -= rc; + spin_unlock(&tconn->kstc_lock); + + /* + * now it's time to re-queue the conns into the + * scheduler queue and wake the scheduler thread. + */ + + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb( tconn, TRUE, tx, 0); + } + + } else { + + PKS_TSDU KsTsdu = Context->CompletionContext; + PKS_TSDU_BUF KsTsduBuf = Context->CompletionContext2; + PKS_TSDU_DAT KsTsduDat = Context->CompletionContext2; + + spin_lock(&tconn->kstc_lock); + /* This is bufferred sending ... */ + ASSERT(KsTsduBuf->StartOffset == 0); + + if (KsTsduBuf->DataLength > Irp->IoStatus.Information) { + /* not fully sent .... we have to abort the connection */ + spin_unlock(&tconn->kstc_lock); + ks_abort_tconn(tconn); + goto errorout; + } + + if (KsTsduBuf->TsduType == TSDU_TYPE_BUF) { + /* free the buffer */ + ExFreePool(KsTsduBuf->UserBuffer); + KsTsduMgr->TotalBytes -= KsTsduBuf->DataLength; + KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); + } else if (KsTsduDat->TsduType == TSDU_TYPE_DAT) { + KsTsduMgr->TotalBytes -= KsTsduDat->DataLength; + KsTsdu->StartOffset += KsTsduDat->TotalLength; + } else { + cfs_enter_debugger(); /* shoult not get here */ + } + + if (KsTsdu->StartOffset == KsTsdu->LastOffset) { + + list_del(&KsTsdu->Link); + KsTsduMgr->NumOfTsdu--; + KsPutKsTsdu(KsTsdu); + } + + spin_unlock(&tconn->kstc_lock); + } + + } else { + + /* cfs_enter_debugger(); */ + + /* + * for the case that the transmission is ussuccessful, + * we need abort the tdi connection, but not destroy it. + * the socknal conn will drop the refer count, then the + * tdi connection will be freed. + */ + + ks_abort_tconn(tconn); + } + +errorout: + + /* freeing the Context structure... */ + + if (Context) { + ExFreePool(Context); + Context = NULL; + } + + /* it's our duty to free the Irp. */ + + if (Irp) { + IoFreeIrp(Irp); + Irp = NULL; + } + + EXIT; + + return Status; +} + +/* + * Normal receive event handler + * + * It will move data from system Tsdu to our TsduList + */ + +NTSTATUS +KsTcpReceiveEventHandler( + IN PVOID TdiEventContext, + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG BytesIndicated, + IN ULONG BytesAvailable, + OUT ULONG * BytesTaken, + IN PVOID Tsdu, + OUT PIRP * IoRequestPacket + ) +{ + NTSTATUS Status; + + ksock_tconn_t * tconn; + + PKS_CHAIN KsChain; + PKS_TSDUMGR KsTsduMgr; + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + + BOOLEAN bIsExpedited; + BOOLEAN bIsCompleteTsdu; + + BOOLEAN bNewTsdu = FALSE; + BOOLEAN bNewBuff = FALSE; + + PCHAR Buffer = NULL; + + PIRP Irp = NULL; + PMDL Mdl = NULL; + PFILE_OBJECT FileObject; + PDEVICE_OBJECT DeviceObject; + + ULONG BytesReceived = 0; + + PKS_TCP_COMPLETION_CONTEXT context = NULL; + + + tconn = (ksock_tconn_t *) ConnectionContext; + + ks_get_tconn(tconn); + + /* check whether the whole body of payload is received or not */ + if ( (cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_ENTIRE_MESSAGE)) && + (BytesIndicated == BytesAvailable) ) { + bIsCompleteTsdu = TRUE; + } else { + bIsCompleteTsdu = FALSE; + } + + bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); + + KsPrint((2, "KsTcpReceiveEventHandler BytesIndicated = %d BytesAvailable = %d ...\n", BytesIndicated, BytesAvailable)); + KsPrint((2, "bIsCompleteTsdu = %d bIsExpedited = %d\n", bIsCompleteTsdu, bIsExpedited )); + + spin_lock(&(tconn->kstc_lock)); + + /* check whether we are conntected or not listener ¡­*/ + if ( !((tconn->kstc_state == ksts_connected) && + (tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child))) { + + *BytesTaken = BytesIndicated; + + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); + + return (STATUS_SUCCESS); + } + + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_recv); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_recv); + } + + if (bIsExpedited) { + KsTsduMgr = &(KsChain->Expedited); + } else { + KsTsduMgr = &(KsChain->Normal); + } + + /* if the Tsdu is even larger than the biggest Tsdu, we have + to allocate new buffer and use TSDU_TYOE_BUF to store it */ + + if ( KS_TSDU_STRU_SIZE(BytesAvailable) > ks_data.ksnd_tsdu_size - + KS_DWORD_ALIGN(sizeof(KS_TSDU))) { + bNewBuff = TRUE; + } + + /* retrieve the latest Tsdu buffer form TsduMgr + list if the list is not empty. */ + + if (list_empty(&(KsTsduMgr->TsduList))) { + + LASSERT(KsTsduMgr->NumOfTsdu == 0); + KsTsdu = NULL; + + } else { + + LASSERT(KsTsduMgr->NumOfTsdu > 0); + KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); + + /* if this Tsdu does not contain enough space, we need + allocate a new Tsdu queue. */ + + if (bNewBuff) { + if ( KsTsdu->LastOffset + sizeof(KS_TSDU_BUF) > + KsTsdu->TotalLength ) { + KsTsdu = NULL; + } + } else { + if ( KS_TSDU_STRU_SIZE(BytesAvailable) > + KsTsdu->TotalLength - KsTsdu->LastOffset ) { + KsTsdu = NULL; + } + } + } + + /* allocating the buffer for TSDU_TYPE_BUF */ + if (bNewBuff) { + Buffer = ExAllocatePool(NonPagedPool, BytesAvailable); + if (NULL == Buffer) { + /* there's no enough memory for us. We just try to + receive maximum bytes with a new Tsdu */ + bNewBuff = FALSE; + KsTsdu = NULL; + } + } + + /* allocate a new Tsdu in case we are not statisfied. */ + + if (NULL == KsTsdu) { + + KsTsdu = KsAllocateKsTsdu(); + + if (NULL == KsTsdu) { + goto errorout; + } else { + bNewTsdu = TRUE; + } + } + + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + + if (bNewBuff) { + + /* setup up the KS_TSDU_BUF record */ + + KsTsduBuf->TsduType = TSDU_TYPE_BUF; + KsTsduBuf->TsduFlags = 0; + KsTsduBuf->StartOffset = 0; + KsTsduBuf->UserBuffer = Buffer; + KsTsduBuf->DataLength = BytesReceived = BytesAvailable; + + KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); + + } else { + + /* setup the KS_TSDU_DATA to contain all the messages */ + + KsTsduDat->TsduType = TSDU_TYPE_DAT; + KsTsduDat->TsduFlags = 0; + + if ( KsTsdu->TotalLength - KsTsdu->LastOffset >= + KS_TSDU_STRU_SIZE(BytesAvailable) ) { + BytesReceived = BytesAvailable; + } else { + BytesReceived = KsTsdu->TotalLength - KsTsdu->LastOffset - + FIELD_OFFSET(KS_TSDU_DAT, Data); + BytesReceived &= (~((ULONG)3)); + } + KsTsduDat->DataLength = BytesReceived; + KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE(BytesReceived); + KsTsduDat->StartOffset = 0; + + Buffer = &KsTsduDat->Data[0]; + + KsTsdu->LastOffset += KsTsduDat->TotalLength; + } + + KsTsduMgr->TotalBytes += BytesReceived; + + if (bIsCompleteTsdu) { + + /* It's a complete receive, we just move all + the data from system to our Tsdu */ + + RtlMoveMemory( + Buffer, + Tsdu, + BytesReceived + ); + + *BytesTaken = BytesReceived; + Status = STATUS_SUCCESS; + + if (bNewTsdu) { + list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); + KsTsduMgr->NumOfTsdu++; + } + + KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); + + /* re-active the ks connection and wake up the scheduler */ + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb( tconn, FALSE, NULL, + KsTsduMgr->TotalBytes ); + } + + } else { + + /* there's still data in tdi internal queue, we need issue a new + Irp to receive all of them. first allocate the tcp context */ + + context = ExAllocatePoolWithTag( + NonPagedPool, + sizeof(KS_TCP_COMPLETION_CONTEXT), + 'cTsK'); + + if (!context) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + /* setup the context */ + RtlZeroMemory(context, sizeof(KS_TCP_COMPLETION_CONTEXT)); + + context->tconn = tconn; + context->CompletionRoutine = KsTcpReceiveCompletionRoutine; + context->CompletionContext = KsTsdu; + context->CompletionContext = bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat; + context->KsTsduMgr = KsTsduMgr; + context->Event = &(KsTsduMgr->Event); + + if (tconn->kstc_type == kstt_sender) { + FileObject = tconn->sender.kstc_info.FileObject; + } else { + FileObject = tconn->child.kstc_info.FileObject; + } + + DeviceObject = IoGetRelatedDeviceObject(FileObject); + + /* build new tdi Irp and setup it. */ + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + goto errorout; + } + + Status = KsLockUserBuffer( + Buffer, + FALSE, + BytesReceived, + IoModifyAccess, + &Mdl + ); + + if (!NT_SUCCESS(Status)) { + goto errorout; + } + + TdiBuildReceive( + Irp, + DeviceObject, + FileObject, + KsTcpCompletionRoutine, + context, + Mdl, + ReceiveFlags & (TDI_RECEIVE_NORMAL | TDI_RECEIVE_EXPEDITED), + BytesReceived + ); + + IoSetNextIrpStackLocation(Irp); + + /* return the newly built Irp to transport driver, + it will process it to receive all the data */ + + *IoRequestPacket = Irp; + *BytesTaken = 0; + + if (bNewTsdu) { + + list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); + KsTsduMgr->NumOfTsdu++; + } + + if (bNewBuff) { + cfs_set_flag(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING); + } else { + cfs_set_flag(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING); + } + ks_get_tconn(tconn); + Status = STATUS_MORE_PROCESSING_REQUIRED; + } + + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); + + return (Status); + +errorout: + + spin_unlock(&(tconn->kstc_lock)); + + if (bNewTsdu && (KsTsdu != NULL)) { + KsFreeKsTsdu(KsTsdu); + } + + if (Mdl) { + KsReleaseMdl(Mdl, FALSE); + } + + if (Irp) { + IoFreeIrp(Irp); + } + + if (context) { + ExFreePool(context); + } + + ks_abort_tconn(tconn); + ks_put_tconn(tconn); + + *BytesTaken = BytesAvailable; + Status = STATUS_SUCCESS; + + return (Status); +} + +/* + * Expedited receive event handler + */ + +NTSTATUS +KsTcpReceiveExpeditedEventHandler( + IN PVOID TdiEventContext, + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG BytesIndicated, + IN ULONG BytesAvailable, + OUT ULONG * BytesTaken, + IN PVOID Tsdu, + OUT PIRP * IoRequestPacket + ) +{ + return KsTcpReceiveEventHandler( + TdiEventContext, + ConnectionContext, + ReceiveFlags | TDI_RECEIVE_EXPEDITED, + BytesIndicated, + BytesAvailable, + BytesTaken, + Tsdu, + IoRequestPacket + ); +} + + +/* + * Bulk receive event handler + * + * It will queue all the system Tsdus to our TsduList. + * Then later ks_recv_mdl will release them. + */ + +NTSTATUS +KsTcpChainedReceiveEventHandler ( + IN PVOID TdiEventContext, // the event context + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG ReceiveLength, + IN ULONG StartingOffset, // offset of start of client data in TSDU + IN PMDL Tsdu, // TSDU data chain + IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives + ) +{ + + NTSTATUS Status; + + ksock_tconn_t * tconn; + + PKS_CHAIN KsChain; + PKS_TSDUMGR KsTsduMgr; + PKS_TSDU KsTsdu; + PKS_TSDU_MDL KsTsduMdl; + + BOOLEAN bIsExpedited; + BOOLEAN bNewTsdu = FALSE; + + tconn = (ksock_tconn_t *) ConnectionContext; + + bIsExpedited = cfs_is_flag_set(ReceiveFlags, TDI_RECEIVE_EXPEDITED); + + KsPrint((2, "KsTcpChainedReceive: ReceiveLength = %xh bIsExpedited = %d\n", ReceiveLength, bIsExpedited)); + + ks_get_tconn(tconn); + spin_lock(&(tconn->kstc_lock)); + + /* check whether we are conntected or not listener ¡­*/ + if ( !((tconn->kstc_state == ksts_connected) && + (tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child))) { + + spin_unlock(&(tconn->kstc_lock)); + ks_put_tconn(tconn); + + return (STATUS_SUCCESS); + } + + /* get the latest Tsdu buffer form TsduMgr list. + just set NULL if the list is empty. */ + + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_recv); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_recv); + } + + if (bIsExpedited) { + KsTsduMgr = &(KsChain->Expedited); + } else { + KsTsduMgr = &(KsChain->Normal); + } + + if (list_empty(&(KsTsduMgr->TsduList))) { + + LASSERT(KsTsduMgr->NumOfTsdu == 0); + KsTsdu = NULL; + + } else { + + LASSERT(KsTsduMgr->NumOfTsdu > 0); + KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + + if (sizeof(KS_TSDU_MDL) > KsTsdu->TotalLength - KsTsdu->LastOffset) { + KsTsdu = NULL; + } + } + + /* if there's no Tsdu or the free size is not enough for this + KS_TSDU_MDL structure. We need re-allocate a new Tsdu. */ + + if (NULL == KsTsdu) { + + KsTsdu = KsAllocateKsTsdu(); + + if (NULL == KsTsdu) { + goto errorout; + } else { + bNewTsdu = TRUE; + } + } + + /* just queue the KS_TSDU_MDL to the Tsdu buffer */ + + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + + KsTsduMdl->TsduType = TSDU_TYPE_MDL; + KsTsduMdl->DataLength = ReceiveLength; + KsTsduMdl->StartOffset = StartingOffset; + KsTsduMdl->Mdl = Tsdu; + KsTsduMdl->Descriptor = TsduDescriptor; + + KsTsdu->LastOffset += sizeof(KS_TSDU_MDL); + KsTsduMgr->TotalBytes += ReceiveLength; + + KsPrint((2, "KsTcpChainedReceiveEventHandler: Total %xh bytes.\n", + KsTsduMgr->TotalBytes )); + + Status = STATUS_PENDING; + + /* attach it to the TsduMgr list if the Tsdu is newly created. */ + if (bNewTsdu) { + + list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); + KsTsduMgr->NumOfTsdu++; + } + + spin_unlock(&(tconn->kstc_lock)); + + /* wake up the threads waiing in ks_recv_mdl */ + KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); + + if (tconn->kstc_conn && tconn->kstc_sched_cb) { + tconn->kstc_sched_cb( tconn, FALSE, NULL, + KsTsduMgr->TotalBytes ); + } + + ks_put_tconn(tconn); + + /* Return STATUS_PENDING to system because we are still + owning the MDL resources. ks_recv_mdl is expected + to free the MDL resources. */ + + return (Status); + +errorout: + + spin_unlock(&(tconn->kstc_lock)); + + if (bNewTsdu && (KsTsdu != NULL)) { + KsFreeKsTsdu(KsTsdu); + } + + /* abort the tdi connection */ + ks_abort_tconn(tconn); + ks_put_tconn(tconn); + + + Status = STATUS_SUCCESS; + + return (Status); +} + + +/* + * Expedited & Bulk receive event handler + */ + +NTSTATUS +KsTcpChainedReceiveExpeditedEventHandler ( + IN PVOID TdiEventContext, // the event context + IN CONNECTION_CONTEXT ConnectionContext, + IN ULONG ReceiveFlags, + IN ULONG ReceiveLength, + IN ULONG StartingOffset, // offset of start of client data in TSDU + IN PMDL Tsdu, // TSDU data chain + IN PVOID TsduDescriptor // for call to TdiReturnChainedReceives + ) +{ + return KsTcpChainedReceiveEventHandler( + TdiEventContext, + ConnectionContext, + ReceiveFlags | TDI_RECEIVE_EXPEDITED, + ReceiveLength, + StartingOffset, + Tsdu, + TsduDescriptor ); +} + + +VOID +KsPrintProviderInfo( + PWSTR DeviceName, + PTDI_PROVIDER_INFO ProviderInfo + ) +{ + KsPrint((2, "%ws ProviderInfo:\n", DeviceName)); + + KsPrint((2, " Version : 0x%4.4X\n", ProviderInfo->Version )); + KsPrint((2, " MaxSendSize : %d\n", ProviderInfo->MaxSendSize )); + KsPrint((2, " MaxConnectionUserData: %d\n", ProviderInfo->MaxConnectionUserData )); + KsPrint((2, " MaxDatagramSize : %d\n", ProviderInfo->MaxDatagramSize )); + KsPrint((2, " ServiceFlags : 0x%8.8X\n", ProviderInfo->ServiceFlags )); + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTION_MODE) { + KsPrint((2, " CONNECTION_MODE\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_ORDERLY_RELEASE) { + KsPrint((2, " ORDERLY_RELEASE\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_CONNECTIONLESS_MODE) { + KsPrint((2, " CONNECTIONLESS_MODE\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_ERROR_FREE_DELIVERY) { + KsPrint((2, " ERROR_FREE_DELIVERY\n")); + } + + if( ProviderInfo->ServiceFlags & TDI_SERVICE_SECURITY_LEVEL ) { + KsPrint((2, " SECURITY_LEVEL\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_BROADCAST_SUPPORTED) { + KsPrint((2, " BROADCAST_SUPPORTED\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_MULTICAST_SUPPORTED) { + KsPrint((2, " MULTICAST_SUPPORTED\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_DELAYED_ACCEPTANCE) { + KsPrint((2, " DELAYED_ACCEPTANCE\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_EXPEDITED_DATA) { + KsPrint((2, " EXPEDITED_DATA\n")); + } + + if( ProviderInfo->ServiceFlags & TDI_SERVICE_INTERNAL_BUFFERING) { + KsPrint((2, " INTERNAL_BUFFERING\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_ROUTE_DIRECTED) { + KsPrint((2, " ROUTE_DIRECTED\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_NO_ZERO_LENGTH) { + KsPrint((2, " NO_ZERO_LENGTH\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_POINT_TO_POINT) { + KsPrint((2, " POINT_TO_POINT\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_MESSAGE_MODE) { + KsPrint((2, " MESSAGE_MODE\n")); + } + + if (ProviderInfo->ServiceFlags & TDI_SERVICE_HALF_DUPLEX) { + KsPrint((2, " HALF_DUPLEX\n")); + } + + KsPrint((2, " MinimumLookaheadData : %d\n", ProviderInfo->MinimumLookaheadData )); + KsPrint((2, " MaximumLookaheadData : %d\n", ProviderInfo->MaximumLookaheadData )); + KsPrint((2, " NumberOfResources : %d\n", ProviderInfo->NumberOfResources )); +} + + +/* + * KsAllocateKsTsdu + * Reuse a Tsdu from the freelist or allocate a new Tsdu + * from the LookAsideList table or the NonPagedPool + * + * Arguments: + * N/A + * + * Return Value: + * PKS_Tsdu: the new Tsdu or NULL if it fails + * + * Notes: + * N/A + */ + +PKS_TSDU +KsAllocateKsTsdu() +{ + PKS_TSDU KsTsdu = NULL; + + spin_lock(&(ks_data.ksnd_tsdu_lock)); + + if (!list_empty (&(ks_data.ksnd_freetsdus))) { + + LASSERT(ks_data.ksnd_nfreetsdus > 0); + + KsTsdu = list_entry(ks_data.ksnd_freetsdus.next, KS_TSDU, Link); + list_del(&(KsTsdu->Link)); + ks_data.ksnd_nfreetsdus--; + + } else { + + KsTsdu = (PKS_TSDU) cfs_mem_cache_alloc( + ks_data.ksnd_tsdu_slab, 0); + } + + spin_unlock(&(ks_data.ksnd_tsdu_lock)); + + if (NULL != KsTsdu) { + KsInitializeKsTsdu(KsTsdu, ks_data.ksnd_tsdu_size); + } + + return (KsTsdu); +} + + +/* + * KsPutKsTsdu + * Move the Tsdu to the free tsdu list in ks_data. + * + * Arguments: + * KsTsdu: Tsdu to be moved. + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +VOID +KsPutKsTsdu( + PKS_TSDU KsTsdu + ) +{ + spin_lock(&(ks_data.ksnd_tsdu_lock)); + + list_add_tail( &(KsTsdu->Link), &(ks_data.ksnd_freetsdus)); + ks_data.ksnd_nfreetsdus++; + + spin_unlock(&(ks_data.ksnd_tsdu_lock)); +} + + +/* + * KsFreeKsTsdu + * Release a Tsdu: uninitialize then free it. + * + * Arguments: + * KsTsdu: Tsdu to be freed. + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +VOID +KsFreeKsTsdu( + PKS_TSDU KsTsdu + ) +{ + cfs_mem_cache_free( + ks_data.ksnd_tsdu_slab, + KsTsdu ); +} + + +/* + * KsInitializeKsTsdu + * Initialize the Tsdu buffer header + * + * Arguments: + * KsTsdu: the Tsdu to be initialized + * Length: the total length of the Tsdu + * + * Return Value: + * VOID + * + * NOTES: + * N/A + */ + +VOID +KsInitializeKsTsdu( + PKS_TSDU KsTsdu, + ULONG Length + ) +{ + RtlZeroMemory(KsTsdu, Length); + KsTsdu->Magic = KS_TSDU_MAGIC; + KsTsdu->TotalLength = Length; + KsTsdu->StartOffset = KsTsdu->LastOffset = + KS_DWORD_ALIGN(sizeof(KS_TSDU)); +} + + +/* + * KsInitializeKsTsduMgr + * Initialize the management structure of + * Tsdu buffers + * + * Arguments: + * TsduMgr: the TsduMgr to be initialized + * + * Return Value: + * VOID + * + * NOTES: + * N/A + */ + +VOID +KsInitializeKsTsduMgr( + PKS_TSDUMGR TsduMgr + ) +{ + KeInitializeEvent( + &(TsduMgr->Event), + NotificationEvent, + FALSE + ); + + CFS_INIT_LIST_HEAD( + &(TsduMgr->TsduList) + ); + + TsduMgr->NumOfTsdu = 0; + TsduMgr->TotalBytes = 0; +} + + +/* + * KsInitializeKsChain + * Initialize the China structure for receiving + * or transmitting + * + * Arguments: + * KsChain: the KsChain to be initialized + * + * Return Value: + * VOID + * + * NOTES: + * N/A + */ + +VOID +KsInitializeKsChain( + PKS_CHAIN KsChain + ) +{ + KsInitializeKsTsduMgr(&(KsChain->Normal)); + KsInitializeKsTsduMgr(&(KsChain->Expedited)); +} + + +/* + * KsCleanupTsduMgr + * Clean up all the Tsdus in the TsduMgr list + * + * Arguments: + * KsTsduMgr: the Tsdu list manager + * + * Return Value: + * NTSTATUS: nt status code + * + * NOTES: + * N/A + */ + +NTSTATUS +KsCleanupTsduMgr( + PKS_TSDUMGR KsTsduMgr + ) +{ + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_MDL KsTsduMdl; + + LASSERT(NULL != KsTsduMgr); + + KeSetEvent(&(KsTsduMgr->Event), 0, FALSE); + + while (!list_empty(&KsTsduMgr->TsduList)) { + + KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + + if (KsTsdu->StartOffset == KsTsdu->LastOffset) { + + // + // KsTsdu is empty now, we need free it ... + // + + list_del(&(KsTsdu->Link)); + KsTsduMgr->NumOfTsdu--; + + KsFreeKsTsdu(KsTsdu); + + } else { + + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { + + KsTsdu->StartOffset += KsTsduDat->TotalLength; + + } else if (TSDU_TYPE_BUF == KsTsduBuf->TsduType) { + + ASSERT(KsTsduBuf->UserBuffer != NULL); + + if (KsTsduBuf->DataLength > KsTsduBuf->StartOffset) { + ExFreePool(KsTsduBuf->UserBuffer); + } else { + cfs_enter_debugger(); + } + + KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); + + } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { + + // + // MDL Tsdu Unit ... + // + + TdiReturnChainedReceives( + &(KsTsduMdl->Descriptor), + 1 ); + + KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); + } + } + } + + return STATUS_SUCCESS; +} + + +/* + * KsCleanupKsChain + * Clean up the TsduMgrs of the KsChain + * + * Arguments: + * KsChain: the chain managing TsduMgr + * + * Return Value: + * NTSTATUS: nt status code + * + * NOTES: + * N/A + */ + +NTSTATUS +KsCleanupKsChain( + PKS_CHAIN KsChain + ) +{ + NTSTATUS Status; + + LASSERT(NULL != KsChain); + + Status = KsCleanupTsduMgr( + &(KsChain->Normal) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + Status = KsCleanupTsduMgr( + &(KsChain->Expedited) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + +errorout: + + return Status; +} + + +/* + * KsCleanupTsdu + * Clean up all the Tsdus of a tdi connected object + * + * Arguments: + * tconn: the tdi connection which is connected already. + * + * Return Value: + * Nt status code + * + * NOTES: + * N/A + */ + +NTSTATUS +KsCleanupTsdu( + ksock_tconn_t * tconn + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + + if (tconn->kstc_type != kstt_sender && + tconn->kstc_type != kstt_child ) { + + goto errorout; + } + + if (tconn->kstc_type == kstt_sender) { + + Status = KsCleanupKsChain( + &(tconn->sender.kstc_recv) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + Status = KsCleanupKsChain( + &(tconn->sender.kstc_send) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + } else { + + Status = KsCleanupKsChain( + &(tconn->child.kstc_recv) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + Status = KsCleanupKsChain( + &(tconn->child.kstc_send) + ); + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + + } + +errorout: + + return (Status); +} + + +/* + * KsCopyMdlChainToMdlChain + * Copy data from a [chained] Mdl to anther [chained] Mdl. + * Tdi library does not provide this function. We have to + * realize it ourselives. + * + * Arguments: + * SourceMdlChain: the source mdl + * SourceOffset: start offset of the source + * DestinationMdlChain: the dst mdl + * DestinationOffset: the offset where data are to be copied. + * BytesTobecopied: the expteced bytes to be copied + * BytesCopied: to store the really copied data length + * + * Return Value: + * NTSTATUS: STATUS_SUCCESS or other error code + * + * NOTES: + * The length of source mdl must be >= SourceOffset + BytesTobecopied + */ + +NTSTATUS +KsCopyMdlChainToMdlChain( + IN PMDL SourceMdlChain, + IN ULONG SourceOffset, + IN PMDL DestinationMdlChain, + IN ULONG DestinationOffset, + IN ULONG BytesTobecopied, + OUT PULONG BytesCopied + ) +{ + PMDL SrcMdl = SourceMdlChain; + PMDL DstMdl = DestinationMdlChain; + + PUCHAR SrcBuf = NULL; + PUCHAR DstBuf = NULL; + + ULONG dwBytes = 0; + + NTSTATUS Status = STATUS_SUCCESS; + + + while (dwBytes < BytesTobecopied) { + + ULONG Length = 0; + + while (MmGetMdlByteCount(SrcMdl) <= SourceOffset) { + + SourceOffset -= MmGetMdlByteCount(SrcMdl); + + SrcMdl = SrcMdl->Next; + + if (NULL == SrcMdl) { + + Status = STATUS_INVALID_PARAMETER; + goto errorout; + } + } + + while (MmGetMdlByteCount(DstMdl) <= DestinationOffset) { + + DestinationOffset -= MmGetMdlByteCount(DstMdl); + + DstMdl = DstMdl->Next; + + if (NULL == DstMdl) { + + Status = STATUS_INVALID_PARAMETER; + goto errorout; + } + } + + DstBuf = (PUCHAR)KsMapMdlBuffer(DstMdl); + + if ((NULL == DstBuf)) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + // + // Here we need skip the OVERFLOW case via RtlCopyMemory :-( + // + + if ( KsQueryMdlsSize(SrcMdl) - SourceOffset > + MmGetMdlByteCount(DstMdl) - DestinationOffset ) { + + Length = BytesTobecopied - dwBytes; + + if (Length > KsQueryMdlsSize(SrcMdl) - SourceOffset) { + Length = KsQueryMdlsSize(SrcMdl) - SourceOffset; + } + + if (Length > MmGetMdlByteCount(DstMdl) - DestinationOffset) { + Length = MmGetMdlByteCount(DstMdl) - DestinationOffset; + } + + SrcBuf = (PUCHAR)KsMapMdlBuffer(SrcMdl); + + if ((NULL == DstBuf)) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + RtlCopyMemory( + DstBuf + DestinationOffset, + SrcBuf + SourceOffset, + Length + ); + + } else { + + Status = TdiCopyMdlToBuffer( + SrcMdl, + SourceOffset, + DstBuf, + DestinationOffset, + MmGetMdlByteCount(DstMdl), + &Length + ); + + if (STATUS_BUFFER_OVERFLOW == Status) { + cfs_enter_debugger(); + } else if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + goto errorout; + } + } + + SourceOffset += Length; + DestinationOffset += Length; + dwBytes += Length; + } + +errorout: + + if (NT_SUCCESS(Status)) { + *BytesCopied = dwBytes; + } else { + *BytesCopied = 0; + } + + return Status; +} + + + +/* + * KsQueryMdlSize + * Query the whole size of a MDL (may be chained) + * + * Arguments: + * Mdl: the Mdl to be queried + * + * Return Value: + * ULONG: the total size of the mdl + * + * NOTES: + * N/A + */ + +ULONG +KsQueryMdlsSize (PMDL Mdl) +{ + PMDL Next = Mdl; + ULONG Length = 0; + + + // + // Walking the MDL Chain ... + // + + while (Next) { + Length += MmGetMdlByteCount(Next); + Next = Next->Next; + } + + return (Length); +} + + +/* + * KsLockUserBuffer + * Allocate MDL for the buffer and lock the pages into + * nonpaged pool + * + * Arguments: + * UserBuffer: the user buffer to be locked + * Length: length in bytes of the buffer + * Operation: read or write access + * pMdl: the result of the created mdl + * + * Return Value: + * NTSTATUS: kernel status code (STATUS_SUCCESS + * or other error code) + * + * NOTES: + * N/A + */ + +NTSTATUS +KsLockUserBuffer ( + IN PVOID UserBuffer, + IN BOOLEAN bPaged, + IN ULONG Length, + IN LOCK_OPERATION Operation, + OUT PMDL * pMdl + ) +{ + NTSTATUS Status; + PMDL Mdl = NULL; + + LASSERT(UserBuffer != NULL); + + *pMdl = NULL; + + Mdl = IoAllocateMdl( + UserBuffer, + Length, + FALSE, + FALSE, + NULL + ); + + if (Mdl == NULL) { + + Status = STATUS_INSUFFICIENT_RESOURCES; + + } else { + + __try { + + if (bPaged) { + MmProbeAndLockPages( + Mdl, + KernelMode, + Operation + ); + } else { + MmBuildMdlForNonPagedPool( + Mdl + ); + } + + Status = STATUS_SUCCESS; + + *pMdl = Mdl; + + } __except (EXCEPTION_EXECUTE_HANDLER) { + + IoFreeMdl(Mdl); + + Mdl = NULL; + + cfs_enter_debugger(); + + Status = STATUS_INVALID_USER_BUFFER; + } + } + + return Status; +} + +/* + * KsMapMdlBuffer + * Map the mdl into a buffer in kernel space + * + * Arguments: + * Mdl: the mdl to be mapped + * + * Return Value: + * PVOID: the buffer mapped or NULL in failure + * + * NOTES: + * N/A + */ + +PVOID +KsMapMdlBuffer (PMDL Mdl) +{ + LASSERT(Mdl != NULL); + + return MmGetSystemAddressForMdlSafe( + Mdl, + NormalPagePriority + ); +} + + +/* + * KsReleaseMdl + * Unlock all the pages in the mdl + * + * Arguments: + * Mdl: memory description list to be released + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ + +VOID +KsReleaseMdl (IN PMDL Mdl, + IN int Paged ) +{ + LASSERT(Mdl != NULL); + + while (Mdl) { + + PMDL Next; + + Next = Mdl->Next; + + if (Paged) { + MmUnlockPages(Mdl); + } + + IoFreeMdl(Mdl); + + Mdl = Next; + } +} + + +/* + * ks_lock_buffer + * allocate MDL for the user spepcified buffer and lock (paging-in) + * all the pages of the buffer into system memory + * + * Arguments: + * buffer: the user buffer to be locked + * length: length in bytes of the buffer + * access: read or write access + * mdl: the result of the created mdl + * + * Return Value: + * int: the ks error code: 0: success / -x: failture + * + * Notes: + * N/A + */ + +int +ks_lock_buffer ( + void * buffer, + int paged, + int length, + LOCK_OPERATION access, + ksock_mdl_t ** kmdl + ) +{ + NTSTATUS status; + + status = KsLockUserBuffer( + buffer, + paged !=0, + length, + access, + kmdl + ); + + return cfs_error_code(status); +} + + +/* + * ks_map_mdl + * Map the mdl pages into kernel space + * + * Arguments: + * mdl: the mdl to be mapped + * + * Return Value: + * void *: the buffer mapped or NULL in failure + * + * Notes: + * N/A + */ + +void * +ks_map_mdl (ksock_mdl_t * mdl) +{ + LASSERT(mdl != NULL); + + return KsMapMdlBuffer(mdl); +} + +/* + * ks_release_mdl + * Unlock all the pages in the mdl and release the mdl + * + * Arguments: + * mdl: memory description list to be released + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +ks_release_mdl (ksock_mdl_t *mdl, int paged) +{ + LASSERT(mdl != NULL); + + KsReleaseMdl(mdl, paged); +} + + +/* + * ks_create_tconn + * allocate a new tconn structure from the SLAB cache or + * NonPaged sysetm pool + * + * Arguments: + * N/A + * + * Return Value: + * ksock_tconn_t *: the address of tconn or NULL if it fails + * + * NOTES: + * N/A + */ + +ksock_tconn_t * +ks_create_tconn() +{ + ksock_tconn_t * tconn = NULL; + + /* allocate ksoc_tconn_t from the slab cache memory */ + + tconn = (ksock_tconn_t *)cfs_mem_cache_alloc( + ks_data.ksnd_tconn_slab, CFS_ALLOC_ZERO); + + if (tconn) { + + /* zero tconn elements */ + memset(tconn, 0, sizeof(ksock_tconn_t)); + + /* initialize the tconn ... */ + tconn->kstc_magic = KS_TCONN_MAGIC; + + ExInitializeWorkItem( + &(tconn->kstc_disconnect.WorkItem), + KsDisconnectHelper, + &(tconn->kstc_disconnect) + ); + + KeInitializeEvent( + &(tconn->kstc_disconnect.Event), + SynchronizationEvent, + FALSE ); + + ExInitializeWorkItem( + &(tconn->kstc_destroy), + ks_destroy_tconn, + tconn + ); + + spin_lock_init(&(tconn->kstc_lock)); + + ks_get_tconn(tconn); + + spin_lock(&(ks_data.ksnd_tconn_lock)); + + /* attach it into global list in ks_data */ + + list_add(&(tconn->kstc_list), &(ks_data.ksnd_tconns)); + ks_data.ksnd_ntconns++; + spin_unlock(&(ks_data.ksnd_tconn_lock)); + + tconn->kstc_rcv_wnd = tconn->kstc_snd_wnd = 0x10000; + } + + return (tconn); +} + + +/* + * ks_free_tconn + * free the tconn structure to the SLAB cache or NonPaged + * sysetm pool + * + * Arguments: + * tconn: the tcon is to be freed + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +ks_free_tconn(ksock_tconn_t * tconn) +{ + LASSERT(atomic_read(&(tconn->kstc_refcount)) == 0); + + spin_lock(&(ks_data.ksnd_tconn_lock)); + + /* remove it from the global list */ + list_del(&tconn->kstc_list); + ks_data.ksnd_ntconns--; + + /* if this is the last tconn, it would be safe for + ks_tdi_fini_data to quit ... */ + if (ks_data.ksnd_ntconns == 0) { + cfs_wake_event(&ks_data.ksnd_tconn_exit); + } + spin_unlock(&(ks_data.ksnd_tconn_lock)); + + /* free the structure memory */ + cfs_mem_cache_free(ks_data.ksnd_tconn_slab, tconn); +} + + +/* + * ks_init_listener + * Initialize the tconn as a listener (daemon) + * + * Arguments: + * tconn: the listener tconn + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +ks_init_listener( + ksock_tconn_t * tconn + ) +{ + /* preparation: intialize the tconn members */ + + tconn->kstc_type = kstt_listener; + + RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); + + CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_listening.list)); + CFS_INIT_LIST_HEAD(&(tconn->listener.kstc_accepted.list)); + + cfs_init_event( &(tconn->listener.kstc_accept_event), + TRUE, + FALSE ); + + cfs_init_event( &(tconn->listener.kstc_destroy_event), + TRUE, + FALSE ); + + tconn->kstc_state = ksts_inited; +} + + +/* + * ks_init_sender + * Initialize the tconn as a sender + * + * Arguments: + * tconn: the sender tconn + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +ks_init_sender( + ksock_tconn_t * tconn + ) +{ + tconn->kstc_type = kstt_sender; + RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); + + KsInitializeKsChain(&(tconn->sender.kstc_recv)); + KsInitializeKsChain(&(tconn->sender.kstc_send)); + + tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + + tconn->kstc_state = ksts_inited; +} + +/* + * ks_init_child + * Initialize the tconn as a child + * + * Arguments: + * tconn: the child tconn + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ + +void +ks_init_child( + ksock_tconn_t * tconn + ) +{ + tconn->kstc_type = kstt_child; + RtlInitUnicodeString(&(tconn->kstc_dev), TCP_DEVICE_NAME); + + KsInitializeKsChain(&(tconn->child.kstc_recv)); + KsInitializeKsChain(&(tconn->child.kstc_send)); + + tconn->kstc_snd_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + tconn->kstc_rcv_wnd = TDINAL_WINDOW_DEFAULT_SIZE; + + tconn->kstc_state = ksts_inited; +} + +/* + * ks_get_tconn + * increase the reference count of the tconn with 1 + * + * Arguments: + * tconn: the tdi connection to be referred + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ + +void +ks_get_tconn( + ksock_tconn_t * tconn + ) +{ + atomic_inc(&(tconn->kstc_refcount)); +} + +/* + * ks_put_tconn + * decrease the reference count of the tconn and destroy + * it if the refercount becomes 0. + * + * Arguments: + * tconn: the tdi connection to be dereferred + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ + +void +ks_put_tconn( + ksock_tconn_t *tconn + ) +{ + if (atomic_dec_and_test(&(tconn->kstc_refcount))) { + + spin_lock(&(tconn->kstc_lock)); + + if ( ( tconn->kstc_type == kstt_child || + tconn->kstc_type == kstt_sender ) && + ( tconn->kstc_state == ksts_connected ) ) { + + spin_unlock(&(tconn->kstc_lock)); + + ks_abort_tconn(tconn); + + } else { + + if (cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY)) { + cfs_enter_debugger(); + } else { + ExQueueWorkItem( + &(tconn->kstc_destroy), + DelayedWorkQueue + ); + + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DESTROY_BUSY); + } + + spin_unlock(&(tconn->kstc_lock)); + } + } +} + +/* + * ks_destroy_tconn + * cleanup the tdi connection and free it + * + * Arguments: + * tconn: the tdi connection to be cleaned. + * + * Return Value: + * N/A + * + * NOTES: + * N/A + */ + +void +ks_destroy_tconn( + ksock_tconn_t * tconn + ) +{ + LASSERT(tconn->kstc_refcount.counter == 0); + + if (tconn->kstc_type == kstt_listener) { + + ks_reset_handlers(tconn); + + /* for listener, we just need to close the address object */ + KsCloseAddress( + tconn->kstc_addr.Handle, + tconn->kstc_addr.FileObject + ); + + tconn->kstc_state = ksts_inited; + + } else if (tconn->kstc_type == kstt_child) { + + /* for child tdi conections */ + + /* disassociate the relation between it's connection object + and the address object */ + + if (tconn->kstc_state == ksts_associated) { + KsDisassociateAddress( + tconn->child.kstc_info.FileObject + ); + } + + /* release the connection object */ + + KsCloseConnection( + tconn->child.kstc_info.Handle, + tconn->child.kstc_info.FileObject + ); + + /* release it's refer of it's parent's address object */ + KsCloseAddress( + NULL, + tconn->kstc_addr.FileObject + ); + + spin_lock(&tconn->child.kstc_parent->kstc_lock); + spin_lock(&tconn->kstc_lock); + + tconn->kstc_state = ksts_inited; + + /* remove it frome it's parent's queues */ + + if (tconn->child.kstc_queued) { + + list_del(&(tconn->child.kstc_link)); + + if (tconn->child.kstc_queueno) { + + LASSERT(tconn->child.kstc_parent->listener.kstc_accepted.num > 0); + tconn->child.kstc_parent->listener.kstc_accepted.num -= 1; + + } else { + + LASSERT(tconn->child.kstc_parent->listener.kstc_listening.num > 0); + tconn->child.kstc_parent->listener.kstc_listening.num -= 1; + } + + tconn->child.kstc_queued = FALSE; + } + + spin_unlock(&tconn->kstc_lock); + spin_unlock(&tconn->child.kstc_parent->kstc_lock); + + /* drop the reference of the parent tconn */ + ks_put_tconn(tconn->child.kstc_parent); + + } else if (tconn->kstc_type == kstt_sender) { + + ks_reset_handlers(tconn); + + /* release the connection object */ + + KsCloseConnection( + tconn->sender.kstc_info.Handle, + tconn->sender.kstc_info.FileObject + ); + + /* release it's refer of it's parent's address object */ + KsCloseAddress( + tconn->kstc_addr.Handle, + tconn->kstc_addr.FileObject + ); + + tconn->kstc_state = ksts_inited; + + } else { + cfs_enter_debugger(); + } + + /* free the tconn structure ... */ + + ks_free_tconn(tconn); +} + +int +ks_query_data( + ksock_tconn_t * tconn, + size_t * size, + int bIsExpedited ) +{ + int rc = 0; + + PKS_CHAIN KsChain; + PKS_TSDUMGR KsTsduMgr; + + *size = 0; + + ks_get_tconn(tconn); + spin_lock(&(tconn->kstc_lock)); + + if ( tconn->kstc_type != kstt_sender && + tconn->kstc_type != kstt_child) { + rc = -EINVAL; + spin_unlock(&(tconn->kstc_lock)); + goto errorout; + } + + if (tconn->kstc_state != ksts_connected) { + rc = -ENOTCONN; + spin_unlock(&(tconn->kstc_lock)); + goto errorout; + } + + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_recv); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_recv); + } + + if (bIsExpedited) { + KsTsduMgr = &(KsChain->Expedited); + } else { + KsTsduMgr = &(KsChain->Normal); + } + + *size = KsTsduMgr->TotalBytes; + spin_unlock(&(tconn->kstc_lock)); + +errorout: + + ks_put_tconn(tconn); + + return (rc); +} + +/* + * ks_get_tcp_option + * Query the the options of the tcp stream connnection + * + * Arguments: + * tconn: the tdi connection + * ID: option id + * OptionValue: buffer to store the option value + * Length: the length of the value, to be returned + * + * Return Value: + * int: ks return code + * + * NOTES: + * N/A + */ + +int +ks_get_tcp_option ( + ksock_tconn_t * tconn, + ULONG ID, + PVOID OptionValue, + PULONG Length + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + IO_STATUS_BLOCK IoStatus; + + TCP_REQUEST_QUERY_INFORMATION_EX QueryInfoEx; + + PFILE_OBJECT ConnectionObject; + PDEVICE_OBJECT DeviceObject = NULL; + + PIRP Irp = NULL; + PIO_STACK_LOCATION IrpSp = NULL; + + KEVENT Event; + + /* make sure the tdi connection is connected ? */ + + ks_get_tconn(tconn); + + if (tconn->kstc_state != ksts_connected) { + Status = STATUS_INVALID_PARAMETER; + goto errorout; + } + + LASSERT(tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child); + + if (tconn->kstc_type == kstt_sender) { + ConnectionObject = tconn->sender.kstc_info.FileObject; + } else { + ConnectionObject = tconn->child.kstc_info.FileObject; + } + + QueryInfoEx.ID.toi_id = ID; + QueryInfoEx.ID.toi_type = INFO_TYPE_CONNECTION; + QueryInfoEx.ID.toi_class = INFO_CLASS_PROTOCOL; + QueryInfoEx.ID.toi_entity.tei_entity = CO_TL_ENTITY; + QueryInfoEx.ID.toi_entity.tei_instance = 0; + + RtlZeroMemory(&(QueryInfoEx.Context), CONTEXT_SIZE); + + KeInitializeEvent(&Event, NotificationEvent, FALSE); + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + Irp = IoBuildDeviceIoControlRequest( + IOCTL_TCP_QUERY_INFORMATION_EX, + DeviceObject, + &QueryInfoEx, + sizeof(TCP_REQUEST_QUERY_INFORMATION_EX), + OptionValue, + *Length, + FALSE, + &Event, + &IoStatus + ); + + if (Irp == NULL) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + IrpSp = IoGetNextIrpStackLocation(Irp); + + if (IrpSp == NULL) { + + IoFreeIrp(Irp); + Irp = NULL; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + IrpSp->FileObject = ConnectionObject; + IrpSp->DeviceObject = DeviceObject; + + Status = IoCallDriver(DeviceObject, Irp); + + if (Status == STATUS_PENDING) { + + KeWaitForSingleObject( + &Event, + Executive, + KernelMode, + FALSE, + NULL + ); + + Status = IoStatus.Status; + } + + + if (NT_SUCCESS(Status)) { + *Length = IoStatus.Information; + } else { + cfs_enter_debugger(); + memset(OptionValue, 0, *Length); + Status = STATUS_SUCCESS; + } + +errorout: + + ks_put_tconn(tconn); + + return cfs_error_code(Status); +} + +/* + * ks_set_tcp_option + * Set the the options for the tcp stream connnection + * + * Arguments: + * tconn: the tdi connection + * ID: option id + * OptionValue: buffer containing the new option value + * Length: the length of the value + * + * Return Value: + * int: ks return code + * + * NOTES: + * N/A + */ + +NTSTATUS +ks_set_tcp_option ( + ksock_tconn_t * tconn, + ULONG ID, + PVOID OptionValue, + ULONG Length + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + + IO_STATUS_BLOCK IoStatus; + + ULONG SetInfoExLength; + PTCP_REQUEST_SET_INFORMATION_EX SetInfoEx = NULL; + + PFILE_OBJECT ConnectionObject; + PDEVICE_OBJECT DeviceObject = NULL; + + PIRP Irp = NULL; + PIO_STACK_LOCATION IrpSp = NULL; + + PKEVENT Event; + + /* make sure the tdi connection is connected ? */ + + ks_get_tconn(tconn); + + if (tconn->kstc_state != ksts_connected) { + Status = STATUS_INVALID_PARAMETER; + goto errorout; + } + + LASSERT(tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child); + + if (tconn->kstc_type == kstt_sender) { + ConnectionObject = tconn->sender.kstc_info.FileObject; + } else { + ConnectionObject = tconn->child.kstc_info.FileObject; + } + + SetInfoExLength = sizeof(TCP_REQUEST_SET_INFORMATION_EX) - 1 + Length + sizeof(KEVENT); + + SetInfoEx = ExAllocatePoolWithTag( + NonPagedPool, + SetInfoExLength, + 'TSSK' + ); + + if (SetInfoEx == NULL) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + SetInfoEx->ID.toi_id = ID; + + SetInfoEx->ID.toi_type = INFO_TYPE_CONNECTION; + SetInfoEx->ID.toi_class = INFO_CLASS_PROTOCOL; + SetInfoEx->ID.toi_entity.tei_entity = CO_TL_ENTITY; + SetInfoEx->ID.toi_entity.tei_instance = TL_INSTANCE; + + SetInfoEx->BufferSize = Length; + RtlCopyMemory(&(SetInfoEx->Buffer[0]), OptionValue, Length); + + Event = (PKEVENT)(&(SetInfoEx->Buffer[Length])); + KeInitializeEvent(Event, NotificationEvent, FALSE); + + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + Irp = IoBuildDeviceIoControlRequest( + IOCTL_TCP_SET_INFORMATION_EX, + DeviceObject, + SetInfoEx, + SetInfoExLength, + NULL, + 0, + FALSE, + Event, + &IoStatus + ); + + if (Irp == NULL) { + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + IrpSp = IoGetNextIrpStackLocation(Irp); + + if (IrpSp == NULL) { + IoFreeIrp(Irp); + Irp = NULL; + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + IrpSp->FileObject = ConnectionObject; + IrpSp->DeviceObject = DeviceObject; + + Status = IoCallDriver(DeviceObject, Irp); + + if (Status == STATUS_PENDING) { + + KeWaitForSingleObject( + Event, + Executive, + KernelMode, + FALSE, + NULL + ); + + Status = IoStatus.Status; + } + +errorout: + + if (SetInfoEx) { + ExFreePool(SetInfoEx); + } + + if (!NT_SUCCESS(Status)) { + printk("ks_set_tcp_option: error setup tcp option: ID (%d), Status = %xh\n", + ID, Status); + Status = STATUS_SUCCESS; + } + + ks_put_tconn(tconn); + + return cfs_error_code(Status); +} + +/* + * ks_bind_tconn + * bind the tdi connection object with an address + * + * Arguments: + * tconn: tconn to be bound + * parent: the parent tconn object + * ipaddr: the ip address + * port: the port number + * + * Return Value: + * int: 0 for success or ks error codes. + * + * NOTES: + * N/A + */ + +int +ks_bind_tconn ( + ksock_tconn_t * tconn, + ksock_tconn_t * parent, + ulong_ptr addr, + unsigned short port + ) +{ + NTSTATUS status; + int rc = 0; + + ksock_tdi_addr_t taddr; + + memset(&taddr, 0, sizeof(ksock_tdi_addr_t)); + + if (tconn->kstc_state != ksts_inited) { + + status = STATUS_INVALID_PARAMETER; + rc = cfs_error_code(status); + + goto errorout; + + } else if (tconn->kstc_type == kstt_child) { + + if (NULL == parent) { + status = STATUS_INVALID_PARAMETER; + rc = cfs_error_code(status); + + goto errorout; + } + + /* refer it's parent's address object */ + + taddr = parent->kstc_addr; + ObReferenceObject(taddr.FileObject); + + ks_get_tconn(parent); + + } else { + + PTRANSPORT_ADDRESS TdiAddress = &(taddr.Tdi); + ULONG AddrLen = 0; + + /* intialize the tdi address*/ + + TdiAddress->TAAddressCount = 1; + TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; + TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; + + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); + + memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); + + + /* open the transport address object */ + + AddrLen = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + + TDI_ADDRESS_LENGTH_IP; + + status = KsOpenAddress( + &(tconn->kstc_dev), + &(taddr.Tdi), + AddrLen, + &(taddr.Handle), + &(taddr.FileObject) + ); + + if (!NT_SUCCESS(status)) { + + KsPrint((0, "ks_bind_tconn: failed to open ip addr object (%x:%d), status = %xh\n", + addr, port, status )); + rc = cfs_error_code(status); + goto errorout; + } + } + + if (tconn->kstc_type == kstt_child) { + tconn->child.kstc_parent = parent; + } + + tconn->kstc_state = ksts_bind; + tconn->kstc_addr = taddr; + +errorout: + + return (rc); +} + +/* + * ks_build_tconn + * build tcp/streaming connection to remote peer + * + * Arguments: + * tconn: tconn to be connected to the peer + * addr: the peer's ip address + * port: the peer's port number + * + * Return Value: + * int: 0 for success or ks error codes. + * + * Notes: + * N/A + */ + +int +ks_build_tconn( + ksock_tconn_t * tconn, + ulong_ptr addr, + unsigned short port + ) +{ + int rc = 0; + NTSTATUS status = STATUS_SUCCESS; + + + PFILE_OBJECT ConnectionObject = NULL; + PDEVICE_OBJECT DeviceObject = NULL; + + PTDI_CONNECTION_INFORMATION ConnectionInfo = NULL; + ULONG AddrLength; + + PIRP Irp = NULL; + + LASSERT(tconn->kstc_type == kstt_sender); + LASSERT(tconn->kstc_state == ksts_bind); + + ks_get_tconn(tconn); + + { + /* set the event callbacks */ + rc = ks_set_handlers(tconn); + + if (rc < 0) { + cfs_enter_debugger(); + goto errorout; + } + } + + /* create the connection file handle / object */ + status = KsOpenConnection( + &(tconn->kstc_dev), + (CONNECTION_CONTEXT)tconn, + &(tconn->sender.kstc_info.Handle), + &(tconn->sender.kstc_info.FileObject) + ); + + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; + } + + /* associdate the the connection with the adress object of the tconn */ + + status = KsAssociateAddress( + tconn->kstc_addr.Handle, + tconn->sender.kstc_info.FileObject + ); + + if (!NT_SUCCESS(status)) { + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; + } + + tconn->kstc_state = ksts_associated; + + /* Allocating Connection Info Together with the Address */ + AddrLength = FIELD_OFFSET(TRANSPORT_ADDRESS, Address->Address) + + TDI_ADDRESS_LENGTH_IP; + + ConnectionInfo = (PTDI_CONNECTION_INFORMATION)ExAllocatePoolWithTag( + NonPagedPool, sizeof(TDI_CONNECTION_INFORMATION) + AddrLength, 'iCsK'); + + if (NULL == ConnectionInfo) { + + status = STATUS_INSUFFICIENT_RESOURCES; + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; + } + + /* Initializing ConnectionInfo ... */ + { + PTRANSPORT_ADDRESS TdiAddress; + + /* ConnectionInfo settings */ + + ConnectionInfo->UserDataLength = 0; + ConnectionInfo->UserData = NULL; + ConnectionInfo->OptionsLength = 0; + ConnectionInfo->Options = NULL; + ConnectionInfo->RemoteAddressLength = AddrLength; + ConnectionInfo->RemoteAddress = ConnectionInfo + 1; + + + /* intialize the tdi address*/ + + TdiAddress = ConnectionInfo->RemoteAddress; + + TdiAddress->TAAddressCount = 1; + TdiAddress->Address[0].AddressLength = TDI_ADDRESS_LENGTH_IP; + TdiAddress->Address[0].AddressType = TDI_ADDRESS_TYPE_IP; + + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_port = htons(port); + ((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->in_addr = htonl(addr); + + memset(&(((PTDI_ADDRESS_IP)&(TdiAddress->Address[0].Address))->sin_zero[0]),0,8); + } + + /* Now prepare to connect the remote peer ... */ + + ConnectionObject = tconn->sender.kstc_info.FileObject; + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + /* allocate a new Irp */ + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + status = STATUS_INSUFFICIENT_RESOURCES; + rc = cfs_error_code(status); + cfs_enter_debugger(); + goto errorout; + } + + /* setup the Irp */ + + TdiBuildConnect( + Irp, + DeviceObject, + ConnectionObject, + NULL, + NULL, + NULL, + ConnectionInfo, + NULL + ); + + + /* sumbit the Irp to the underlying transport driver */ + status = KsSubmitTdiIrp( + DeviceObject, + Irp, + TRUE, + NULL + ); + + spin_lock(&(tconn->kstc_lock)); + + if (NT_SUCCESS(status)) { + + /* Connected! the conneciton is built successfully. */ + + tconn->kstc_state = ksts_connected; + + tconn->sender.kstc_info.ConnectionInfo = ConnectionInfo; + tconn->sender.kstc_info.Remote = ConnectionInfo->RemoteAddress; + + spin_unlock(&(tconn->kstc_lock)); + + } else { + + /* Not connected! Abort it ... */ + + if (rc != 0) { + cfs_enter_debugger(); + } + + Irp = NULL; + rc = cfs_error_code(status); + + tconn->kstc_state = ksts_associated; + spin_unlock(&(tconn->kstc_lock)); + + /* disassocidate the connection and the address object, + after cleanup, it's safe to set the state to abort ... */ + + if ( NT_SUCCESS(KsDisassociateAddress( + tconn->sender.kstc_info.FileObject))) { + tconn->kstc_state = ksts_aborted; + } + + /* reset the event callbacks */ + rc = ks_reset_handlers(tconn); + + goto errorout; + } + +errorout: + + if (NT_SUCCESS(status)) { + + ks_query_local_ipaddr(tconn); + + } else { + + if (ConnectionInfo) { + ExFreePool(ConnectionInfo); + } + if (Irp) { + IoFreeIrp(Irp); + } + } + + ks_put_tconn(tconn); + + return (rc); +} + + +/* + * ks_disconnect_tconn + * disconnect the tconn from a connection + * + * Arguments: + * tconn: the tdi connecton object connected already + * flags: flags & options for disconnecting + * + * Return Value: + * int: ks error code + * + * Notes: + * N/A + */ + +int +ks_disconnect_tconn( + ksock_tconn_t * tconn, + ulong_ptr flags + ) +{ + NTSTATUS status = STATUS_SUCCESS; + + ksock_tconn_info_t * info; + + PFILE_OBJECT ConnectionObject; + PDEVICE_OBJECT DeviceObject = NULL; + + PIRP Irp = NULL; + + KEVENT Event; + + ks_get_tconn(tconn); + + /* make sure tt's connected already and it + must be a sender or a child ... */ + + LASSERT(tconn->kstc_state == ksts_connected); + LASSERT( tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child); + + /* reset all the event handlers to NULL */ + + if (tconn->kstc_type != kstt_child) { + ks_reset_handlers (tconn); + } + + /* Disconnecting to the remote peer ... */ + + if (tconn->kstc_type == kstt_sender) { + info = &(tconn->sender.kstc_info); + } else { + info = &(tconn->child.kstc_info); + } + + ConnectionObject = info->FileObject; + DeviceObject = IoGetRelatedDeviceObject(ConnectionObject); + + /* allocate an Irp and setup it */ + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + status = STATUS_INSUFFICIENT_RESOURCES; + cfs_enter_debugger(); + goto errorout; + } + + KeInitializeEvent( + &Event, + SynchronizationEvent, + FALSE + ); + + TdiBuildDisconnect( + Irp, + DeviceObject, + ConnectionObject, + KsDisconectCompletionRoutine, + &Event, + NULL, + flags, + NULL, + NULL + ); + + /* issue the Irp to the underlying transport + driver to disconnect the connection */ + + status = IoCallDriver(DeviceObject, Irp); + + if (STATUS_PENDING == status) { + + status = KeWaitForSingleObject( + &Event, + Executive, + KernelMode, + FALSE, + NULL + ); + + status = Irp->IoStatus.Status; + } + + KsPrint((2, "KsDisconnect: Disconnection is done with Status = %xh (%s) ...\n", + status, KsNtStatusToString(status))); + + IoFreeIrp(Irp); + + if (info->ConnectionInfo) { + + /* disassociate the association between connection/address objects */ + + status = KsDisassociateAddress(ConnectionObject); + + if (!NT_SUCCESS(status)) { + cfs_enter_debugger(); + } + + spin_lock(&(tconn->kstc_lock)); + + /* cleanup the tsdumgr Lists */ + KsCleanupTsdu (tconn); + + /* set the state of the tconn */ + if (NT_SUCCESS(status)) { + tconn->kstc_state = ksts_disconnected; + } else { + tconn->kstc_state = ksts_associated; + } + + /* free the connection info to system pool*/ + ExFreePool(info->ConnectionInfo); + info->ConnectionInfo = NULL; + info->Remote = NULL; + + spin_unlock(&(tconn->kstc_lock)); + } + + status = STATUS_SUCCESS; + +errorout: + + ks_put_tconn(tconn); + + return cfs_error_code(status); +} + + +/* + * ks_abort_tconn + * The connection is broken un-expectedly. We need do + * some cleanup. + * + * Arguments: + * tconn: the tdi connection + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +ks_abort_tconn( + ksock_tconn_t * tconn + ) +{ + PKS_DISCONNECT_WORKITEM WorkItem = NULL; + + WorkItem = &(tconn->kstc_disconnect); + + ks_get_tconn(tconn); + spin_lock(&(tconn->kstc_lock)); + + if (tconn->kstc_state != ksts_connected) { + ks_put_tconn(tconn); + } else { + + if (!cfs_is_flag_set(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY)) { + + WorkItem->Flags = TDI_DISCONNECT_ABORT; + WorkItem->tconn = tconn; + + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DISCONNECT_BUSY); + + ExQueueWorkItem( + &(WorkItem->WorkItem), + DelayedWorkQueue + ); + } + } + + spin_unlock(&(tconn->kstc_lock)); +} + + +/* + * ks_query_local_ipaddr + * query the local connection ip address + * + * Arguments: + * tconn: the tconn which is connected + * + * Return Value: + * int: ks error code + * + * Notes: + * N/A + */ + +int +ks_query_local_ipaddr( + ksock_tconn_t * tconn + ) +{ + PFILE_OBJECT FileObject = NULL; + NTSTATUS status; + + PTRANSPORT_ADDRESS TdiAddress; + ULONG AddressLength; + + if (tconn->kstc_type == kstt_sender) { + FileObject = tconn->sender.kstc_info.FileObject; + } else if (tconn->kstc_type == kstt_child) { + FileObject = tconn->child.kstc_info.FileObject; + } else { + status = STATUS_INVALID_PARAMETER; + goto errorout; + } + + TdiAddress = &(tconn->kstc_addr.Tdi); + AddressLength = MAX_ADDRESS_LENGTH; + + status = KsQueryIpAddress(FileObject, TdiAddress, &AddressLength); + + if (NT_SUCCESS(status)) { + + KsPrint((0, "ks_query_local_ipaddr: Local ip address = %xh port = %xh\n", + ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->in_addr, + ((PTDI_ADDRESS_IP)(&(TdiAddress->Address[0].Address)))->sin_port )); + } else { + KsPrint((0, "KsQueryonnectionIpAddress: Failed to query the connection local ip address.\n")); + } + +errorout: + + return cfs_error_code(status); +} + +/* + * ks_send_mdl + * send MDL chain to the peer for a stream connection + * + * Arguments: + * tconn: tdi connection object + * tx: the transmit context + * mdl: the mdl chain containing the data + * len: length of the data + * flags: flags of the transmission + * + * Return Value: + * ks return code + * + * Notes: + * N/A + */ + +int +ks_send_mdl( + ksock_tconn_t * tconn, + void * tx, + ksock_mdl_t * mdl, + int len, + int flags + ) +{ + NTSTATUS Status; + int rc = 0; + ulong_ptr length; + ulong_ptr tflags; + ksock_tdi_tx_t * context; + + PKS_CHAIN KsChain; + PKS_TSDUMGR KsTsduMgr; + PKS_TSDU KsTsdu; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_DAT KsTsduDat; + + BOOLEAN bNewTsdu = FALSE; /* newly allocated */ + BOOLEAN bNewBuff = FALSE; /* newly allocated */ + + BOOLEAN bBuffed; /* bufferred sending */ + + PUCHAR Buffer = NULL; + ksock_mdl_t * NewMdl = NULL; + + PIRP Irp = NULL; + PFILE_OBJECT ConnObject; + PDEVICE_OBJECT DeviceObject; + + BOOLEAN bIsNonBlock; + + ks_get_tconn(tconn); + + tflags = ks_tdi_send_flags(flags); + bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); + + spin_lock(&tconn->kstc_lock); + + LASSERT( tconn->kstc_type == kstt_sender || + tconn->kstc_type == kstt_child ); + + if (tconn->kstc_state != ksts_connected) { + spin_unlock(&tconn->kstc_lock); + ks_put_tconn(tconn); + return -ENOTCONN; + } + + /* get the latest Tsdu buffer form TsduMgr list. + just set NULL if the list is empty. */ + + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_send); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_send); + } + + if (cfs_is_flag_set(tflags, TDI_SEND_EXPEDITED)) { + KsTsduMgr = &(KsChain->Expedited); + } else { + KsTsduMgr = &(KsChain->Normal); + } + + if (KsTsduMgr->TotalBytes + len <= tconn->kstc_snd_wnd) { + bBuffed = TRUE; + } else { + bBuffed = FALSE; + } + + /* do the preparation work for bufferred sending */ + + if (bBuffed) { + + /* if the data is even larger than the biggest Tsdu, we have + to allocate new buffer and use TSDU_TYOE_BUF to store it */ + + if ( KS_TSDU_STRU_SIZE((ULONG)len) > ks_data.ksnd_tsdu_size + - KS_DWORD_ALIGN(sizeof(KS_TSDU))) { + bNewBuff = TRUE; + } + + if (list_empty(&(KsTsduMgr->TsduList))) { + + LASSERT(KsTsduMgr->NumOfTsdu == 0); + KsTsdu = NULL; + + } else { + + LASSERT(KsTsduMgr->NumOfTsdu > 0); + KsTsdu = list_entry(KsTsduMgr->TsduList.prev, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + + + /* check whether KsTsdu free space is enough, or we need alloc new Tsdu */ + if (bNewBuff) { + if (sizeof(KS_TSDU_BUF) + KsTsdu->LastOffset > KsTsdu->TotalLength) { + KsTsdu = NULL; + } + } else { + if ( KS_TSDU_STRU_SIZE((ULONG)len) > + KsTsdu->TotalLength - KsTsdu->LastOffset ) { + KsTsdu = NULL; + } + } + } + + /* if there's no Tsdu or the free size is not enough for the + KS_TSDU_BUF or KS_TSDU_DAT. We need re-allocate a new Tsdu. */ + + if (NULL == KsTsdu) { + + KsTsdu = KsAllocateKsTsdu(); + + if (NULL == KsTsdu) { + bBuffed = FALSE; + bNewBuff = FALSE; + } else { + bNewTsdu = TRUE; + } + } + + /* process the case that a new buffer is to be allocated from system memory */ + if (bNewBuff) { + + /* now allocating internal buffer to contain the payload */ + Buffer = ExAllocatePool(NonPagedPool, len); + + if (NULL == Buffer) { + bBuffed = FALSE; + } + } + } + + if (bBuffed) { + + if (bNewBuff) { + + /* queue a new KS_TSDU_BUF to the Tsdu buffer */ + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + + KsTsduBuf->TsduFlags = 0; + KsTsduBuf->DataLength = (ULONG)len; + KsTsduBuf->StartOffset = 0; + KsTsduBuf->UserBuffer = Buffer; + } else { + /* queue a new KS_TSDU_BUF to the Tsdu buffer */ + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->LastOffset); + + KsTsduDat->TsduFlags = 0; + KsTsduDat->DataLength = (ULONG)len; + KsTsduDat->StartOffset = 0; + KsTsduDat->TotalLength = KS_TSDU_STRU_SIZE((ULONG)len); + + Buffer = &KsTsduDat->Data[0]; + } + + /* now locking the Buffer and copy user payload into the buffer */ + ASSERT(Buffer != NULL); + + rc = ks_lock_buffer(Buffer, FALSE, len, IoReadAccess, &NewMdl); + if (rc != 0) { + printk("ks_send_mdl: bufferred: error allocating mdl.\n"); + bBuffed = FALSE; + } else { + ULONG BytesCopied = 0; + TdiCopyMdlToBuffer(mdl, 0, Buffer, 0, (ULONG)len, &BytesCopied); + if (BytesCopied != (ULONG) len) { + bBuffed = FALSE; + } + } + + /* Do the finializing job if we succeed to to lock the buffer and move + user data. Or we need do cleaning up ... */ + if (bBuffed) { + + if (bNewBuff) { + KsTsduBuf->TsduType = TSDU_TYPE_BUF; + KsTsdu->LastOffset += sizeof(KS_TSDU_BUF); + + } else { + KsTsduDat->TsduType = TSDU_TYPE_DAT; + KsTsdu->LastOffset += KsTsduDat->TotalLength; + } + + /* attach it to the TsduMgr list if the Tsdu is newly created. */ + if (bNewTsdu) { + + list_add_tail(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); + KsTsduMgr->NumOfTsdu++; + } + + } else { + + if (NewMdl) { + ks_release_mdl(NewMdl, FALSE); + NewMdl = NULL; + } + + if (bNewBuff) { + ExFreePool(Buffer); + Buffer = NULL; + bNewBuff = FALSE; + } + } + } + + /* update the TotalBytes being in sending */ + KsTsduMgr->TotalBytes += (ULONG)len; + + spin_unlock(&tconn->kstc_lock); + + /* cleanup the Tsdu if not successful */ + if (!bBuffed && bNewTsdu) { + KsPutKsTsdu(KsTsdu); + bNewTsdu = FALSE; + KsTsdu = NULL; + } + + /* we need allocate the ksock_tx_t structure from memory pool. */ + + context = cfs_alloc(sizeof(ksock_tdi_tx_t) + sizeof(KEVENT),0); + if (!context) { + /* release the chained mdl */ + ks_release_mdl(mdl, FALSE); + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + /* intialize the TcpContext */ + + memset(context,0, sizeof(ksock_tdi_tx_t) + sizeof(KEVENT)); + + context->tconn = tconn; + context->Event = (PKEVENT) ((PUCHAR)context + sizeof(ksock_tdi_tx_t)); + + KeInitializeEvent(context->Event, SynchronizationEvent, FALSE); + + if (bBuffed) { + + /* for bufferred transmission, we need set + the internal completion routine. */ + + context->CompletionRoutine = KsTcpSendCompletionRoutine; + context->KsTsduMgr = KsTsduMgr; + context->CompletionContext = KsTsdu; + context->CompletionContext2 = (bNewBuff ? (PVOID)KsTsduBuf : (PVOID)KsTsduDat); + context->bCounted = FALSE; + + } else if (bIsNonBlock) { + + /* for non-blocking transmission, we need set + the internal completion routine too. */ + + context->CompletionRoutine = KsTcpSendCompletionRoutine; + context->CompletionContext = tx; + context->KsTsduMgr = KsTsduMgr; + context->bCounted = TRUE; + context->ReferCount = 2; + } + + if (tconn->kstc_type == kstt_sender) { + ConnObject = tconn->sender.kstc_info.FileObject; + } else { + LASSERT(tconn->kstc_type == kstt_child); + ConnObject = tconn->child.kstc_info.FileObject; + } + + DeviceObject = IoGetRelatedDeviceObject(ConnObject); + + Irp = KsBuildTdiIrp(DeviceObject); + + if (NULL == Irp) { + + /* release the chained mdl */ + ks_release_mdl(mdl, FALSE); + + Status = STATUS_INSUFFICIENT_RESOURCES; + goto errorout; + } + + length = KsQueryMdlsSize(mdl); + + LASSERT((ULONG)len <= length); + + ks_get_tconn(tconn); + + TdiBuildSend( + Irp, + DeviceObject, + ConnObject, + KsTcpCompletionRoutine, + context, + (bBuffed ? NewMdl : mdl), + (bBuffed ? (tflags | TDI_SEND_NON_BLOCKING) : tflags), + (ULONG)len; + ); + + Status = IoCallDriver(DeviceObject, Irp); + + if (bBuffed) { + ks_release_mdl(mdl, FALSE); + NewMdl = NULL; + } + + if (!NT_SUCCESS(Status)) { + cfs_enter_debugger(); + rc = cfs_error_code(Status); + goto errorout; + } + + if (bBuffed) { + Status = STATUS_SUCCESS; + rc = len; + context = NULL; + } else { + if (bIsNonBlock) { + if (InterlockedDecrement(&context->ReferCount) == 0) { + Status = Irp->IoStatus.Status; + } else { + Status = STATUS_PENDING; + context = NULL; + } + } else { + if (STATUS_PENDING == Status) { + Status = KeWaitForSingleObject( + context->Event, + Executive, + KernelMode, + FALSE, + NULL + ); + + if (NT_SUCCESS(Status)) { + Status = Irp->IoStatus.Status; + } + } + } + + if (Status == STATUS_SUCCESS) { + rc = (int)(Irp->IoStatus.Information); + + spin_lock(&tconn->kstc_lock); + KsTsduMgr->TotalBytes -= rc; + spin_unlock(&tconn->kstc_lock); + + } else { + rc = cfs_error_code(Status); + } + } + +errorout: + + if (bBuffed) { + + if (NewMdl) { + ks_release_mdl(NewMdl, FALSE); + NewMdl = NULL; + } + + if (bNewBuff) { + if (!NT_SUCCESS(Status)) { + ExFreePool(Buffer); + Buffer = NULL; + } + } + + } else { + + if (Status != STATUS_PENDING) { + + if (Irp) { + + /* Freeing the Irp ... */ + + IoFreeIrp(Irp); + Irp = NULL; + } + } + } + + if (!NT_SUCCESS(Status)) { + + spin_lock(&tconn->kstc_lock); + + KsTsduMgr->TotalBytes -= (ULONG)len; + + if (bBuffed) { + + /* attach it to the TsduMgr list if the Tsdu is newly created. */ + if (bNewTsdu) { + + list_del(&(KsTsdu->Link)); + KsTsduMgr->NumOfTsdu--; + + KsPutKsTsdu(KsTsdu); + } else { + if (bNewBuff) { + if ( (ulong_ptr)KsTsduBuf + sizeof(KS_TSDU_BUF) == + (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { + KsTsdu->LastOffset -= sizeof(KS_TSDU_BUF); + KsTsduBuf->TsduType = 0; + } else { + cfs_enter_debugger(); + KsTsduBuf->StartOffset = KsTsduBuf->DataLength; + } + } else { + if ( (ulong_ptr)KsTsduDat + KsTsduDat->TotalLength == + (ulong_ptr)KsTsdu + KsTsdu->LastOffset) { + KsTsdu->LastOffset -= KsTsduDat->TotalLength; + KsTsduDat->TsduType = 0; + } else { + cfs_enter_debugger(); + KsTsduDat->StartOffset = KsTsduDat->DataLength; + } + } + } + } + + spin_unlock(&tconn->kstc_lock); + } + + /* free the context if is not used at all */ + if (context) { + cfs_free(context); + } + + ks_put_tconn(tconn); + + return rc; +} + +/* + * ks_recv_mdl + * Receive data from the peer for a stream connection + * + * Arguments: + * tconn: tdi connection object + * mdl: the mdl chain to contain the incoming data + * len: length of the data + * flags: flags of the receiving + * + * Return Value: + * ks return code + * + * Notes: + * N/A + */ + +int +ks_recv_mdl( + ksock_tconn_t * tconn, + ksock_mdl_t * mdl, + int size, + int flags + ) +{ + NTSTATUS Status = STATUS_SUCCESS; + int rc = 0; + + BOOLEAN bIsNonBlock; + BOOLEAN bIsExpedited; + + PKS_CHAIN KsChain; + PKS_TSDUMGR KsTsduMgr; + PKS_TSDU KsTsdu; + PKS_TSDU_DAT KsTsduDat; + PKS_TSDU_BUF KsTsduBuf; + PKS_TSDU_MDL KsTsduMdl; + + PUCHAR Buffer; + + ULONG BytesRecved = 0; + ULONG RecvedOnce; + + bIsNonBlock = cfs_is_flag_set(flags, MSG_DONTWAIT); + bIsExpedited = cfs_is_flag_set(flags, MSG_OOB); + + ks_get_tconn(tconn); + +Again: + + RecvedOnce = 0; + + spin_lock(&(tconn->kstc_lock)); + + if ( tconn->kstc_type != kstt_sender && + tconn->kstc_type != kstt_child) { + + rc = -EINVAL; + spin_unlock(&(tconn->kstc_lock)); + + goto errorout; + } + + if (tconn->kstc_state != ksts_connected) { + + rc = -ENOTCONN; + spin_unlock(&(tconn->kstc_lock)); + + goto errorout; + } + + if (tconn->kstc_type == kstt_sender) { + KsChain = &(tconn->sender.kstc_recv); + } else { + LASSERT(tconn->kstc_type == kstt_child); + KsChain = &(tconn->child.kstc_recv); + } + + if (bIsExpedited) { + KsTsduMgr = &(KsChain->Expedited); + } else { + KsTsduMgr = &(KsChain->Normal); + } + +NextTsdu: + + if (list_empty(&(KsTsduMgr->TsduList))) { + + // + // It's a notification event. We need reset it to + // un-signaled state in case there no any tsdus. + // + + KeResetEvent(&(KsTsduMgr->Event)); + + } else { + + KsTsdu = list_entry(KsTsduMgr->TsduList.next, KS_TSDU, Link); + LASSERT(KsTsdu->Magic == KS_TSDU_MAGIC); + + /* remove the KsTsdu from TsduMgr list to release the lock */ + list_del(&(KsTsdu->Link)); + KsTsduMgr->NumOfTsdu--; + + spin_unlock(&(tconn->kstc_lock)); + + while ((ULONG)size > BytesRecved) { + + ULONG BytesCopied = 0; + ULONG BytesToCopy = 0; + ULONG StartOffset = 0; + + KsTsduDat = (PKS_TSDU_DAT)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduBuf = (PKS_TSDU_BUF)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + KsTsduMdl = (PKS_TSDU_MDL)((PUCHAR)KsTsdu + KsTsdu->StartOffset); + + if ( TSDU_TYPE_DAT == KsTsduDat->TsduType || + TSDU_TYPE_BUF == KsTsduBuf->TsduType ) { + + + // + // Data Tsdu Unit ... + // + + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { + + if (cfs_is_flag_set(KsTsduDat->TsduFlags, KS_TSDU_DAT_RECEIVING)) { + /* data is not ready yet*/ + KeResetEvent(&(KsTsduMgr->Event)); + printk("ks_recv_mdl: KsTsduDat (%xh) is not ready yet !!!!!!!\n", KsTsduDat); + break; + } + + Buffer = &KsTsduDat->Data[0]; + StartOffset = KsTsduDat->StartOffset; + if (KsTsduDat->DataLength - KsTsduDat->StartOffset > size - BytesRecved) { + /* Recvmsg requst could be statisfied ... */ + BytesToCopy = size - BytesRecved; + } else { + BytesToCopy = KsTsduDat->DataLength - KsTsduDat->StartOffset; + } + + } else { + + if (cfs_is_flag_set(KsTsduBuf->TsduFlags, KS_TSDU_BUF_RECEIVING)) { + /* data is not ready yet*/ + KeResetEvent(&(KsTsduMgr->Event)); + DbgPrint("ks_recv_mdl: KsTsduBuf (%xh) is not ready yet !!!!!!!\n", KsTsduBuf); + break; + } + + ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); + Buffer = KsTsduBuf->UserBuffer; + StartOffset = KsTsduBuf->StartOffset; + + if (KsTsduBuf->DataLength - KsTsduBuf->StartOffset > size - BytesRecved) { + /* Recvmsg requst could be statisfied ... */ + BytesToCopy = size - BytesRecved; + } else { + BytesToCopy = KsTsduBuf->DataLength - KsTsduBuf->StartOffset; + } + } + + if (BytesToCopy > 0) { + Status = TdiCopyBufferToMdl( + Buffer, + StartOffset, + BytesToCopy, + mdl, + BytesRecved, + &BytesCopied + ); + + if (NT_SUCCESS(Status)) { + + if (BytesToCopy != BytesCopied) { + cfs_enter_debugger(); + } + + BytesRecved += BytesCopied; + RecvedOnce += BytesCopied; + + } else { + + cfs_enter_debugger(); + + if (STATUS_BUFFER_OVERFLOW == Status) { + } + } + } + + if (TSDU_TYPE_DAT == KsTsduDat->TsduType) { + + KsTsduDat->StartOffset += BytesCopied; + + if (KsTsduDat->StartOffset == KsTsduDat->DataLength) { + KsTsdu->StartOffset += KsTsduDat->TotalLength; + } + + } else { + + ASSERT(TSDU_TYPE_BUF == KsTsduBuf->TsduType); + KsTsduBuf->StartOffset += BytesCopied; + if (KsTsduBuf->StartOffset == KsTsduBuf->DataLength) { + KsTsdu->StartOffset += sizeof(KS_TSDU_BUF); + /* now we need release the buf to system pool */ + ExFreePool(KsTsduBuf->UserBuffer); + } + } + + } else if (TSDU_TYPE_MDL == KsTsduMdl->TsduType) { + + // + // MDL Tsdu Unit ... + // + + if (KsTsduMdl->DataLength > size - BytesRecved) { + + /* Recvmsg requst could be statisfied ... */ + + BytesToCopy = size - BytesRecved; + + } else { + + BytesToCopy = KsTsduMdl->DataLength; + } + + Status = KsCopyMdlChainToMdlChain( + KsTsduMdl->Mdl, + KsTsduMdl->StartOffset, + mdl, + BytesRecved, + BytesToCopy, + &BytesCopied + ); + + if (NT_SUCCESS(Status)) { + + if (BytesToCopy != BytesCopied) { + cfs_enter_debugger(); + } + + KsTsduMdl->StartOffset += BytesCopied; + KsTsduMdl->DataLength -= BytesCopied; + + BytesRecved += BytesCopied; + RecvedOnce += BytesCopied; + } else { + cfs_enter_debugger(); + } + + if (0 == KsTsduMdl->DataLength) { + + // + // Call TdiReturnChainedReceives to release the Tsdu memory + // + + TdiReturnChainedReceives( + &(KsTsduMdl->Descriptor), + 1 ); + + KsTsdu->StartOffset += sizeof(KS_TSDU_MDL); + } + + } else { + printk("ks_recv_mdl: unknown tsdu slot: slot = %x type = %x Start= %x\n", + KsTsduDat, KsTsduDat->TsduType, KsTsduDat->StartOffset, KsTsduDat->DataLength); + printk(" Tsdu = %x Magic=%x: Start = %x Last = %x Length = %x", + KsTsdu, KsTsdu->Magic, KsTsdu->StartOffset, KsTsdu->LastOffset, KsTsdu->TotalLength); + cfs_enter_debugger(); + } + + if (KsTsdu->StartOffset == KsTsdu->LastOffset) { + + // + // KsTsdu is empty now, we need free it ... + // + + KsPutKsTsdu(KsTsdu); + KsTsdu = NULL; + + break; + } + } + + spin_lock(&(tconn->kstc_lock)); + + /* we need attach the KsTsdu to the list header */ + if (KsTsdu) { + KsTsduMgr->NumOfTsdu++; + list_add(&(KsTsdu->Link), &(KsTsduMgr->TsduList)); + } else if ((ULONG)size > BytesRecved) { + goto NextTsdu; + } + } + + if (KsTsduMgr->TotalBytes < RecvedOnce) { + cfs_enter_debugger(); + KsTsduMgr->TotalBytes = 0; + } else { + KsTsduMgr->TotalBytes -= RecvedOnce; + } + + spin_unlock(&(tconn->kstc_lock)); + + if (NT_SUCCESS(Status)) { + + if ((BytesRecved < (ulong_ptr)size) && (!bIsNonBlock)) { + + KeWaitForSingleObject( + &(KsTsduMgr->Event), + Executive, + KernelMode, + FALSE, + NULL + ); + + goto Again; + } + + if (bIsNonBlock && (BytesRecved == 0)) { + rc = -EAGAIN; + } else { + rc = BytesRecved; + } + } + +errorout: + + ks_put_tconn(tconn); + + if (rc > 0) { + KsPrint((1, "ks_recv_mdl: recvieving %d bytes ...\n", rc)); + } else { + KsPrint((0, "ks_recv_mdl: recvieving error code = %d Stauts = %xh ...\n", rc, Status)); + } + + /* release the chained mdl */ + ks_release_mdl(mdl, FALSE); + + return (rc); +} + + +/* + * ks_init_tdi_data + * initialize the global data in ksockal_data + * + * Arguments: + * N/A + * + * Return Value: + * int: ks error code + * + * Notes: + * N/A + */ + +int +ks_init_tdi_data() +{ + int rc = 0; + + /* initialize tconn related globals */ + RtlZeroMemory(&ks_data, sizeof(ks_data_t)); + + spin_lock_init(&ks_data.ksnd_tconn_lock); + CFS_INIT_LIST_HEAD(&ks_data.ksnd_tconns); + cfs_init_event(&ks_data.ksnd_tconn_exit, TRUE, FALSE); + + ks_data.ksnd_tconn_slab = cfs_mem_cache_create( + "tcon", sizeof(ksock_tconn_t) , 0, 0); + + if (!ks_data.ksnd_tconn_slab) { + rc = -ENOMEM; + goto errorout; + } + + /* initialize tsdu related globals */ + + spin_lock_init(&ks_data.ksnd_tsdu_lock); + CFS_INIT_LIST_HEAD(&ks_data.ksnd_freetsdus); + ks_data.ksnd_tsdu_size = TDINAL_TSDU_DEFAULT_SIZE; /* 64k */ + ks_data.ksnd_tsdu_slab = cfs_mem_cache_create( + "tsdu", ks_data.ksnd_tsdu_size, 0, 0); + + if (!ks_data.ksnd_tsdu_slab) { + rc = -ENOMEM; + cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); + ks_data.ksnd_tconn_slab = NULL; + goto errorout; + } + + /* initialize daemon related globals */ + + spin_lock_init(&ks_data.ksnd_daemon_lock); + CFS_INIT_LIST_HEAD(&ks_data.ksnd_daemons); + cfs_init_event(&ks_data.ksnd_daemon_exit, TRUE, FALSE); + + KsRegisterPnpHandlers(); + +errorout: + + return rc; +} + + +/* + * ks_fini_tdi_data + * finalize the global data in ksockal_data + * + * Arguments: + * N/A + * + * Return Value: + * int: ks error code + * + * Notes: + * N/A + */ + +void +ks_fini_tdi_data() +{ + PKS_TSDU KsTsdu = NULL; + struct list_head * list = NULL; + + /* clean up the pnp handler and address slots */ + KsDeregisterPnpHandlers(); + + /* we need wait until all the tconn are freed */ + spin_lock(&(ks_data.ksnd_tconn_lock)); + + if (list_empty(&(ks_data.ksnd_tconns))) { + cfs_wake_event(&ks_data.ksnd_tconn_exit); + } + spin_unlock(&(ks_data.ksnd_tconn_lock)); + + /* now wait on the tconn exit event */ + cfs_wait_event(&ks_data.ksnd_tconn_exit, 0); + + /* it's safe to delete the tconn slab ... */ + cfs_mem_cache_destroy(ks_data.ksnd_tconn_slab); + ks_data.ksnd_tconn_slab = NULL; + + /* clean up all the tsud buffers in the free list */ + spin_lock(&(ks_data.ksnd_tsdu_lock)); + list_for_each (list, &ks_data.ksnd_freetsdus) { + KsTsdu = list_entry (list, KS_TSDU, Link); + + cfs_mem_cache_free( + ks_data.ksnd_tsdu_slab, + KsTsdu ); + } + spin_unlock(&(ks_data.ksnd_tsdu_lock)); + + /* it's safe to delete the tsdu slab ... */ + cfs_mem_cache_destroy(ks_data.ksnd_tsdu_slab); + ks_data.ksnd_tsdu_slab = NULL; + + /* good! it's smooth to do the cleaning up...*/ +} + +/* + * ks_create_child_tconn + * Create the backlog child connection for a listener + * + * Arguments: + * parent: the listener daemon connection + * + * Return Value: + * the child connection or NULL in failure + * + * Notes: + * N/A + */ + +ksock_tconn_t * +ks_create_child_tconn( + ksock_tconn_t * parent + ) +{ + NTSTATUS status; + ksock_tconn_t * backlog; + + /* allocate the tdi connecton object */ + backlog = ks_create_tconn(); + + if (!backlog) { + goto errorout; + } + + /* initialize the tconn as a child */ + ks_init_child(backlog); + + + /* now bind it */ + if (ks_bind_tconn(backlog, parent, 0, 0) < 0) { + ks_free_tconn(backlog); + backlog = NULL; + goto errorout; + } + + /* open the connection object */ + status = KsOpenConnection( + &(backlog->kstc_dev), + (PVOID)backlog, + &(backlog->child.kstc_info.Handle), + &(backlog->child.kstc_info.FileObject) + ); + + if (!NT_SUCCESS(status)) { + + ks_put_tconn(backlog); + backlog = NULL; + cfs_enter_debugger(); + goto errorout; + } + + /* associate it now ... */ + status = KsAssociateAddress( + backlog->kstc_addr.Handle, + backlog->child.kstc_info.FileObject + ); + + if (!NT_SUCCESS(status)) { + + ks_put_tconn(backlog); + backlog = NULL; + cfs_enter_debugger(); + goto errorout; + } + + backlog->kstc_state = ksts_associated; + +errorout: + + return backlog; +} + +/* + * ks_replenish_backlogs( + * to replenish the backlogs listening... + * + * Arguments: + * tconn: the parent listen tdi connect + * nbacklog: number fo child connections in queue + * + * Return Value: + * N/A + * + * Notes: + * N/A + */ + +void +ks_replenish_backlogs( + ksock_tconn_t * parent, + int nbacklog + ) +{ + ksock_tconn_t * backlog; + int n = 0; + + /* calculate how many backlogs needed */ + if ( ( parent->listener.kstc_listening.num + + parent->listener.kstc_accepted.num ) < nbacklog ) { + n = nbacklog - ( parent->listener.kstc_listening.num + + parent->listener.kstc_accepted.num ); + } else { + n = 0; + } + + while (n--) { + + /* create the backlog child tconn */ + backlog = ks_create_child_tconn(parent); + + spin_lock(&(parent->kstc_lock)); + + if (backlog) { + spin_lock(&backlog->kstc_lock); + /* attch it into the listing list of daemon */ + list_add( &backlog->child.kstc_link, + &parent->listener.kstc_listening.list ); + parent->listener.kstc_listening.num++; + + backlog->child.kstc_queued = TRUE; + spin_unlock(&backlog->kstc_lock); + } else { + cfs_enter_debugger(); + } + + spin_unlock(&(parent->kstc_lock)); + } +} + +/* + * ks_start_listen + * setup the listener tdi connection and make it listen + * on the user specified ip address and port. + * + * Arguments: + * tconn: the parent listen tdi connect + * nbacklog: number fo child connections in queue + * + * Return Value: + * ks error code >=: success; otherwise error. + * + * Notes: + * N/A + */ + +int +ks_start_listen(ksock_tconn_t *tconn, int nbacklog) +{ + int rc = 0; + + /* now replenish the backlogs */ + ks_replenish_backlogs(tconn, nbacklog); + + /* set the event callback handlers */ + rc = ks_set_handlers(tconn); + + if (rc < 0) { + return rc; + } + + spin_lock(&(tconn->kstc_lock)); + tconn->listener.nbacklog = nbacklog; + tconn->kstc_state = ksts_listening; + cfs_set_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED); + spin_unlock(&(tconn->kstc_lock)); + + return rc; +} + +void +ks_stop_listen(ksock_tconn_t *tconn) +{ + struct list_head * list; + ksock_tconn_t * backlog; + + /* reset all tdi event callbacks to NULL */ + ks_reset_handlers (tconn); + + spin_lock(&tconn->kstc_lock); + + cfs_clear_flag(tconn->kstc_flags, KS_TCONN_DAEMON_STARTED); + + /* cleanup all the listening backlog child connections */ + list_for_each (list, &(tconn->listener.kstc_listening.list)) { + backlog = list_entry(list, ksock_tconn_t, child.kstc_link); + + /* destory and free it */ + ks_put_tconn(backlog); + } + + spin_unlock(&tconn->kstc_lock); + + /* wake up it from the waiting on new incoming connections */ + KeSetEvent(&tconn->listener.kstc_accept_event, 0, FALSE); + + /* free the listening daemon tconn */ + ks_put_tconn(tconn); +} + + +/* + * ks_wait_child_tconn + * accept a child connection from peer + * + * Arguments: + * parent: the daemon tdi connection listening + * child: to contain the accepted connection + * + * Return Value: + * ks error code; + * + * Notes: + * N/A + */ + +int +ks_wait_child_tconn( + ksock_tconn_t * parent, + ksock_tconn_t ** child + ) +{ + struct list_head * tmp; + ksock_tconn_t * backlog = NULL; + + ks_replenish_backlogs(parent, parent->listener.nbacklog); + + spin_lock(&(parent->kstc_lock)); + + if (parent->listener.kstc_listening.num <= 0) { + spin_unlock(&(parent->kstc_lock)); + return -1; + } + +again: + + /* check the listening queue and try to search the accepted connecton */ + + list_for_each(tmp, &(parent->listener.kstc_listening.list)) { + backlog = list_entry (tmp, ksock_tconn_t, child.kstc_link); + + spin_lock(&(backlog->kstc_lock)); + + if (backlog->child.kstc_accepted) { + + LASSERT(backlog->kstc_state == ksts_connected); + LASSERT(backlog->child.kstc_busy); + + list_del(&(backlog->child.kstc_link)); + list_add(&(backlog->child.kstc_link), + &(parent->listener.kstc_accepted.list)); + parent->listener.kstc_accepted.num++; + parent->listener.kstc_listening.num--; + backlog->child.kstc_queueno = 1; + + spin_unlock(&(backlog->kstc_lock)); + + break; + } else { + spin_unlock(&(backlog->kstc_lock)); + backlog = NULL; + } + } + + spin_unlock(&(parent->kstc_lock)); + + /* we need wait until new incoming connections are requested + or the case of shuting down the listenig daemon thread */ + if (backlog == NULL) { + + NTSTATUS Status; + + Status = KeWaitForSingleObject( + &(parent->listener.kstc_accept_event), + Executive, + KernelMode, + FALSE, + NULL + ); + + spin_lock(&(parent->kstc_lock)); + + /* check whether it's exptected to exit ? */ + if (!cfs_is_flag_set(parent->kstc_flags, KS_TCONN_DAEMON_STARTED)) { + spin_unlock(&(parent->kstc_lock)); + } else { + goto again; + } + } + + if (backlog) { + /* query the local ip address of the connection */ + ks_query_local_ipaddr(backlog); + } + + *child = backlog; + + return 0; +} + +int libcfs_ipif_query(char *name, int *up, __u32 *ip, __u32 *mask) +{ + ks_addr_slot_t * slot = NULL; + PLIST_ENTRY list = NULL; + + spin_lock(&ks_data.ksnd_addrs_lock); + + list = ks_data.ksnd_addrs_list.Flink; + while (list != &ks_data.ksnd_addrs_list) { + slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); + if (_stricmp(name, &slot->iface[0]) == 0) { + *up = slot->up; + *ip = slot->ip_addr; + *mask = slot->netmask; + break; + } + list = list->Flink; + slot = NULL; + } + + spin_unlock(&ks_data.ksnd_addrs_lock); + + return (int)(slot == NULL); +} + +int libcfs_ipif_enumerate(char ***names) +{ + ks_addr_slot_t * slot = NULL; + PLIST_ENTRY list = NULL; + int nips = 0; + + spin_lock(&ks_data.ksnd_addrs_lock); + + *names = cfs_alloc(sizeof(char *) * ks_data.ksnd_naddrs, CFS_ALLOC_ZERO); + if (*names == NULL) { + goto errorout; + } + + list = ks_data.ksnd_addrs_list.Flink; + while (list != &ks_data.ksnd_addrs_list) { + slot = CONTAINING_RECORD(list, ks_addr_slot_t, link); + list = list->Flink; + (*names)[nips++] = slot->iface; + cfs_assert(nips <= ks_data.ksnd_naddrs); + } + + cfs_assert(nips == ks_data.ksnd_naddrs); + +errorout: + + spin_unlock(&ks_data.ksnd_addrs_lock); + return nips; +} + +void libcfs_ipif_free_enumeration(char **names, int n) +{ + if (names) { + cfs_free(names); + } +} + +int libcfs_sock_listen(struct socket **sockp, __u32 ip, int port, int backlog) +{ + int rc = 0; + ksock_tconn_t * parent; + + parent = ks_create_tconn(); + if (!parent) { + rc = -ENOMEM; + goto errorout; + } + + /* initialize the tconn as a listener */ + ks_init_listener(parent); + + /* bind the daemon->tconn */ + rc = ks_bind_tconn(parent, NULL, ip, (unsigned short)port); + + if (rc < 0) { + ks_free_tconn(parent); + goto errorout; + } + + /* create listening children and make it to listen state*/ + rc = ks_start_listen(parent, backlog); + if (rc < 0) { + ks_stop_listen(parent); + goto errorout; + } + + *sockp = parent; + +errorout: + + return rc; +} + +int libcfs_sock_accept(struct socket **newsockp, struct socket *sock) +{ + /* wait for incoming connecitons */ + return ks_wait_child_tconn(sock, newsockp); +} + +void libcfs_sock_abort_accept(struct socket *sock) +{ + LASSERT(sock->kstc_type == kstt_listener); + + spin_lock(&(sock->kstc_lock)); + + /* clear the daemon flag */ + cfs_clear_flag(sock->kstc_flags, KS_TCONN_DAEMON_STARTED); + + /* wake up it from the waiting on new incoming connections */ + KeSetEvent(&sock->listener.kstc_accept_event, 0, FALSE); + + spin_unlock(&(sock->kstc_lock)); +} + +/* + * libcfs_sock_connect + * build a conntion between local ip/port and the peer ip/port. + * + * Arguments: + * laddr: local ip address + * lport: local port number + * paddr: peer's ip address + * pport: peer's port number + * + * Return Value: + * int: return code ... + * + * Notes: + * N/A + */ + + +int libcfs_sock_connect(struct socket **sockp, int *fatal, + __u32 local_ip, int local_port, + __u32 peer_ip, int peer_port) +{ + ksock_tconn_t * tconn = NULL; + int rc = 0; + + *sockp = NULL; + + KsPrint((1, "libcfs_sock_connect: connecting to %x:%d with %x:%d...\n", + peer_ip, peer_port, local_ip, local_port )); + + /* create the tdi connecion structure */ + tconn = ks_create_tconn(); + if (!tconn) { + rc = -ENOMEM; + goto errorout; + } + + /* initialize the tdi sender connection */ + ks_init_sender(tconn); + + /* bind the local ip address with the tconn */ + rc = ks_bind_tconn(tconn, NULL, local_ip, (unsigned short)local_port); + if (rc < 0) { + KsPrint((0, "libcfs_sock_connect: failed to bind address %x:%d...\n", + local_ip, local_port )); + ks_free_tconn(tconn); + goto errorout; + } + + /* connect to the remote peer */ + rc = ks_build_tconn(tconn, peer_ip, (unsigned short)peer_port); + if (rc < 0) { + KsPrint((0, "libcfs_sock_connect: failed to connect %x:%d ...\n", + peer_ip, peer_port )); + + ks_put_tconn(tconn); + goto errorout; + } + + *sockp = tconn; + +errorout: + + return rc; +} + +int libcfs_sock_setbuf(struct socket *socket, int txbufsize, int rxbufsize) +{ + return 0; +} + +int libcfs_sock_getbuf(struct socket *socket, int *txbufsize, int *rxbufsize) +{ + return 0; +} + +int libcfs_sock_getaddr(struct socket *socket, int remote, __u32 *ip, int *port) +{ + PTRANSPORT_ADDRESS taddr = NULL; + + spin_lock(&socket->kstc_lock); + if (remote) { + if (socket->kstc_type == kstt_sender) { + taddr = socket->sender.kstc_info.Remote; + } else if (socket->kstc_type == kstt_child) { + taddr = socket->child.kstc_info.Remote; + } + } else { + taddr = &(socket->kstc_addr.Tdi); + } + + if (taddr) { + PTDI_ADDRESS_IP addr = (PTDI_ADDRESS_IP)(&(taddr->Address[0].Address)); + if (ip != NULL) + *ip = ntohl (addr->in_addr); + if (port != NULL) + *port = ntohs (addr->sin_port); + } else { + spin_unlock(&socket->kstc_lock); + return -ENOTCONN; + } + + spin_unlock(&socket->kstc_lock); + return 0; +} + +int libcfs_sock_write(struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + ksock_mdl_t * mdl; + + int offset = 0; + + while (nob > offset) { + + /* lock the user buffer */ + rc = ks_lock_buffer( (char *)buffer + offset, + FALSE, nob - offset, IoReadAccess, &mdl ); + + if (rc < 0) { + return (rc); + } + + /* send out the whole mdl */ + rc = ks_send_mdl( sock, NULL, mdl, nob - offset, 0 ); + + if (rc > 0) { + offset += rc; + } else { + return (rc); + } + } + + return (0); +} + +int libcfs_sock_read(struct socket *sock, void *buffer, int nob, int timeout) +{ + int rc; + ksock_mdl_t * mdl; + + int offset = 0; + + while (nob > offset) { + + /* lock the user buffer */ + rc = ks_lock_buffer( (char *)buffer + offset, + FALSE, nob - offset, IoWriteAccess, &mdl ); + + if (rc < 0) { + return (rc); + } + + /* recv the requested buffer */ + rc = ks_recv_mdl( sock, mdl, nob - offset, 0 ); + + if (rc > 0) { + offset += rc; + } else { + return (rc); + } + } + + return (0); +} + +void libcfs_sock_release(struct socket *sock) +{ + if (sock->kstc_type == kstt_listener && + sock->kstc_state == ksts_listening) { + ks_stop_listen(sock); + } else { + ks_put_tconn(sock); + } +} diff --git a/libcfs/libcfs/winnt/winnt-tracefile.c b/libcfs/libcfs/winnt/winnt-tracefile.c new file mode 100644 index 0000000..61ba735 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-tracefile.c @@ -0,0 +1,224 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +#define DEBUG_SUBSYSTEM S_LNET +#define LUSTRE_TRACEFILE_PRIVATE + +#include +#include +#include "tracefile.h" + +#ifndef get_cpu +#define get_cpu() smp_processor_id() +#define put_cpu() do { } while (0) +#endif + +#define TCD_TYPE_MAX 1 + +event_t tracefile_event; + +void tracefile_init_arch() +{ + int i; + int j; + struct trace_cpu_data *tcd; + + cfs_init_event(&tracefile_event, TRUE, TRUE); + + /* initialize trace_data */ + memset(trace_data, 0, sizeof(trace_data)); + for (i = 0; i < TCD_TYPE_MAX; i++) { + trace_data[i]=cfs_alloc(sizeof(struct trace_data_union)*NR_CPUS, 0); + if (trace_data[i] == NULL) + goto out; + } + + /* arch related info initialized */ + tcd_for_each(tcd, i, j) { + tcd->tcd_pages_factor = 100; /* Only one type */ + tcd->tcd_cpu = j; + tcd->tcd_type = i; + } + + memset(trace_console_buffers, 0, sizeof(trace_console_buffers)); + + for (i = 0; i < NR_CPUS; i++) { + for (j = 0; j < 1; j++) { + trace_console_buffers[i][j] = + cfs_alloc(TRACE_CONSOLE_BUFFER_SIZE, + CFS_ALLOC_ZERO); + + if (trace_console_buffers[i][j] == NULL) + goto out; + } + } + + return 0; + +out: + tracefile_fini_arch(); + KsPrint((0, "lnet: No enough memory\n")); + return -ENOMEM; +} + +void tracefile_fini_arch() +{ + int i; + int j; + + for (i = 0; i < NR_CPUS; i++) { + for (j = 0; j < 2; j++) { + if (trace_console_buffers[i][j] != NULL) { + cfs_free(trace_console_buffers[i][j]); + trace_console_buffers[i][j] = NULL; + } + } + } + + for (i = 0; trace_data[i] != NULL; i++) { + cfs_free(trace_data[i]); + trace_data[i] = NULL; + } +} + +void tracefile_read_lock() +{ + cfs_wait_event(&tracefile_event, 0); +} + +void tracefile_read_unlock() +{ + cfs_wake_event(&tracefile_event); +} + +void tracefile_write_lock() +{ + cfs_wait_event(&tracefile_event, 0); +} + +void tracefile_write_unlock() +{ + cfs_wake_event(&tracefile_event); +} + +char * +trace_get_console_buffer(void) +{ +#pragma message ("is there possible problem with pre-emption ?") + int cpu = (int) KeGetCurrentProcessorNumber(); + return trace_console_buffers[cpu][0]; +} + +void +trace_put_console_buffer(char *buffer) +{ +} + +struct trace_cpu_data * +trace_get_tcd(void) +{ +#pragma message("todo: return NULL if in interrupt context") + + int cpu = (int) KeGetCurrentProcessorNumber(); + return &(*trace_data[0])[cpu].tcd; +} + +void +trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags) +{ +} + +int +trace_lock_tcd(struct trace_cpu_data *tcd) +{ + __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); + return 1; +} + +void +trace_unlock_tcd(struct trace_cpu_data *tcd) +{ + __LASSERT(tcd->tcd_type < TCD_TYPE_MAX); +} + +void +set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, + const int line, unsigned long stack) +{ + struct timeval tv; + + do_gettimeofday(&tv); + + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = line; + header->ph_extern_pid = 0; + return; +} + +void print_to_console(struct ptldebug_header *hdr, int mask, const char *buf, + int len, const char *file, const char *fn) +{ + char *prefix = NULL, *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = "LustreError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "LustreError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "Lustre"; + ptype = KERN_WARNING; + } else if ((mask & libcfs_printk) != 0 || (mask & D_CONSOLE)) { + prefix = "Lustre"; + ptype = KERN_INFO; + } + + if ((mask & D_CONSOLE) != 0) { + printk("%s%s: %s", ptype, prefix, buf); + } else { + printk("%s%s: %d:%d:(%s:%d:%s()) %s", ptype, prefix, hdr->ph_pid, + hdr->ph_extern_pid, file, hdr->ph_line_num, fn, buf); + } + return; +} + +int tcd_owns_tage(struct trace_cpu_data *tcd, struct trace_page *tage) +{ + return 1; +} + +int trace_max_debug_mb(void) +{ + int total_mb = (num_physpages >> (20 - CFS_PAGE_SHIFT)); + + return MAX(512, (total_mb * 80)/100); +} + +void +trace_call_on_all_cpus(void (*fn)(void *arg), void *arg) +{ +#error "tbd" +} + diff --git a/libcfs/libcfs/winnt/winnt-usr.c b/libcfs/libcfs/winnt/winnt-usr.c new file mode 100644 index 0000000..f79347b --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-usr.c @@ -0,0 +1,85 @@ + +#ifndef __KERNEL__ + +#include +#include +#include +#include +#include + +void portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + char *format, ...) { + } + +int cfs_proc_mknod(const char *path, unsigned short mode, unsigned int dev) +{ + return 0; +} + + +void print_last_error(char* Prefix) +{ + LPVOID lpMsgBuf; + + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + GetLastError(), + 0, + (LPTSTR) &lpMsgBuf, + 0, + NULL + ); + + printf("%s %s", Prefix, (LPTSTR) lpMsgBuf); + + LocalFree(lpMsgBuf); +} + +// +// The following declarations are defined in io.h of VC +// sys/types.h will conflict with io.h, so we need place +// these declartions here. + +#ifdef __cplusplus +extern "C" { +#endif + void + __declspec (naked) __cdecl _chkesp(void) + { +#if _X86_ + __asm { jz exit_chkesp }; + __asm { int 3 }; + exit_chkesp: + __asm { ret }; +#endif + } +#ifdef __cplusplus +} +#endif + +unsigned int sleep (unsigned int seconds) +{ + Sleep(seconds * 1000); + return 0; +} + +int gethostname(char * name, int namelen) +{ + return 0; +} + +int ioctl ( + int handle, + int cmd, + void *buffer + ) +{ + printf("hello, world\n"); + return 0; +} + +#endif /* __KERNEL__ */ \ No newline at end of file diff --git a/libcfs/libcfs/winnt/winnt-utils.c b/libcfs/libcfs/winnt/winnt-utils.c new file mode 100644 index 0000000..cd33aa2 --- /dev/null +++ b/libcfs/libcfs/winnt/winnt-utils.c @@ -0,0 +1,158 @@ +/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=4:tabstop=4: + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + + +/* + * miscellaneous libcfs stuff + */ +#define DEBUG_SUBSYSTEM S_LNET +#include + +/* + * Convert server error code to client format. Error codes are from + * Linux errno.h, so for Linux client---identity. + */ +int convert_server_error(__u64 ecode) +{ + return cfs_error_code((NTSTATUS)ecode); +} + +/* + * convert flag from client to server. + * + * nt kernel uses several members to describe the open flags + * such as DesiredAccess/ShareAccess/CreateDisposition/CreateOptions + * so it's better to convert when using, not here. + */ + +int convert_client_oflag(int cflag, int *result) +{ + *result = 0; + return 0; +} + + +int cfs_error_code(NTSTATUS Status) +{ + switch (Status) { + + case STATUS_ACCESS_DENIED: + return (-EACCES); + + case STATUS_ACCESS_VIOLATION: + return (-EFAULT); + + case STATUS_BUFFER_TOO_SMALL: + return (-ETOOSMALL); + + case STATUS_INVALID_PARAMETER: + return (-EINVAL); + + case STATUS_NOT_IMPLEMENTED: + case STATUS_NOT_SUPPORTED: + return (-EOPNOTSUPP); + + case STATUS_INVALID_ADDRESS: + case STATUS_INVALID_ADDRESS_COMPONENT: + return (-EADDRNOTAVAIL); + + case STATUS_NO_SUCH_DEVICE: + case STATUS_NO_SUCH_FILE: + case STATUS_OBJECT_NAME_NOT_FOUND: + case STATUS_OBJECT_PATH_NOT_FOUND: + case STATUS_NETWORK_BUSY: + case STATUS_INVALID_NETWORK_RESPONSE: + case STATUS_UNEXPECTED_NETWORK_ERROR: + return (-ENETDOWN); + + case STATUS_BAD_NETWORK_PATH: + case STATUS_NETWORK_UNREACHABLE: + case STATUS_PROTOCOL_UNREACHABLE: + return (-ENETUNREACH); + + case STATUS_LOCAL_DISCONNECT: + case STATUS_TRANSACTION_ABORTED: + case STATUS_CONNECTION_ABORTED: + return (-ECONNABORTED); + + case STATUS_REMOTE_DISCONNECT: + case STATUS_LINK_FAILED: + case STATUS_CONNECTION_DISCONNECTED: + case STATUS_CONNECTION_RESET: + case STATUS_PORT_UNREACHABLE: + return (-ECONNRESET); + + case STATUS_PAGEFILE_QUOTA: + case STATUS_NO_MEMORY: + case STATUS_CONFLICTING_ADDRESSES: + case STATUS_QUOTA_EXCEEDED: + case STATUS_TOO_MANY_PAGING_FILES: + case STATUS_INSUFFICIENT_RESOURCES: + case STATUS_WORKING_SET_QUOTA: + case STATUS_COMMITMENT_LIMIT: + case STATUS_TOO_MANY_ADDRESSES: + case STATUS_REMOTE_RESOURCES: + return (-ENOBUFS); + + case STATUS_INVALID_CONNECTION: + return (-ENOTCONN); + + case STATUS_PIPE_DISCONNECTED: + return (-ESHUTDOWN); + + case STATUS_TIMEOUT: + case STATUS_IO_TIMEOUT: + case STATUS_LINK_TIMEOUT: + return (-ETIMEDOUT); + + case STATUS_REMOTE_NOT_LISTENING: + case STATUS_CONNECTION_REFUSED: + return (-ECONNREFUSED); + + case STATUS_HOST_UNREACHABLE: + return (-EHOSTUNREACH); + + case STATUS_PENDING: + case STATUS_DEVICE_NOT_READY: + return (-EAGAIN); + + case STATUS_CANCELLED: + case STATUS_REQUEST_ABORTED: + return (-EINTR); + + case STATUS_BUFFER_OVERFLOW: + case STATUS_INVALID_BUFFER_SIZE: + return (-EMSGSIZE); + + } + + if (NT_SUCCESS(Status)) + return 0; + + return (-EINVAL); +} + + +void cfs_stack_trace_fill(struct cfs_stack_trace *trace) +{ +} + +void *cfs_stack_trace_frame(struct cfs_stack_trace *trace, int frame_no) +{ + return NULL; +} -- 1.8.3.1