From 00f255b8c00dff66481a6ab22391869217b5d8af Mon Sep 17 00:00:00 2001 From: eeb Date: Fri, 4 Feb 2005 18:50:00 +0000 Subject: [PATCH] * Landed portals:b_port_step as follows... - removed CFS_DECL_SPIN* just use 'spinlock_t' and initialise with spin_lock_init() - removed CFS_DECL_MUTEX* just use 'struct semaphore' and initialise with init_mutex() - removed CFS_DECL_RWSEM* just use 'struct rw_semaphore' and initialise with init_rwsem() - renamed cfs_sleep_chan -> cfs_waitq cfs_sleep_link -> cfs_waitlink - fixed race in linux version of arch-independent socknal (the ENOMEM/EAGAIN decision). - Didn't fix problems in Darwin version of arch-independent socknal (resetting socket callbacks, eager ack hack, ENOMEM/EAGAIN decision) - removed libcfs types from non-socknal header files (only some types in the header files had been changed; the .c files hadn't been updated at all). - Updated lustre b1_4 to match --- lnet/ChangeLog | 25 + lnet/autoMakefile.am | 20 +- lnet/autoconf/lustre-lnet.m4 | 39 +- lnet/include/Makefile.am | 2 +- lnet/include/libcfs/.cvsignore | 2 + lnet/include/libcfs/Makefile.am | 3 + lnet/include/libcfs/curproc.h | 62 ++ lnet/include/libcfs/darwin/.cvsignore | 2 + lnet/include/libcfs/darwin/Makefile.am | 3 + lnet/include/libcfs/darwin/darwin-fs.h | 131 +++ lnet/include/libcfs/darwin/darwin-lock.h | 264 +++++ lnet/include/libcfs/darwin/darwin-mem.h | 206 ++++ lnet/include/libcfs/darwin/darwin-prim.h | 554 +++++++++++ lnet/include/libcfs/darwin/darwin-sync.h | 276 ++++++ lnet/include/libcfs/darwin/darwin-time.h | 257 +++++ lnet/include/libcfs/darwin/darwin-types.h | 82 ++ lnet/include/libcfs/darwin/darwin-utils.h | 60 ++ lnet/include/libcfs/darwin/kp30.h | 90 ++ lnet/include/libcfs/darwin/libcfs.h | 173 ++++ lnet/include/libcfs/darwin/lltrace.h | 26 + lnet/include/libcfs/darwin/portals_lib.h | 34 + lnet/include/libcfs/darwin/portals_utils.h | 18 + lnet/include/{linux => libcfs}/kp30.h | 400 ++------ lnet/include/{linux => libcfs}/libcfs.h | 317 +++--- lnet/include/libcfs/linux/.cvsignore | 2 + lnet/include/libcfs/linux/Makefile.am | 3 + lnet/include/libcfs/linux/kp30.h | 322 +++++++ lnet/include/libcfs/linux/libcfs.h | 151 +++ lnet/include/libcfs/linux/linux-fs.h | 70 ++ lnet/include/libcfs/linux/linux-lock.h | 118 +++ lnet/include/libcfs/linux/linux-mem.h | 110 +++ lnet/include/libcfs/linux/linux-prim.h | 173 ++++ lnet/include/libcfs/linux/linux-time.h | 292 ++++++ lnet/include/libcfs/linux/lltrace.h | 28 + lnet/include/{ => libcfs}/linux/portals_compat25.h | 4 +- lnet/include/libcfs/linux/portals_lib.h | 38 + lnet/include/libcfs/linux/portals_utils.h | 51 + .../include/{linux/lustre_list.h => libcfs/list.h} | 62 +- lnet/include/{lnet => libcfs}/lltrace.h | 29 +- lnet/include/{linux => libcfs}/portals_lib.h | 14 +- lnet/include/libcfs/portals_utils.h | 19 + lnet/include/libcfs/user-lock.h | 171 ++++ lnet/include/libcfs/user-prim.h | 182 ++++ lnet/include/libcfs/user-time.h | 198 ++++ lnet/include/linux/Makefile.am | 4 - lnet/include/lnet/Makefile.am | 10 +- lnet/include/lnet/api-support.h | 6 +- lnet/include/{linux => lnet/darwin}/.cvsignore | 0 lnet/include/lnet/darwin/Makefile.am | 1 + lnet/include/lnet/darwin/lib-lnet.h | 14 + lnet/include/lnet/darwin/lib-p30.h | 14 + lnet/include/lnet/darwin/lib-types.h | 15 + lnet/include/lnet/darwin/lnet.h | 20 + lnet/include/lnet/darwin/p30.h | 20 + lnet/include/{linux => lnet}/kpr.h | 4 +- lnet/include/lnet/lib-lnet.h | 18 +- lnet/include/lnet/lib-p30.h | 18 +- lnet/include/lnet/lib-types.h | 24 +- lnet/include/lnet/linux/.cvsignore | 2 + lnet/include/lnet/linux/Makefile.am | 1 + lnet/include/lnet/linux/lib-lnet.h | 20 + lnet/include/lnet/linux/lib-p30.h | 20 + lnet/include/lnet/linux/lib-types.h | 20 + lnet/include/lnet/linux/lnet.h | 25 + lnet/include/lnet/linux/p30.h | 25 + lnet/include/lnet/list.h | 243 ----- lnet/include/lnet/lnet.h | 15 +- lnet/include/lnet/lnetctl.h | 4 +- lnet/include/lnet/p30.h | 15 +- lnet/include/lnet/ptlctl.h | 4 +- lnet/include/lnet/types.h | 4 +- lnet/klnds/gmlnd/gmlnd.h | 2 +- lnet/klnds/iiblnd/iiblnd.h | 74 +- lnet/klnds/lolnd/autoMakefile.am | 2 + lnet/klnds/lolnd/lolnd.h | 2 +- lnet/klnds/openiblnd/openiblnd.h | 34 +- lnet/klnds/qswlnd/qswlnd.h | 26 +- lnet/klnds/ralnd/ralnd.h | 3 +- lnet/klnds/socklnd/Info.plist | 37 + lnet/klnds/socklnd/Makefile.in | 3 +- lnet/klnds/socklnd/autoMakefile.am | 26 +- lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj | 287 ++++++ lnet/klnds/socklnd/socklnd.c | 526 +++------- lnet/klnds/socklnd/socklnd.h | 170 +--- lnet/klnds/socklnd/socklnd_cb.c | 1017 ++++---------------- lnet/klnds/socklnd/socklnd_lib-darwin.c | 1011 +++++++++++++++++++ lnet/klnds/socklnd/socklnd_lib-darwin.h | 50 + lnet/klnds/socklnd/socklnd_lib-linux.c | 977 +++++++++++++++++++ lnet/klnds/socklnd/socklnd_lib-linux.h | 125 +++ lnet/klnds/viblnd/viblnd.h | 42 +- lnet/libcfs/Info.plist | 33 + lnet/libcfs/Makefile.in | 31 +- lnet/libcfs/autoMakefile.am | 31 +- lnet/libcfs/darwin/.cvsignore | 2 + lnet/libcfs/darwin/Makefile.am | 11 + lnet/libcfs/darwin/darwin-curproc.c | 124 +++ lnet/libcfs/darwin/darwin-debug.c | 25 + lnet/libcfs/darwin/darwin-fs.c | 330 +++++++ lnet/libcfs/darwin/darwin-mem.c | 455 +++++++++ lnet/libcfs/darwin/darwin-module.c | 159 +++ lnet/libcfs/darwin/darwin-prim.c | 402 ++++++++ lnet/libcfs/darwin/darwin-proc.c | 129 +++ lnet/libcfs/darwin/darwin-sync.c | 868 +++++++++++++++++ lnet/libcfs/darwin/darwin-tracefile.c | 159 +++ lnet/libcfs/darwin/darwin-utils.c | 482 ++++++++++ lnet/libcfs/debug.c | 153 +-- lnet/libcfs/libcfs.xcode/project.pbxproj | 439 +++++++++ lnet/libcfs/linux/.cvsignore | 3 + lnet/libcfs/linux/Makefile.am | 4 + lnet/libcfs/linux/linux-curproc.c | 133 +++ lnet/libcfs/linux/linux-debug.c | 151 +++ lnet/libcfs/linux/linux-fs.c | 31 + lnet/libcfs/linux/linux-lock.c | 4 + lnet/libcfs/linux/linux-lwt.c | 2 + lnet/libcfs/linux/linux-mem.c | 175 ++++ lnet/libcfs/linux/linux-module.c | 170 ++++ lnet/libcfs/linux/linux-prim.c | 19 + lnet/libcfs/{proc.c => linux/linux-proc.c} | 2 +- lnet/libcfs/linux/linux-sync.c | 2 + lnet/libcfs/linux/linux-tracefile.c | 205 ++++ lnet/libcfs/linux/linux-utils.c | 47 + lnet/libcfs/lwt.c | 2 +- lnet/libcfs/module.c | 250 ++--- lnet/libcfs/tracefile.c | 610 ++++-------- lnet/libcfs/tracefile.h | 78 +- lnet/libcfs/user-lock.c | 242 +++++ lnet/libcfs/user-prim.c | 266 +++++ lnet/libcfs/watchdog.c | 18 +- lnet/lnet/Info.plist | 35 + lnet/lnet/api-ni.c | 6 +- lnet/lnet/api-wrap.c | 64 +- lnet/lnet/autoMakefile.am | 25 +- lnet/lnet/lib-eq.c | 22 +- lnet/lnet/lib-init.c | 19 +- lnet/lnet/lib-md.c | 51 +- lnet/lnet/lib-me.c | 19 +- lnet/lnet/lib-move.c | 171 ++-- lnet/lnet/lib-msg.c | 19 +- lnet/lnet/lib-pid.c | 1 - lnet/lnet/module.c | 33 +- lnet/lnet/portals.xcode/project.pbxproj | 430 +++++++++ lnet/router/autoMakefile.am | 4 + lnet/router/router.h | 4 +- lnet/tests/arch-linux/ping.h | 22 + lnet/tests/arch-xnu/ping.h | 8 + lnet/tests/autoMakefile.am | 4 + lnet/tests/build-osx | 159 +++ lnet/tests/build.seq | 5 + lnet/tests/ping.h | 11 +- lnet/tests/ping_cli.c | 52 +- lnet/tests/ping_cli/Info.plist | 37 + lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj | 255 +++++ lnet/tests/ping_srv.c | 56 +- lnet/tests/ping_srv/Info.plist | 37 + lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj | 255 +++++ lnet/tests/sping_cli.c | 2 +- lnet/tests/sping_srv.c | 2 +- lnet/ulnds/address.c | 16 +- lnet/ulnds/connection.c | 3 +- lnet/ulnds/select.c | 12 +- lnet/ulnds/socklnd/address.c | 16 +- lnet/ulnds/socklnd/connection.c | 3 +- lnet/ulnds/socklnd/select.c | 12 +- lnet/utils/acceptor.c | 2 +- lnet/utils/debug.c | 18 +- lnet/utils/gmlndnid.c | 7 - lnet/utils/l_ioctl.c | 8 +- lnet/utils/portals.c | 11 +- lnet/utils/wirecheck.c | 7 +- 169 files changed, 14919 insertions(+), 3314 deletions(-) create mode 100644 lnet/include/libcfs/.cvsignore create mode 100644 lnet/include/libcfs/Makefile.am create mode 100644 lnet/include/libcfs/curproc.h create mode 100644 lnet/include/libcfs/darwin/.cvsignore create mode 100644 lnet/include/libcfs/darwin/Makefile.am create mode 100644 lnet/include/libcfs/darwin/darwin-fs.h create mode 100644 lnet/include/libcfs/darwin/darwin-lock.h create mode 100644 lnet/include/libcfs/darwin/darwin-mem.h create mode 100644 lnet/include/libcfs/darwin/darwin-prim.h create mode 100644 lnet/include/libcfs/darwin/darwin-sync.h create mode 100644 lnet/include/libcfs/darwin/darwin-time.h create mode 100644 lnet/include/libcfs/darwin/darwin-types.h create mode 100644 lnet/include/libcfs/darwin/darwin-utils.h create mode 100644 lnet/include/libcfs/darwin/kp30.h create mode 100644 lnet/include/libcfs/darwin/libcfs.h create mode 100644 lnet/include/libcfs/darwin/lltrace.h create mode 100644 lnet/include/libcfs/darwin/portals_lib.h create mode 100644 lnet/include/libcfs/darwin/portals_utils.h rename lnet/include/{linux => libcfs}/kp30.h (54%) rename lnet/include/{linux => libcfs}/libcfs.h (63%) create mode 100644 lnet/include/libcfs/linux/.cvsignore create mode 100644 lnet/include/libcfs/linux/Makefile.am create mode 100644 lnet/include/libcfs/linux/kp30.h create mode 100644 lnet/include/libcfs/linux/libcfs.h create mode 100644 lnet/include/libcfs/linux/linux-fs.h create mode 100644 lnet/include/libcfs/linux/linux-lock.h create mode 100644 lnet/include/libcfs/linux/linux-mem.h create mode 100644 lnet/include/libcfs/linux/linux-prim.h create mode 100644 lnet/include/libcfs/linux/linux-time.h create mode 100644 lnet/include/libcfs/linux/lltrace.h rename lnet/include/{ => libcfs}/linux/portals_compat25.h (97%) create mode 100644 lnet/include/libcfs/linux/portals_lib.h create mode 100644 lnet/include/libcfs/linux/portals_utils.h rename lnet/include/{linux/lustre_list.h => libcfs/list.h} (87%) rename lnet/include/{lnet => libcfs}/lltrace.h (90%) rename lnet/include/{linux => libcfs}/portals_lib.h (91%) create mode 100644 lnet/include/libcfs/portals_utils.h create mode 100644 lnet/include/libcfs/user-lock.h create mode 100644 lnet/include/libcfs/user-prim.h create mode 100644 lnet/include/libcfs/user-time.h delete mode 100644 lnet/include/linux/Makefile.am rename lnet/include/{linux => lnet/darwin}/.cvsignore (100%) create mode 100644 lnet/include/lnet/darwin/Makefile.am create mode 100644 lnet/include/lnet/darwin/lib-lnet.h create mode 100644 lnet/include/lnet/darwin/lib-p30.h create mode 100644 lnet/include/lnet/darwin/lib-types.h create mode 100644 lnet/include/lnet/darwin/lnet.h create mode 100644 lnet/include/lnet/darwin/p30.h rename lnet/include/{linux => lnet}/kpr.h (99%) create mode 100644 lnet/include/lnet/linux/.cvsignore create mode 100644 lnet/include/lnet/linux/Makefile.am create mode 100644 lnet/include/lnet/linux/lib-lnet.h create mode 100644 lnet/include/lnet/linux/lib-p30.h create mode 100644 lnet/include/lnet/linux/lib-types.h create mode 100644 lnet/include/lnet/linux/lnet.h create mode 100644 lnet/include/lnet/linux/p30.h delete mode 100644 lnet/include/lnet/list.h create mode 100644 lnet/klnds/socklnd/Info.plist create mode 100644 lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj create mode 100644 lnet/klnds/socklnd/socklnd_lib-darwin.c create mode 100644 lnet/klnds/socklnd/socklnd_lib-darwin.h create mode 100644 lnet/klnds/socklnd/socklnd_lib-linux.c create mode 100644 lnet/klnds/socklnd/socklnd_lib-linux.h create mode 100644 lnet/libcfs/Info.plist create mode 100644 lnet/libcfs/darwin/.cvsignore create mode 100644 lnet/libcfs/darwin/Makefile.am create mode 100644 lnet/libcfs/darwin/darwin-curproc.c create mode 100644 lnet/libcfs/darwin/darwin-debug.c create mode 100644 lnet/libcfs/darwin/darwin-fs.c create mode 100644 lnet/libcfs/darwin/darwin-mem.c create mode 100644 lnet/libcfs/darwin/darwin-module.c create mode 100644 lnet/libcfs/darwin/darwin-prim.c create mode 100644 lnet/libcfs/darwin/darwin-proc.c create mode 100644 lnet/libcfs/darwin/darwin-sync.c create mode 100644 lnet/libcfs/darwin/darwin-tracefile.c create mode 100644 lnet/libcfs/darwin/darwin-utils.c create mode 100644 lnet/libcfs/libcfs.xcode/project.pbxproj create mode 100644 lnet/libcfs/linux/.cvsignore create mode 100644 lnet/libcfs/linux/Makefile.am create mode 100644 lnet/libcfs/linux/linux-curproc.c create mode 100644 lnet/libcfs/linux/linux-debug.c create mode 100644 lnet/libcfs/linux/linux-fs.c create mode 100644 lnet/libcfs/linux/linux-lock.c create mode 100644 lnet/libcfs/linux/linux-lwt.c create mode 100644 lnet/libcfs/linux/linux-mem.c create mode 100644 lnet/libcfs/linux/linux-module.c create mode 100644 lnet/libcfs/linux/linux-prim.c rename lnet/libcfs/{proc.c => linux/linux-proc.c} (99%) create mode 100644 lnet/libcfs/linux/linux-sync.c create mode 100644 lnet/libcfs/linux/linux-tracefile.c create mode 100644 lnet/libcfs/linux/linux-utils.c create mode 100644 lnet/libcfs/user-lock.c create mode 100644 lnet/libcfs/user-prim.c create mode 100644 lnet/lnet/Info.plist create mode 100644 lnet/lnet/portals.xcode/project.pbxproj create mode 100644 lnet/tests/arch-linux/ping.h create mode 100644 lnet/tests/arch-xnu/ping.h create mode 100644 lnet/tests/build-osx create mode 100644 lnet/tests/build.seq create mode 100644 lnet/tests/ping_cli/Info.plist create mode 100644 lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj create mode 100644 lnet/tests/ping_srv/Info.plist create mode 100644 lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj diff --git a/lnet/ChangeLog b/lnet/ChangeLog index e69de29..3d7f2b0 100644 --- a/lnet/ChangeLog +++ b/lnet/ChangeLog @@ -0,0 +1,25 @@ +2005-02-04 Eric Barton + + * Landed portals:b_port_step as follows... + + - removed CFS_DECL_SPIN* + just use 'spinlock_t' and initialise with spin_lock_init() + + - removed CFS_DECL_MUTEX* + just use 'struct semaphore' and initialise with init_mutex() + + - removed CFS_DECL_RWSEM* + just use 'struct rw_semaphore' and initialise with init_rwsem() + + - renamed cfs_sleep_chan -> cfs_waitq + cfs_sleep_link -> cfs_waitlink + + - fixed race in linux version of arch-independent socknal + (the ENOMEM/EAGAIN decision). + + - Didn't fix problems in Darwin version of arch-independent socknal + (resetting socket callbacks, eager ack hack, ENOMEM/EAGAIN decision) + + - removed libcfs types from non-socknal header files (only some types + in the header files had been changed; the .c files hadn't been + updated at all). diff --git a/lnet/autoMakefile.am b/lnet/autoMakefile.am index f2ba240..50d8298 100644 --- a/lnet/autoMakefile.am +++ b/lnet/autoMakefile.am @@ -3,7 +3,23 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution -SUBDIRS = portals libcfs knals unals router tests doc utils include \ +SUBDIRS = libcfs portals knals unals router tests doc utils include \ autoconf -sources: +sources: include/libcfs/arch + $(MAKE) sources -C libcfs + +all-recursive: include/libcfs/arch + +include/libcfs/arch: + case `uname` in \ + Linux) \ + ln -s linux include/libcfs/arch \ + ;; \ + Darwin) \ + ln -s darwin include/libcfs/arch \ + ;; \ + *) \ + echo "Platform `uname` is not supported" \ + ;; \ + esac diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index 06bdf39..b2abf0a 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -428,6 +428,15 @@ LP_FUNC_SHOW_TASK ]) # +# LP_PROG_DARWIN +# +# Darwin checks +# +AC_DEFUN([LP_PROG_DARWIN], +[LB_DARWIN_CHECK_FUNCS([get_preemption_level]) +]) + +# # LP_PATH_DEFAULTS # # default paths for installed files @@ -443,7 +452,7 @@ AC_DEFUN([LP_PATH_DEFAULTS], # AC_DEFUN([LP_CONFIGURE], [# portals/utils/portals.c -AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h]) +AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h endian.h]) AC_CHECK_FUNCS([gethostbyname socket connect]) # portals/utils/debug.c @@ -454,22 +463,20 @@ AC_CHECK_TYPE([spinlock_t], [], [#include ]) +# portals/utils/wirecheck.c +AC_CHECK_FUNCS([strnlen]) + # -------- Check for required packages -------------- -# this doesn't seem to work on older autoconf -# AC_CHECK_LIB(readline, readline,,) -AC_MSG_CHECKING([for readline support]) -AC_ARG_ENABLE(readline, - AC_HELP_STRING([--disable-readline], - [do not use readline library]), - [],[enable_readline='yes']) -AC_MSG_RESULT([$enable_readline]) -if test x$enable_readline = xyes ; then +LIBS_save="$LIBS" +LIBS="-lncurses $LIBS" +AC_CHECK_LIB([readline],[readline],[ LIBREADLINE="-lreadline -lncurses" AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available]) -else +],[ LIBREADLINE="" -fi +]) +LIBS="$LIBS_save" AC_SUBST(LIBREADLINE) AC_MSG_CHECKING([if efence debugging support is requested]) @@ -573,8 +580,12 @@ portals/autoMakefile portals/autoconf/Makefile portals/doc/Makefile portals/include/Makefile -portals/include/linux/Makefile +portals/include/libcfs/Makefile +portals/include/libcfs/darwin/Makefile +portals/include/libcfs/linux/Makefile portals/include/portals/Makefile +portals/include/portals/darwin/Makefile +portals/include/portals/linux/Makefile portals/knals/Makefile portals/knals/autoMakefile portals/knals/gmnal/Makefile @@ -595,6 +606,8 @@ portals/knals/socknal/Makefile portals/knals/socknal/autoMakefile portals/libcfs/Makefile portals/libcfs/autoMakefile +portals/libcfs/darwin/Makefile +portals/libcfs/linux/Makefile portals/portals/Makefile portals/portals/autoMakefile portals/router/Makefile diff --git a/lnet/include/Makefile.am b/lnet/include/Makefile.am index 2b3eb8c..dd6db1d 100644 --- a/lnet/include/Makefile.am +++ b/lnet/include/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = linux portals +SUBDIRS = libcfs portals EXTRA_DIST = cygwin-ioctl.h diff --git a/lnet/include/libcfs/.cvsignore b/lnet/include/libcfs/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/lnet/include/libcfs/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/lnet/include/libcfs/Makefile.am b/lnet/include/libcfs/Makefile.am new file mode 100644 index 0000000..1e928ba --- /dev/null +++ b/lnet/include/libcfs/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS := darwin linux + +EXTRA_DIST := libcfs.h list.h lltrace.h kp30.h portals_utils.h portals_lib.h diff --git a/lnet/include/libcfs/curproc.h b/lnet/include/libcfs/curproc.h new file mode 100644 index 0000000..630912d --- /dev/null +++ b/lnet/include/libcfs/curproc.h @@ -0,0 +1,62 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre curproc API declaration + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. You should have received a copy of the GNU + * General Public License along with Lustre; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef __LIBCFS_CURPROC_H__ +#define __LIBCFS_CURPROC_H__ + +/* + * Portable API to access common characteristics of "current" UNIX process. + * + * Implemented in portals/include/libcfs// + */ +uid_t cfs_curproc_uid(void); +gid_t cfs_curproc_gid(void); +uid_t cfs_curproc_fsuid(void); +gid_t cfs_curproc_fsgid(void); +pid_t cfs_curproc_pid(void); +int cfs_curproc_groups_nr(void); +int cfs_curproc_is_in_groups(gid_t group); +void cfs_curproc_groups_dump(gid_t *array, int size); +mode_t cfs_curproc_umask(void); +char *cfs_curproc_comm(void); + + +/* + * Plus, platform-specific constant + * + * CFS_CURPROC_COMM_MAX, + * + * and opaque scalar type + * + * cfs_kernel_cap_t + */ +cfs_kernel_cap_t cfs_curproc_cap_get(void); +void cfs_curproc_cap_set(cfs_kernel_cap_t cap); + +/* __LIBCFS_CURPROC_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/libcfs/darwin/.cvsignore b/lnet/include/libcfs/darwin/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/lnet/include/libcfs/darwin/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/lnet/include/libcfs/darwin/Makefile.am b/lnet/include/libcfs/darwin/Makefile.am new file mode 100644 index 0000000..4ff2072 --- /dev/null +++ b/lnet/include/libcfs/darwin/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST := darwin-mem.h darwin-types.h libcfs.h portals_utils.h \ + darwin-fs.h darwin-prim.h darwin-utils.h lltrace.h \ + darwin-lock.h darwin-sync.h kp30.h portals_lib.h diff --git a/lnet/include/libcfs/darwin/darwin-fs.h b/lnet/include/libcfs/darwin/darwin-fs.h new file mode 100644 index 0000000..32244e7 --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-fs.h @@ -0,0 +1,131 @@ +#ifndef __LIBCFS_DARWIN_CFS_FS_H__ +#define __LIBCFS_DARWIN_CFS_FS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#include +#include +/* + * __APPLE_API_PRIVATE is defined before include user.h + * Doing this way to get the define of uthread, it's not good + * but I do need to know what's inside uthread. + */ +#ifndef __APPLE_API_PRIVATE +#define __APPLE_API_PRIVATE +#include +#undef __APPLE_API_PRIVATE +#else +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * File operating APIs in kernel + */ +typedef struct file cfs_file_t; + +int filp_node_size(cfs_file_t *fp, off_t *size); +#define cfs_filp_size(fp) \ + ({ \ + off_t __size; \ + filp_node_size((fp), &__size); \ + __size; \ + }) +#define cfs_filp_poff(fp) (NULL) + +cfs_file_t *filp_open(const char *name, int flags, int mode, int *err); +int filp_close(cfs_file_t *fp); +int filp_read(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); +int filp_write(cfs_file_t *fp, void *buf, size_t nbytes, off_t *pos); +int filp_fsync(cfs_file_t *fp); + +#define cfs_filp_open(n, f, m, e) filp_open(n, f, m, e) +#define cfs_filp_close(f) filp_close(f) +#define cfs_filp_read(f, b, n, p) filp_read(f, b, n, p) +#define cfs_filp_write(f, b, n, p) filp_write(f, b, n, p) +#define cfs_filp_fsync(f) filp_fsync(f) + +int ref_file(cfs_file_t *fp); +int rele_file(cfs_file_t *fp); +int file_count(cfs_file_t *fp); +#define cfs_get_file(f) ref_file(f) +#define cfs_put_file(f) rele_file(f) +#define cfs_file_count(f) file_count(f) + +#define CFS_INT_LIMIT(x) (~((x)1 << (sizeof(x)*8 - 1))) +#define CFS_OFFSET_MAX CFS_INT_LIMIT(loff_t) + +typedef struct flock cfs_flock_t; +#define CFS_FLOCK_TYPE(fl) ((fl)->l_type) +#define CFS_FLOCK_SET_TYPE(fl, type) do { (fl)->l_type = (type); } while(0) +#define CFS_FLOCK_PID(fl) ((fl)->l_pid) +#define CFS_FLOCK_SET_PID(fl, pid) do { (fl)->l_pid = (pid); } while(0) +#define CFS_FLOCK_START(fl) ((fl)->l_start) +#define CFS_FLOCK_SET_START(fl, start) do { (fl)->l_start = (start); } while(0) +#define CFS_FLOCK_END(fl) ((fl)->l_len == 0? CFS_OFFSET_MAX: ((fl)->l_start + (fl)->l_en)) +#define CFS_FLOCK_SET_END(fl, end) \ + do { \ + if (end == CFS_OFFSET_MAX) \ + (fl)->l_len = 0; \ + else \ + (fl)->l_len = (end) - (fl)->l_start;\ + } while(0) + +typedef struct { + void *d; +} cfs_dentry_t; +typedef unsigned short umode_t; + +#define ATTR_MODE 0x0001 +#define ATTR_UID 0x0002 +#define ATTR_GID 0x0004 +#define ATTR_SIZE 0x0008 +#define ATTR_ATIME 0x0010 +#define ATTR_MTIME 0x0020 +#define ATTR_CTIME 0x0040 +#define ATTR_ATIME_SET 0x0080 +#define ATTR_MTIME_SET 0x0100 +#define ATTR_FORCE 0x0200 /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG 0x0400 +#define ATTR_RAW 0x0800 /* file system, not vfs will massage attrs */ +#define ATTR_FROM_OPEN 0x1000 /* called from open path, ie O_TRUNC */ +#define ATTR_CTIME_SET 0x2000 + +#define in_group_p(x) (0) + +#endif + +#define O_SYNC 0 +#define O_DIRECTORY 0 +#define O_LARGEFILE 0 + +#endif diff --git a/lnet/include/libcfs/darwin/darwin-lock.h b/lnet/include/libcfs/darwin/darwin-lock.h new file mode 100644 index 0000000..da16418 --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-lock.h @@ -0,0 +1,264 @@ +#ifndef __LIBCFS_DARWIN_CFS_LOCK_H__ +#define __LIBCFS_DARWIN_CFS_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#include + +/* spin lock types and operations */ +#include +#include +#include + +#include +#include + +/* + * spin_lock (use Linux kernel's primitives) + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + */ +struct spin_lock { + struct kspin spin; +}; + +typedef struct spin_lock spinlock_t; + +static inline void spin_lock_init(spinlock_t *lock) +{ + kspin_init(&lock->spin); +} + +static inline void spin_lock(spinlock_t *lock) +{ + kspin_lock(&lock->spin); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + kspin_unlock(&lock->spin); +} + +static inline int spin_trylock(spinlock_t *lock) +{ + return kspin_trylock(&lock->spin); +} + +#define spin_lock_bh(x) spin_lock(x) +#define spin_unlock_bh(x) spin_unlock(x) +#define spin_lock_bh_init(x) spin_lock_init(x) + +extern boolean_t ml_set_interrupts_enabled(boolean_t enable); +#define __disable_irq() (spl_t) ml_set_interrupts_enabled(FALSE) +#define __enable_irq(x) (void) ml_set_interrupts_enabled(x) + +#define spin_lock_irqsave(s, f) do{ \ + f = __disable_irq(); \ + spin_lock(s); }while(0) + +#define spin_unlock_irqrestore(s, f) do{ \ + spin_unlock(s); \ + __enable_irq(f);}while(0) + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ +struct semaphore { + struct ksem sem; +}; + +static inline void sema_init(struct semaphore *s, int val) +{ + ksem_init(&s->sem, val); +} + +static inline void __down(struct semaphore *s) +{ + ksem_down(&s->sem, 1); +} + +static inline void __up(struct semaphore *s) +{ + ksem_up(&s->sem, 1); +} + +/* + * Mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ + +#define mutex_up(s) __up(s) +#define mutex_down(s) __down(s) + +#define init_mutex(x) sema_init(x, 1) +#define init_mutex_locked(x) sema_init(x, 0) + +/* + * Completion: + * + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + */ +struct completion { + /* + * Emulate completion by semaphore for now. + * + * XXX nikita: this is not safe if completion is used to synchronize + * exit from kernel daemon thread and kext unloading. In this case + * some core function (a la complete_and_exit()) is needed. + */ + struct ksem sem; +}; + +static inline void init_completion(struct completion *c) +{ + ksem_init(&c->sem, 0); +} + +static inline void complete(struct completion *c) +{ + ksem_up(&c->sem, 1); +} + +static inline void wait_for_completion(struct completion *c) +{ + ksem_down(&c->sem, 1); +} + +/* + * rw_semaphore: + * + * - DECLARE_RWSEM(x) + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ +struct rw_semaphore { + struct krw_sem s; +}; + +static inline void init_rwsem(struct rw_semaphore *s) +{ + krw_sem_init(&s->s); +} + +static inline void down_read(struct rw_semaphore *s) +{ + krw_sem_down_r(&s->s); +} + +static inline int down_read_trylock(struct rw_semaphore *s) +{ + int ret = krw_sem_down_r_try(&s->s); + return ret == 0? 1: 0; +} + +static inline void down_write(struct rw_semaphore *s) +{ + krw_sem_down_w(&s->s); +} + +static inline int down_write_trylock(struct rw_semaphore *s) +{ + int ret = krw_sem_down_w_try(&s->s); + return ret == 0? 1: 0; +} + +static inline void up_read(struct rw_semaphore *s) +{ + krw_sem_up_r(&s->s); +} + +static inline void up_write(struct rw_semaphore *s) +{ + krw_sem_up_w(&s->s); +} + +/* + * read-write lock : Need to be investigated more!! + * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore + * + * - DECLARE_RWLOCK(l) + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ +typedef struct rw_semaphore rwlock_t; + +#define rwlock_init(pl) init_rwsem(pl) + +#define read_lock(l) down_read(l) +#define read_unlock(l) up_read(l) +#define write_lock(l) down_write(l) +#define write_unlock(l) up_write(l) + +#define write_lock_irqsave(l, f) do{ \ + f = __disable_irq(); \ + write_lock(l); }while(0) + +#define write_unlock_irqrestore(l, f) do{ \ + write_unlock(l); \ + __enable_irq(f);}while(0) + +#define read_lock_irqsave(l, f) do{ \ + f = __disable_irq(); \ + read_lock(l); }while(0) + +#define read_unlock_irqrestore(l, f) do{ \ + read_unlock(l); \ + __enable_irq(f);}while(0) + +/* + * Funnel: + * + * Safe funnel in/out + */ + +#define CFS_DECL_FUNNEL_DATA \ + boolean_t __funnel_state = FALSE; \ + funnel_t *__funnel +#define CFS_DECL_CONE_DATA CFS_DECL_FUNNEL_DATA +#define CFS_DECL_NET_DATA CFS_DECL_FUNNEL_DATA + +void lustre_cone_in(boolean_t *state, funnel_t **cone); +void lustre_cone_ex(boolean_t state, funnel_t *cone); + +#define CFS_CONE_IN lustre_cone_in(&__funnel_state, &__funnel) +#define CFS_CONE_EX lustre_cone_ex(__funnel_state, __funnel) + +void lustre_net_in(boolean_t *state, funnel_t **cone); +void lustre_net_ex(boolean_t state, funnel_t *cone); + +#define CFS_NET_IN lustre_net_in(&__funnel_state, &__funnel) +#define CFS_NET_EX lustre_net_ex(__funnel_state, __funnel) + +/* __KERNEL__ */ +#endif + +/* __XNU_CFS_LOCK_H */ +#endif diff --git a/lnet/include/libcfs/darwin/darwin-mem.h b/lnet/include/libcfs/darwin/darwin-mem.h new file mode 100644 index 0000000..922a1b8 --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-mem.h @@ -0,0 +1,206 @@ +#ifndef __LIBCFS_DARWIN_CFS_MEM_H__ +#define __LIBCFS_DARWIN_CFS_MEM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Page of OSX + * + * There is no page in OSX, however, we need page in lustre. + */ +#define PAGE_MASK (~(PAGE_SIZE-1)) +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +/* + * Basic xnu_page struct, should be binary compatibility with + * all page types in xnu (we have only xnu_raw_page, xll_page now) + */ + +/* Variable sized pages are not supported */ + +#define CFS_PAGE_SHIFT 12 +#define CFS_PAGE_SIZE (1 << CFS_PAGE_SHIFT) +#define PAGE_CACHE_SIZE CFS_PAGE_SIZE +#define CFS_PAGE_MASK (~(CFS_PAGE_SIZE - 1)) + +enum { + XNU_PAGE_RAW, + XNU_PAGE_XLL, + XNU_PAGE_NTYPES +}; + +typedef __u32 page_off_t; + +/* + * For XNU we have our own page cache built on top of underlying BSD/MACH + * infrastructure. In particular, we have two disjoint types of pages: + * + * - "raw" pages (XNU_PAGE_RAW): these are just buffers mapped into KVM, + * based on UPLs, and + * + * - "xll" pages (XNU_PAGE_XLL): these are used by file system to cache + * file data, owned by file system objects, hashed, lrued, etc. + * + * cfs_page_t has to cover both of them, because core Lustre code is based on + * the Linux assumption that page is _both_ memory buffer and file system + * caching entity. + * + * To achieve this, all types of pages supported on XNU has to start from + * common header that contains only "page type". Common cfs_page_t operations + * dispatch through operation vector based on page type. + * + */ +typedef struct xnu_page { + int type; +} cfs_page_t; + +struct xnu_page_ops { + void *(*page_map) (cfs_page_t *); + void (*page_unmap) (cfs_page_t *); + void *(*page_address) (cfs_page_t *); +}; + +void xnu_page_ops_register(int type, struct xnu_page_ops *ops); +void xnu_page_ops_unregister(int type); + +/* + * raw page, no cache object, just like buffer + */ +struct xnu_raw_page { + struct xnu_page header; + vm_address_t virtual; + upl_t upl; + int order; + atomic_t count; + void *private; +}; + +/* + * Public interface to lustre + * + * - cfs_alloc_pages(f, o) + * - cfs_alloc_page(f) + * - cfs_free_pages(p, o) + * - cfs_free_page(p) + * - cfs_kmap(p) + * - cfs_kunmap(p) + * - cfs_page_address(p) + */ + +/* + * Of all functions above only cfs_kmap(), cfs_kunmap(), and + * cfs_page_address() can be called on file system pages. The rest is for raw + * pages only. + */ + +cfs_page_t *cfs_alloc_pages(u_int32_t flags, u_int32_t order); +cfs_page_t *cfs_alloc_page(u_int32_t flags); +void cfs_free_pages(cfs_page_t *pages, int order); +void cfs_free_page(cfs_page_t *page); +void cfs_get_page(cfs_page_t *page); +int cfs_put_page_testzero(cfs_page_t *page); +int cfs_page_count(cfs_page_t *page); +void cfs_set_page_count(cfs_page_t *page, int v); + +void *cfs_page_address(cfs_page_t *pg); +void *cfs_kmap(cfs_page_t *pg); +void cfs_kunmap(cfs_page_t *pg); + +/* + * Memory allocator + */ + +extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); +extern void cfs_free(void *addr); + +extern void *cfs_alloc_large(size_t nr_bytes); +extern void cfs_free_large(void *addr); + +/* + * Slab: + * + * No slab in OSX, use zone allocator to fake slab + */ +#define SLAB_HWCACHE_ALIGN 0 + +typedef struct cfs_mem_cache { + struct list_head link; + zone_t zone; + int size; + char name [ZONE_NAME_MAX_LEN]; +} cfs_mem_cache_t; + +#define KMEM_CACHE_MAX_COUNT 64 +#define KMEM_MAX_ZONE 8192 + +extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long, + void (*)(void *, cfs_mem_cache_t *, unsigned long), + void (*)(void *, cfs_mem_cache_t *, unsigned long)); +extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); +extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); +extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); + +/* + * Misc + */ +/* XXX fix me */ +#define num_physpages (64 * 1024) + +#define CFS_DECL_MMSPACE +#define CFS_MMSPACE_OPEN do {} while(0) +#define CFS_MMSPACE_CLOSE do {} while(0) + +#define copy_from_user(kaddr, uaddr, size) copyin((caddr_t)uaddr, (caddr_t)kaddr, size) +#define copy_to_user(uaddr, kaddr, size) copyout((caddr_t)kaddr, (caddr_t)uaddr, size) + +#if defined (__ppc__) +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("sync" : : : "memory") +#define wmb() __asm__ __volatile__ ("eieio" : : : "memory") +#elif defined (__i386__) +#define mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory") +#define rmb() mb() +#define wmb() __asm__ __volatile__ ("": : :"memory") +#else +#error architecture not supported +#endif + +#else /* !__KERNEL__ */ + +typedef struct cfs_page{ + void *foo; +} cfs_page_t; +#endif /* __KERNEL__ */ + +#endif /* __XNU_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-prim.h b/lnet/include/libcfs/darwin/darwin-prim.h new file mode 100644 index 0000000..ce9078d --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-prim.h @@ -0,0 +1,554 @@ +#ifndef __LIBCFS_DARWIN_CFS_PRIM_H__ +#define __LIBCFS_DARWIN_CFS_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include + +#ifndef __APPLE_API_PRIVATE +#define __APPLE_API_PRIVATE +#include +#undef __APPLE_API_PRIVATE +#else +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Symbol functions for libcfs + * + * OSX has no facility for use to register symbol. + * So we have to implement it. + */ +#define CFS_SYMBOL_LEN 64 + +struct cfs_symbol { + char name[CFS_SYMBOL_LEN]; + void *value; + int ref; + struct list_head sym_list; +}; + +extern kern_return_t cfs_symbol_register(const char *, const void *); +extern kern_return_t cfs_symbol_unregister(const char *); +extern void * cfs_symbol_get(const char *); +extern kern_return_t cfs_symbol_put(const char *); + +/* + * sysctl typedef + * + * User can register/unregister a list of sysctl_oids + * sysctl_oid is data struct of osx's sysctl-entry + */ +typedef struct sysctl_oid * cfs_sysctl_table_t; +typedef cfs_sysctl_table_t cfs_sysctl_table_header_t; +cfs_sysctl_table_header_t *register_cfs_sysctl_table (cfs_sysctl_table_t *table, int arg); +void unregister_cfs_sysctl_table (cfs_sysctl_table_header_t *table); + +/* + * Proc file system APIs, no /proc fs support in OSX + */ +typedef struct cfs_proc_dir_entry{ + void *data; +}cfs_proc_dir_entry_t; + +cfs_proc_dir_entry_t * cfs_create_proc_entry(char *name, int mod, + cfs_proc_dir_entry_t *parent); +void cfs_free_proc_entry(cfs_proc_dir_entry_t *de); +void cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry); + +typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, + int count, int *eof, void *data); +typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, + unsigned long count, void *data); + +/* + * cfs pseudo device + * + * cfs_psdev_t + * cfs_psdev_register: + * cfs_psdev_deregister: + */ +typedef struct { + int index; + void *handle; + const char *name; + struct cdevsw *devsw; + void *private; +} cfs_psdev_t; + +extern kern_return_t cfs_psdev_register(cfs_psdev_t *); +extern kern_return_t cfs_psdev_deregister(cfs_psdev_t *); + +/* + * Task struct and ... + * + * Using BSD current_proc in Darwin + */ +extern boolean_t assert_wait_possible(void); +extern void *get_bsdtask_info(task_t); + +typedef struct uthread cfs_task_t; +#define current_uthread() ((struct uthread *)get_bsdthread_info(current_act())) +#define cfs_current() current_uthread() + +#define set_current_state(s) do {;} while (0) +#define reparent_to_init() do {;} while (0) + +#define CFS_DECL_JOURNAL_DATA +#define CFS_PUSH_JOURNAL do {;} while(0) +#define CFS_POP_JOURNAL do {;} while(0) + +#define THREAD_NAME(comm, fmt, a...) +/* + * Kernel thread: + * + * OSX kernel thread can not be created with args, + * so we have to implement new APIs to create thread with args + * + * All requests to create kernel thread will create a new + * thread instance of cfs_thread_agent, one by one. + * cfs_thread_agent will call the caller's thread function + * with argument supplied by caller. + */ + +typedef int (*cfs_thread_t)(void *); + +extern task_t kernel_task; + +struct kernel_thread_arg +{ + spinlock_t lock; + atomic_t inuse; + cfs_thread_t func; + void *arg; +}; + +extern struct kernel_thread_arg cfs_thread_arg; +extern void cfs_thread_agent(void); + +#define THREAD_ARG_FREE 0 +#define THREAD_ARG_HOLD 1 +#define THREAD_ARG_RECV 2 + +#define set_targ_stat(a, v) atomic_set(&(a)->inuse, v) +#define get_targ_stat(a) atomic_read(&(a)->inuse) + +/* + * Hold the thread argument and set the status of thread_status + * to THREAD_ARG_HOLD, if the thread argument is held by other + * threads (It's THREAD_ARG_HOLD already), current-thread has to wait. + */ +#define thread_arg_hold(pta, _func, _arg) \ + do { \ + spin_lock(&(pta)->lock); \ + if (get_targ_stat(pta) == THREAD_ARG_FREE) { \ + set_targ_stat((pta), THREAD_ARG_HOLD); \ + (pta)->arg = (void *)_arg; \ + (pta)->func = _func; \ + spin_unlock(&(pta)->lock); \ + break; \ + } \ + spin_unlock(&(pta)->lock); \ + schedule(); \ + } while(1); \ + +/* + * Release the thread argument if the thread argument has been + * received by the child-thread (Status of thread_args is + * THREAD_ARG_RECV), otherwise current-thread has to wait. + * After release, the thread_args' status will be set to + * THREAD_ARG_FREE, and others can re-use the thread_args to + * create new kernel_thread. + */ +#define thread_arg_release(pta) \ + do { \ + spin_lock(&(pta)->lock); \ + if (get_targ_stat(pta) == THREAD_ARG_RECV) { \ + (pta)->arg = NULL; \ + (pta)->func = NULL; \ + set_targ_stat(pta, THREAD_ARG_FREE); \ + spin_unlock(&(pta)->lock); \ + break; \ + } \ + spin_unlock(&(pta)->lock); \ + schedule(); \ + } while(1) + +/* + * Receive thread argument (Used in child thread), set the status + * of thread_args to THREAD_ARG_RECV. + */ +#define __thread_arg_recv_fin(pta, _func, _arg, fin) \ + do { \ + spin_lock(&(pta)->lock); \ + if (get_targ_stat(pta) == THREAD_ARG_HOLD) { \ + if (fin) \ + set_targ_stat(pta, THREAD_ARG_RECV);\ + _arg = (pta)->arg; \ + _func = (pta)->func; \ + spin_unlock(&(pta)->lock); \ + break; \ + } \ + spin_unlock(&(pta)->lock); \ + schedule(); \ + } while (1); \ + +/* + * Just set the thread_args' status to THREAD_ARG_RECV + */ +#define thread_arg_fin(pta) \ + do { \ + spin_lock(&(pta)->lock); \ + assert( get_targ_stat(pta) == THREAD_ARG_HOLD); \ + set_targ_stat(pta, THREAD_ARG_RECV); \ + spin_unlock(&(pta)->lock); \ + } while(0) + +#define thread_arg_recv(pta, f, a) __thread_arg_recv_fin(pta, f, a, 1) +#define thread_arg_keep(pta, f, a) __thread_arg_recv_fin(pta, f, a, 0) + +/* + * cloning flags, no use in OSX, just copy them from Linux + */ +#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */ +#define CLONE_VM 0x00000100 /* set if VM shared between processes */ +#define CLONE_FS 0x00000200 /* set if fs info shared between processes */ +#define CLONE_FILES 0x00000400 /* set if open files shared between processes */ +#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */ +#define CLONE_PID 0x00001000 /* set if pid shared */ +#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */ +#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */ +#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */ +#define CLONE_THREAD 0x00010000 /* Same thread group? */ +#define CLONE_NEWNS 0x00020000 /* New namespace group? */ + +#define CLONE_SIGNAL (CLONE_SIGHAND | CLONE_THREAD) + +extern int cfs_kernel_thread(cfs_thread_t func, void *arg, int flag); + + +/* + * Wait Queue implementation + * + * Like wait_queue in Linux + */ +typedef struct cfs_waitq { + struct ksleep_chan wq_ksleep_chan; +} cfs_waitq_t; + +typedef struct cfs_waitlink { + struct cfs_waitq *wl_waitq; + struct ksleep_link wl_ksleep_link; +} cfs_waitlink_t; + +void cfs_waitq_init(struct cfs_waitq *waitq); +void cfs_waitlink_init(struct cfs_waitlink *link); + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link); +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); +int cfs_waitq_active(struct cfs_waitq *waitq); + +void cfs_waitq_signal(struct cfs_waitq *waitq); +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); +void cfs_waitq_broadcast(struct cfs_waitq *waitq); + +void cfs_waitq_wait(struct cfs_waitlink *link); +cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, + cfs_duration_t timeout); + +/* + * Thread schedule APIs. + */ +#define MAX_SCHEDULE_TIMEOUT ((long)(~0UL>>12)) + +static inline int schedule_timeout(int64_t timeout) +{ + int result; + + AbsoluteTime clock_current; + AbsoluteTime clock_delay; + result = assert_wait((event_t)current_uthread(), THREAD_UNINT); + clock_get_uptime(&clock_current); + nanoseconds_to_absolutetime(timeout, &clock_delay); + ADD_ABSOLUTETIME(&clock_current, &clock_delay); + thread_set_timer_deadline(clock_current); + if (result == THREAD_WAITING) + result = thread_block(THREAD_CONTINUE_NULL); + thread_cancel_timer(); + if (result == THREAD_TIMED_OUT) + result = 0; + else + result = 1; + return result; +} + +#define schedule() \ + do { \ + if (assert_wait_possible()) \ + schedule_timeout(1); \ + else \ + schedule_timeout(0); \ + } while (0) + +#define __wait_event(wq, condition) \ +do { \ + struct cfs_waitlink __wait; \ + \ + cfs_waitlink_init(&__wait); \ + for (;;) { \ + cfs_waitq_add(&wq, &__wait); \ + if (condition) \ + break; \ + cfs_waitq_wait(&__wait); \ + cfs_waitq_del(&wq, &__wait); \ + } \ + cfs_waitq_del(&wq, &__wait); \ +} while (0) + +#define wait_event(wq, condition) \ +do { \ + if (condition) \ + break; \ + __wait_event(wq, condition); \ +} while (0) + +#define wait_event_interruptible(wq, condition) \ +({ \ + wait_event(wq, condition); \ + 0; \ +}) + +extern void wakeup_one __P((void * chan)); +/* only used in tests */ +#define wake_up_process(p) \ + do { \ + wakeup_one(p); \ + } while (0) + +/* used in couple of places */ +static inline void sleep_on(cfs_waitq_t *waitq) +{ + cfs_waitlink_t link; + + cfs_waitlink_init(&link); + cfs_waitq_add(waitq, &link); + cfs_waitq_wait(&link); + cfs_waitq_del(waitq, &link); +} + +/* + * XXX + * Signal + */ +#define cfs_sigmask_lock(t, f) do { f = 0; } while(0) +#define cfs_sigmask_unlock(t, f) do { f = 0; } while(0) +#define cfs_signal_pending(t) (0) + +#define cfs_siginitset(pmask, sigs) \ + do { \ + sigset_t __sigs = sigs & (~sigcantmask); \ + *(pmask) = __sigs; \ + } while(0) + +#define cfs_siginitsetinv(pmask, sigs) \ + do { \ + sigset_t __sigs = ~(sigs | sigcantmask); \ + *(pmask) = __sigs; \ + } while(0) + +#define cfs_recalc_sigpending(ut) \ + do { \ + (ut)->uu_siglist = (ut)->uu_siglist & ~(ut)->uu_sigmask;\ + } while (0) +#define cfs_sigfillset(s) \ + do { \ + memset((s), -1, sizeof(sigset_t)); \ + } while(0) + +#define cfs_set_sig_blocked(ut, b) do {(ut)->uu_sigmask = b;} while(0) +#define cfs_get_sig_blocked(ut) (&(ut)->uu_sigmask) + +#define SIGNAL_MASK_ASSERT() + +/* + * Timer + */ + +typedef struct cfs_timer { + struct ktimer t; +} cfs_timer_t; + +#define cfs_init_timer(t) do {} while(0) +void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); +void cfs_timer_done(struct cfs_timer *t); +void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); +void cfs_timer_disarm(struct cfs_timer *t); +int cfs_timer_is_armed(struct cfs_timer *t); + +cfs_time_t cfs_timer_deadline(struct cfs_timer *t); + +/* + * Ioctl + * We don't need to copy out everything in osx + */ +#define cfs_ioctl_data_out(a, d, l) \ + ({ \ + int __size; \ + int __rc = 0; \ + assert((l) >= sizeof(*d)); \ + __size = (l) - sizeof(*d); \ + if (__size > 0) \ + __rc = copy_to_user((void *)a + __size, \ + (void *)d + __size, \ + __size); \ + __rc; \ + }) + +/* + * CPU + */ +#include +/* Run in PowerG5 who is PPC64 */ +#define SMP_CACHE_BYTES 128 +#define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +/* XXX How to get the value of NCPUS from xnu ? */ +#define NR_CPUS 2 +#define smp_processor_id() cpu_number() +#define smp_num_cpus NR_CPUS +/* XXX smp_call_function is not supported in xnu */ +#define smp_call_function(f, a, n, w) do {} while(0) + +/* + * Misc + */ +#ifndef likely +#define likely(exp) (exp) +#endif +#ifndef unlikely +#define unlikely(exp) (exp) +#endif + +#define lock_kernel() do {} while(0) +#define unlock_kernel() do {} while(0) + +#define exit_mm(t) do {} while(0) +#define exit_files(t) do {} while(0) + +#define CAP_SYS_ADMIN 0 +#define capable(a) suser(current_proc()->p_ucred, &(current_proc()->p_acflag)) + +#define USERMODEHELPER(path, argv, envp) (0) + +#define cfs_module(name, version, init, fini) \ +extern kern_return_t _start(kmod_info_t *ki, void *data); \ +extern kern_return_t _stop(kmod_info_t *ki, void *data); \ +__private_extern__ kern_return_t name##_start(kmod_info_t *ki, void *data); \ +__private_extern__ kern_return_t name##_stop(kmod_info_t *ki, void *data); \ + \ +kmod_info_t KMOD_INFO_NAME = { 0, KMOD_INFO_VERSION, -1, \ + { "com.clusterfs.lustre." #name }, { version }, \ + -1, 0, 0, 0, 0, name##_start, name##_stop }; \ + \ +__private_extern__ kmod_start_func_t *_realmain = name##_start; \ +__private_extern__ kmod_stop_func_t *_antimain = name##_stop; \ +__private_extern__ int _kext_apple_cc = __APPLE_CC__ ; \ + \ +kern_return_t name##_start(kmod_info_t *ki, void *d) \ +{ \ + return init(); \ +} \ + \ +kern_return_t name##_stop(kmod_info_t *ki, void *d) \ +{ \ + fini(); \ + return KERN_SUCCESS; \ +} \ + \ +/* \ + * to allow semicolon after cfs_module(...) \ + */ \ +struct __dummy_ ## name ## _struct {} + +#define inter_module_get(n) cfs_symbol_get(n) +#define inter_module_put(n) cfs_symbol_put(n) + +#ifndef __exit +#define __exit +#endif +#ifndef __init +#define __init +#endif + +#define EXPORT_SYMBOL(s) +#define MODULE_AUTHOR(s) +#define MODULE_DESCRIPTION(s) +#define MODULE_LICENSE(s) +#define MODULE_PARM(a, b) +#define MODULE_PARM_DESC(a, b) + +#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) +#define LINUX_VERSION_CODE (2*200+5*10+0) + +#define NR_IRQS 512 +#define in_interrupt() (0) + +#define KERN_EMERG "<0>" /* system is unusable */ +#define KERN_ALERT "<1>" /* action must be taken immediately */ +#define KERN_CRIT "<2>" /* critical conditions */ +#define KERN_ERR "<3>" /* error conditions */ +#define KERN_WARNING "<4>" /* warning conditions */ +#define KERN_NOTICE "<5>" /* normal but significant condition */ +#define KERN_INFO "<6>" /* informational */ +#define KERN_DEBUG "<7>" /* debug-level messages */ + +static inline long PTR_ERR(const void *ptr) +{ + return (long) ptr; +} + +#define ERR_PTR(err) ((void *)err) + +/* XXX */ +#define IS_ERR(p) (0) + +/* + * Error nubmer + */ +#define EBADR EBADRPC +#define ERESTARTSYS ERESTART +#define EDEADLOCK EDEADLK +#define ECOMM EINVAL +#define ENODATA EINVAL + +#else +#define __WORDSIZE 32 +#endif /* __KERNEL__ */ + +#endif /* __LINUX__ */ diff --git a/lnet/include/libcfs/darwin/darwin-sync.h b/lnet/include/libcfs/darwin/darwin-sync.h new file mode 100644 index 0000000..3374f43 --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-sync.h @@ -0,0 +1,276 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +/* + * xnu_sync.h + * + * Created by nikita on Sun Jul 18 2004. + * + * Prototypes of XNU synchronization primitives. + */ + +#ifndef __LIBCFS_DARWIN_XNU_SYNC_H__ +#define __LIBCFS_DARWIN_XNU_SYNC_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#define XNU_SYNC_DEBUG (0) + +#if XNU_SYNC_DEBUG +#define ON_SYNC_DEBUG(e) e +#else +#define ON_SYNC_DEBUG(e) +#endif + +enum { + /* "egrep -i '^(o?x)?[abcdeflo]*$' /usr/dict/words" is your friend */ + KMUT_MAGIC = 0x0bac0cab, /* [a, [b, c]] = b (a, c) - c (a, b) */ + KSEM_MAGIC = 0x1abe11ed, + KCOND_MAGIC = 0xb01dface, + KRW_MAGIC = 0xdabb1edd, + KSPIN_MAGIC = 0xca11ab1e, + KSLEEP_CHAN_MAGIC = 0x0debac1e, + KSLEEP_LINK_MAGIC = 0xacc01ade, + KTIMER_MAGIC = 0xbefadd1e +}; + +/* ------------------------- spin lock ------------------------- */ + +/* + * XXX nikita: don't use NCPUS it's hardcoded to (1) in cpus.h + */ +#define SMP (1) + +#include + +#include + +struct kspin { +#if SMP + hw_lock_data_t lock; +#endif +#if XNU_SYNC_DEBUG + unsigned magic; + thread_t owner; +#endif +}; + +/* + * XXX nikita: we cannot use simple_* functions, because bsd/sys/lock.h + * redefines them to nothing. Use low-level hw_lock_* instead. + */ + +void kspin_init(struct kspin *spin); +void kspin_done(struct kspin *spin); +void kspin_lock(struct kspin *spin); +void kspin_unlock(struct kspin *spin); +int kspin_trylock(struct kspin *spin); + +#if XNU_SYNC_DEBUG +/* + * two functions below are for use in assertions + */ +/* true, iff spin-lock is locked by the current thread */ +int kspin_islocked(struct kspin *spin); +/* true, iff spin-lock is not locked by the current thread */ +int kspin_isnotlocked(struct kspin *spin); +#else +#define kspin_islocked(s) (1) +#define kspin_isnotlocked(s) (1) +#endif + +/* ------------------------- semaphore ------------------------- */ + +struct ksem { + struct kspin guard; + struct wait_queue q; + int value; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void ksem_init(struct ksem *sem, int value); +void ksem_done(struct ksem *sem); +int ksem_up (struct ksem *sem, int value); +void ksem_down(struct ksem *sem, int value); +int ksem_trydown(struct ksem *sem, int value); + +/* ------------------------- mutex ------------------------- */ + +struct kmut { + struct ksem s; +#if XNU_SYNC_DEBUG + unsigned magic; + thread_t owner; +#endif +}; + +void kmut_init(struct kmut *mut); +void kmut_done(struct kmut *mut); + +void kmut_lock (struct kmut *mut); +void kmut_unlock (struct kmut *mut); +int kmut_trylock(struct kmut *mut); + +#if XNU_SYNC_DEBUG +/* + * two functions below are for use in assertions + */ +/* true, iff mutex is locked by the current thread */ +int kmut_islocked(struct kmut *mut); +/* true, iff mutex is not locked by the current thread */ +int kmut_isnotlocked(struct kmut *mut); +#else +#define kmut_islocked(m) (1) +#define kmut_isnotlocked(m) (1) +#endif + +/* ------------------------- condition variable ------------------------- */ + +struct kcond_link { + struct kcond_link *next; + struct ksem sem; +}; + +struct kcond { + struct kspin guard; + struct kcond_link *waiters; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void kcond_init(struct kcond *cond); +void kcond_done(struct kcond *cond); +void kcond_wait(struct kcond *cond, struct kspin *lock); +void kcond_signal(struct kcond *cond); +void kcond_broadcast(struct kcond *cond); + +void kcond_wait_guard(struct kcond *cond); +void kcond_signal_guard(struct kcond *cond); +void kcond_broadcast_guard(struct kcond *cond); + +/* ------------------------- read-write semaphore ------------------------- */ + +struct krw_sem { + int count; + struct kcond cond; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void krw_sem_init(struct krw_sem *sem); +void krw_sem_done(struct krw_sem *sem); +void krw_sem_down_r(struct krw_sem *sem); +int krw_sem_down_r_try(struct krw_sem *sem); +void krw_sem_down_w(struct krw_sem *sem); +int krw_sem_down_w_try(struct krw_sem *sem); +void krw_sem_up_r(struct krw_sem *sem); +void krw_sem_up_w(struct krw_sem *sem); + +/* ------------------------- sleep-channel ------------------------- */ + +struct ksleep_chan { + struct kspin guard; + struct list_head waiters; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +#define KSLEEP_CHAN_INITIALIZER {{{0}}} + +struct ksleep_link { + int flags; + event_t event; + int hits; + struct ksleep_chan *forward; + struct list_head linkage; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +enum { + KSLEEP_EXCLUSIVE = 1 +}; + +void ksleep_chan_init(struct ksleep_chan *chan); +void ksleep_chan_done(struct ksleep_chan *chan); + +void ksleep_link_init(struct ksleep_link *link); +void ksleep_link_done(struct ksleep_link *link); + +void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link); +void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link); + +void ksleep_wait(struct ksleep_chan *chan); +int64_t ksleep_timedwait(struct ksleep_chan *chan, uint64_t timeout); + +void ksleep_wake(struct ksleep_chan *chan); +void ksleep_wake_all(struct ksleep_chan *chan); +void ksleep_wake_nr(struct ksleep_chan *chan, int nr); + +#define KSLEEP_LINK_DECLARE(name) \ +{ \ + .flags = 0, \ + .event = 0, \ + .hits = 0, \ + .linkage = PTL_LIST_HEAD_INIT(name.linkage), \ + .magic = KSLEEP_LINK_MAGIC \ +} + +/* ------------------------- timer ------------------------- */ + +struct ktimer { + struct kspin guard; + void (*func)(void *); + void *arg; + u_int64_t deadline; /* timer deadline in absolute nanoseconds */ + int armed; +#if XNU_SYNC_DEBUG + unsigned magic; +#endif +}; + +void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg); +void ktimer_done(struct ktimer *t); +void ktimer_arm(struct ktimer *t, u_int64_t deadline); +void ktimer_disarm(struct ktimer *t); +int ktimer_is_armed(struct ktimer *t); + +u_int64_t ktimer_deadline(struct ktimer *t); + +/* __XNU_SYNC_H__ */ +#endif + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/libcfs/darwin/darwin-time.h b/lnet/include/libcfs/darwin/darwin-time.h new file mode 100644 index 0000000..d6230ad --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-time.h @@ -0,0 +1,257 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for XNU kernel + * + */ + +#ifndef __LIBCFS_DARWIN_DARWIN_TIME_H__ +#define __LIBCFS_DARWIN_DARWIN_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * cfs_duration_t cfs_time_minimal_timeout(void) + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION ((u_int64_t) 1000000) + +#ifdef __KERNEL__ +#include +#include + +#ifndef __APPLE_API_PRIVATE +#define __APPLE_API_PRIVATE +#include +#undef __APPLE_API_PRIVATE +#else +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +typedef u_int64_t cfs_time_t; /* nanoseconds */ +typedef int64_t cfs_duration_t; + +#define CFS_TIME_T "%llu" +#define CFS_DURATION_T "%lld" + +typedef struct timeval cfs_fs_time_t; + +static inline cfs_time_t cfs_time_current(void) +{ + struct timespec instant; + + nanotime(&instant); + return ((u_int64_t)instant.tv_sec) * ONE_BILLION + instant.tv_nsec; +} + +static inline time_t cfs_time_current_sec(void) +{ + struct timespec instant; + + nanotime(&instant); + return instant.tv_sec; +} + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return t + d; +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return t1 - t2; +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return (int64_t)t1 - (int64_t)t2 < 0; +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return (int64_t)t1 - (int64_t)t2 <= 0; +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + *t = time; +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return t->tv_sec; +} + +static inline cfs_duration_t cfs_duration_build(int64_t nano) +{ + return nano; +} + + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + *v = *t; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = t->tv_sec; + s->tv_nsec = t->tv_usec * 1000; +} + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return cfs_duration_build(ONE_BILLION * (int64_t)seconds); +} + +static inline cfs_time_t cfs_time_shift(int seconds) +{ + return cfs_time_add(cfs_time_current(), cfs_time_seconds(seconds)); +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline int64_t __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return ((int64_t)t->tv_sec) * ONE_BILLION + t->tv_usec; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) < 0; +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return __cfs_fs_time_flat(t1) - __cfs_fs_time_flat(t2) <= 0; +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d / ONE_BILLION; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ + s->tv_sec = d / ONE_BILLION; + s->tv_usec = (d - s->tv_sec * ONE_BILLION) / 1000; +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ + s->tv_sec = d / ONE_BILLION; + s->tv_nsec = d - ((int64_t)s->tv_sec) * ONE_BILLION; +} + +static inline cfs_duration_t cfs_time_minimal_timeout(void) +{ + return ONE_BILLION / (u_int64_t)hz; +} + +/* inline function cfs_time_minimal_timeout() can not be used to + * initiallize static variable */ +#define CFS_MIN_DELAY (ONE_BILLION / (u_int64_t)100) + +#define LTIME_S(t) (t) + +/* __KERNEL__ */ +#else + +/* + * User level + */ +#include + +/* __KERNEL__ */ +#endif + +/* __LIBCFS_DARWIN_DARWIN_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/libcfs/darwin/darwin-types.h b/lnet/include/libcfs/darwin/darwin-types.h new file mode 100644 index 0000000..b2762c0 --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-types.h @@ -0,0 +1,82 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_DARWIN_XNU_TYPES_H__ +#define __LIBCFS_DARWIN_XNU_TYPES_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +typedef u_int8_t __u8; +typedef u_int16_t __u16; +typedef u_int32_t __u32; +typedef u_int64_t __u64; + +#ifdef __KERNEL__ + +#include + +#ifndef __s32 +typedef __signed__ int __s32; +#endif +#ifndef __s64 +typedef __signed__ long long __s64; +#endif + +typedef struct { int e; } event_chan_t; +typedef dev_t kdev_t; + +/* + * Atmoic define + */ +#include + +typedef struct { volatile uint32_t counter; } atomic_t; + +#define ATOMIC_INIT(i) { (i) } +#define atomic_read(a) ((a)->counter) +#define atomic_set(a, v) (((a)->counter) = (v)) +#define atomic_add(v, a) hw_atomic_add((uint32_t *)&((a)->counter), v) +#define atomic_sub(v, a) hw_atomic_sub((uint32_t *)&((a)->counter), v) +#define atomic_inc(a) atomic_add(1, a) +#define atomic_dec(a) atomic_sub(1, a) +#define atomic_sub_and_test(v, a) ( atomic_sub(v, a) == 0 ) +#define atomic_dec_and_test(a) ( atomic_dec(a) == 0 ) + +#include +typedef uint64_t loff_t; + +#else /* !__KERNEL__ */ + +#include + +typedef uint64_t loff_t; + +#endif /* __KERNEL END */ + +#endif /* __XNU_CFS_TYPES_H__ */ diff --git a/lnet/include/libcfs/darwin/darwin-utils.h b/lnet/include/libcfs/darwin/darwin-utils.h new file mode 100644 index 0000000..4e91db9 --- /dev/null +++ b/lnet/include/libcfs/darwin/darwin-utils.h @@ -0,0 +1,60 @@ +#ifndef __LIBCFS_DARWIN_XNU_UTILS_H__ +#define __LIBCFS_DARWIN_XNU_UTILS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include + +#ifdef __KERNEL__ +inline int isspace(char c); +char *strpbrk(const char *cs, const char *ct); +char * strsep(char **s, const char *ct); +size_t strnlen(const char * s, size_t count); +char * strstr(const char *in, const char *str); +char * strrchr(const char *p, int ch); +char * ul2dstr(unsigned long address, char *buf, int len); + +#define simple_strtol(a1, a2, a3) strtol(a1, a2, a3) +#define simple_strtoul(a1, a2, a3) strtoul(a1, a2, a3) +#define simple_strtoll(a1, a2, a3) strtoq(a1, a2, a3) +#define simple_strtoull(a1, a2, a3) strtouq(a1, a2, a3) + +#define test_bit(i, a) isset(a, i) +#define set_bit(i, a) setbit(a, i) +#define clear_bit(i, a) clrbit(a, i) + +#define get_random_bytes(buf, len) read_random(buf, len) + +#endif /* __KERNEL__ */ + +#ifndef min_t +#define min_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) +#endif +#ifndef max_t +#define max_t(type,x,y) \ + ({ type __x = (x); type __y = (y); __x > __y ? __x: __y; }) +#endif + +#define do_div(n,base) \ + ({ \ + __u64 __n = (n); \ + __u32 __base = (base); \ + __u32 __mod; \ + \ + __mod = __n % __base; \ + n = __n / __base; \ + __mod; \ + }) + +#define NIPQUAD(addr) \ + ((unsigned char *)&addr)[0], \ + ((unsigned char *)&addr)[1], \ + ((unsigned char *)&addr)[2], \ + ((unsigned char *)&addr)[3] + +#define HIPQUAD NIPQUAD + +#endif /* __XNU_UTILS_H__ */ diff --git a/lnet/include/libcfs/darwin/kp30.h b/lnet/include/libcfs/darwin/kp30.h new file mode 100644 index 0000000..885a78a --- /dev/null +++ b/lnet/include/libcfs/darwin/kp30.h @@ -0,0 +1,90 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_DARWIN_KP30__ +#define __LIBCFS_DARWIN_KP30__ + +#ifndef __LIBCFS_KP30_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define our_cond_resched() schedule_timeout(1); + +#ifdef CONFIG_SMP +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) /* XXX */ +#else +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) +#endif + +#define LBUG_WITH_LOC(file, func, line) do {} while(0) + +/* --------------------------------------------------------------------- */ + +#define PORTAL_SYMBOL_REGISTER(x) cfs_symbol_register(#x, &x) +#define PORTAL_SYMBOL_UNREGISTER(x) cfs_symbol_unregister(#x) + +#define PORTAL_SYMBOL_GET(x) ((typeof(&x))cfs_symbol_get(#x)) +#define PORTAL_SYMBOL_PUT(x) cfs_symbol_put(#x) + +#define PORTAL_MODULE_USE do{int i = 0; i++;}while(0) +#define PORTAL_MODULE_UNUSE do{int i = 0; i--;}while(0) + +#define printk(format, args...) printf(format, ## args) + +#else /* !__KERNEL__ */ +# include +# include +# include +# include +# include +# include +# include +#endif + +/******************************************************************************/ +/* Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. */ +#define LWT_SUPPORT 0 + +typedef struct { + long long lwte_when; + char *lwte_where; + void *lwte_task; + long lwte_p1; + long lwte_p2; + long lwte_p3; + long lwte_p4; +} lwt_event_t; + +# define LWT_EVENT(p1,p2,p3,p4) /* no lwt implementation yet */ + +/* -------------------------------------------------------------------------- */ + +#define IOCTL_PORTAL_TYPE struct portal_ioctl_data + +#define LPU64 "%llu" +#define LPD64 "%lld" +#define LPX64 "%llx" +#define LPSZ "%lu" +#define LPSSZ "%ld" +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) + +#endif diff --git a/lnet/include/libcfs/darwin/libcfs.h b/lnet/include/libcfs/darwin/libcfs.h new file mode 100644 index 0000000..8e4eb89 --- /dev/null +++ b/lnet/include/libcfs/darwin/libcfs.h @@ -0,0 +1,173 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_DARWIN_LIBCFS_H__ +#define __LIBCFS_DARWIN_LIBCFS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __KERNEL__ +# include +# include +# define do_gettimeofday(tv) microuptime(tv) +#else +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL); +typedef unsigned long long cycles_t; +#endif + +#define __cpu_to_le64(x) OSSwapHostToLittleInt64(x) +#define __cpu_to_le32(x) OSSwapHostToLittleInt32(x) +#define __cpu_to_le16(x) OSSwapHostToLittleInt16(x) + +#define __le16_to_cpu(x) OSSwapLittleToHostInt16(x) +#define __le32_to_cpu(x) OSSwapLittleToHostInt32(x) +#define __le64_to_cpu(x) OSSwapLittleToHostInt64(x) + +#define cpu_to_le64(x) __cpu_to_le64(x) +#define cpu_to_le32(x) __cpu_to_le32(x) +#define cpu_to_le16(x) __cpu_to_le16(x) + +#define le64_to_cpu(x) __le64_to_cpu(x) +#define le32_to_cpu(x) __le32_to_cpu(x) +#define le16_to_cpu(x) __le16_to_cpu(x) + +#define __swab16(x) OSSwapInt16(x) +#define __swab32(x) OSSwapInt32(x) +#define __swab64(x) OSSwapInt64(x) +#define __swab16s(x) do { *(x) = __swab16(*(x)); } while (0) +#define __swab32s(x) do { *(x) = __swab32(*(x)); } while (0) +#define __swab64s(x) do { *(x) = __swab64(*(x)); } while (0) + +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u32 ph_cpu_id; + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + + +#ifdef __KERNEL__ +# include +# include +/* Fix me */ +# define THREAD_SIZE 8192 +#else +# define THREAD_SIZE 8192 +#endif +#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) + +#define CHECK_STACK(stack) do { } while(0) +#define CDEBUG_STACK (0L) + +/* Darwin has defined RETURN, so we have to undef it in lustre */ +#ifdef RETURN +#undef RETURN +#endif + +/* + * When this is enabled debugging messages are indented according to the + * current "nesting level". Nesting level in increased when ENTRY macro + * is executed, and decreased on EXIT and RETURN. + */ +#ifdef __KERNEL__ +#define ENTRY_NESTING_SUPPORT (0) +#endif + +#if ENTRY_NESTING_SUPPORT + +/* + * Currently ENTRY_NESTING_SUPPORT is only supported for XNU port. Basic + * idea is to keep per-thread pointer to small data structure (struct + * cfs_debug_data) describing current nesting level. In XNU unused + * proc->p_wmegs field in hijacked for this. On Linux + * current->journal_info can be used. In user space + * pthread_{g,s}etspecific(). + * + * ENTRY macro allocates new cfs_debug_data on stack, and installs it as + * a current nesting level, storing old data in cfs_debug_data it just + * created. + * + * EXIT pops old value back. + * + */ + +/* + * One problem with this approach is that there is a lot of code that + * does ENTRY and then escapes scope without doing EXIT/RETURN. In this + * case per-thread current nesting level pointer is dangling (it points + * to the stack area that is possible already overridden). To detect + * such cases, we add two magic fields to the cfs_debug_data and check + * them whenever current nesting level pointer is dereferenced. While + * looking flaky this works because stack is always consumed + * "continously". + */ +enum { + CDD_MAGIC1 = 0x02128506, + CDD_MAGIC2 = 0x42424242 +}; + +struct cfs_debug_data { + unsigned int magic1; + struct cfs_debug_data *parent; + int nesting_level; + unsigned int magic2; +}; + +void __entry_nesting(struct cfs_debug_data *child); +void __exit_nesting(struct cfs_debug_data *child); +unsigned int __current_nesting_level(void); + +#define ENTRY_NESTING \ +struct cfs_debug_data __cdd = { .magic1 = CDD_MAGIC1, \ + .parent = NULL, \ + .nesting_level = 0, \ + .magic2 = CDD_MAGIC2 }; \ +__entry_nesting(&__cdd); + +#define EXIT_NESTING __exit_nesting(&__cdd) + +/* ENTRY_NESTING_SUPPORT */ +#else + +#define ENTRY_NESTING do {;} while (0) +#define EXIT_NESTING do {;} while (0) +#define __current_nesting_level() (0) + +/* ENTRY_NESTING_SUPPORT */ +#endif + +#define LUSTRE_PTL_PID 12345 + +#define _XNU_LIBCFS_H + +/* + * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) + * + * Implementation is in darwin-curproc.c + */ +#define CFS_CURPROC_COMM_MAX (sizeof ((struct proc *)0)->p_comm) +/* + * XNU has no capabilities + */ +typedef int cfs_kernel_cap_t; + +#endif /* _XNU_LIBCFS_H */ diff --git a/lnet/include/libcfs/darwin/lltrace.h b/lnet/include/libcfs/darwin/lltrace.h new file mode 100644 index 0000000..bb0dc91 --- /dev/null +++ b/lnet/include/libcfs/darwin/lltrace.h @@ -0,0 +1,26 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_DARWIN_LLTRACE_H__ +#define __LIBCFS_DARWIN_LLTRACE_H__ + +#ifndef __LIBCFS_LLTRACE_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/lnet/include/libcfs/darwin/portals_lib.h b/lnet/include/libcfs/darwin/portals_lib.h new file mode 100644 index 0000000..dde962a --- /dev/null +++ b/lnet/include/libcfs/darwin/portals_lib.h @@ -0,0 +1,34 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_DARWIN_PORTALS_LIB_H__ +#define __LIBCFS_DARWIN_PORTALS_LIB_H__ + +#ifndef __LIBCFS_PORTALS_LIB_H__ +#error Do not #include this file directly. #include instead +#endif + +#include + +#endif diff --git a/lnet/include/libcfs/darwin/portals_utils.h b/lnet/include/libcfs/darwin/portals_utils.h new file mode 100644 index 0000000..4907cb1 --- /dev/null +++ b/lnet/include/libcfs/darwin/portals_utils.h @@ -0,0 +1,18 @@ +#ifndef __LIBCFS_DARWIN_PORTALS_UTILS_H__ +#define __LIBCFS_DARWIN_PORTALS_UTILS_H__ + +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#ifdef __KERNEL__ +#include +#include +#else /* !__KERNEL__ */ +#include +#include +#include +#endif /* !__KERNEL__ */ + +#endif diff --git a/lnet/include/linux/kp30.h b/lnet/include/libcfs/kp30.h similarity index 54% rename from lnet/include/linux/kp30.h rename to lnet/include/libcfs/kp30.h index e590514..cdd1bff 100644 --- a/lnet/include/linux/kp30.h +++ b/lnet/include/libcfs/kp30.h @@ -1,58 +1,19 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef _KP30_INCLUDED -#define _KP30_INCLUDED +#ifndef __LIBCFS_KP30_H__ +#define __LIBCFS_KP30_H__ -#include #define PORTAL_DEBUG +#include -#ifdef __KERNEL__ -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define schedule_work schedule_task -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_TQUEUE((wq), 0, 0); \ - PREPARE_TQUEUE((wq), (cb), (cbdata)); \ -} while (0) - -#define PageUptodate Page_Uptodate -#define our_recalc_sigpending(current) recalc_sigpending(current) -#define num_online_cpus() smp_num_cpus -static inline void our_cond_resched(void) -{ - if (current->need_resched) - schedule (); -} -#define work_struct_t struct tq_struct +#include -#else - -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ -} while (0) -#define wait_on_page wait_on_page_locked -#define our_recalc_sigpending(current) recalc_sigpending() -#define strtok(a,b) strpbrk(a, b) -static inline void our_cond_resched(void) -{ - cond_resched(); -} -#define work_struct_t struct work_struct +#ifdef __KERNEL__ -#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ +# ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED +# endif #ifdef PORTAL_DEBUG extern void kportal_assertion_failed(char *expr, char *file, const char *func, @@ -74,32 +35,7 @@ extern void kportal_assertion_failed(char *expr, char *file, const char *func, #define LASSERTF(cond, fmt...) do { } while (0) #endif -#ifdef CONFIG_SMP -#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock)) -#else -#define LASSERT_SPIN_LOCKED(lock) do {} while(0) -#endif - -#ifdef __arch_um__ -#define LBUG_WITH_LOC(file, func, line) \ -do { \ - CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \ - portals_debug_dumplog(); \ - portals_run_lbug_upcall(file, func, line); \ - panic("LBUG"); \ -} while (0) -#else -#define LBUG_WITH_LOC(file, func, line) \ -do { \ - CEMERG("LBUG\n"); \ - portals_debug_dumpstack(NULL); \ - portals_debug_dumplog(); \ - portals_run_lbug_upcall(file, func, line); \ - set_task_state(current, TASK_UNINTERRUPTIBLE); \ - schedule(); \ -} while (0) -#endif /* __arch_um__ */ - +/* LBUG_WITH_LOC defined in portals//kp30.h */ #define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__) /* @@ -127,11 +63,11 @@ do { \ #define PORTAL_ALLOC_GFP(ptr, size, mask) \ do { \ LASSERT(!in_interrupt() || \ - (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC)); \ + (size <= PORTAL_VMALLOC_SIZE && mask == CFS_ALLOC_ATOMIC));\ if ((size) > PORTAL_VMALLOC_SIZE) \ - (ptr) = vmalloc(size); \ + (ptr) = cfs_alloc_large(size); \ else \ - (ptr) = kmalloc((size), (mask)); \ + (ptr) = cfs_alloc((size), (mask)); \ if ((ptr) == NULL) { \ CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ @@ -139,17 +75,18 @@ do { \ atomic_read(&portal_kmemory)); \ } else { \ portal_kmem_inc((ptr), (size)); \ - memset((ptr), 0, (size)); \ + if (!((mask) & CFS_ALLOC_ZERO)) \ + memset((ptr), 0, (size)); \ } \ CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ (int)(size), (ptr), atomic_read (&portal_kmemory)); \ } while (0) #define PORTAL_ALLOC(ptr, size) \ - PORTAL_ALLOC_GFP(ptr, size, GFP_NOFS) + PORTAL_ALLOC_GFP(ptr, size, CFS_ALLOC_IO) #define PORTAL_ALLOC_ATOMIC(ptr, size) \ - PORTAL_ALLOC_GFP(ptr, size, GFP_ATOMIC) + PORTAL_ALLOC_GFP(ptr, size, CFS_ALLOC_ATOMIC) #define PORTAL_FREE(ptr, size) \ do { \ @@ -160,39 +97,14 @@ do { \ break; \ } \ if (s > PORTAL_VMALLOC_SIZE) \ - vfree(ptr); \ + cfs_free_large(ptr); \ else \ - kfree(ptr); \ + cfs_free(ptr); \ portal_kmem_dec((ptr), s); \ CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ s, (ptr), atomic_read(&portal_kmemory)); \ } while (0) -/* ------------------------------------------------------------------- */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#define PORTAL_MODULE_USE MOD_INC_USE_COUNT -#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT -#else - -#define PORTAL_SYMBOL_REGISTER(x) -#define PORTAL_SYMBOL_UNREGISTER(x) - -#define PORTAL_SYMBOL_GET(x) symbol_get(x) -#define PORTAL_SYMBOL_PUT(x) symbol_put(x) - -#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) -#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) - -#endif - /******************************************************************************/ #ifdef PORTALS_PROFILING @@ -243,7 +155,9 @@ extern struct prof_ent prof_ents[MAX_PROFS]; #endif /* PORTALS_PROFILING */ /* debug.c */ -void portals_debug_dumpstack(struct task_struct *tsk); +extern spinlock_t stack_backtrace_lock; + +void portals_debug_dumpstack(cfs_task_t *tsk); void portals_run_upcall(char **argv); void portals_run_lbug_upcall(char * file, const char *fn, const int line); void portals_debug_dumplog(void); @@ -254,44 +168,20 @@ int portals_debug_mark_buffer(char *text); int portals_debug_set_daemon(unsigned int cmd, unsigned int length, char *file, unsigned int size); __s32 portals_debug_copy_to_user(char *buf, unsigned long len); -#if (__GNUC__) /* Use the special GNU C __attribute__ hack to have the compiler check the * printf style argument string against the actual argument count and * types. */ -#ifdef printf -# warning printf has been defined as a macro... -# undef printf -#endif void portals_debug_msg(int subsys, int mask, char *file, const char *fn, const int line, unsigned long stack, char *format, ...) __attribute__ ((format (printf, 7, 8))); -#else -void portals_debug_msg(int subsys, int mask, char *file, const char *fn, - const int line, unsigned long stack, - const char *format, ...); -#endif /* __GNUC__ */ void portals_debug_set_level(unsigned int debug_level); -# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) -# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) -# define time(a) CURRENT_TIME - extern void kportal_daemonize (char *name); extern void kportal_blockallsigs (void); #else /* !__KERNEL__ */ -# include -# include -#ifndef __CYGWIN__ -# include -#else -# include -#endif -# include -# include -# include # ifndef DEBUG_SUBSYSTEM # define DEBUG_SUBSYSTEM S_UNDEFINED # endif @@ -324,6 +214,22 @@ void portals_debug_dumplog(void); #define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) #endif +/* + * compile-time assertions. @cond has to be constant expression. + * ISO C Standard: + * + * 6.8.4.2 The switch statement + * + * .... + * + * [#3] The expression of each case label shall be an integer + * constant expression and no two of the case constant + * expressions in the same switch statement shall have the same + * value after conversion... + * + */ +#define CLASSERT(cond) ({ switch(42) { case (cond): case 0: break; } }) + /* support decl needed both by kernel and liblustre */ char *portals_nid2str(int nal, ptl_nid_t nid, char *str); char *portals_id2str(int nal, ptl_process_id_t nid, char *str); @@ -332,119 +238,19 @@ char *portals_id2str(int nal, ptl_process_id_t nid, char *str); # define CURRENT_TIME time(0) #endif -/******************************************************************************/ -/* Light-weight trace - * Support for temporary event tracing with minimal Heisenberg effect. */ -#define LWT_SUPPORT 0 - -#define LWT_MEMORY (16<<20) - -#if !KLWT_SUPPORT -# if defined(__KERNEL__) -# if !defined(BITS_PER_LONG) -# error "BITS_PER_LONG not defined" -# endif -# elif !defined(__WORDSIZE) -# error "__WORDSIZE not defined" -# else -# define BITS_PER_LONG __WORDSIZE -# endif - -/* kernel hasn't defined this? */ -typedef struct { - long long lwte_when; - char *lwte_where; - void *lwte_task; - long lwte_p1; - long lwte_p2; - long lwte_p3; - long lwte_p4; -# if BITS_PER_LONG > 32 - long lwte_pad; -# endif -} lwt_event_t; -#endif /* !KLWT_SUPPORT */ - -#if LWT_SUPPORT -# ifdef __KERNEL__ -# if !KLWT_SUPPORT - -typedef struct _lwt_page { - struct list_head lwtp_list; - struct page *lwtp_page; - lwt_event_t *lwtp_events; -} lwt_page_t; - -typedef struct { - int lwtc_current_index; - lwt_page_t *lwtc_current_page; -} lwt_cpu_t; - -extern int lwt_enabled; -extern lwt_cpu_t lwt_cpus[]; - -/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. - * This stuff is meant for finding specific problems; it never stays in - * production code... */ - -#define LWTSTR(n) #n -#define LWTWHERE(f,l) f ":" LWTSTR(l) -#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t)) - -#define LWT_EVENT(p1, p2, p3, p4) \ -do { \ - unsigned long flags; \ - lwt_cpu_t *cpu; \ - lwt_page_t *p; \ - lwt_event_t *e; \ - \ - if (lwt_enabled) { \ - local_irq_save (flags); \ - \ - cpu = &lwt_cpus[smp_processor_id()]; \ - p = cpu->lwtc_current_page; \ - e = &p->lwtp_events[cpu->lwtc_current_index++]; \ - \ - if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ - cpu->lwtc_current_page = \ - list_entry (p->lwtp_list.next, \ - lwt_page_t, lwtp_list); \ - cpu->lwtc_current_index = 0; \ - } \ - \ - e->lwte_when = get_cycles(); \ - e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ - e->lwte_task = current; \ - e->lwte_p1 = (long)(p1); \ - e->lwte_p2 = (long)(p2); \ - e->lwte_p3 = (long)(p3); \ - e->lwte_p4 = (long)(p4); \ - \ - local_irq_restore (flags); \ - } \ -} while (0) - -#endif /* !KLWT_SUPPORT */ - -extern int lwt_init (void); -extern void lwt_fini (void); -extern int lwt_lookup_string (int *size, char *knlptr, - char *usrptr, int usrsize); -extern int lwt_control (int enable, int clear); -extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, - void *user_ptr, int user_size); -# else /* __KERNEL__ */ -# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ -# endif /* __KERNEL__ */ -#endif /* LWT_SUPPORT */ +/* -------------------------------------------------------------------- + * Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. + * All stuff about lwt are put in arch/kp30.h + * -------------------------------------------------------------------- */ struct portals_device_userstate { int pdu_memhog_pages; - struct page *pdu_memhog_root_page; + cfs_page_t *pdu_memhog_root_page; }; -#include +#include /* * USER LEVEL STUFF BELOW @@ -569,77 +375,30 @@ static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf, return 0; } -#else -#include - -/* buffer MUST be at least the size of portal_ioctl_hdr */ -static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct portal_ioctl_hdr *hdr; - struct portal_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct portal_ioctl_hdr *)buf; - data = (struct portal_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if (err) - RETURN(err); - if (hdr->ioc_version != PORTAL_IOCTL_VERSION) { - CERROR("PORTALS: version mismatch kernel vs application\n"); - RETURN(-EINVAL); - } - - if (hdr->ioc_len + buf >= end) { - CERROR("PORTALS: user buffer exceeds kernel buffer\n"); - RETURN(-EINVAL); - } - - - if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) { - CERROR("PORTALS: user buffer too small for ioctl\n"); - RETURN(-EINVAL); - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if (err) - RETURN(err); - - if (portal_ioctl_is_invalid(data)) { - CERROR("PORTALS: ioctl not correctly formatted\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1) - data->ioc_inlbuf1 = &data->ioc_bulk[0]; +#else - if (data->ioc_inllen2) - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); +extern inline int portal_ioctl_getdata(char *buf, char *end, void *arg); - RETURN(0); -} #endif /* ioctls for manipulating snapshots 30- */ #define IOC_PORTAL_TYPE 'e' #define IOC_PORTAL_MIN_NR 30 -#define IOC_PORTAL_PING _IOWR('e', 30, long) +#define IOC_PORTAL_PING _IOWR('e', 30, IOCTL_PORTAL_TYPE) -#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long) -#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long) -#define IOC_PORTAL_PANIC _IOWR('e', 34, long) -#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, long) -#define IOC_PORTAL_GET_NID _IOWR('e', 36, long) -#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, long) +#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_PANIC _IOWR('e', 34, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_GET_NID _IOWR('e', 36, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, IOCTL_PORTAL_TYPE) -#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long) -#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long) -#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long) -#define IOC_PORTAL_MEMHOG _IOWR('e', 42, long) +#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, IOCTL_PORTAL_TYPE) +#define IOC_PORTAL_MEMHOG _IOWR('e', 42, IOCTL_PORTAL_TYPE) #define IOC_PORTAL_MAX_NR 42 enum { @@ -694,49 +453,6 @@ enum cfg_record_type { typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); -#ifdef __CYGWIN__ -# ifndef BITS_PER_LONG -# if (~0UL) == 0xffffffffUL -# define BITS_PER_LONG 32 -# else -# define BITS_PER_LONG 64 -# endif -# endif -#endif - -#if BITS_PER_LONG > 32 -# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) -#else -# define LI_POISON ((int)0x5a5a5a5a) -# define LL_POISON ((long)0x5a5a5a5a) -# define LP_POISON ((void *)(long)0x5a5a5a5a) -#endif - -#if defined(__x86_64__) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPSZ "%u" -# define LPSSZ "%d" -#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%lu" -# define LPD64 "%ld" -# define LPX64 "%#lx" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#endif -#ifndef LPU64 -# error "No word size defined" -#endif - /* lustre_id output helper macros */ #define DLID4 "%lu/%lu/%lu/%lu" @@ -745,5 +461,5 @@ typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data); (unsigned long)(id)->li_fid.lf_group, \ (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \ (unsigned long)(id)->li_stc.u.e3s.l3s_gen - + #endif diff --git a/lnet/include/linux/libcfs.h b/lnet/include/libcfs/libcfs.h similarity index 63% rename from lnet/include/linux/libcfs.h rename to lnet/include/libcfs/libcfs.h index d8f5349..2fabeab 100644 --- a/lnet/include/linux/libcfs.h +++ b/lnet/include/libcfs/libcfs.h @@ -1,23 +1,15 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef _LIBCFS_H -#define _LIBCFS_H +#ifndef __LIBCFS_LIBCFS_H__ +#define __LIBCFS_LIBCFS_H__ -#ifdef HAVE_ASM_TYPES_H -#include -#else -#include "types.h" +#if !__GNUC__ +#define __attribute__(x) #endif -#ifdef __KERNEL__ -# include -# include -#else -# include -# define do_gettimeofday(tv) gettimeofday(tv, NULL); -typedef unsigned long long cycles_t; -#endif +#include +#include "curproc.h" #define PORTAL_DEBUG @@ -27,37 +19,6 @@ typedef unsigned long long cycles_t; #define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) -#ifndef __KERNEL__ -/* Userpace byte flipping */ -# include -# include -# define __swab16(x) bswap_16(x) -# define __swab32(x) bswap_32(x) -# define __swab64(x) bswap_64(x) -# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0) -# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0) -# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0) -# if __BYTE_ORDER == __LITTLE_ENDIAN -# define le16_to_cpu(x) (x) -# define cpu_to_le16(x) (x) -# define le32_to_cpu(x) (x) -# define cpu_to_le32(x) (x) -# define le64_to_cpu(x) (x) -# define cpu_to_le64(x) (x) -# else -# if __BYTE_ORDER == __BIG_ENDIAN -# define le16_to_cpu(x) bswap_16(x) -# define cpu_to_le16(x) bswap_16(x) -# define le32_to_cpu(x) bswap_32(x) -# define cpu_to_le32(x) bswap_32(x) -# define le64_to_cpu(x) bswap_64(x) -# define cpu_to_le64(x) bswap_64(x) -# else -# error "Unknown byte order" -# endif /* __BIG_ENDIAN */ -# endif /* __LITTLE_ENDIAN */ -#endif /* ! __KERNEL__ */ - /* * Debugging */ @@ -66,19 +27,9 @@ extern unsigned int portal_stack; extern unsigned int portal_debug; extern unsigned int portal_printk; -struct ptldebug_header { - __u32 ph_len; - __u32 ph_flags; - __u32 ph_subsys; - __u32 ph_mask; - __u32 ph_cpu_id; - __u32 ph_sec; - __u64 ph_usec; - __u32 ph_stack; - __u32 ph_pid; - __u32 ph_extern_pid; - __u32 ph_line_num; -} __attribute__((packed)); +/* + * struct ptldebug_header is defined in libcfs//libcfs.h + */ #define PH_FLAG_FIRST_RECORD 1 @@ -144,42 +95,10 @@ struct ptldebug_header { * utils/lconf */ -#ifdef __KERNEL__ -# include /* THREAD_SIZE */ -#else -# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ -# define THREAD_SIZE 8192 -# endif +#ifndef DEBUG_SUBSYSTEM +# define DEBUG_SUBSYSTEM S_UNDEFINED #endif -#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) - -#ifdef __KERNEL__ -# ifdef __ia64__ -# define CDEBUG_STACK (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -# else -# define CDEBUG_STACK (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -# endif /* __ia64__ */ - -#define CHECK_STACK(stack) \ - do { \ - if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \ - __FILE__, __FUNCTION__, __LINE__, \ - (stack),"maximum lustre stack %u\n",\ - portal_stack = (stack)); \ - /*panic("LBUG");*/ \ - } \ - } while (0) -#else /* !__KERNEL__ */ -#define CHECK_STACK(stack) do { } while(0) -#define CDEBUG_STACK (0L) -#endif /* __KERNEL__ */ - #if 1 #define CDEBUG(mask, format, a...) \ do { \ @@ -195,11 +114,12 @@ do { \ #define CDEBUG_MAX_LIMIT 600 #define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...) \ do { \ - static unsigned long cdebug_next = 0; \ - static int cdebug_count = 0, cdebug_delay = 1; \ + static cfs_time_t cdebug_next = 0; \ + static int cdebug_count = 0; \ + static cfs_duration_t cdebug_delay = CFS_MIN_DELAY; \ \ CHECK_STACK(CDEBUG_STACK); \ - if (time_after(jiffies, cdebug_next)) { \ + if (cfs_time_after(cfs_time_current(), cdebug_next)) { \ portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__, \ __FUNCTION__, __LINE__, CDEBUG_STACK, \ cdebug_format, ## a); \ @@ -210,12 +130,16 @@ do { \ cdebug_count); \ cdebug_count = 0; \ } \ - if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\ - cdebug_delay = cdebug_delay > 8 ? cdebug_delay/8 : 1; \ + if (cfs_time_after(cfs_time_current(), \ + cdebug_next + \ + cfs_time_seconds(CDEBUG_MAX_LIMIT+10))) \ + cdebug_delay = cdebug_delay > (8 * CFS_MIN_DELAY)? \ + cdebug_delay/8 : CFS_MIN_DELAY; \ else \ - cdebug_delay = cdebug_delay*2 >= CDEBUG_MAX_LIMIT*HZ ?\ - CDEBUG_MAX_LIMIT*HZ : cdebug_delay*2; \ - cdebug_next = jiffies + cdebug_delay; \ + cdebug_delay = cdebug_delay*2 >= cfs_time_seconds(CDEBUG_MAX_LIMIT)?\ + cfs_time_seconds(CDEBUG_MAX_LIMIT) : \ + cdebug_delay*2; \ + cdebug_next = cfs_time_current() + cdebug_delay; \ } else { \ portals_debug_msg(DEBUG_SUBSYSTEM, \ portal_debug & ~(D_EMERG|D_ERROR|D_WARNING),\ @@ -238,15 +162,25 @@ do { \ goto label; \ } while (0) +#define CDEBUG_ENTRY_EXIT (0) + +#ifdef CDEBUG_ENTRY_EXIT + +/* + * if rc == NULL, we need to code as RETURN((void *)NULL), otherwise + * there will be a warning in osx. + */ #define RETURN(rc) \ do { \ typeof(rc) RETURN__ret = (rc); \ CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ + EXIT_NESTING; \ return RETURN__ret; \ } while (0) #define ENTRY \ +ENTRY_NESTING; \ do { \ CDEBUG(D_TRACE, "Process entered\n"); \ } while (0) @@ -254,8 +188,17 @@ do { \ #define EXIT \ do { \ CDEBUG(D_TRACE, "Process leaving\n"); \ + EXIT_NESTING; \ } while(0) #else + +#define RETURN(rc) return (rc) +#define ENTRY +#define EXIT + +#endif + +#else #define CDEBUG(mask, format, a...) do { } while (0) #define CWARN(format, a...) printk(KERN_WARNING format, ## a) #define CERROR(format, a...) printk(KERN_ERR format, ## a) @@ -266,23 +209,7 @@ do { \ #define EXIT do { } while (0) #endif -/* initial pid */ -# if CRAY_PORTALS -/* - * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this - * is too big. - * - * 2) the implementation of ernal in cray portals further restricts the pid - * space that may be used to 0 <= pid <= 255 (an 8 bit value). Returns - * an error at nal init time for any pid outside this range. Other nals - * in cray portals don't have this restriction. - * */ -#define LUSTRE_PTL_PID 9 -# else -#define LUSTRE_PTL_PID 12345 -# endif - -#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID +#define LUSTRE_SRV_PTL_PID LUSTRE_PTL_PID #define PORTALS_CFG_VERSION 0x00010001; @@ -358,7 +285,7 @@ struct portal_ioctl_data { #ifdef __KERNEL__ -#include +#include struct libcfs_ioctl_handler { struct list_head item; @@ -368,16 +295,13 @@ struct libcfs_ioctl_handler { #define DECLARE_IOCTL_HANDLER(ident, func) \ struct libcfs_ioctl_handler ident = { \ - .item = LIST_HEAD_INIT(ident.item), \ + .item = CFS_LIST_HEAD_INIT(ident.item), \ .handle_ioctl = func \ } int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand); int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand); -#endif - -#ifdef __KERNEL__ /* libcfs watchdogs */ struct lc_watchdog; @@ -386,7 +310,7 @@ struct lc_watchdog; /* Add a watchdog which fires after "time" milliseconds of delay. You have to * touch it once to enable it. */ -struct lc_watchdog *lc_watchdog_add(int time, +struct lc_watchdog *lc_watchdog_add(int time, void (*cb)(struct lc_watchdog *, struct task_struct *, void *), @@ -405,6 +329,151 @@ void lc_watchdog_delete(struct lc_watchdog *lcw); void lc_watchdog_dumplog(struct lc_watchdog *lcw, struct task_struct *tsk, void *data); -#endif /* !__KERNEL__ */ + +/* __KERNEL__ */ +#endif + +/* + * libcfs pseudo device operations + * + * struct cfs_psdev_t and + * cfs_psdev_register() and + * cfs_psdev_deregister() are declared in + * libcfs//cfs_prim.h + * + * It's just draft now. + */ + +struct cfs_psdev_file { + unsigned long off; + void *private_data; + unsigned long reserved1; + unsigned long reserved2; +}; + +struct cfs_psdev_ops { + int (*p_open)(unsigned long, void *); + int (*p_close)(unsigned long, void *); + int (*p_read)(struct cfs_psdev_file *, char *, unsigned long); + int (*p_write)(struct cfs_psdev_file *, char *, unsigned long); + int (*p_ioctl)(struct cfs_psdev_file *, unsigned long, void *); +}; + +/* + * generic time manipulation functions. + */ + +static inline int cfs_time_after(cfs_time_t t1, cfs_time_t t2) +{ + return cfs_time_before(t2, t1); +} + +static inline int cfs_time_aftereq(cfs_time_t t1, cfs_time_t t2) +{ + return cfs_time_beforeq(t2, t1); +} + +/* + * return seconds since UNIX epoch + */ +static inline time_t cfs_unix_seconds(void) +{ + cfs_fs_time_t t; + + cfs_fs_time_current(&t); + return cfs_fs_time_sec(&t); +} + +#define CFS_RATELIMIT(seconds) \ +({ \ + /* \ + * XXX nikita: non-portable initializer \ + */ \ + static time_t __next_message = 0; \ + int result; \ + \ + if (cfs_time_after(cfs_time_current(), __next_message)) \ + result = 1; \ + else { \ + __next_message = cfs_time_shift(seconds); \ + result = 0; \ + } \ + result; \ +}) + +extern void portals_debug_msg(int subsys, int mask, char *file, const char *fn, + const int line, unsigned long stack, + char *format, ...) + __attribute__ ((format (printf, 7, 8))); + + +static inline void cfs_slow_warning(cfs_time_t now, int seconds, char *msg) +{ + if (cfs_time_after(cfs_time_current(), + cfs_time_add(now, cfs_time_seconds(15)))) + CERROR("slow %s %lu sec\n", msg, + cfs_duration_sec(cfs_time_sub(cfs_time_current(), now))); +} + +/* + * helper function similar to do_gettimeofday() of Linux kernel + */ +static inline void cfs_fs_timeval(struct timeval *tv) +{ + cfs_fs_time_t time; + + cfs_fs_time_current(&time); + cfs_fs_time_usec(&time, tv); +} + +/* + * return valid time-out based on user supplied one. Currently we only check + * that time-out is not shorted than allowed. + */ +static inline cfs_duration_t cfs_timeout_cap(cfs_duration_t timeout) +{ + if (timeout < cfs_time_minimal_timeout()) + timeout = cfs_time_minimal_timeout(); + return timeout; +} + +/* + * Portable memory allocator API (draft) + */ +enum cfs_alloc_flags { + /* allocation is not allowed to block */ + CFS_ALLOC_ATOMIC = (1 << 0), + /* allocation is allowed to block */ + CFS_ALLOC_WAIT = (1 << 1), + /* allocation should return zeroed memory */ + CFS_ALLOC_ZERO = (1 << 2), + /* allocation is allowed to call file-system code to free/clean + * memory */ + CFS_ALLOC_FS = (1 << 3), + /* allocation is allowed to do io to free/clean memory */ + CFS_ALLOC_IO = (1 << 4), + /* standard allocator flag combination */ + CFS_ALLOC_STD = CFS_ALLOC_FS | CFS_ALLOC_IO, + CFS_ALLOC_USER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO, +}; + +#define CFS_SLAB_ATOMIC CFS_ALLOC_ATOMIC +#define CFS_SLAB_WAIT CFS_ALLOC_WAIT +#define CFS_SLAB_ZERO CFS_ALLOC_ZERO +#define CFS_SLAB_FS CFS_ALLOC_FS +#define CFS_SLAB_IO CFS_ALLOC_IO +#define CFS_SLAB_STD CFS_ALLOC_STD +#define CFS_SLAB_USER CFS_ALLOC_USER + +/* flags for cfs_page_alloc() in addition to enum cfs_alloc_flags */ +enum cfs_page_alloc_flags { + /* allow to return page beyond KVM. It has to be mapped into KVM by + * cfs_page_map(); */ + CFS_ALLOC_HIGH = (1 << 5), + CFS_ALLOC_HIGHUSER = CFS_ALLOC_WAIT | CFS_ALLOC_FS | CFS_ALLOC_IO | CFS_ALLOC_HIGH, +}; + + +#define _LIBCFS_H #endif /* _LIBCFS_H */ diff --git a/lnet/include/libcfs/linux/.cvsignore b/lnet/include/libcfs/linux/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/lnet/include/libcfs/linux/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/lnet/include/libcfs/linux/Makefile.am b/lnet/include/libcfs/linux/Makefile.am new file mode 100644 index 0000000..f340c87 --- /dev/null +++ b/lnet/include/libcfs/linux/Makefile.am @@ -0,0 +1,3 @@ +EXTRA_DIST := linux-fs.h linux-prim.h portals_lib.h kp30.h \ + linux-lock.h lltrace.h portals_utils.h libcfs.h linux-mem.h \ + portals_compat25.h diff --git a/lnet/include/libcfs/linux/kp30.h b/lnet/include/libcfs/linux/kp30.h new file mode 100644 index 0000000..344264d --- /dev/null +++ b/lnet/include/libcfs/linux/kp30.h @@ -0,0 +1,322 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_KP30_H__ +#define __LIBCFS_LINUX_KP30_H__ + +#ifndef __LIBCFS_KP30_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# ifdef HAVE_MM_INLINE +# include +# endif +# if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +# include +# endif + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define schedule_work schedule_task +#define prepare_work(wq,cb,cbdata) \ +do { \ + INIT_TQUEUE((wq), 0, 0); \ + PREPARE_TQUEUE((wq), (cb), (cbdata)); \ +} while (0) + +#define PageUptodate Page_Uptodate +#define our_recalc_sigpending(current) recalc_sigpending(current) +#define num_online_cpus() smp_num_cpus +static inline void our_cond_resched(void) +{ + if (current->need_resched) + schedule (); +} +#define work_struct_t struct tq_struct + +#else + +#define prepare_work(wq,cb,cbdata) \ +do { \ + INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ +} while (0) +#define wait_on_page wait_on_page_locked +#define our_recalc_sigpending(current) recalc_sigpending() +#define strtok(a,b) strpbrk(a, b) +static inline void our_cond_resched(void) +{ + cond_resched(); +} +#define work_struct_t struct work_struct + +#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ + +#ifdef CONFIG_SMP +#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock)) +#else +#define LASSERT_SPIN_LOCKED(lock) do {} while(0) +#endif + +#ifdef __arch_um__ +#define LBUG_WITH_LOC(file, func, line) \ +do { \ + CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \ + portals_debug_dumplog(); \ + portals_run_lbug_upcall(file, func, line); \ + panic("LBUG"); \ +} while (0) +#else +#define LBUG_WITH_LOC(file, func, line) \ +do { \ + CEMERG("LBUG\n"); \ + portals_debug_dumpstack(NULL); \ + portals_debug_dumplog(); \ + portals_run_lbug_upcall(file, func, line); \ + set_task_state(current, TASK_UNINTERRUPTIBLE); \ + schedule(); \ +} while (0) +#endif /* __arch_um__ */ + +/* ------------------------------------------------------------------- */ + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + +#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) +#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) + +#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) +#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) + +#define PORTAL_MODULE_USE MOD_INC_USE_COUNT +#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT +#else + +#define PORTAL_SYMBOL_REGISTER(x) +#define PORTAL_SYMBOL_UNREGISTER(x) + +#define PORTAL_SYMBOL_GET(x) symbol_get(x) +#define PORTAL_SYMBOL_PUT(x) symbol_put(x) + +#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) +#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) + +#endif + +/******************************************************************************/ + +#if (__GNUC__) +/* Use the special GNU C __attribute__ hack to have the compiler check the + * printf style argument string against the actual argument count and + * types. + */ +#ifdef printf +# warning printf has been defined as a macro... +# undef printf +#endif + +#endif /* __GNUC__ */ + +# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) +# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) +# define time(a) CURRENT_TIME + +#else /* !__KERNEL__ */ +# include +# include +#ifndef __CYGWIN__ +# include +#else +# include +#endif +# include +# include +# include + +#endif /* End of !__KERNEL__ */ + +/******************************************************************************/ +/* Light-weight trace + * Support for temporary event tracing with minimal Heisenberg effect. */ +#define LWT_SUPPORT 0 + +#define LWT_MEMORY (16<<20) + +#if !KLWT_SUPPORT +# if defined(__KERNEL__) +# if !defined(BITS_PER_LONG) +# error "BITS_PER_LONG not defined" +# endif +# elif !defined(__WORDSIZE) +# error "__WORDSIZE not defined" +# else +# define BITS_PER_LONG __WORDSIZE +# endif + +/* kernel hasn't defined this? */ +typedef struct { + long long lwte_when; + char *lwte_where; + void *lwte_task; + long lwte_p1; + long lwte_p2; + long lwte_p3; + long lwte_p4; +# if BITS_PER_LONG > 32 + long lwte_pad; +# endif +} lwt_event_t; +#endif /* !KLWT_SUPPORT */ + +#if LWT_SUPPORT +# ifdef __KERNEL__ +# if !KLWT_SUPPORT + +typedef struct _lwt_page { + struct list_head lwtp_list; + struct page *lwtp_page; + lwt_event_t *lwtp_events; +} lwt_page_t; + +typedef struct { + int lwtc_current_index; + lwt_page_t *lwtc_current_page; +} lwt_cpu_t; + +extern int lwt_enabled; +extern lwt_cpu_t lwt_cpus[]; + +/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set. + * This stuff is meant for finding specific problems; it never stays in + * production code... */ + +#define LWTSTR(n) #n +#define LWTWHERE(f,l) f ":" LWTSTR(l) +#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t)) + +#define LWT_EVENT(p1, p2, p3, p4) \ +do { \ + unsigned long flags; \ + lwt_cpu_t *cpu; \ + lwt_page_t *p; \ + lwt_event_t *e; \ + \ + if (lwt_enabled) { \ + local_irq_save (flags); \ + \ + cpu = &lwt_cpus[smp_processor_id()]; \ + p = cpu->lwtc_current_page; \ + e = &p->lwtp_events[cpu->lwtc_current_index++]; \ + \ + if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) { \ + cpu->lwtc_current_page = \ + list_entry (p->lwtp_list.next, \ + lwt_page_t, lwtp_list); \ + cpu->lwtc_current_index = 0; \ + } \ + \ + e->lwte_when = get_cycles(); \ + e->lwte_where = LWTWHERE(__FILE__,__LINE__); \ + e->lwte_task = current; \ + e->lwte_p1 = (long)(p1); \ + e->lwte_p2 = (long)(p2); \ + e->lwte_p3 = (long)(p3); \ + e->lwte_p4 = (long)(p4); \ + \ + local_irq_restore (flags); \ + } \ +} while (0) + +#endif /* !KLWT_SUPPORT */ + +extern int lwt_init (void); +extern void lwt_fini (void); +extern int lwt_lookup_string (int *size, char *knlptr, + char *usrptr, int usrsize); +extern int lwt_control (int enable, int clear); +extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, + void *user_ptr, int user_size); +# else /* __KERNEL__ */ +# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */ +# endif /* __KERNEL__ */ +#endif /* LWT_SUPPORT */ + +/* ------------------------------------------------------------------ */ + +#define IOCTL_PORTAL_TYPE long + +#ifdef __CYGWIN__ +# ifndef BITS_PER_LONG +# if (~0UL) == 0xffffffffUL +# define BITS_PER_LONG 32 +# else +# define BITS_PER_LONG 64 +# endif +# endif +#endif + +#if BITS_PER_LONG > 32 +# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a) +#else +# define LI_POISON ((int)0x5a5a5a5a) +# define LL_POISON ((long)0x5a5a5a5a) +# define LP_POISON ((void *)(long)0x5a5a5a5a) +#endif + +#if defined(__x86_64__) +# define LPU64 "%Lu" +# define LPD64 "%Ld" +# define LPX64 "%#Lx" +# define LPSZ "%lu" +# define LPSSZ "%ld" +#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32) +# define LPU64 "%Lu" +# define LPD64 "%Ld" +# define LPX64 "%#Lx" +# define LPSZ "%u" +# define LPSSZ "%d" +#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64) +# define LPU64 "%lu" +# define LPD64 "%ld" +# define LPX64 "%#lx" +# define LPSZ "%lu" +# define LPSSZ "%ld" +#endif +#ifndef LPU64 +# error "No word size defined" +#endif + +#endif diff --git a/lnet/include/libcfs/linux/libcfs.h b/lnet/include/libcfs/linux/libcfs.h new file mode 100644 index 0000000..208b89e --- /dev/null +++ b/lnet/include/libcfs/linux/libcfs.h @@ -0,0 +1,151 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_LIBCFS_H__ +#define __LIBCFS_LINUX_LIBCFS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include + +#ifdef HAVE_ASM_TYPES_H +#include +#else +#include "types.h" +#endif + + +#ifdef __KERNEL__ +# include +# include +# include +#else +# include +# include +# define do_gettimeofday(tv) gettimeofday(tv, NULL); +typedef unsigned long long cycles_t; +#endif + +#ifndef __KERNEL__ +/* Userpace byte flipping */ +# include +# include +# define __swab16(x) bswap_16(x) +# define __swab32(x) bswap_32(x) +# define __swab64(x) bswap_64(x) +# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0) +# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0) +# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0) +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define le16_to_cpu(x) (x) +# define cpu_to_le16(x) (x) +# define le32_to_cpu(x) (x) +# define cpu_to_le32(x) (x) +# define le64_to_cpu(x) (x) +# define cpu_to_le64(x) (x) +# else +# if __BYTE_ORDER == __BIG_ENDIAN +# define le16_to_cpu(x) bswap_16(x) +# define cpu_to_le16(x) bswap_16(x) +# define le32_to_cpu(x) bswap_32(x) +# define cpu_to_le32(x) bswap_32(x) +# define le64_to_cpu(x) bswap_64(x) +# define cpu_to_le64(x) bswap_64(x) +# else +# error "Unknown byte order" +# endif /* __BIG_ENDIAN */ +# endif /* __LITTLE_ENDIAN */ +#endif /* ! __KERNEL__ */ + +struct ptldebug_header { + __u32 ph_len; + __u32 ph_flags; + __u32 ph_subsys; + __u32 ph_mask; + __u32 ph_cpu_id; + __u32 ph_sec; + __u64 ph_usec; + __u32 ph_stack; + __u32 ph_pid; + __u32 ph_extern_pid; + __u32 ph_line_num; +} __attribute__((packed)); + +#ifdef __KERNEL__ +# include /* THREAD_SIZE */ +#else +# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */ +# define THREAD_SIZE 8192 +# endif +#endif + +#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5) + +#ifdef __KERNEL__ +# ifdef __ia64__ +# define CDEBUG_STACK (THREAD_SIZE - \ + ((unsigned long)__builtin_dwarf_cfa() & \ + (THREAD_SIZE - 1))) +# else +# define CDEBUG_STACK (THREAD_SIZE - \ + ((unsigned long)__builtin_frame_address(0) & \ + (THREAD_SIZE - 1))) +# endif /* __ia64__ */ + +#define CHECK_STACK(stack) \ + do { \ + if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ + portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \ + __FILE__, __FUNCTION__, __LINE__, \ + (stack),"maximum lustre stack %u\n",\ + portal_stack = (stack)); \ + /*panic("LBUG");*/ \ + } \ + } while (0) +#else /* !__KERNEL__ */ +#define CHECK_STACK(stack) do { } while(0) +#define CDEBUG_STACK (0L) +#endif /* __KERNEL__ */ + +/* initial pid */ +# if CRAY_PORTALS +/* + * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this + * is too big. + * + * 2) the implementation of ernal in cray portals further restricts the pid + * space that may be used to 0 <= pid <= 255 (an 8 bit value). Returns + * an error at nal init time for any pid outside this range. Other nals + * in cray portals don't have this restriction. + * */ +#define LUSTRE_PTL_PID 9 +# else +#define LUSTRE_PTL_PID 12345 +# endif + +#define ENTRY_NESTING_SUPPORT (0) +#define ENTRY_NESTING do {;} while (0) +#define EXIT_NESTING do {;} while (0) +#define __current_nesting_level() (0) + +/* + * Platform specific declarations for cfs_curproc API (libcfs/curproc.h) + * + * Implementation is in linux-curproc.c + */ +#define CFS_CURPROC_COMM_MAX (sizeof ((struct task_struct *)0)->comm) + +#if defined(__KERNEL__) +#include +typedef kernel_cap_t cfs_kernel_cap_t; +#else +typedef __u32 cfs_kernel_cap_t; +#endif + +#endif /* _LINUX_LIBCFS_H */ diff --git a/lnet/include/libcfs/linux/linux-fs.h b/lnet/include/libcfs/linux/linux-fs.h new file mode 100644 index 0000000..9530360 --- /dev/null +++ b/lnet/include/libcfs/linux/linux-fs.h @@ -0,0 +1,70 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_FS_H__ +#define __LIBCFS_LINUX_CFS_FS_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#endif + +typedef struct file cfs_file_t; +typedef struct dentry cfs_dentry_t; + +#ifdef __KERNEL__ +#define cfs_filp_size(f) ((f)->f_dentry->d_inode->i_size) +#define cfs_filp_poff(f) (&(f)->f_pos) + +/* + * XXX Do we need to parse flags and mode in cfs_filp_open? + */ +cfs_file_t *cfs_filp_open (const char *name, int flags, int mode, int *err); +#define cfs_filp_close(f) filp_close(f, NULL) +#define cfs_filp_read(fp, buf, size, pos) (fp)->f_op->read((fp), (buf), (size), pos) +#define cfs_filp_write(fp, buf, size, pos) (fp)->f_op->write((fp), (buf), (size), pos) +#define cfs_filp_fsync(fp) (fp)->f_op->fsync((fp), (fp)->f_dentry, 1) + +#define cfs_get_file(f) get_file(f) +#define cfs_put_file(f) fput(f) +#define cfs_file_count(f) file_count(f) + +typedef struct file_lock cfs_flock_t; +#define CFS_FLOCK_TYPE(fl) ((fl)->fl_type) +#define CFS_FLOCK_SET_TYPE(fl, type) do { (fl)->fl_type = (type); } while(0) +#define CFS_FLOCK_PID(fl) ((fl)->fl_pid) +#define CFS_FLOCK_SET_PID(fl, pid) do { (fl)->fl_pid = (pid); } while(0) +#define CFS_FLOCK_START(fl) ((fl)->fl_start) +#define CFS_FLOCK_SET_START(fl, start) do { (fl)->fl_start = (start); } while(0) +#define CFS_FLOCK_END(fl) ((fl)->fl_end) +#define CFS_FLOCK_SET_END(fl, end) do { (fl)->fl_end = (end); } while(0) + +#endif + +#endif diff --git a/lnet/include/libcfs/linux/linux-lock.h b/lnet/include/libcfs/linux/linux-lock.h new file mode 100644 index 0000000..ce097e9 --- /dev/null +++ b/lnet/include/libcfs/linux/linux-lock.h @@ -0,0 +1,118 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_LOCK_H__ +#define __LIBCFS_LINUX_CFS_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include + +/* + * IMPORTANT !!!!!!!! + * + * All locks' declaration are not guaranteed to be initialized, + * Althought some of they are initialized in Linux. All locks + * declared by CFS_DECL_* should be initialized explicitly. + */ + + +/* + * spin_lock (use Linux kernel's primitives) + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + */ + +/* + * rw_semaphore (use Linux kernel's primitives) + * + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ + +/* + * rwlock_t (use Linux kernel's primitives) + * + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ + +/* + * mutex_t: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ +#define init_mutex(x) init_MUTEX(x) +#define init_mutex_locked(x) init_MUTEX_LOCKED(x) +#define mutex_up(x) up(x) +#define mutex_down(x) down(x) + +/* + * completion (use Linux kernel's primitives) + * + * - init_complition(c) + * - complete(c) + * - wait_for_completion(c) + */ + +/* + * OSX funnels: + * + * No funnels needed in Linux + */ +#define CFS_DECL_FUNNEL_DATA +#define CFS_DECL_CONE_DATA DECLARE_FUNNEL_DATA +#define CFS_DECL_NET_DATA DECLARE_FUNNEL_DATA +#define CFS_CONE_IN do {} while(0) +#define CFS_CONE_EX do {} while(0) + +#define CFS_NET_IN do {} while(0) +#define CFS_NET_EX do {} while(0) + +/* __KERNEL__ */ +#else + +//#include "../user-lock.h" + +/* __KERNEL__ */ +#endif +#endif diff --git a/lnet/include/libcfs/linux/linux-mem.h b/lnet/include/libcfs/linux/linux-mem.h new file mode 100644 index 0000000..94b764f --- /dev/null +++ b/lnet/include/libcfs/linux/linux-mem.h @@ -0,0 +1,110 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_MEM_H__ +#define __LIBCFS_LINUX_CFS_MEM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +# include +# include +# include +# include +# ifdef HAVE_MM_INLINE +# include +# endif + +typedef struct page cfs_page_t; +#define CFS_PAGE_SIZE PAGE_CACHE_SIZE +#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT +#define CFS_PAGE_MASK PAGE_CACHE_MASK + +cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order); +#define cfs_alloc_page(f) cfs_alloc_pages(f, 0) +#define cfs_free_pages(p, o) __free_pages(p, o) +#define cfs_free_page(p) __free_pages(p, 0) + +static inline void *cfs_page_address(cfs_page_t *page) +{ + return page_address(page); +} + +static inline void *cfs_kmap(cfs_page_t *page) +{ + return kmap(page); +} + +static inline void cfs_kunmap(cfs_page_t *page) +{ + kunmap(page); +} + +static inline void cfs_get_page(cfs_page_t *page) +{ + get_page(page); +} + +static inline int cfs_page_count(cfs_page_t *page) +{ + return page_count(page); +} + +static inline void cfs_set_page_count(cfs_page_t *page, int v) +{ + set_page_count(page, v); +} + +/* + * Memory allocator + */ +extern void *cfs_alloc(size_t nr_bytes, u_int32_t flags); +extern void cfs_free(void *addr); + +extern void *cfs_alloc_large(size_t nr_bytes); +extern void cfs_free_large(void *addr); + +/* + * SLAB allocator + */ +typedef kmem_cache_t cfs_mem_cache_t; +extern cfs_mem_cache_t * cfs_mem_cache_create (const char *, size_t, size_t, unsigned long, + void (*)(void *, cfs_mem_cache_t *, unsigned long), + void (*)(void *, cfs_mem_cache_t *, unsigned long)); +extern int cfs_mem_cache_destroy ( cfs_mem_cache_t * ); +extern void *cfs_mem_cache_alloc ( cfs_mem_cache_t *, int); +extern void cfs_mem_cache_free ( cfs_mem_cache_t *, void *); + +/* + */ +#define CFS_DECL_MMSPACE mm_segment_t __oldfs +#define CFS_MMSPACE_OPEN do { __oldfs = get_fs(); set_fs(get_ds());} while(0) +#define CFS_MMSPACE_CLOSE set_fs(__oldfs) + +/* __KERNEL__ */ +#endif + +#endif /* __LINUX_CFS_MEM_H__ */ diff --git a/lnet/include/libcfs/linux/linux-prim.h b/lnet/include/libcfs/linux/linux-prim.h new file mode 100644 index 0000000..69bda36 --- /dev/null +++ b/lnet/include/libcfs/linux/linux-prim.h @@ -0,0 +1,173 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_CFS_PRIM_H__ +#define __LIBCFS_LINUX_CFS_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* + * Pseudo device register + */ +typedef struct miscdevice cfs_psdev_t; +#define cfs_psdev_register(dev) misc_register(dev) +#define cfs_psdev_deregister(dev) misc_deregister(dev) + +/* + * Sysctl register + */ +typedef struct ctl_table cfs_sysctl_table_t; +typedef struct ctl_table_header cfs_sysctl_table_header_t; + +#define register_cfs_sysctl_table(t, a) register_sysctl_table(t, a) +#define unregister_cfs_sysctl_table(t) unregister_sysctl_table(t, a) + +/* + * Proc file system APIs + */ +typedef read_proc_t cfs_read_proc_t; +typedef write_proc_t cfs_write_proc_t; +typedef struct proc_dir_entry cfs_proc_dir_entry_t; +#define cfs_create_proc_entry(n, m, p) create_proc_entry(n, m, p) +#define cfs_free_proc_entry(e) free_proc_entry(e) +#define cfs_remove_proc_entry(n, e) remove_proc_entry(n, e) + +/* + * Wait Queue + */ +typedef wait_queue_t cfs_waitlink_t; +typedef wait_queue_head_t cfs_waitq_t; + +#define cfs_waitq_init(w) init_waitqueue_head(w) +#define cfs_waitlink_init(l) init_waitqueue_entry(l, current) +#define cfs_waitq_add(w, l) add_wait_queue(w, l) +#define cfs_waitq_add_exclusive(w, l) add_wait_queue_exclusive(w, l) +#define cfs_waitq_forward(l, w) do {} while(0) +#define cfs_waitq_del(w, l) remove_wait_queue(w, l) +#define cfs_waitq_active(w) waitqueue_active(w) +#define cfs_waitq_signal(w) wake_up(w) +#define cfs_waitq_signal_nr(w,n) wake_up_nr(w, n) +#define cfs_waitq_broadcast(w) wake_up_all(w) +#define cfs_waitq_wait(l) schedule() +#define cfs_waitq_timedwait(l, t) schedule_timeout(t) + +/* Kernel thread */ +typedef int (*cfs_thread_t)(void *); +#define cfs_kernel_thread(func, a, f) kernel_thread(func, a, f) + +/* + * Task struct + */ +typedef struct task_struct cfs_task_t; +#define cfs_current() current +#define CFS_DECL_JOURNAL_DATA void *journal_info +#define CFS_PUSH_JOURNAL do { \ + journal_info = current->journal_info; \ + current->journal_info = NULL; \ + } while(0) +#define CFS_POP_JOURNAL do { \ + current->journal_info = journal_info; \ + } while(0) + +/* Module interfaces */ +#define cfs_module(name, version, init, fini) \ +module_init(init); \ +module_exit(fini) + +/* + * Signal + */ +#define cfs_sigmask_lock(t, f) SIGNAL_MASK_LOCK(t, f) +#define cfs_sigmask_unlock(t, f) SIGNAL_MASK_UNLOCK(t, f) +#define cfs_recalc_sigpending(t) RECALC_SIGPENDING +#define cfs_signal_pending(t) signal_pending(t) +#define cfs_sigfillset(s) sigfillset(s) + +#define cfs_set_sig_blocked(t, b) do { (t)->blocked = b; } while(0) +#define cfs_get_sig_blocked(t) (&(t)->blocked) + +/* + * Timer + */ +typedef struct timer_list cfs_timer_t; +typedef void (*timer_func_t)(unsigned long); + +#define cfs_init_timer(t) init_timer(t) + +static inline void cfs_timer_init(cfs_timer_t *t, void (*func)(unsigned long), void *arg) +{ + init_timer(t); + t->function = (timer_func_t)func; + t->data = (unsigned long)arg; +} + +static inline void cfs_timer_done(cfs_timer_t *t) +{ + return; +} + +static inline void cfs_timer_arm(cfs_timer_t *t, cfs_time_t deadline) +{ + mod_timer(t, deadline); +} + +static inline void cfs_timer_disarm(cfs_timer_t *t) +{ + del_timer(t); +} + +static inline int cfs_timer_is_armed(cfs_timer_t *t) +{ + return timer_pending(t); +} + +static inline cfs_time_t cfs_timer_deadline(cfs_timer_t *t) +{ + return t->expires; +} + +#else /* !__KERNEL__ */ + +#include "../user-prim.h" + +#endif /* __KERNEL__ */ + +#endif diff --git a/lnet/include/libcfs/linux/linux-time.h b/lnet/include/libcfs/linux/linux-time.h new file mode 100644 index 0000000..f18e7d9 --- /dev/null +++ b/lnet/include/libcfs/linux/linux-time.h @@ -0,0 +1,292 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for Linux (kernel and user-level). + * + */ + +#ifndef __LIBCFS_LINUX_LINUX_TIME_H__ +#define __LIBCFS_LINUX_LINUX_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * cfs_duration_t cfs_time_minimal_timeout(void) + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION ((u_int64_t) 1000000) + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include +#include + +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + +/* + * old kernels---CURRENT_TIME is struct timeval + */ +typedef struct timeval cfs_fs_time_t; + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + *v = *t; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = t->tv_sec; + s->tv_nsec = t->tv_usec * 1000; +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline unsigned long __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return ((unsigned long)t->tv_sec) * ONE_MILLION + t->tv_usec * 1000; +} + +#define CURRENT_KERN_TIME xtime + +/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ +#else + +/* + * post 2.5 kernels. + */ + +#include + +typedef struct timespec cfs_fs_time_t; + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + v->tv_sec = t->tv_sec; + v->tv_usec = t->tv_nsec / 1000; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + *s = *t; +} + +/* + * internal helper function used by cfs_fs_time_before*() + */ +static inline unsigned long __cfs_fs_time_flat(cfs_fs_time_t *t) +{ + return ((unsigned long)t->tv_sec) * ONE_BILLION + t->tv_nsec; +} + +#define CURRENT_KERN_TIME CURRENT_TIME + +/* (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) */ +#endif + +/* + * Generic kernel stuff + */ + +typedef unsigned long cfs_time_t; /* jiffies */ +typedef long cfs_duration_t; + + +static inline cfs_time_t cfs_time_current(void) +{ + return jiffies; +} + +static inline time_t cfs_time_current_sec(void) +{ + return CURRENT_SECONDS; +} + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return t + d; +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return t1 - t2; +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return time_before(t1, t2); +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return time_before_eq(t1, t2); +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + *t = CURRENT_KERN_TIME; +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return t->tv_sec; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return time_before(__cfs_fs_time_flat(t1), __cfs_fs_time_flat(t2)); +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return time_before_eq(__cfs_fs_time_flat(t1), __cfs_fs_time_flat(t2)); +} + +#if 0 +static inline cfs_duration_t cfs_duration_build(int64_t nano) +{ +#if (BITS_PER_LONG == 32) + /* We cannot use do_div(t, ONE_BILLION), do_div can only process + * 64 bits n and 32 bits base */ + int64_t t = nano * HZ; + do_div(t, 1000); + do_div(t, 1000000); + return (cfs_duration_t)t; +#else + return (nano * HZ / ONE_BILLION); +#endif +} +#endif + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return seconds * HZ; +} + +static inline cfs_time_t cfs_time_shift(int seconds) +{ + return jiffies + seconds * HZ; +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d / HZ; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ +#if (BITS_PER_LONG == 32) + uint64_t t = (d - s->tv_sec * HZ) * ONE_MILLION; + s->tv_usec = do_div (t, HZ); +#else + s->tv_usec = (d - s->tv_sec * HZ) * ONE_MILLION / HZ; +#endif + s->tv_sec = d / HZ; +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ +#if (BITS_PER_LONG == 32) + uint64_t t = (d - s->tv_sec * HZ) * ONE_BILLION; + s->tv_nsec = do_div (t, HZ); +#else + s->tv_nsec = (d - s->tv_sec * HZ) * ONE_BILLION / HZ; +#endif + s->tv_sec = d / HZ; +} + +static inline cfs_duration_t cfs_time_minimal_timeout(void) +{ + return 1; +} + +/* inline function cfs_time_minimal_timeout() can not be used + * to initiallize static variable */ +#define CFS_MIN_DELAY (1) + +#define CFS_TIME_T "%lu" +#define CFS_DURATION_T "%ld" + +#else /* !__KERNEL__ */ + +/* + * Liblustre. time(2) based implementation. + */ +#include +#endif /* __KERNEL__ */ + +/* __LIBCFS_LINUX_LINUX_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/libcfs/linux/lltrace.h b/lnet/include/libcfs/linux/lltrace.h new file mode 100644 index 0000000..5050abc --- /dev/null +++ b/lnet/include/libcfs/linux/lltrace.h @@ -0,0 +1,28 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_LINUX_LLTRACE_H__ +#define __LIBCFS_LINUX_LLTRACE_H__ + +#ifndef __LIBCFS_LLTRACE_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/lnet/include/linux/portals_compat25.h b/lnet/include/libcfs/linux/portals_compat25.h similarity index 97% rename from lnet/include/linux/portals_compat25.h rename to lnet/include/libcfs/linux/portals_compat25.h index fa2709e..148a401 100644 --- a/lnet/include/linux/portals_compat25.h +++ b/lnet/include/libcfs/linux/portals_compat25.h @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef _PORTALS_COMPAT_H -#define _PORTALS_COMPAT_H +#ifndef __LIBCFS_LINUX_PORTALS_COMPAT_H__ +#define __LIBCFS_LINUX_PORTALS_COMPAT_H__ // XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved #if SPINLOCK_DEBUG diff --git a/lnet/include/libcfs/linux/portals_lib.h b/lnet/include/libcfs/linux/portals_lib.h new file mode 100644 index 0000000..99fd1bd --- /dev/null +++ b/lnet/include/libcfs/linux/portals_lib.h @@ -0,0 +1,38 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Basic library routines. + * + */ + +#ifndef __LIBCFS_LINUX_PORTALS_LIB_H__ +#define __LIBCFS_LINUX_PORTALS_LIB_H__ + +#ifndef __LIBCFS_PORTALS_LIB_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifndef __KERNEL__ +# include +#else +# include +#endif + +#endif diff --git a/lnet/include/libcfs/linux/portals_utils.h b/lnet/include/libcfs/linux/portals_utils.h new file mode 100644 index 0000000..0dd6c7e --- /dev/null +++ b/lnet/include/libcfs/linux/portals_utils.h @@ -0,0 +1,51 @@ +#ifndef __LIBCFS_LINUX_PORTALS_UTILS_H__ +#define __LIBCFS_LINUX_PORTALS_UTILS_H__ + +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# include +#else /* (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) */ +# include +#endif +#include +#include +#else /* !__KERNEL__ */ + +#include +#include + +#ifdef HAVE_LINUX_VERSION_H +# include + +# if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +# define BUG() /* workaround for module.h includes */ +# include +# endif +#endif /* !HAVE_LINUX_VERSION_H */ + +#ifndef __CYGWIN__ +# include +#else /* __CYGWIN__ */ +# include +# include +# include +#endif /* __CYGWIN__ */ + +#endif /* !__KERNEL__ */ +#endif diff --git a/lnet/include/linux/lustre_list.h b/lnet/include/libcfs/list.h similarity index 87% rename from lnet/include/linux/lustre_list.h rename to lnet/include/libcfs/list.h index a218f2c..eebb7b6 100644 --- a/lnet/include/linux/lustre_list.h +++ b/lnet/include/libcfs/list.h @@ -1,9 +1,16 @@ -#ifndef _LUSTRE_LIST_H -#define _LUSTRE_LIST_H +#ifndef __LIBCFS_LIST_H__ +#define __LIBCFS_LIST_H__ + +#if defined (__linux__) && defined(__KERNEL__) -#ifdef __KERNEL__ #include -#else + +#define CFS_LIST_HEAD_INIT(n) LIST_HEAD_INIT(n) +#define CFS_LIST_HEAD(n) LIST_HEAD(n) +#define CFS_INIT_LIST_HEAD(p) INIT_LIST_HEAD(p) + +#else /* !defined (__linux__) && defined(__KERNEL__) */ + /* * Simple doubly linked list implementation. * @@ -22,15 +29,22 @@ struct list_head { typedef struct list_head list_t; -#define LIST_HEAD_INIT(name) { &(name), &(name) } +#define CFS_LIST_HEAD_INIT(name) { &(name), &(name) } -#define LIST_HEAD(name) \ +#define CFS_LIST_HEAD(name) \ struct list_head name = LIST_HEAD_INIT(name) -#define INIT_LIST_HEAD(ptr) do { \ +#define CFS_INIT_LIST_HEAD(ptr) do { \ (ptr)->next = (ptr); (ptr)->prev = (ptr); \ } while (0) +#ifndef __APPLE__ +#define LIST_HEAD(n) CFS_LIST_HEAD(n) +#endif + +#define LIST_HEAD_INIT(n) CFS_LIST_HEAD_INIT(n) +#define INIT_LIST_HEAD(p) CFS_INIT_LIST_HEAD(p) + /* * Insert a new entry between two known consecutive entries. * @@ -103,7 +117,7 @@ static inline void list_del(struct list_head *entry) static inline void list_del_init(struct list_head *entry) { __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); + CFS_INIT_LIST_HEAD(entry); } /** @@ -175,7 +189,7 @@ static inline void list_splice_init(struct list_head *list, { if (!list_empty(list)) { __list_splice(list, head); - INIT_LIST_HEAD(list); + CFS_INIT_LIST_HEAD(list); } } @@ -198,15 +212,6 @@ static inline void list_splice_init(struct list_head *list, pos = pos->next, prefetch(pos->next)) /** - * list_for_each_prev - iterate over a list in reverse order - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -/** * list_for_each_safe - iterate over a list safe against removal of list entry * @pos: the &struct list_head to use as a loop counter. * @n: another &struct list_head to use as temporary storage @@ -216,6 +221,21 @@ static inline void list_splice_init(struct list_head *list, for (pos = (head)->next, n = pos->next; pos != (head); \ pos = n, n = pos->next) +#endif /* __linux__*/ + +#ifndef list_for_each_prev +/** + * list_for_each_prev - iterate over a list in reverse order + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ + pos = pos->prev, prefetch(pos->prev)) + +#endif /* list_for_each_prev */ + +#ifndef list_for_each_entry /** * list_for_each_entry - iterate over list of given type * @pos: the type * to use as a loop counter. @@ -228,7 +248,9 @@ static inline void list_splice_init(struct list_head *list, &pos->member != (head); \ pos = list_entry(pos->member.next, typeof(*pos), member), \ prefetch(pos->member.next)) +#endif /* list_for_each_entry */ +#ifndef list_for_each_entry_safe /** * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry * @pos: the type * to use as a loop counter. @@ -241,6 +263,6 @@ static inline void list_splice_init(struct list_head *list, n = list_entry(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) +#endif /* list_for_each_entry_safe */ -#endif /* if !__KERNEL__*/ -#endif /* if !_LUSTRE_LIST_H */ +#endif /* __LIBCFS_LUSTRE_LIST_H__ */ diff --git a/lnet/include/lnet/lltrace.h b/lnet/include/libcfs/lltrace.h similarity index 90% rename from lnet/include/lnet/lltrace.h rename to lnet/include/libcfs/lltrace.h index 3e01df1..4f386c5 100644 --- a/lnet/include/lnet/lltrace.h +++ b/lnet/include/libcfs/lltrace.h @@ -4,25 +4,16 @@ * Compile with: * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl */ -#ifndef __LTRACE_H_ -#define __LTRACE_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#ifndef __LIBCFS_LLTRACE_H__ +#define __LIBCFS_LLTRACE_H__ + +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#else +#error Unsupported Operating System +#endif static inline int ltrace_write_file(char* fname) { diff --git a/lnet/include/linux/portals_lib.h b/lnet/include/libcfs/portals_lib.h similarity index 91% rename from lnet/include/linux/portals_lib.h rename to lnet/include/libcfs/portals_lib.h index 8778a52..f6b12c2 100644 --- a/lnet/include/linux/portals_lib.h +++ b/lnet/include/libcfs/portals_lib.h @@ -22,13 +22,15 @@ * */ -#ifndef _PORTALS_LIB_H -#define _PORTALS_LIB_H +#ifndef __LIBCFS_PORTALS_LIB_H__ +#define __LIBCFS_PORTALS_LIB_H__ -#ifndef __KERNEL__ -# include -#else -# include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#else +#error Unsupported Operating System #endif #undef MIN diff --git a/lnet/include/libcfs/portals_utils.h b/lnet/include/libcfs/portals_utils.h new file mode 100644 index 0000000..932caaf --- /dev/null +++ b/lnet/include/libcfs/portals_utils.h @@ -0,0 +1,19 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __LIBCFS_PORTALS_UTILS_H__ +#define __LIBCFS_PORTALS_UTILS_H__ + +/* + * portals_utils.h + * + */ +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include +#else +#error Unsupported Operating System +#endif + +#endif diff --git a/lnet/include/libcfs/user-lock.h b/lnet/include/libcfs/user-lock.h new file mode 100644 index 0000000..e57200f --- /dev/null +++ b/lnet/include/libcfs/user-lock.h @@ -0,0 +1,171 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_LOCK_H__ +#define __LIBCFS_USER_LOCK_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Implementations of portable synchronization APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + */ + +#ifndef __KERNEL__ + +/* + * Optional debugging (magic stamping and checking ownership) can be added. + */ + +/* + * spin_lock + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + * + * No-op implementation. + */ +struct spin_lock {}; + +typedef struct spin_lock spinlock_t; + +void spin_lock_init(spinlock_t *lock); +void spin_lock(spinlock_t *lock); +void spin_unlock(spinlock_t *lock); +int spin_trylock(spinlock_t *lock); +void spin_lock_bh_init(spinlock_t *lock); +void spin_lock_bh(spinlock_t *lock); +void spin_unlock_bh(spinlock_t *lock); + +#define spin_lock_irqsave(l, flags) ({ spin_lock(l); (void)flags; }) +#define spin_unlock_irqrestore(l, flags) ({ spin_unlock(l); (void)flags; }) + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ +struct semaphore {}; + +void sema_init(struct semaphore *s, int val); +void __down(struct semaphore *s); +void __up(struct semaphore *s); + +/* + * Mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ +#define mutex_up(s) __up(s) +#define mutex_down(s) __down(s) + +#define init_mutex(x) sema_init(x, 1) +#define init_mutex_locked(x) sema_init(x, 0) + +/* + * Completion: + * + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + */ +struct completion {}; + +void init_completion(struct completion *c); +void complete(struct completion *c); +void wait_for_completion(struct completion *c); + +/* + * rw_semaphore: + * + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ +struct rw_semaphore {}; + +void init_rwsem(struct rw_semaphore *s); +void down_read(struct rw_semaphore *s); +int down_read_trylock(struct rw_semaphore *s); +void down_write(struct rw_semaphore *s); +int down_write_trylock(struct rw_semaphore *s); +void up_read(struct rw_semaphore *s); +void up_write(struct rw_semaphore *s); + +/* + * read-write lock : Need to be investigated more!! + * XXX nikita: for now, let rwlock_t to be identical to rw_semaphore + * + * - DECLARE_RWLOCK(l) + * - rwlock_init(x) + * - read_lock(x) + * - read_unlock(x) + * - write_lock(x) + * - write_unlock(x) + */ +typedef struct rw_semaphore rwlock_t; + +#define rwlock_init(pl) init_rwsem(pl) + +#define read_lock(l) down_read(l) +#define read_unlock(l) up_read(l) +#define write_lock(l) down_write(l) +#define write_unlock(l) up_write(l) + +#define write_lock_irqsave(l, f) write_lock(l) +#define write_unlock_irqrestore(l, f) write_unlock(l) + +#define read_lock_irqsave(l, f) read_lock(l) +#define read_unlock_irqrestore(l, f) read_unlock(l) + +/* !__KERNEL__ */ +#endif + +/* __LIBCFS_USER_LOCK_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/libcfs/user-prim.h b/lnet/include/libcfs/user-prim.h new file mode 100644 index 0000000..6c3410b --- /dev/null +++ b/lnet/include/libcfs/user-prim.h @@ -0,0 +1,182 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_PRIM_H__ +#define __LIBCFS_USER_PRIM_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Implementations of portable APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + */ + +#ifndef __KERNEL__ + +#include + +/* + * Wait Queue. No-op implementation. + */ + +typedef struct cfs_waitlink {} cfs_waitlink_t; +typedef struct cfs_waitq {} cfs_waitq_t; + +void cfs_waitq_init(struct cfs_waitq *waitq); +void cfs_waitlink_init(struct cfs_waitlink *link); +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link); +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link); +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq); +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link); +int cfs_waitq_active(struct cfs_waitq *waitq); +void cfs_waitq_signal(struct cfs_waitq *waitq); +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr); +void cfs_waitq_broadcast(struct cfs_waitq *waitq); +void cfs_waitq_wait(struct cfs_waitlink *link); +int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int64_t timeout); + +/* + * Allocator + */ + +/* 2.4 defines */ + +/* XXX + * for this moment, liblusre will not rely OST for non-page-aligned write + */ +#define LIBLUSTRE_HANDLE_UNALIGNED_PAGE + +struct page { + void *addr; + unsigned long index; + struct list_head list; + unsigned long private; + + /* internally used by liblustre file i/o */ + int _offset; + int _count; +#ifdef LIBLUSTRE_HANDLE_UNALIGNED_PAGE + int _managed; +#endif +}; + +typedef struct page cfs_page_t; + +#define CFS_PAGE_SIZE PAGE_CACHE_SIZE +#define CFS_PAGE_SHIFT PAGE_CACHE_SHIFT +#define CFS_PAGE_MASK PAGE_CACHE_MASK + +cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order); +void cfs_free_pages(struct page *pg, int what); + +cfs_page_t *cfs_alloc_page(unsigned int flags); +void cfs_free_page(cfs_page_t *pg, int what); +void *cfs_page_address(cfs_page_t *pg); +void *cfs_kmap(cfs_page_t *pg); +void cfs_kunmap(cfs_page_t *pg); + +#define cfs_get_page(p) __I_should_not_be_called__(at_all) +#define cfs_page_count(p) __I_should_not_be_called__(at_all) +#define cfs_set_page_count(p, v) __I_should_not_be_called__(at_all) + +/* + * Memory allocator + */ +void *cfs_alloc(size_t nr_bytes, u_int32_t flags); +void cfs_free(void *addr); +void *cfs_alloc_large(size_t nr_bytes); +void cfs_free_large(void *addr); + +/* + * SLAB allocator + */ +typedef struct { + int size; +} cfs_mem_cache_t; + +#define SLAB_HWCACHE_ALIGN 0 + +cfs_mem_cache_t * +cfs_mem_cache_create(const char *, size_t, size_t, unsigned long, + void (*)(void *, cfs_mem_cache_t *, unsigned long), + void (*)(void *, cfs_mem_cache_t *, unsigned long)); +int cfs_mem_cache_destroy(cfs_mem_cache_t *c); +void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp); +void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr); + +typedef int (cfs_read_proc_t)(char *page, char **start, off_t off, + int count, int *eof, void *data); + +struct file; /* forward ref */ +typedef int (cfs_write_proc_t)(struct file *file, const char *buffer, + unsigned long count, void *data); + +/* + * Timer + */ + +typedef struct cfs_timer {} cfs_timer_t; + +#if 0 +#define cfs_init_timer(t) do {} while(0) +void cfs_timer_init(struct cfs_timer *t, void (*func)(unsigned long), void *arg); +void cfs_timer_done(struct cfs_timer *t); +void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline); +void cfs_timer_disarm(struct cfs_timer *t); +int cfs_timer_is_armed(struct cfs_timer *t); + +cfs_time_t cfs_timer_deadline(struct cfs_timer *t); +#endif + +typedef void cfs_psdev_t; + +static inline int cfs_psdev_register(cfs_psdev_t *foo) +{ + return 0; +} + +static inline int cfs_psdev_deregister(cfs_psdev_t *foo) +{ + return 0; +} + +/* !__KERNEL__ */ +#endif + +/* __LIBCFS_USER_PRIM_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/libcfs/user-time.h b/lnet/include/libcfs/user-time.h new file mode 100644 index 0000000..7abc9e8 --- /dev/null +++ b/lnet/include/libcfs/user-time.h @@ -0,0 +1,198 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +#ifndef __LIBCFS_USER_TIME_H__ +#define __LIBCFS_USER_TIME_H__ + +#ifndef __LIBCFS_LIBCFS_H__ +#error Do not #include this file directly. #include instead +#endif + +/* Portable time API */ + +/* + * Platform provides three opaque data-types: + * + * cfs_time_t represents point in time. This is internal kernel + * time rather than "wall clock". This time bears no + * relation to gettimeofday(). + * + * cfs_duration_t represents time interval with resolution of internal + * platform clock + * + * cfs_fs_time_t represents instance in world-visible time. This is + * used in file-system time-stamps + * + * cfs_time_t cfs_time_current(void); + * cfs_time_t cfs_time_add (cfs_time_t, cfs_duration_t); + * cfs_duration_t cfs_time_sub (cfs_time_t, cfs_time_t); + * int cfs_time_before (cfs_time_t, cfs_time_t); + * int cfs_time_beforeq(cfs_time_t, cfs_time_t); + * + * cfs_duration_t cfs_duration_build(int64_t); + * + * time_t cfs_duration_sec (cfs_duration_t); + * void cfs_duration_usec(cfs_duration_t, struct timeval *); + * void cfs_duration_nsec(cfs_duration_t, struct timespec *); + * + * void cfs_fs_time_current(cfs_fs_time_t *); + * time_t cfs_fs_time_sec (cfs_fs_time_t *); + * void cfs_fs_time_usec (cfs_fs_time_t *, struct timeval *); + * void cfs_fs_time_nsec (cfs_fs_time_t *, struct timespec *); + * int cfs_fs_time_before (cfs_fs_time_t *, cfs_fs_time_t *); + * int cfs_fs_time_beforeq(cfs_fs_time_t *, cfs_fs_time_t *); + * + * cfs_duration_t cfs_time_minimal_timeout(void) + * + * CFS_TIME_FORMAT + * CFS_DURATION_FORMAT + * + */ + +#define ONE_BILLION ((u_int64_t)1000000000) +#define ONE_MILLION ((u_int64_t) 1000000) + +#ifndef __KERNEL__ + +/* + * Liblustre. time(2) based implementation. + */ + +#include +#include +#include + +typedef time_t cfs_fs_time_t; +typedef time_t cfs_time_t; +typedef long cfs_duration_t; + +static inline cfs_time_t cfs_time_current(void) +{ + return time(NULL); +} + +static inline cfs_duration_t cfs_time_seconds(int seconds) +{ + return seconds; +} + +static inline int cfs_time_before(cfs_time_t t1, cfs_time_t t2) +{ + return t1 < t2; +} + +static inline int cfs_time_beforeq(cfs_time_t t1, cfs_time_t t2) +{ + return t1 <= t2; +} + +static inline cfs_duration_t cfs_duration_build(int64_t nano) +{ + return nano / ONE_BILLION; +} + +static inline time_t cfs_duration_sec(cfs_duration_t d) +{ + return d; +} + +static inline void cfs_duration_usec(cfs_duration_t d, struct timeval *s) +{ + s->tv_sec = d; + s->tv_usec = 0; +} + +static inline void cfs_duration_nsec(cfs_duration_t d, struct timespec *s) +{ + s->tv_sec = d; + s->tv_nsec = 0; +} + +static inline void cfs_fs_time_current(cfs_fs_time_t *t) +{ + time(t); +} + +static inline time_t cfs_fs_time_sec(cfs_fs_time_t *t) +{ + return *t; +} + +static inline void cfs_fs_time_usec(cfs_fs_time_t *t, struct timeval *v) +{ + v->tv_sec = *t; + v->tv_usec = 0; +} + +static inline void cfs_fs_time_nsec(cfs_fs_time_t *t, struct timespec *s) +{ + s->tv_sec = *t; + s->tv_nsec = 0; +} + +static inline int cfs_fs_time_before(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return *t1 < *t2; +} + +static inline int cfs_fs_time_beforeq(cfs_fs_time_t *t1, cfs_fs_time_t *t2) +{ + return *t1 <= *t2; +} + +static inline cfs_duration_t cfs_time_minimal_timeout(void) +{ + return 1; +} + +#define CFS_MIN_DELAY (1) + +static inline cfs_time_t cfs_time_add(cfs_time_t t, cfs_duration_t d) +{ + return t + d; +} + +static inline cfs_duration_t cfs_time_sub(cfs_time_t t1, cfs_time_t t2) +{ + return t1 - t2; +} + +#define CFS_TIME_T "%lu" +#define CFS_DURATION_T "%ld" + +/* !__KERNEL__ */ +#endif + +/* __LIBCFS_USER_TIME_H__ */ +#endif +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/include/linux/Makefile.am b/lnet/include/linux/Makefile.am deleted file mode 100644 index 3c28c6e8..0000000 --- a/lnet/include/linux/Makefile.am +++ /dev/null @@ -1,4 +0,0 @@ -linuxdir = $(includedir)/linux - -EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h \ - portals_lib.h diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am index 4043f66..55cd240 100644 --- a/lnet/include/lnet/Makefile.am +++ b/lnet/include/lnet/Makefile.am @@ -1,10 +1,8 @@ portalsdir=$(includedir)/portals -if UTILS -portals_HEADERS = list.h -endif +SUBDIRS := darwin linux -EXTRA_DIST = api.h api-support.h build_check.h errno.h \ - internal.h lib-p30.h lib-types.h list.h \ - lltrace.h myrnal.h nal.h nalids.h p30.h ptlctl.h \ +EXTRA_DIST = api.h api-support.h build_check.h errno.h \ + internal.h kpr.h lib-p30.h lib-types.h \ + myrnal.h nal.h nalids.h p30.h ptlctl.h \ socknal.h stringtab.h types.h diff --git a/lnet/include/lnet/api-support.h b/lnet/include/lnet/api-support.h index c5994c6..848cf40 100644 --- a/lnet/include/lnet/api-support.h +++ b/lnet/include/lnet/api-support.h @@ -1,4 +1,5 @@ - +#ifndef __API_SUPPORT_H__ +#define __API_SUPPORT_H__ #include "build_check.h" #ifndef __KERNEL__ @@ -14,9 +15,10 @@ #endif #include -#include +#include #include #include #include +#endif diff --git a/lnet/include/linux/.cvsignore b/lnet/include/lnet/darwin/.cvsignore similarity index 100% rename from lnet/include/linux/.cvsignore rename to lnet/include/lnet/darwin/.cvsignore diff --git a/lnet/include/lnet/darwin/Makefile.am b/lnet/include/lnet/darwin/Makefile.am new file mode 100644 index 0000000..b6e7daf --- /dev/null +++ b/lnet/include/lnet/darwin/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST := lib-p30.h lib-types.h p30.h diff --git a/lnet/include/lnet/darwin/lib-lnet.h b/lnet/include/lnet/darwin/lib-lnet.h new file mode 100644 index 0000000..d3b1ba9 --- /dev/null +++ b/lnet/include/lnet/darwin/lib-lnet.h @@ -0,0 +1,14 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_DARWIN_LIB_P30_H__ +#define __PORTALS_DARWIN_LIB_P30_H__ + +#ifndef __PORTALS_LIB_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +#endif diff --git a/lnet/include/lnet/darwin/lib-p30.h b/lnet/include/lnet/darwin/lib-p30.h new file mode 100644 index 0000000..d3b1ba9 --- /dev/null +++ b/lnet/include/lnet/darwin/lib-p30.h @@ -0,0 +1,14 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_DARWIN_LIB_P30_H__ +#define __PORTALS_DARWIN_LIB_P30_H__ + +#ifndef __PORTALS_LIB_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include + +#endif diff --git a/lnet/include/lnet/darwin/lib-types.h b/lnet/include/lnet/darwin/lib-types.h new file mode 100644 index 0000000..744e566 --- /dev/null +++ b/lnet/include/lnet/darwin/lib-types.h @@ -0,0 +1,15 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_DARWIN_LIB_TYPES_H__ +#define __PORTALS_DARWIN_LIB_TYPES_H__ + +#ifndef __PORTALS_LIB_TYPES_H__ +#error Do not #include this file directly. #include instead +#endif + +#include +#include +#include + +#endif diff --git a/lnet/include/lnet/darwin/lnet.h b/lnet/include/lnet/darwin/lnet.h new file mode 100644 index 0000000..e619fa7 --- /dev/null +++ b/lnet/include/lnet/darwin/lnet.h @@ -0,0 +1,20 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _PORTALS_DARWIN_P30_H_ +#define _PORTALS_DARWIN_P30_H_ + +#ifndef __PORTALS_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +/* + * p30.h + * + * User application interface file + */ + +#include +#include + +#endif diff --git a/lnet/include/lnet/darwin/p30.h b/lnet/include/lnet/darwin/p30.h new file mode 100644 index 0000000..e619fa7 --- /dev/null +++ b/lnet/include/lnet/darwin/p30.h @@ -0,0 +1,20 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef _PORTALS_DARWIN_P30_H_ +#define _PORTALS_DARWIN_P30_H_ + +#ifndef __PORTALS_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +/* + * p30.h + * + * User application interface file + */ + +#include +#include + +#endif diff --git a/lnet/include/linux/kpr.h b/lnet/include/lnet/kpr.h similarity index 99% rename from lnet/include/linux/kpr.h rename to lnet/include/lnet/kpr.h index 1127698..23d6e7c 100644 --- a/lnet/include/linux/kpr.h +++ b/lnet/include/lnet/kpr.h @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef _KPR_H -#define _KPR_H +#ifndef __PORTALS_KPR_H__ +#define __PORTALS_KPR_H__ # include /* for ptl_hdr_t */ diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h index b710569..a407379 100644 --- a/lnet/include/lnet/lib-lnet.h +++ b/lnet/include/lnet/lib-lnet.h @@ -6,21 +6,21 @@ * Top level include for library side routines */ -#ifndef _LIB_P30_H_ -#define _LIB_P30_H_ +#ifndef __PORTALS_LIB_P30_H__ +#define __PORTALS_LIB_P30_H__ #include "build_check.h" -#ifdef __KERNEL__ -# include -# include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include #else -# include -# include -# include +#error Unsupported Operating System #endif + #include -#include +#include #include #include #include diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h index b710569..a407379 100644 --- a/lnet/include/lnet/lib-p30.h +++ b/lnet/include/lnet/lib-p30.h @@ -6,21 +6,21 @@ * Top level include for library side routines */ -#ifndef _LIB_P30_H_ -#define _LIB_P30_H_ +#ifndef __PORTALS_LIB_P30_H__ +#define __PORTALS_LIB_P30_H__ #include "build_check.h" -#ifdef __KERNEL__ -# include -# include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include #else -# include -# include -# include +#error Unsupported Operating System #endif + #include -#include +#include #include #include #include diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h index cfcef2b..1ac2c56 100644 --- a/lnet/include/lnet/lib-types.h +++ b/lnet/include/lnet/lib-types.h @@ -7,22 +7,24 @@ * exposed to the user application */ -#ifndef _LIB_TYPES_H_ -#define _LIB_TYPES_H_ +#ifndef __PORTALS_LIB_TYPES_H__ +#define __PORTALS_LIB_TYPES_H__ #include "build_check.h" -#include -#include -#ifdef __KERNEL__ -# include -# include -# include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include #else -# define PTL_USE_LIB_FREELIST -# include +#error Unsupported Operating System #endif +#include +#include +#include +#include + typedef char *user_ptr; typedef struct lib_msg_t lib_msg_t; typedef struct lib_ptl_t lib_ptl_t; @@ -271,7 +273,7 @@ typedef struct lib_ni #ifdef __KERNEL__ spinlock_t ni_lock; - wait_queue_head_t ni_waitq; + cfs_waitq_t ni_waitq; #else pthread_mutex_t ni_mutex; pthread_cond_t ni_cond; diff --git a/lnet/include/lnet/linux/.cvsignore b/lnet/include/lnet/linux/.cvsignore new file mode 100644 index 0000000..3dda729 --- /dev/null +++ b/lnet/include/lnet/linux/.cvsignore @@ -0,0 +1,2 @@ +Makefile.in +Makefile diff --git a/lnet/include/lnet/linux/Makefile.am b/lnet/include/lnet/linux/Makefile.am new file mode 100644 index 0000000..b6e7daf --- /dev/null +++ b/lnet/include/lnet/linux/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST := lib-p30.h lib-types.h p30.h diff --git a/lnet/include/lnet/linux/lib-lnet.h b/lnet/include/lnet/linux/lib-lnet.h new file mode 100644 index 0000000..1c88080 --- /dev/null +++ b/lnet/include/lnet/linux/lib-lnet.h @@ -0,0 +1,20 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_LINUX_LIB_P30_H__ +#define __PORTALS_LINUX_LIB_P30_H__ + +#ifndef __PORTALS_LIB_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +# include +# include +#else +# include +# include +# include +#endif + +#endif diff --git a/lnet/include/lnet/linux/lib-p30.h b/lnet/include/lnet/linux/lib-p30.h new file mode 100644 index 0000000..1c88080 --- /dev/null +++ b/lnet/include/lnet/linux/lib-p30.h @@ -0,0 +1,20 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_LINUX_LIB_P30_H__ +#define __PORTALS_LINUX_LIB_P30_H__ + +#ifndef __PORTALS_LIB_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +# include +# include +#else +# include +# include +# include +#endif + +#endif diff --git a/lnet/include/lnet/linux/lib-types.h b/lnet/include/lnet/linux/lib-types.h new file mode 100644 index 0000000..f896b4b --- /dev/null +++ b/lnet/include/lnet/linux/lib-types.h @@ -0,0 +1,20 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_LINUX_LIB_TYPES_H__ +#define __PORTALS_LINUX_LIB_TYPES_H__ + +#ifndef __PORTALS_LIB_TYPES_H__ +#error Do not #include this file directly. #include instead +#endif + +#ifdef __KERNEL__ +# include +# include +# include +#else +# define PTL_USE_LIB_FREELIST +# include +#endif + +#endif diff --git a/lnet/include/lnet/linux/lnet.h b/lnet/include/lnet/linux/lnet.h new file mode 100644 index 0000000..b074837 --- /dev/null +++ b/lnet/include/lnet/linux/lnet.h @@ -0,0 +1,25 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_LINUX_P30_H__ +#define __PORTALS_LINUX_P30_H__ + +#ifndef __PORTALS_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +/* + * p30.h + * + * User application interface file + */ + +#if defined (__KERNEL__) +#include +#include +#else +#include +#include +#endif + +#endif diff --git a/lnet/include/lnet/linux/p30.h b/lnet/include/lnet/linux/p30.h new file mode 100644 index 0000000..b074837 --- /dev/null +++ b/lnet/include/lnet/linux/p30.h @@ -0,0 +1,25 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + */ +#ifndef __PORTALS_LINUX_P30_H__ +#define __PORTALS_LINUX_P30_H__ + +#ifndef __PORTALS_P30_H__ +#error Do not #include this file directly. #include instead +#endif + +/* + * p30.h + * + * User application interface file + */ + +#if defined (__KERNEL__) +#include +#include +#else +#include +#include +#endif + +#endif diff --git a/lnet/include/lnet/list.h b/lnet/include/lnet/list.h deleted file mode 100644 index 37d9952..0000000 --- a/lnet/include/lnet/list.h +++ /dev/null @@ -1,243 +0,0 @@ -#ifndef _LINUX_LIST_H -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -typedef struct list_head list_t; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -#define INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head * new, - struct list_head * prev, - struct list_head * next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} -#endif - -#ifndef list_for_each_entry -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} -#endif - -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(struct list_head *head) -{ - return head->next == head; -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next ; pos != (head); pos = pos->next ) - -/** - * list_for_each_prev - iterate over a list in reverse order - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev ; pos != (head); pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop counter. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -#endif - -#ifndef list_for_each_entry -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) -#endif - -#ifndef list_for_each_entry_safe -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop counter. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) -#endif diff --git a/lnet/include/lnet/lnet.h b/lnet/include/lnet/lnet.h index 4b8631d..9be79b8 100644 --- a/lnet/include/lnet/lnet.h +++ b/lnet/include/lnet/lnet.h @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef _P30_H_ -#define _P30_H_ +#ifndef __PORTALS_P30_H__ +#define __PORTALS_P30_H__ #include "build_check.h" @@ -11,13 +11,12 @@ * * User application interface file */ - -#if defined (__KERNEL__) -#include -#include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include #else -#include -#include +#error Unsupported Operating System #endif #include diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h index 6a7df9a..99da747 100644 --- a/lnet/include/lnet/lnetctl.h +++ b/lnet/include/lnet/lnetctl.h @@ -22,8 +22,8 @@ #define _PTLCTL_H_ #include -#include -#include +#include +#include #define PORTALS_DEV_ID 0 #define PORTALS_DEV_PATH "/dev/portals" diff --git a/lnet/include/lnet/p30.h b/lnet/include/lnet/p30.h index 4b8631d..9be79b8 100644 --- a/lnet/include/lnet/p30.h +++ b/lnet/include/lnet/p30.h @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: */ -#ifndef _P30_H_ -#define _P30_H_ +#ifndef __PORTALS_P30_H__ +#define __PORTALS_P30_H__ #include "build_check.h" @@ -11,13 +11,12 @@ * * User application interface file */ - -#if defined (__KERNEL__) -#include -#include +#if defined(__linux__) +#include +#elif defined(__APPLE__) +#include #else -#include -#include +#error Unsupported Operating System #endif #include diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h index 6a7df9a..99da747 100644 --- a/lnet/include/lnet/ptlctl.h +++ b/lnet/include/lnet/ptlctl.h @@ -22,8 +22,8 @@ #define _PTLCTL_H_ #include -#include -#include +#include +#include #define PORTALS_DEV_ID 0 #define PORTALS_DEV_PATH "/dev/portals" diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h index 0bada40..6b31187 100644 --- a/lnet/include/lnet/types.h +++ b/lnet/include/lnet/types.h @@ -3,7 +3,7 @@ #include "build_check.h" -#include +#include #include /* This implementation uses the same type for API function return codes and @@ -95,7 +95,7 @@ typedef struct { typedef struct iovec ptl_md_iovec_t; typedef struct { - struct page *kiov_page; + cfs_page_t *kiov_page; unsigned int kiov_len; unsigned int kiov_offset; } ptl_kiov_t; diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h index f45eab7..a3492f4 100644 --- a/lnet/klnds/gmlnd/gmlnd.h +++ b/lnet/klnds/gmlnd/gmlnd.h @@ -52,7 +52,7 @@ #include "portals/nal.h" #include "portals/api.h" #include "portals/errno.h" -#include "linux/kp30.h" +#include "libcfs/kp30.h" #include "portals/p30.h" #include "portals/nal.h" diff --git a/lnet/klnds/iiblnd/iiblnd.h b/lnet/klnds/iiblnd/iiblnd.h index 4f04542..82cdd3c 100644 --- a/lnet/klnds/iiblnd/iiblnd.h +++ b/lnet/klnds/iiblnd/iiblnd.h @@ -50,7 +50,7 @@ #define DEBUG_SUBSYSTEM S_NAL -#include +#include #include #include #include @@ -137,7 +137,7 @@ /* XXX I have no idea. */ #define IBNAL_STARTING_PSN 1 -typedef struct +typedef struct { int kib_io_timeout; /* comms timeout (seconds) */ struct ctl_table_header *kib_sysctl; /* sysctl interface */ @@ -163,8 +163,8 @@ typedef struct __u32 md_rkey; __u64 md_addr; } kib_md_t __attribute__((packed)); - -typedef struct + +typedef struct { int kib_init; /* initialisation state */ __u64 kib_incarnation; /* which one am I */ @@ -196,7 +196,7 @@ typedef struct struct list_head kib_sched_txq; /* tx requiring attention */ struct list_head kib_sched_rxq; /* rx requiring attention */ spinlock_t kib_sched_lock; /* serialise */ - + struct kib_tx *kib_tx_descs; /* all the tx descriptors */ kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ @@ -205,7 +205,7 @@ typedef struct wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */ __u64 kib_next_tx_cookie; /* RDMA completion cookie */ spinlock_t kib_tx_lock; /* serialise */ - + IB_HANDLE kib_hca; /* The HCA */ int kib_port; /* port on the device */ IB_HANDLE kib_pd; /* protection domain */ @@ -232,9 +232,9 @@ typedef struct #define IBNAL_INIT_PD 7 #define IBNAL_INIT_FMR 8 #define IBNAL_INIT_MR 9 -#define IBNAL_INIT_TXD 10 -#define IBNAL_INIT_CQ 11 -#define IBNAL_INIT_ALL 12 +#define IBNAL_INIT_TXD 10 +#define IBNAL_INIT_CQ 11 +#define IBNAL_INIT_ALL 12 /************************************************************************ * Wire message structs. @@ -259,7 +259,7 @@ typedef struct /* these arrays serve two purposes during rdma. they are built on the passive * side and sent to the active side as remote arguments. On the active side - * the descs are used as a data structure on the way to local gather items. + * the descs are used as a data structure on the way to local gather items. * the different roles result in split local/remote meaning of desc->rd_key */ typedef struct { @@ -373,7 +373,7 @@ typedef struct kib_connreq } kib_connreq_t; typedef struct kib_conn -{ +{ struct kib_peer *ibc_peer; /* owning peer */ struct list_head ibc_list; /* stash on peer's conn list */ __u64 ibc_incarnation; /* which instance of the peer */ @@ -445,7 +445,7 @@ iibt_get_hca_guids(uint32 *hca_count, EUI64 *hca_guid_list) } static inline FSTATUS -iibt_open_hca(EUI64 hca_guid, +iibt_open_hca(EUI64 hca_guid, IB_COMPLETION_CALLBACK completion_callback, IB_ASYNC_EVENT_CALLBACK async_event_callback, void *arg, @@ -480,55 +480,55 @@ iibt_pd_free(IB_HANDLE pd_handle) } static inline FSTATUS -iibt_register_physical_memory(IB_HANDLE hca_handle, +iibt_register_physical_memory(IB_HANDLE hca_handle, IB_VIRT_ADDR requested_io_va, void *phys_buffers, uint64 nphys_buffers, uint32 io_va_offset, IB_HANDLE pd_handle, IB_ACCESS_CONTROL access, - IB_HANDLE *mem_handle, + IB_HANDLE *mem_handle, IB_VIRT_ADDR *actual_io_va, IB_L_KEY *lkey, IB_R_KEY *rkey) { return IIBT_IF.Vpi.RegisterPhysMemRegion(hca_handle, requested_io_va, phys_buffers, nphys_buffers, - io_va_offset, pd_handle, + io_va_offset, pd_handle, access, mem_handle, actual_io_va, lkey, rkey); } static inline FSTATUS -iibt_register_contig_physical_memory(IB_HANDLE hca_handle, +iibt_register_contig_physical_memory(IB_HANDLE hca_handle, IB_VIRT_ADDR requested_io_va, - IB_MR_PHYS_BUFFER *phys_buffers, + IB_MR_PHYS_BUFFER *phys_buffers, uint64 nphys_buffers, uint32 io_va_offset, IB_HANDLE pd_handle, IB_ACCESS_CONTROL access, - IB_HANDLE *mem_handle, + IB_HANDLE *mem_handle, IB_VIRT_ADDR *actual_io_va, IB_L_KEY *lkey, IB_R_KEY *rkey) { - return IIBT_IF.Vpi.RegisterContigPhysMemRegion(hca_handle, + return IIBT_IF.Vpi.RegisterContigPhysMemRegion(hca_handle, requested_io_va, - phys_buffers, + phys_buffers, nphys_buffers, - io_va_offset, pd_handle, + io_va_offset, pd_handle, access, mem_handle, actual_io_va, lkey, rkey); } static inline FSTATUS -iibt_register_memory(IB_HANDLE hca_handle, +iibt_register_memory(IB_HANDLE hca_handle, void *virt_addr, unsigned int length, IB_HANDLE pd_handle, IB_ACCESS_CONTROL access, - IB_HANDLE *mem_handle, + IB_HANDLE *mem_handle, IB_L_KEY *lkey, IB_R_KEY *rkey) { - return IIBT_IF.Vpi.RegisterMemRegion(hca_handle, + return IIBT_IF.Vpi.RegisterMemRegion(hca_handle, virt_addr, length, - pd_handle, + pd_handle, access, mem_handle, lkey, rkey); @@ -568,10 +568,10 @@ iibt_cq_destroy(IB_HANDLE cq_handle) static inline FSTATUS iibt_qp_create(IB_HANDLE hca_handle, IB_QP_ATTRIBUTES_CREATE *create_attr, - void *arg, IB_HANDLE *cq_handle, + void *arg, IB_HANDLE *cq_handle, IB_QP_ATTRIBUTES_QUERY *query_attr) { - return IIBT_IF.Vpi.CreateQP(hca_handle, create_attr, arg, cq_handle, + return IIBT_IF.Vpi.CreateQP(hca_handle, create_attr, arg, cq_handle, query_attr); } @@ -672,7 +672,7 @@ iibt_cm_cancel(IB_HANDLE cep) } static inline FSTATUS -iibt_cm_accept(IB_HANDLE cep, +iibt_cm_accept(IB_HANDLE cep, CM_CONN_INFO *send_info, CM_CONN_INFO *recv_info, PFN_CM_CALLBACK callback, void *arg, IB_HANDLE *new_cep) @@ -736,10 +736,10 @@ static inline int wrq_signals_completion(IB_WORK_REQ *wrq) /******************************************************************************/ static inline struct list_head * -kibnal_nid2peerlist (ptl_nid_t nid) +kibnal_nid2peerlist (ptl_nid_t nid) { unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - + return (&kibnal_data.kib_peers [hash]); } @@ -797,14 +797,14 @@ kibnal_show_rdma_attr (kib_conn_t *conn) { struct ib_qp_attribute qp_attr; int rc; - + memset (&qp_attr, 0, sizeof(qp_attr)); rc = ib_qp_query(conn->ibc_qp, &qp_attr); if (rc != 0) { CERROR ("Can't get qp attrs: %d\n", rc); return; } - + CWARN ("RDMA CAPABILITY: write %s read %s\n", (qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ? (qp_attr.enable_rdma_write ? "enabled" : "disabled") : "invalid", @@ -818,7 +818,7 @@ static inline __u64 kibnal_page2phys (struct page *p) { __u64 page_number = p - mem_map; - + return (page_number << PAGE_SHIFT); } #else @@ -863,7 +863,7 @@ extern void kibnal_destroy_peer (kib_peer_t *peer); extern int kibnal_del_peer (ptl_nid_t nid, int single_share); extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid); extern void kibnal_unlink_peer_locked (kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, +extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation); extern kib_conn_t *kibnal_create_conn (void); extern void kibnal_put_conn (kib_conn_t *conn); @@ -881,9 +881,9 @@ extern int kibnal_scheduler(void *arg); extern int kibnal_connd (void *arg); extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); extern void kibnal_close_conn (kib_conn_t *conn, int why); -extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, - unsigned int niov, +extern void kibnal_start_active_rdma (int type, int status, + kib_rx_t *rx, lib_msg_t *libmsg, + unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t nob); diff --git a/lnet/klnds/lolnd/autoMakefile.am b/lnet/klnds/lolnd/autoMakefile.am index d8f2639..ee6aa98 100644 --- a/lnet/klnds/lolnd/autoMakefile.am +++ b/lnet/klnds/lolnd/autoMakefile.am @@ -5,9 +5,11 @@ if MODULES if !CRAY_PORTALS +if LINUX modulenet_DATA = klonal$(KMODEXT) endif endif +endif MOSTLYCLEANFILES = *.o *.ko *.mod.c DIST_SOURCES = $(klonal-objs:%.o=%.c) lonal.h diff --git a/lnet/klnds/lolnd/lolnd.h b/lnet/klnds/lolnd/lolnd.h index 2beb40d..6d8d77d 100644 --- a/lnet/klnds/lolnd/lolnd.h +++ b/lnet/klnds/lolnd/lolnd.h @@ -35,7 +35,7 @@ #define DEBUG_SUBSYSTEM S_NAL -#include +#include #include #include #include diff --git a/lnet/klnds/openiblnd/openiblnd.h b/lnet/klnds/openiblnd/openiblnd.h index c3e9837..3170c63 100644 --- a/lnet/klnds/openiblnd/openiblnd.h +++ b/lnet/klnds/openiblnd/openiblnd.h @@ -53,7 +53,7 @@ #define DEBUG_SUBSYSTEM S_NAL -#include +#include #include #include #include @@ -121,7 +121,7 @@ //#define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_PROCESS #define IBNAL_CALLBACK_CTXT IB_CQ_CALLBACK_INTERRUPT -typedef struct +typedef struct { int kib_io_timeout; /* comms timeout (seconds) */ int kib_listener_timeout; /* listener's timeout */ @@ -140,8 +140,8 @@ typedef struct struct ib_mr *ibp_handle; /* mapped region handle */ struct page *ibp_pages[0]; } kib_pages_t; - -typedef struct + +typedef struct { int kib_init; /* initialisation state */ __u64 kib_incarnation; /* which one am I */ @@ -180,7 +180,7 @@ typedef struct struct list_head kib_sched_txq; /* tx requiring attention */ struct list_head kib_sched_rxq; /* rx requiring attention */ spinlock_t kib_sched_lock; /* serialise */ - + struct kib_tx *kib_tx_descs; /* all the tx descriptors */ kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ @@ -189,7 +189,7 @@ typedef struct wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */ __u64 kib_next_tx_cookie; /* RDMA completion cookie */ spinlock_t kib_tx_lock; /* serialise */ - + struct ib_device *kib_device; /* "the" device */ struct ib_device_properties kib_device_props; /* its properties */ int kib_port; /* port on the device */ @@ -199,7 +199,7 @@ typedef struct struct ib_fmr_pool *kib_fmr_pool; /* fast memory region pool */ #endif struct ib_cq *kib_cq; /* completion queue */ - + } kib_data_t; #define IBNAL_INIT_NOTHING 0 @@ -362,7 +362,7 @@ typedef struct kib_connreq } kib_connreq_t; typedef struct kib_conn -{ +{ struct kib_peer *ibc_peer; /* owning peer */ struct list_head ibc_list; /* stash on peer's conn list */ __u64 ibc_incarnation; /* which instance of the peer */ @@ -412,10 +412,10 @@ extern kib_data_t kibnal_data; extern kib_tunables_t kibnal_tunables; static inline struct list_head * -kibnal_nid2peerlist (ptl_nid_t nid) +kibnal_nid2peerlist (ptl_nid_t nid) { unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - + return (&kibnal_data.kib_peers [hash]); } @@ -445,14 +445,14 @@ kibnal_show_rdma_attr (kib_conn_t *conn) { struct ib_qp_attribute qp_attr; int rc; - + memset (&qp_attr, 0, sizeof(qp_attr)); rc = ib_qp_query(conn->ibc_qp, &qp_attr); if (rc != 0) { CERROR ("Can't get qp attrs: %d\n", rc); return; } - + CWARN ("RDMA CAPABILITY: write %s read %s\n", (qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ? (qp_attr.enable_rdma_write ? "enabled" : "disabled") : "invalid", @@ -522,7 +522,7 @@ extern void kibnal_put_peer (kib_peer_t *peer); extern int kibnal_del_peer (ptl_nid_t nid, int single_share); extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid); extern void kibnal_unlink_peer_locked (kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, +extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation); extern kib_conn_t *kibnal_create_conn (void); extern void kibnal_put_conn (kib_conn_t *conn); @@ -535,7 +535,7 @@ extern void kibnal_check_sends (kib_conn_t *conn); extern tTS_IB_CM_CALLBACK_RETURN kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg); -extern tTS_IB_CM_CALLBACK_RETURN +extern tTS_IB_CM_CALLBACK_RETURN kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid, void *param, void *arg); @@ -548,9 +548,9 @@ extern int kibnal_reaper (void *arg); extern void kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg); extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); extern int kibnal_close_conn (kib_conn_t *conn, int why); -extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, - unsigned int niov, +extern void kibnal_start_active_rdma (int type, int status, + kib_rx_t *rx, lib_msg_t *libmsg, + unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, int offset, int nob); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h index 6e04752..c138be4 100644 --- a/lnet/klnds/qswlnd/qswlnd.h +++ b/lnet/klnds/qswlnd/qswlnd.h @@ -18,7 +18,7 @@ * along with Lustre; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * - * Basic library routines. + * Basic library routines. * */ @@ -74,8 +74,8 @@ #define DEBUG_SUBSYSTEM S_NAL -#include -#include +#include +#include #include #include #include @@ -215,7 +215,7 @@ typedef struct int kqn_optimized_gets; /* optimized GETs? */ #if CONFIG_SYSCTL struct ctl_table_header *kqn_sysctl; /* sysctl interface */ -#endif +#endif } kqswnal_tunables_t; typedef struct @@ -234,7 +234,7 @@ typedef struct wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ atomic_t kqn_pending_txs; /* # transmits being prepped */ - + spinlock_t kqn_sched_lock; /* serialise packet schedulers */ wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ @@ -282,24 +282,24 @@ extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); extern void kqswnal_rx_done (kqswnal_rx_t *krx); static inline ptl_nid_t -kqswnal_elanid2nid (int elanid) +kqswnal_elanid2nid (int elanid) { return (kqswnal_data.kqn_nid_offset + elanid); } static inline int -kqswnal_nid2elanid (ptl_nid_t nid) +kqswnal_nid2elanid (ptl_nid_t nid) { /* not in this cluster? */ if (nid < kqswnal_data.kqn_nid_offset || nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes) return (-1); - + return (nid - kqswnal_data.kqn_nid_offset); } static inline ptl_nid_t -kqswnal_rx_nid(kqswnal_rx_t *krx) +kqswnal_rx_nid(kqswnal_rx_t *krx) { return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd))); } @@ -318,10 +318,10 @@ kqswnal_pages_spanned (void *base, int nob) static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob) { unsigned char *ptr = (unsigned char *)base; - + while (nob-- > 0) sum += *ptr++; - + return (sum); } #endif @@ -349,7 +349,7 @@ static inline void kqswnal_rx_decref (kqswnal_rx_t *krx) #define EP_ENOMEM ENOMEM static inline EP_XMTR * -ep_alloc_xmtr(EP_DEV *e) +ep_alloc_xmtr(EP_DEV *e) { return (ep_alloc_large_xmtr(e)); } @@ -361,7 +361,7 @@ ep_alloc_rcvr(EP_DEV *e, int svc, int nenv) } static inline void -ep_free_xmtr(EP_XMTR *x) +ep_free_xmtr(EP_XMTR *x) { ep_free_large_xmtr(x); } diff --git a/lnet/klnds/ralnd/ralnd.h b/lnet/klnds/ralnd/ralnd.h index e6602dd..0843058 100644 --- a/lnet/klnds/ralnd/ralnd.h +++ b/lnet/klnds/ralnd/ralnd.h @@ -53,8 +53,7 @@ #define DEBUG_SUBSYSTEM S_NAL -#include -#include +#include #include #include #include diff --git a/lnet/klnds/socklnd/Info.plist b/lnet/klnds/socklnd/Info.plist new file mode 100644 index 0000000..11be93d --- /dev/null +++ b/lnet/klnds/socklnd/Info.plist @@ -0,0 +1,37 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ksocknal + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.ksocknal + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kernel.bsd + 1.1 + com.apple.kernel.iokit + 1.0.0b1 + com.apple.kernel.mach + 1.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + com.clusterfs.lustre.portals + 1.0.0 + + + diff --git a/lnet/klnds/socklnd/Makefile.in b/lnet/klnds/socklnd/Makefile.in index 633b455..7fe9638 100644 --- a/lnet/klnds/socklnd/Makefile.in +++ b/lnet/klnds/socklnd/Makefile.in @@ -1,5 +1,6 @@ MODULES := ksocknal -ksocknal-objs := socknal.o socknal_cb.o + +ksocknal-objs := socknal.o socknal_cb.o socknal_lib-linux.o # If you don't build with -O2, your modules won't insert, becahse htonl is # just special that way. diff --git a/lnet/klnds/socklnd/autoMakefile.am b/lnet/klnds/socklnd/autoMakefile.am index 070b649..63c095d 100644 --- a/lnet/klnds/socklnd/autoMakefile.am +++ b/lnet/klnds/socklnd/autoMakefile.am @@ -5,9 +5,31 @@ if MODULES if !CRAY_PORTALS + +if LINUX modulenet_DATA = ksocknal$(KMODEXT) endif + +if DARWIN +macos_PROGRAMS := ksocknal + +ksocknal_SOURCES := socknal.c socknal_cb.c socknal_lib-darwin.c + +ksocknal_CFLAGS := $(EXTRA_KCFLAGS) +ksocknal_LDFLAGS := $(EXTRA_KLDFLAGS) +ksocknal_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install-data-hook: fix-kext-ownership + +endif + endif +endif + +EXTRA_DIST := Info.plist -MOSTLYCLEANFILES = *.o *.ko *.mod.c -DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal.h +MOSTLYCLEANFILES = *.o *.ko *.mod.c socknal_lib.c +DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal_lib-darwin.c \ + socknal_lib-darwin.h socknal_lib-linux.h socknal.h diff --git a/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj b/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj new file mode 100644 index 0000000..cab8b43 --- /dev/null +++ b/lnet/klnds/socklnd/ksocklnd.xcode/project.pbxproj @@ -0,0 +1,287 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 39; + objects = { + 06AA1262FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; + }; + isa = PBXBuildStyle; + name = Development; + }; + 06AA1263FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; + }; + isa = PBXBuildStyle; + name = Deployment; + }; +//060 +//061 +//062 +//063 +//064 +//080 +//081 +//082 +//083 +//084 + 089C1669FE841209C02AAC07 = { + buildSettings = { + }; + buildStyles = ( + 06AA1262FFB20DD611CA28AA, + 06AA1263FFB20DD611CA28AA, + ); + hasScannedForEncodings = 1; + isa = PBXProject; + mainGroup = 089C166AFE841209C02AAC07; + projectDirPath = ""; + targets = ( + 32A4FEB80562C75700D090E7, + ); + }; + 089C166AFE841209C02AAC07 = { + children = ( + 247142CAFF3F8F9811CA285C, + 089C167CFE841241C02AAC07, + 19C28FB6FE9D52B211CA2CBB, + ); + isa = PBXGroup; + name = ksocknal; + refType = 4; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 = { + children = ( + 32A4FEC30562C75700D090E7, + ); + isa = PBXGroup; + name = Resources; + refType = 4; + sourceTree = ""; + }; +//080 +//081 +//082 +//083 +//084 +//190 +//191 +//192 +//193 +//194 + 1957C5680737C71F00425049 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = socknal.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 1957C5690737C71F00425049 = { + fileRef = 1957C5680737C71F00425049; + isa = PBXBuildFile; + settings = { + }; + }; + 1957C56A0737C72F00425049 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = socknal_cb.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 1957C56B0737C72F00425049 = { + fileRef = 1957C56A0737C72F00425049; + isa = PBXBuildFile; + settings = { + }; + }; + 1957C5B20737C78E00425049 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = socknal_lib.c; + path = arch/xnu/socknal_lib.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 1957C5B30737C78E00425049 = { + fileRef = 1957C5B20737C78E00425049; + isa = PBXBuildFile; + settings = { + }; + }; + 19C28FB6FE9D52B211CA2CBB = { + children = ( + 32A4FEC40562C75800D090E7, + ); + isa = PBXGroup; + name = Products; + refType = 4; + sourceTree = ""; + }; +//190 +//191 +//192 +//193 +//194 +//240 +//241 +//242 +//243 +//244 + 247142CAFF3F8F9811CA285C = { + children = ( + 1957C5B20737C78E00425049, + 1957C56A0737C72F00425049, + 1957C5680737C71F00425049, + ); + isa = PBXGroup; + name = Source; + path = ""; + refType = 4; + sourceTree = ""; + }; +//240 +//241 +//242 +//243 +//244 +//320 +//321 +//322 +//323 +//324 + 32A4FEB80562C75700D090E7 = { + buildPhases = ( + 32A4FEB90562C75700D090E7, + 32A4FEBA0562C75700D090E7, + 32A4FEBB0562C75700D090E7, + 32A4FEBD0562C75700D090E7, + 32A4FEBF0562C75700D090E7, + 32A4FEC00562C75700D090E7, + 32A4FEC10562C75700D090E7, + ); + buildRules = ( + ); + buildSettings = { + FRAMEWORK_SEARCH_PATHS = ""; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = NO; + HEADER_SEARCH_PATHS = "../../include ./arch/xnu"; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + LIBRARY_SEARCH_PATHS = ""; + MODULE_NAME = com.clusterfs.lustre.portals.knals.ksocknal; + MODULE_START = ksocknal_start; + MODULE_STOP = ksocknal_stop; + MODULE_VERSION = 1.0.1; + OTHER_CFLAGS = "-D__KERNEL__"; + OTHER_LDFLAGS = ""; + OTHER_REZFLAGS = ""; + PRODUCT_NAME = ksocknal; + SECTORDER_FLAGS = ""; + WARNING_CFLAGS = "-Wmost"; + WRAPPER_EXTENSION = kext; + }; + dependencies = ( + ); + isa = PBXNativeTarget; + name = ksocknal; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = ksocknal; + productReference = 32A4FEC40562C75800D090E7; + productType = "com.apple.product-type.kernel-extension"; + }; + 32A4FEB90562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEBA0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXHeadersBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBB0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXResourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBD0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + 1957C5690737C71F00425049, + 1957C56B0737C72F00425049, + 1957C5B30737C78E00425049, + ); + isa = PBXSourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBF0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXFrameworksBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC00562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXRezBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC10562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEC30562C75700D090E7 = { + isa = PBXFileReference; + lastKnownFileType = text.plist.xml; + path = Info.plist; + refType = 4; + sourceTree = ""; + }; + 32A4FEC40562C75800D090E7 = { + explicitFileType = wrapper.cfbundle; + includeInIndex = 0; + isa = PBXFileReference; + path = ksocknal.kext; + refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; + }; + }; + rootObject = 089C1669FE841209C02AAC07; +} diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 7642770..07078ca 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -37,68 +37,6 @@ kpr_nal_interface_t ksocknal_router_interface = { kprni_notify: ksocknal_notify, }; -#ifdef CONFIG_SYSCTL -#define SOCKNAL_SYSCTL 200 - -#define SOCKNAL_SYSCTL_TIMEOUT 1 -#define SOCKNAL_SYSCTL_EAGER_ACK 2 -#define SOCKNAL_SYSCTL_ZERO_COPY 3 -#define SOCKNAL_SYSCTL_TYPED 4 -#define SOCKNAL_SYSCTL_MIN_BULK 5 -#define SOCKNAL_SYSCTL_BUFFER_SIZE 6 -#define SOCKNAL_SYSCTL_NAGLE 7 -#define SOCKNAL_SYSCTL_IRQ_AFFINITY 8 -#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE 9 -#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10 -#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11 - -static ctl_table ksocknal_ctl_table[] = { - {SOCKNAL_SYSCTL_TIMEOUT, "timeout", - &ksocknal_tunables.ksnd_io_timeout, sizeof (int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", - &ksocknal_tunables.ksnd_eager_ack, sizeof (int), - 0644, NULL, &proc_dointvec}, -#if SOCKNAL_ZC - {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", - &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), - 0644, NULL, &proc_dointvec}, -#endif - {SOCKNAL_SYSCTL_TYPED, "typed", - &ksocknal_tunables.ksnd_typed_conns, sizeof (int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", - &ksocknal_tunables.ksnd_min_bulk, sizeof (int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size", - &ksocknal_tunables.ksnd_buffer_size, sizeof(int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_NAGLE, "nagle", - &ksocknal_tunables.ksnd_nagle, sizeof(int), - 0644, NULL, &proc_dointvec}, -#if CPU_AFFINITY - {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity", - &ksocknal_tunables.ksnd_irq_affinity, sizeof(int), - 0644, NULL, &proc_dointvec}, -#endif - {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle", - &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count", - &ksocknal_tunables.ksnd_keepalive_count, sizeof(int), - 0644, NULL, &proc_dointvec}, - {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl", - &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int), - 0644, NULL, &proc_dointvec}, - { 0 } -}; - -static ctl_table ksocknal_top_ctl_table[] = { - {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, - { 0 } -}; -#endif - int ksocknal_set_mynid(ptl_nid_t nid) { @@ -117,54 +55,6 @@ ksocknal_set_mynid(ptl_nid_t nid) return (0); } -void -ksocknal_bind_irq (unsigned int irq) -{ -#if (defined(CONFIG_SMP) && CPU_AFFINITY) - int bind; - int cpu; - unsigned long flags; - char cmdline[64]; - ksock_irqinfo_t *info; - char *argv[] = {"/bin/sh", - "-c", - cmdline, - NULL}; - char *envp[] = {"HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - LASSERT (irq < NR_IRQS); - if (irq == 0) /* software NIC or affinity disabled */ - return; - - info = &ksocknal_data.ksnd_irqinfo[irq]; - - write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - - LASSERT (info->ksni_valid); - bind = !info->ksni_bound; - info->ksni_bound = 1; - - write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); - - if (!bind) /* bound already */ - return; - - cpu = ksocknal_irqsched2cpu(info->ksni_sched); - snprintf (cmdline, sizeof (cmdline), - "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); - - printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n", - irq, cpu, cmdline); - - /* FIXME: Find a better method of setting IRQ affinity... - */ - - USERMODEHELPER(argv[0], argv, envp); -#endif -} - ksock_interface_t * ksocknal_ip2iface(__u32 ip) { @@ -174,11 +64,11 @@ ksocknal_ip2iface(__u32 ip) for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) { LASSERT(i < SOCKNAL_MAX_INTERFACES); iface = &ksocknal_data.ksnd_interfaces[i]; - + if (iface->ksni_ipaddr == ip) return (iface); } - + return (NULL); } @@ -193,7 +83,7 @@ ksocknal_create_route (__u32 ipaddr, int port) atomic_set (&route->ksnr_refcount, 1); route->ksnr_peer = NULL; - route->ksnr_timeout = jiffies; + route->ksnr_timeout = cfs_time_current(); route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; route->ksnr_ipaddr = ipaddr; route->ksnr_port = port; @@ -244,9 +134,9 @@ ksocknal_create_peer (ptl_nid_t nid) peer->ksnp_nid = nid; atomic_set (&peer->ksnp_refcount, 1); /* 1 ref for caller */ peer->ksnp_closing = 0; - INIT_LIST_HEAD (&peer->ksnp_conns); - INIT_LIST_HEAD (&peer->ksnp_routes); - INIT_LIST_HEAD (&peer->ksnp_tx_queue); + CFS_INIT_LIST_HEAD (&peer->ksnp_conns); + CFS_INIT_LIST_HEAD (&peer->ksnp_routes); + CFS_INIT_LIST_HEAD (&peer->ksnp_tx_queue); atomic_inc (&ksocknal_data.ksnd_npeers); return (peer); @@ -346,7 +236,7 @@ ksocknal_unlink_peer_locked (ksock_peer_t *peer) int ksocknal_get_peer_info (int index, ptl_nid_t *nid, - __u32 *myip, __u32 *peer_ip, int *port, + __u32 *myip, __u32 *peer_ip, int *port, int *conn_count, int *share_count) { ksock_peer_t *peer; @@ -360,7 +250,7 @@ ksocknal_get_peer_info (int index, ptl_nid_t *nid, read_lock (&ksocknal_data.ksnd_global_lock); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) { - + list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) { peer = list_entry (ptmp, ksock_peer_t, ksnp_list); @@ -368,7 +258,7 @@ ksocknal_get_peer_info (int index, ptl_nid_t *nid, list_empty(&peer->ksnp_routes)) { if (index-- > 0) continue; - + *nid = peer->ksnp_nid; *myip = 0; *peer_ip = 0; @@ -382,7 +272,7 @@ ksocknal_get_peer_info (int index, ptl_nid_t *nid, for (j = 0; j < peer->ksnp_n_passive_ips; j++) { if (index-- > 0) continue; - + *nid = peer->ksnp_nid; *myip = peer->ksnp_passive_ips[j]; *peer_ip = 0; @@ -392,7 +282,7 @@ ksocknal_get_peer_info (int index, ptl_nid_t *nid, rc = 0; goto out; } - + list_for_each (rtmp, &peer->ksnp_routes) { if (index-- > 0) continue; @@ -430,24 +320,24 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) if (route->ksnr_myipaddr == 0) { /* route wasn't bound locally yet (the initial route) */ CWARN("Binding "LPX64" %u.%u.%u.%u to %u.%u.%u.%u\n", - peer->ksnp_nid, + peer->ksnp_nid, HIPQUAD(route->ksnr_ipaddr), HIPQUAD(conn->ksnc_myipaddr)); } else { CWARN("Rebinding "LPX64" %u.%u.%u.%u from " "%u.%u.%u.%u to %u.%u.%u.%u\n", - peer->ksnp_nid, + peer->ksnp_nid, HIPQUAD(route->ksnr_ipaddr), HIPQUAD(route->ksnr_myipaddr), HIPQUAD(conn->ksnc_myipaddr)); - + iface = ksocknal_ip2iface(route->ksnr_myipaddr); - if (iface != NULL) + if (iface != NULL) iface->ksni_nroutes--; } route->ksnr_myipaddr = conn->ksnc_myipaddr; iface = ksocknal_ip2iface(route->ksnr_myipaddr); - if (iface != NULL) + if (iface != NULL) iface->ksni_nroutes++; } @@ -457,7 +347,7 @@ ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn) /* Successful connection => further attempts can * proceed immediately */ - route->ksnr_timeout = jiffies; + route->ksnr_timeout = cfs_time_current(); route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL; } @@ -488,7 +378,7 @@ ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route) atomic_inc (&peer->ksnp_refcount); /* peer's routelist takes over my ref on 'route' */ list_add_tail(&route->ksnr_list, &peer->ksnp_routes); - + list_for_each(tmp, &peer->ksnp_conns) { conn = list_entry(tmp, ksock_conn_t, ksnc_list); type = conn->ksnc_type; @@ -518,7 +408,7 @@ ksocknal_del_route_locked (ksock_route_t *route) if (conn->ksnc_route != route) continue; - + ksocknal_close_conn_locked (conn, 0); } @@ -549,7 +439,7 @@ ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port) ksock_peer_t *peer2; ksock_route_t *route; ksock_route_t *route2; - + if (nid == PTL_NID_ANY) return (-EINVAL); @@ -579,10 +469,10 @@ ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port) route2 = NULL; list_for_each (tmp, &peer->ksnp_routes) { route2 = list_entry(tmp, ksock_route_t, ksnr_list); - + if (route2->ksnr_ipaddr == ipaddr) break; - + route2 = NULL; } if (route2 == NULL) { @@ -628,7 +518,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share) /* This deletes associated conns too */ ksocknal_del_route_locked (route); } - + if (single_share) break; } @@ -638,7 +528,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share) route = list_entry(tmp, ksock_route_t, ksnr_list); nshared += route->ksnr_share_count; } - + if (nshared == 0) { /* remove everything else if there are no explicit entries * left */ @@ -657,7 +547,7 @@ ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share) ksocknal_close_conn_locked(conn, 0); } } - + /* NB peer unlinks itself when last conn/route is removed */ } @@ -735,62 +625,6 @@ ksocknal_get_conn_by_idx (int index) return (NULL); } -int -ksocknal_get_conn_addrs (ksock_conn_t *conn) -{ - struct sockaddr_in sin; - int len = sizeof (sin); - int rc; - - rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock, - (struct sockaddr *)&sin, &len, 2); - /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ - LASSERT (!conn->ksnc_closing); - - if (rc != 0) { - CERROR ("Error %d getting sock peer IP\n", rc); - return rc; - } - - conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr); - conn->ksnc_port = ntohs (sin.sin_port); - - rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock, - (struct sockaddr *)&sin, &len, 0); - if (rc != 0) { - CERROR ("Error %d getting sock local IP\n", rc); - return rc; - } - - conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr); - - return 0; -} - -unsigned int -ksocknal_sock_irq (struct socket *sock) -{ - int irq = 0; - struct dst_entry *dst; - - if (!ksocknal_tunables.ksnd_irq_affinity) - return 0; - - dst = sk_dst_get (sock->sk); - if (dst != NULL) { - if (dst->dev != NULL) { - irq = dst->dev->irq; - if (irq >= NR_IRQS) { - CERROR ("Unexpected IRQ %x\n", irq); - irq = 0; - } - } - dst_release (dst); - } - - return (irq); -} - ksock_sched_t * ksocknal_choose_scheduler_locked (unsigned int irq) { @@ -840,7 +674,7 @@ ksocknal_local_ipvec (__u32 *ipaddrs) ipaddrs[i] = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr; LASSERT (ipaddrs[i] != 0); } - + read_unlock (&ksocknal_data.ksnd_global_lock); return (nip); } @@ -854,25 +688,25 @@ ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips) int this_xor; int this_netmatch; int i; - + for (i = 0; i < nips; i++) { if (ips[i] == 0) continue; this_xor = (ips[i] ^ iface->ksni_ipaddr); this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0; - + if (!(best < 0 || best_netmatch < this_netmatch || - (best_netmatch == this_netmatch && + (best_netmatch == this_netmatch && best_xor > this_xor))) continue; - + best = i; best_netmatch = this_netmatch; best_xor = this_xor; } - + LASSERT (best >= 0); return (best); } @@ -901,7 +735,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) /* Also note that I'm not going to return more than n_peerips * interfaces, even if I have more myself */ - + write_lock_irqsave(global_lock, flags); LASSERT (n_peerips <= SOCKNAL_MAX_INTERFACES); @@ -914,10 +748,10 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) /* If we have any new interfaces, first tick off all the * peer IPs that match old interfaces, then choose new - * interfaces to match the remaining peer IPS. + * interfaces to match the remaining peer IPS. * We don't forget interfaces we've stopped using; we might * start using them again... */ - + if (i < peer->ksnp_n_passive_ips) { /* Old interface. */ ip = peer->ksnp_passive_ips[i]; @@ -932,7 +766,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) best_iface = NULL; best_netmatch = 0; best_npeers = 0; - + for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) { iface = &ksocknal_data.ksnd_interfaces[j]; ip = iface->ksni_ipaddr; @@ -940,7 +774,7 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) for (k = 0; k < peer->ksnp_n_passive_ips; k++) if (peer->ksnp_passive_ips[k] == ip) break; - + if (k < peer->ksnp_n_passive_ips) /* using it already */ continue; @@ -964,24 +798,24 @@ ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips) peer->ksnp_passive_ips[i] = ip; peer->ksnp_n_passive_ips = i+1; } - + LASSERT (best_iface != NULL); /* mark the best matching peer IP used */ j = ksocknal_match_peerip(best_iface, peerips, n_peerips); peerips[j] = 0; } - + /* Overwrite input peer IP addresses */ memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips)); - + write_unlock_irqrestore(global_lock, flags); - + return (n_ips); } void -ksocknal_create_routes(ksock_peer_t *peer, int port, +ksocknal_create_routes(ksock_peer_t *peer, int port, __u32 *peer_ipaddrs, int npeer_ipaddrs) { ksock_route_t *newroute = NULL; @@ -1005,7 +839,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, write_lock_irqsave(global_lock, flags); LASSERT (npeer_ipaddrs <= SOCKNAL_MAX_INTERFACES); - + for (i = 0; i < npeer_ipaddrs; i++) { if (newroute != NULL) { newroute->ksnr_ipaddr = peer_ipaddrs[i]; @@ -1018,7 +852,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, write_lock_irqsave(global_lock, flags); } - + /* Already got a route? */ route = NULL; list_for_each(rtmp, &peer->ksnp_routes) { @@ -1026,7 +860,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, if (route->ksnr_ipaddr == newroute->ksnr_ipaddr) break; - + route = NULL; } if (route != NULL) @@ -1054,21 +888,21 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, if (route != NULL) continue; - this_netmatch = (((iface->ksni_ipaddr ^ - newroute->ksnr_ipaddr) & + this_netmatch = (((iface->ksni_ipaddr ^ + newroute->ksnr_ipaddr) & iface->ksni_netmask) == 0) ? 1 : 0; - + if (!(best_iface == NULL || best_netmatch < this_netmatch || (best_netmatch == this_netmatch && best_nroutes > iface->ksni_nroutes))) continue; - + best_iface = iface; best_netmatch = this_netmatch; best_nroutes = iface->ksni_nroutes; } - + if (best_iface == NULL) continue; @@ -1078,7 +912,7 @@ ksocknal_create_routes(ksock_peer_t *peer, int port, ksocknal_add_route_locked(peer, newroute); newroute = NULL; } - + write_unlock_irqrestore(global_lock, flags); if (newroute != NULL) ksocknal_put_route(newroute); @@ -1108,15 +942,15 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) * have been created in userland and (b) we need to refcount the * socket so that we don't close it while I/O is being done on * it, and sock->file has that pre-cooked... */ - LASSERT (sock->file != NULL); - LASSERT (file_count(sock->file) > 0); + LASSERT (KSN_SOCK2FILE(sock) != NULL); + LASSERT (cfs_file_count(KSN_SOCK2FILE(sock)) > 0); LASSERT (route == NULL || !passive); - rc = ksocknal_setup_sock (sock); + rc = ksocknal_lib_setup_sock (sock); if (rc != 0) return (rc); - irq = ksocknal_sock_irq (sock); + irq = ksocknal_lib_sock_irq (sock); PORTAL_ALLOC(conn, sizeof(*conn)); if (conn == NULL) @@ -1127,21 +961,20 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) conn->ksnc_route = NULL; conn->ksnc_sock = sock; conn->ksnc_type = type; - conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; - conn->ksnc_saved_write_space = sock->sk->sk_write_space; + ksocknal_lib_save_callback(sock, conn); atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for me */ conn->ksnc_rx_ready = 0; conn->ksnc_rx_scheduled = 0; ksocknal_new_packet (conn, 0); - INIT_LIST_HEAD (&conn->ksnc_tx_queue); + CFS_INIT_LIST_HEAD (&conn->ksnc_tx_queue); conn->ksnc_tx_ready = 0; conn->ksnc_tx_scheduled = 0; atomic_set (&conn->ksnc_tx_nob, 0); /* stash conn's local and remote addrs */ - rc = ksocknal_get_conn_addrs (conn); + rc = ksocknal_lib_get_conn_addrs (conn); if (rc != 0) goto failed_0; @@ -1193,9 +1026,9 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) write_unlock_irqrestore(global_lock, flags); } - + if (!passive) { - ksocknal_create_routes(peer, conn->ksnc_port, + ksocknal_create_routes(peer, conn->ksnc_port, ipaddrs, nipaddrs); rc = 0; } else { @@ -1205,7 +1038,7 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) } if (rc < 0) goto failed_1; - + write_lock_irqsave (global_lock, flags); if (peer->ksnp_closing || @@ -1255,17 +1088,17 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) if (route->ksnr_ipaddr != conn->ksnc_ipaddr) continue; - + ksocknal_associate_route_conn_locked(route, conn); break; } /* Give conn a ref on sock->file since we're going to return success */ - get_file(sock->file); + cfs_get_file(KSN_SOCK2FILE(sock)); conn->ksnc_peer = peer; /* conn takes my ref on peer */ conn->ksnc_incarnation = incarnation; - peer->ksnp_last_alive = jiffies; + peer->ksnp_last_alive = cfs_time_current(); peer->ksnp_error = 0; sched = ksocknal_choose_scheduler_locked (irq); @@ -1273,18 +1106,15 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) conn->ksnc_scheduler = sched; /* Set the deadline for the outgoing HELLO to drain */ - conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued; - conn->ksnc_tx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; + conn->ksnc_tx_bufnob = SOCK_WMEM_QUEUED(sock); + conn->ksnc_tx_deadline = cfs_time_shift(ksocknal_tunables.ksnd_io_timeout); mb(); /* order with adding to peer's conn list */ list_add (&conn->ksnc_list, &peer->ksnp_conns); atomic_inc (&conn->ksnc_refcount); /* NB my callbacks block while I hold ksnd_global_lock */ - sock->sk->sk_user_data = conn; - sock->sk->sk_data_ready = ksocknal_data_ready; - sock->sk->sk_write_space = ksocknal_write_space; + ksocknal_lib_set_callback(sock, conn); /* Take all the packets blocking for a connection. * NB, it might be nicer to share these blocked packets among any @@ -1305,18 +1135,17 @@ ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type) write_unlock_irqrestore (global_lock, flags); - ksocknal_bind_irq (irq); + ksocknal_lib_bind_irq (irq); /* Call the callbacks right now to get things going. */ if (ksocknal_getconnsock(conn) == 0) { - ksocknal_data_ready (sock->sk, 0); - ksocknal_write_space (sock->sk); + ksocknal_lib_act_callback(sock, conn); ksocknal_putconnsock(conn); } CWARN("New conn nid:"LPX64" %u.%u.%u.%u -> %u.%u.%u.%u/%d" " incarnation:"LPX64" sched[%d]/%d\n", - nid, HIPQUAD(conn->ksnc_myipaddr), + nid, HIPQUAD(conn->ksnc_myipaddr), HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation, (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq); @@ -1355,7 +1184,7 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) LASSERT (!conn->ksnc_closing); conn->ksnc_closing = 1; atomic_inc (&ksocknal_data.ksnd_nclosing_conns); - + /* ksnd_deathrow_conns takes over peer's ref */ list_del (&conn->ksnc_list); @@ -1369,11 +1198,11 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) conn2 = NULL; list_for_each(tmp, &peer->ksnp_conns) { conn2 = list_entry(tmp, ksock_conn_t, ksnc_list); - + if (conn2->ksnc_route == route && conn2->ksnc_type == conn->ksnc_type) break; - + conn2 = NULL; } if (conn2 == NULL) @@ -1403,8 +1232,8 @@ ksocknal_close_conn_locked (ksock_conn_t *conn, int error) spin_lock (&ksocknal_data.ksnd_reaper_lock); list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns); - wake_up (&ksocknal_data.ksnd_reaper_waitq); - + cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); + spin_unlock (&ksocknal_data.ksnd_reaper_lock); } @@ -1437,7 +1266,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) /* extra ref for scheduler */ atomic_inc (&conn->ksnc_refcount); - wake_up (&sched->kss_waitq); + cfs_waitq_signal (&sched->kss_waitq); } spin_unlock_irqrestore (&sched->kss_lock, flags); @@ -1445,16 +1274,7 @@ ksocknal_terminate_conn (ksock_conn_t *conn) /* serialise with callbacks */ write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); - /* Remove conn's network callbacks. - * NB I _have_ to restore the callback, rather than storing a noop, - * since the socket could survive past this module being unloaded!! */ - conn->ksnc_sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; - conn->ksnc_sock->sk->sk_write_space = conn->ksnc_saved_write_space; - - /* A callback could be in progress already; they hold a read lock - * on ksnd_global_lock (to serialise with me) and NOOP if - * sk_user_data is NULL. */ - conn->ksnc_sock->sk->sk_user_data = NULL; + ksocknal_lib_reset_callback(conn->ksnc_sock, conn); /* OK, so this conn may not be completely disengaged from its * scheduler yet, but it _has_ committed to terminate... */ @@ -1463,13 +1283,14 @@ ksocknal_terminate_conn (ksock_conn_t *conn) if (peer->ksnp_error != 0) { /* peer's last conn closed in error */ LASSERT (list_empty (&peer->ksnp_conns)); - + /* convert peer's last-known-alive timestamp from jiffies */ do_gettimeofday (&now); - then = now.tv_sec - (jiffies - peer->ksnp_last_alive)/HZ; + then = now.tv_sec - cfs_duration_sec(cfs_time_sub(cfs_time_current(), + peer->ksnp_last_alive)); notify = 1; } - + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); /* The socket is closed on the final put; either here, or in @@ -1538,7 +1359,7 @@ ksocknal_put_conn (ksock_conn_t *conn) spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); list_add (&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns); - wake_up (&ksocknal_data.ksnd_reaper_waitq); + cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); } @@ -1582,7 +1403,7 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) "incarnation:"LPX64"("LPX64")\n", peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port, conn->ksnc_incarnation, incarnation); - + count++; ksocknal_close_conn_locked (conn, -ESTALE); } @@ -1591,7 +1412,7 @@ ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation) } int -ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) +ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) { ksock_peer_t *peer = conn->ksnc_peer; __u32 ipaddr = conn->ksnc_ipaddr; @@ -1601,7 +1422,7 @@ ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); count = ksocknal_close_peer_conns_locked (peer, ipaddr, why); - + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); return (count); @@ -1645,7 +1466,7 @@ ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr) /* wildcards always succeed */ if (nid == PTL_NID_ANY || ipaddr == 0) return (0); - + return (count == 0 ? -ENOENT : 0); } @@ -1662,62 +1483,11 @@ ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive) ksocknal_close_matching_conns (gw_nid, 0); return; } - + /* ...otherwise do nothing. We can only establish new connections * if we have autroutes, and these connect on demand. */ } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - return &(sk->tp_pinfo.af_tcp); -} -#else -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - struct tcp_sock *s = (struct tcp_sock *)sk; - return &s->tcp; -} -#endif - -void -ksocknal_push_conn (ksock_conn_t *conn) -{ - struct sock *sk; - struct tcp_opt *tp; - int nonagle; - int val = 1; - int rc; - mm_segment_t oldmm; - - rc = ksocknal_getconnsock (conn); - if (rc != 0) /* being shut down */ - return; - - sk = conn->ksnc_sock->sk; - tp = sock2tcp_opt(sk); - - lock_sock (sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock (sk); - - oldmm = get_fs (); - set_fs (KERNEL_DS); - - rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof (val)); - LASSERT (rc == 0); - - set_fs (oldmm); - - lock_sock (sk); - tp->nonagle = nonagle; - release_sock (sk); - - ksocknal_putconnsock (conn); -} - void ksocknal_push_peer (ksock_peer_t *peer) { @@ -1745,7 +1515,7 @@ ksocknal_push_peer (ksock_peer_t *peer) if (conn == NULL) break; - ksocknal_push_conn (conn); + ksocknal_lib_push_conn (conn); ksocknal_put_conn (conn); } } @@ -1841,10 +1611,10 @@ ksocknal_add_interface(__u32 ipaddress, __u32 netmask) for (j = 0; i < peer->ksnp_n_passive_ips; j++) if (peer->ksnp_passive_ips[j] == ipaddress) iface->ksni_npeers++; - + list_for_each(rtmp, &peer->ksnp_routes) { route = list_entry(rtmp, ksock_route_t, ksnr_list); - + if (route->ksnr_myipaddr == ipaddress) iface->ksni_nroutes++; } @@ -1854,7 +1624,7 @@ ksocknal_add_interface(__u32 ipaddress, __u32 netmask) rc = 0; /* NB only new connections will pay attention to the new interface! */ } - + write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); return (rc); @@ -1881,10 +1651,10 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) list_for_each_safe(tmp, nxt, &peer->ksnp_routes) { route = list_entry (tmp, ksock_route_t, ksnr_list); - + if (route->ksnr_myipaddr != ipaddr) continue; - + if (route->ksnr_share_count != 0) { /* Manually created; keep, but unbind */ route->ksnr_myipaddr = 0; @@ -1892,10 +1662,10 @@ ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr) ksocknal_del_route_locked(route); } } - + list_for_each_safe(tmp, nxt, &peer->ksnp_conns) { conn = list_entry(tmp, ksock_conn_t, ksnc_list); - + if (conn->ksnc_myipaddr == ipaddr) ksocknal_close_conn_locked (conn, 0); } @@ -1927,20 +1697,20 @@ ksocknal_del_interface(__u32 ipaddress) for (j = i+1; j < ksocknal_data.ksnd_ninterfaces; j++) ksocknal_data.ksnd_interfaces[j-1] = ksocknal_data.ksnd_interfaces[j]; - + ksocknal_data.ksnd_ninterfaces--; for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) { list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) { peer = list_entry(tmp, ksock_peer_t, ksnp_list); - + ksocknal_peer_del_interface_locked(peer, this_ip); } } } - + write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags); - + return (rc); } @@ -1967,7 +1737,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) pcfg->pcfg_fd = iface->ksni_npeers; pcfg->pcfg_count = iface->ksni_nroutes; } - + read_unlock (&ksocknal_data.ksnd_global_lock); break; } @@ -1987,7 +1757,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) int port = 0; int conn_count = 0; int share_count = 0; - + rc = ksocknal_get_peer_info(pcfg->pcfg_count, &nid, &myip, &ip, &port, &conn_count, &share_count); @@ -2000,13 +1770,13 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) break; } case NAL_CMD_ADD_PEER: { - rc = ksocknal_add_peer (pcfg->pcfg_nid, + rc = ksocknal_add_peer (pcfg->pcfg_nid, pcfg->pcfg_id, /* IP */ pcfg->pcfg_misc); /* port */ break; } case NAL_CMD_DEL_PEER: { - rc = ksocknal_del_peer (pcfg->pcfg_nid, + rc = ksocknal_del_peer (pcfg->pcfg_nid, pcfg->pcfg_id, /* IP */ pcfg->pcfg_flags); /* single_share? */ break; @@ -2029,7 +1799,7 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) pcfg->pcfg_misc = conn->ksnc_port; pcfg->pcfg_fd = conn->ksnc_myipaddr; pcfg->pcfg_flags = conn->ksnc_type; - pcfg->pcfg_gw_nal = conn->ksnc_scheduler - + pcfg->pcfg_gw_nal = conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers; pcfg->pcfg_count = txmem; pcfg->pcfg_size = rxmem; @@ -2057,11 +1827,11 @@ ksocknal_cmd(struct portals_cfg *pcfg, void * private) rc = -EINVAL; break; } - fput (sock->file); + cfs_put_file (KSN_SOCK2FILE(sock)); break; } case NAL_CMD_CLOSE_CONNECTION: { - rc = ksocknal_close_matching_conns (pcfg->pcfg_nid, + rc = ksocknal_close_matching_conns (pcfg->pcfg_nid, pcfg->pcfg_id); break; } @@ -2090,15 +1860,15 @@ ksocknal_free_fmbs (ksock_fmb_pool_t *p) LASSERT (list_empty(&p->fmp_blocked_conns)); LASSERT (p->fmp_nactive_fmbs == 0); - + while (!list_empty(&p->fmp_idle_fmbs)) { fmb = list_entry(p->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); - + for (i = 0; i < npages; i++) if (fmb->fmb_kiov[i].kiov_page != NULL) - __free_page(fmb->fmb_kiov[i].kiov_page); + cfs_free_page(fmb->fmb_kiov[i].kiov_page); list_del(&fmb->fmb_list); PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages])); @@ -2118,7 +1888,7 @@ ksocknal_free_buffers (void) sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers); PORTAL_FREE (ksocknal_data.ksnd_peers, - sizeof (struct list_head) * + sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); } @@ -2164,7 +1934,7 @@ ksocknal_api_shutdown (nal_t *nal) "waiting for %d peers to disconnect\n", atomic_read (&ksocknal_data.ksnd_npeers)); set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); + schedule_timeout (cfs_time_seconds(1)); } /* Tell lib we've stopped calling into her. */ @@ -2200,12 +1970,12 @@ ksocknal_api_shutdown (nal_t *nal) /* flag threads to terminate; wake and wait for them to die */ ksocknal_data.ksnd_shuttingdown = 1; - wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq); - wake_up_all (&ksocknal_data.ksnd_reaper_waitq); + cfs_waitq_broadcast (&ksocknal_data.ksnd_autoconnectd_waitq); + cfs_waitq_broadcast (&ksocknal_data.ksnd_reaper_waitq); for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) { sched = &ksocknal_data.ksnd_schedulers[i]; - wake_up_all(&sched->kss_waitq); + cfs_waitq_broadcast(&sched->kss_waitq); } i = 4; @@ -2217,7 +1987,7 @@ ksocknal_api_shutdown (nal_t *nal) ksocknal_data.ksnd_nthreads); read_unlock(&ksocknal_data.ksnd_global_lock); set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); + schedule_timeout (cfs_time_seconds(1)); read_lock(&ksocknal_data.ksnd_global_lock); } read_unlock(&ksocknal_data.ksnd_global_lock); @@ -2250,10 +2020,10 @@ ksocknal_init_incarnation (void) * identifies this particular instance of the socknal. Hopefully * we won't be able to reboot more frequently than 1MHz for the * forseeable future :) */ - + do_gettimeofday(&tv); - - ksocknal_data.ksnd_incarnation = + + ksocknal_data.ksnd_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec; } @@ -2283,7 +2053,7 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ ksocknal_init_incarnation(); - + ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE; PORTAL_ALLOC (ksocknal_data.ksnd_peers, sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size); @@ -2291,29 +2061,29 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, return (-ENOMEM); for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) - INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); + CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]); rwlock_init(&ksocknal_data.ksnd_global_lock); spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); + CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); + CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES; spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); + CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); + CFS_INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES; spin_lock_init (&ksocknal_data.ksnd_reaper_lock); - INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); - INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); - INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); - init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); + CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns); + CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns); + CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns); + cfs_waitq_init(&ksocknal_data.ksnd_reaper_waitq); spin_lock_init (&ksocknal_data.ksnd_autoconnectd_lock); - INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes); - init_waitqueue_head(&ksocknal_data.ksnd_autoconnectd_waitq); + CFS_INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes); + cfs_waitq_init(&ksocknal_data.ksnd_autoconnectd_waitq); /* NB memset above zeros whole of ksocknal_data, including * ksocknal_data.ksnd_irqinfo[all].ksni_valid */ @@ -2333,18 +2103,18 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; spin_lock_init (&kss->kss_lock); - INIT_LIST_HEAD (&kss->kss_rx_conns); - INIT_LIST_HEAD (&kss->kss_tx_conns); + CFS_INIT_LIST_HEAD (&kss->kss_rx_conns); + CFS_INIT_LIST_HEAD (&kss->kss_tx_conns); #if SOCKNAL_ZC - INIT_LIST_HEAD (&kss->kss_zctxdone_list); + CFS_INIT_LIST_HEAD (&kss->kss_zctxdone_list); #endif - init_waitqueue_head (&kss->kss_waitq); + cfs_waitq_init (&kss->kss_waitq); } /* NB we have to wait to be told our true NID... */ - process_id.pid = requested_pid; + process_id.pid = requested_pid; process_id.nid = 0; - + rc = lib_init(&ksocknal_lib, nal, process_id, requested_limits, actual_limits); if (rc != PTL_OK) { @@ -2394,14 +2164,14 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, SOCKNAL_LARGE_FWD_NMSGS); i++) { ksock_fmb_t *fmb; ksock_fmb_pool_t *pool; - + if (i < SOCKNAL_SMALL_FWD_NMSGS) pool = &ksocknal_data.ksnd_small_fmp; else pool = &ksocknal_data.ksnd_large_fmp; - - PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, + + PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, fmb_kiov[pool->fmp_buff_pages])); if (fmb == NULL) { ksocknal_api_shutdown(nal); @@ -2409,16 +2179,16 @@ ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid, } fmb->fmb_pool = pool; - + for (j = 0; j < pool->fmp_buff_pages; j++) { - fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL); + fmb->fmb_kiov[j].kiov_page = cfs_alloc_page(CFS_ALLOC_STD); if (fmb->fmb_kiov[j].kiov_page == NULL) { ksocknal_api_shutdown (nal); return (-ENOMEM); } - LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL); + LASSERT(cfs_page_address(fmb->fmb_kiov[j].kiov_page) != NULL); } list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs); @@ -2455,6 +2225,8 @@ ksocknal_module_fini (void) ptl_unregister_nal(SOCKNAL); } +extern cfs_sysctl_table_t ksocknal_top_ctl_table[]; + int __init ksocknal_module_init (void) { @@ -2480,7 +2252,7 @@ ksocknal_module_init (void) #endif /* check ksnr_connected/connecting field large enough */ LASSERT(SOCKNAL_CONN_NTYPES <= 4); - + ksocknal_api.nal_ni_init = ksocknal_api_startup; ksocknal_api.nal_ni_fini = ksocknal_api_shutdown; @@ -2513,19 +2285,17 @@ ksocknal_module_init (void) ptl_unregister_nal(SOCKNAL); return (-ENODEV); } - + #ifdef CONFIG_SYSCTL /* Press on regardless even if registering sysctl doesn't work */ - ksocknal_tunables.ksnd_sysctl = + ksocknal_tunables.ksnd_sysctl = register_sysctl_table (ksocknal_top_ctl_table, 0); #endif return (0); } MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); +MODULE_DESCRIPTION("Kernel TCP Socket NAL v1.0.0"); MODULE_LICENSE("GPL"); -module_init(ksocknal_module_init); -module_exit(ksocknal_module_fini); - +cfs_module(ksocknal, "1.0.0", ksocknal_module_init, ksocknal_module_fini); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index f1f3de0..3442c25 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -28,39 +28,18 @@ # define EXPORT_SYMTAB #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - #define DEBUG_SUBSYSTEM S_NAL -#include -#include -#include +#if defined(__linux__) +#include "socknal_lib-linux.h" +#elif defined(__APPLE__) +#include "socknal_lib-darwin.h" +#else +#error Unsupported Operating System +#endif + +#include +#include #include #include #include @@ -68,12 +47,12 @@ #define SOCKNAL_N_AUTOCONNECTD 4 /* # socknal autoconnect daemons */ -#define SOCKNAL_MIN_RECONNECT_INTERVAL HZ /* first failed connection retry... */ -#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */ +#define SOCKNAL_MIN_RECONNECT_INTERVAL cfs_time_seconds(1) /* first failed connection retry... */ +#define SOCKNAL_MAX_RECONNECT_INTERVAL cfs_time_seconds(60) /* ...exponentially increasing to this */ /* default vals for runtime tunables */ #define SOCKNAL_IO_TIMEOUT 50 /* default comms timeout (seconds) */ -#define SOCKNAL_EAGER_ACK 0 /* default eager ack (boolean) */ +#define SOCKNAL_EAGER_ACK SOCKNAL_ARCH_EAGER_ACK /* default eager ack (boolean) */ #define SOCKNAL_TYPED_CONNS 1 /* unidirectional large, bidirectional small? */ #define SOCKNAL_ZC_MIN_FRAG (2<<10) /* default smallest zerocopy fragment */ #define SOCKNAL_MIN_BULK (1<<10) /* smallest "large" message */ @@ -95,14 +74,12 @@ /* # pages in a large message fwd buffer */ #define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ -#define SOCKNAL_ENOMEM_RETRY 1 /* jiffies between retries */ +#define SOCKNAL_ENOMEM_RETRY CFS_MIN_DELAY /* jiffies between retries */ #define SOCKNAL_MAX_INTERFACES 16 /* Largest number of interfaces we bind */ #define SOCKNAL_ROUND_ROBIN 0 /* round robin / load balance */ -#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10) - #define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ #define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ @@ -114,21 +91,6 @@ # define SOCKNAL_RISK_KMAP_DEADLOCK 1 #endif -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72)) -# define sk_allocation allocation -# define sk_data_ready data_ready -# define sk_write_space write_space -# define sk_user_data user_data -# define sk_prot prot -# define sk_sndbuf sndbuf -# define sk_socket socket -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) -# define sk_wmem_queued wmem_queued -# define sk_err err -#endif - typedef struct /* pool of forwarding buffers */ { spinlock_t fmp_lock; /* serialise */ @@ -147,7 +109,7 @@ typedef struct /* per scheduler state */ #if SOCKNAL_ZC struct list_head kss_zctxdone_list; /* completed ZC transmits */ #endif - wait_queue_head_t kss_waitq; /* where scheduler sleeps */ + cfs_waitq_t kss_waitq; /* where scheduler sleeps */ int kss_nconns; /* # connections assigned to this scheduler */ } ksock_sched_t; @@ -181,7 +143,7 @@ typedef struct #if SOCKNAL_ZC unsigned int ksnd_zc_min_frag; /* minimum zero copy frag size */ #endif - struct ctl_table_header *ksnd_sysctl; /* sysctl interface */ + cfs_sysctl_table_header_t *ksnd_sysctl; /* sysctl interface */ } ksock_tunables_t; typedef struct @@ -211,8 +173,8 @@ typedef struct struct list_head ksnd_deathrow_conns; /* conns to be closed */ struct list_head ksnd_zombie_conns; /* conns to be freed */ struct list_head ksnd_enomem_conns; /* conns to be retried */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - unsigned long ksnd_reaper_waketime; /* when reaper will wake */ + cfs_waitq_t ksnd_reaper_waitq; /* reaper sleeps here */ + cfs_time_t ksnd_reaper_waketime; /* when reaper will wake */ spinlock_t ksnd_reaper_lock; /* serialise */ int ksnd_enomem_tx; /* test ENOMEM sender */ @@ -220,7 +182,7 @@ typedef struct int ksnd_stall_rx; /* test sluggish receiver */ struct list_head ksnd_autoconnectd_routes; /* routes waiting to be connected */ - wait_queue_head_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */ + cfs_waitq_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */ spinlock_t ksnd_autoconnectd_lock; /* serialise */ ksock_irqinfo_t ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */ @@ -336,7 +298,7 @@ typedef struct ksock_conn /* reader */ struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times out */ + cfs_time_t ksnc_rx_deadline; /* when (in jiffies) receive times out */ int ksnc_rx_started; /* started receiving a message */ int ksnc_rx_ready; /* data ready to read */ int ksnc_rx_scheduled; /* being progressed */ @@ -354,7 +316,7 @@ typedef struct ksock_conn /* WRITER */ struct list_head ksnc_tx_list; /* where I enq waiting for output space */ struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out */ + cfs_time_t ksnc_tx_deadline; /* when (in jiffies) tx times out */ int ksnc_tx_bufnob; /* send buffer marker */ atomic_t ksnc_tx_nob; /* # bytes queued */ int ksnc_tx_ready; /* write space */ @@ -378,8 +340,8 @@ typedef struct ksock_route struct list_head ksnr_connect_list; /* chain on autoconnect list */ struct ksock_peer *ksnr_peer; /* owning peer */ atomic_t ksnr_refcount; /* # users */ - unsigned long ksnr_timeout; /* when (in jiffies) reconnection can happen next */ - unsigned int ksnr_retry_interval; /* how long between retries */ + cfs_time_t ksnr_timeout; /* when (in jiffies) reconnection can happen next */ + cfs_duration_t ksnr_retry_interval; /* how long between retries */ __u32 ksnr_myipaddr; /* my IP */ __u32 ksnr_ipaddr; /* IP address to connect to */ int ksnr_port; /* port to connect to */ @@ -401,7 +363,7 @@ typedef struct ksock_peer struct list_head ksnp_conns; /* all active connections */ struct list_head ksnp_routes; /* routes */ struct list_head ksnp_tx_queue; /* waiting packets */ - unsigned long ksnp_last_alive; /* when (in jiffies) I was last alive */ + cfs_time_t ksnp_last_alive; /* when (in jiffies) I was last alive */ int ksnp_n_passive_ips; /* # of... */ __u32 ksnp_passive_ips[SOCKNAL_MAX_INTERFACES]; /* preferred local interfaces */ } ksock_peer_t; @@ -427,7 +389,7 @@ ksocknal_getconnsock (ksock_conn_t *conn) read_lock (&ksocknal_data.ksnd_global_lock); if (!conn->ksnc_closing) { rc = 0; - get_file (conn->ksnc_sock->file); + cfs_get_file (KSN_CONN2FILE(conn)); } read_unlock (&ksocknal_data.ksnd_global_lock); @@ -437,64 +399,9 @@ ksocknal_getconnsock (ksock_conn_t *conn) static inline void ksocknal_putconnsock (ksock_conn_t *conn) { - fput (conn->ksnc_sock->file); -} - -#ifndef CONFIG_SMP -static inline -int ksocknal_nsched(void) -{ - return 1; -} -#else -#include -# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT)) -static inline int -ksocknal_nsched(void) -{ - return num_online_cpus(); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - return i; + cfs_put_file (KSN_CONN2FILE(conn)); } -static inline int -ksocknal_irqsched2cpu(int i) -{ - return i; -} -# else -static inline int -ksocknal_nsched(void) -{ - if (smp_num_siblings == 1) - return (num_online_cpus()); - - /* We need to know if this assumption is crap */ - LASSERT (smp_num_siblings == 2); - return (num_online_cpus()/2); -} - -static inline int -ksocknal_sched2cpu(int i) -{ - if (smp_num_siblings == 1) - return i; - - return (i * 2); -} - -static inline int -ksocknal_irqsched2cpu(int i) -{ - return (ksocknal_sched2cpu(i) + 1); -} -# endif -#endif - extern void ksocknal_put_route (ksock_route_t *route); extern void ksocknal_put_peer (ksock_peer_t *peer); extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid); @@ -519,8 +426,6 @@ extern void ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive); extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); extern int ksocknal_scheduler (void *arg); -extern void ksocknal_data_ready(struct sock *sk, int n); -extern void ksocknal_write_space(struct sock *sk); extern int ksocknal_autoconnectd (void *arg); extern int ksocknal_reaper (void *arg); extern int ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, @@ -529,3 +434,26 @@ extern int ksocknal_setup_sock (struct socket *sock); extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs); extern int ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs); + +extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn); +extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn); +extern void ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn); +extern void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn); +extern void ksocknal_lib_push_conn (ksock_conn_t *conn); +extern void ksocknal_lib_bind_irq (unsigned int irq); +extern int ksocknal_lib_get_conn_addrs (ksock_conn_t *conn); +extern unsigned int ksocknal_lib_sock_irq (struct socket *sock); +extern int ksocknal_lib_setup_sock (struct socket *so); +extern int ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx); +extern int ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx); +extern void ksocknal_lib_eager_ack (ksock_conn_t *conn); +extern int ksocknal_lib_recv_iov (ksock_conn_t *conn); +extern int ksocknal_lib_recv_kiov (ksock_conn_t *conn); +extern int ksocknal_lib_sock_write (struct socket *sock, + void *buffer, int nob); +extern int ksocknal_lib_sock_read (struct socket *sock, + void *buffer, int nob); +extern int ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, + int *rxmem, int *nagle); +extern int ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, + ksock_route_t *route, int local_port); diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c index 3dcaa6c..2b08e1c 100644 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ b/lnet/klnds/socklnd/socklnd_cb.c @@ -24,9 +24,6 @@ */ #include "socknal.h" -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -# include -#endif /* * LIB functions follow @@ -53,221 +50,76 @@ ksocknal_free_ltx (ksock_ltx_t *ltx) PORTAL_FREE(ltx, ltx->ltx_desc_size); } -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) -struct page * -ksocknal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#if CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (NULL); - - return (page); -} -#endif - int ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; +{ struct iovec *iov = tx->tx_iov; -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) - unsigned long vaddr = (unsigned long)iov->iov_base - int offset = vaddr & (PAGE_SIZE - 1); - int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset); - struct page *page; -#endif - int nob; - int rc; + int nob; + int rc; - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ LASSERT (tx->tx_niov > 0); - -#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) - if (zcsize >= ksocknal_data.ksnd_zc_min_frag && - (sock->sk->route_caps & NETIF_F_SG) && - (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", - (void *)vaddr, page, page_address(page), offset, zcsize); - - if (!list_empty (&conn->ksnc_tx_queue) || - zcsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd); - } else -#endif - { -#if SOCKNAL_SINGLE_FRAG_TX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - int niov = tx->tx_niov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = tx->tx_iov[i]; - nob += scratchiov[i].iov_len; - } - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_MORE; - - set_fs (KERNEL_DS); - rc = sock_sendmsg(sock, &msg, nob); - set_fs (oldmm); - } + /* Never touch tx->tx_iov inside ksocknal_lib_send_iov() */ + rc = ksocknal_lib_send_iov(conn, tx); - if (rc <= 0) /* sent nothing? */ + if (rc <= 0) /* sent nothing? */ return (rc); - nob = rc; - LASSERT (nob <= tx->tx_resid); + nob = rc; + LASSERT (nob <= tx->tx_resid); tx->tx_resid -= nob; - /* "consume" iov */ - do { - LASSERT (tx->tx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); - iov->iov_len -= nob; - return (rc); - } + /* "consume" iov */ + do { + LASSERT (tx->tx_niov > 0); + + if (nob < iov->iov_len) { + iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob); + iov->iov_len -= nob; + return (rc); + } - nob -= iov->iov_len; - tx->tx_iov = ++iov; - tx->tx_niov--; + nob -= iov->iov_len; + tx->tx_iov = ++iov; + tx->tx_niov--; } while (nob != 0); - + return (rc); } int ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) -{ - struct socket *sock = conn->ksnc_sock; +{ ptl_kiov_t *kiov = tx->tx_kiov; - int rc; - int nob; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - LASSERT (tx->tx_niov == 0); - LASSERT (tx->tx_nkiov > 0); - -#if SOCKNAL_ZC - if (kiov->kiov_len >= ksocknal_tunables.ksnd_zc_min_frag && - (sock->sk->route_caps & NETIF_F_SG) && - (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - int msgflg = MSG_DONTWAIT; - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, kiov->kiov_len); - - if (!list_empty(&conn->ksnc_tx_queue) || - fragsize < tx->tx_resid) - msgflg |= MSG_MORE; - - rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg, - &tx->tx_zccd); - } else -#endif - { -#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; - int niov = tx->tx_nkiov; -#endif - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - int i; - - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + - kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - - if (!list_empty(&conn->ksnc_tx_queue) || - nob < tx->tx_resid) - msg.msg_flags |= MSG_DONTWAIT; - - set_fs (KERNEL_DS); - rc = sock_sendmsg(sock, &msg, nob); - set_fs (oldmm); - - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - } + int nob; + int rc; - if (rc <= 0) /* sent nothing? */ - return (rc); + LASSERT (tx->tx_niov == 0); + LASSERT (tx->tx_nkiov > 0); - nob = rc; - LASSERT (nob <= tx->tx_resid); - tx->tx_resid -= nob; + /* Never touch tx->tx_kiov inside ksocknal_lib_send_kiov() */ + rc = ksocknal_lib_send_kiov(conn, tx); - do { - LASSERT(tx->tx_nkiov > 0); - - if (nob < kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; - return rc; - } - - nob -= kiov->kiov_len; - tx->tx_kiov = ++kiov; - tx->tx_nkiov--; + if (rc <= 0) /* sent nothing? */ + return (rc); + + nob = rc; + LASSERT (nob <= tx->tx_resid); + tx->tx_resid -= nob; + + /* "consume" kiov */ + do { + LASSERT(tx->tx_nkiov > 0); + + if (nob < kiov->kiov_len) { + kiov->kiov_offset += nob; + kiov->kiov_len -= nob; + return rc; + } + + nob -= kiov->kiov_len; + tx->tx_kiov = ++kiov; + tx->tx_nkiov--; } while (nob != 0); return (rc); @@ -281,7 +133,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) if (ksocknal_data.ksnd_stall_tx != 0) { set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (ksocknal_data.ksnd_stall_tx * HZ); + schedule_timeout (cfs_time_seconds(ksocknal_data.ksnd_stall_tx)); } LASSERT (tx->tx_resid != 0); @@ -303,16 +155,15 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) rc = ksocknal_send_kiov (conn, tx); } - bufnob = conn->ksnc_sock->sk->sk_wmem_queued; + bufnob = SOCK_WMEM_QUEUED(conn->ksnc_sock); if (rc > 0) /* sent something? */ conn->ksnc_tx_bufnob += rc; /* account it */ if (bufnob < conn->ksnc_tx_bufnob) { /* allocated send buffer bytes < computed; infer * something got ACKed */ - conn->ksnc_tx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; - conn->ksnc_peer->ksnp_last_alive = jiffies; + conn->ksnc_tx_deadline = cfs_time_shift(ksocknal_tunables.ksnd_io_timeout); + conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); conn->ksnc_tx_bufnob = bufnob; mb(); } @@ -332,7 +183,7 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) sched = conn->ksnc_scheduler; spin_lock_irqsave(&sched->kss_lock, flags); - if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) && + if (!SOCK_TEST_NOSPACE(conn->ksnc_sock) && !conn->ksnc_tx_ready) { /* SOCK_NOSPACE is set when the socket fills * and cleared in the write_space callback @@ -359,92 +210,45 @@ ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx) return (rc); } -void -ksocknal_eager_ack (ksock_conn_t *conn) -{ - int opt = 1; - mm_segment_t oldmm = get_fs(); - struct socket *sock = conn->ksnc_sock; - - /* Remind the socket to ACK eagerly. If I don't, the socket might - * think I'm about to send something it could piggy-back the ACK - * on, introducing delay in completing zero-copy sends in my - * peer. */ - - set_fs(KERNEL_DS); - sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK, - (char *)&opt, sizeof (opt)); - set_fs(oldmm); -} - int ksocknal_recv_iov (ksock_conn_t *conn) -{ -#if SOCKNAL_SINGLE_FRAG_RX - struct iovec scratch; - struct iovec *scratchiov = &scratch; - int niov = 1; -#else - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - int niov = conn->ksnc_rx_niov; -#endif +{ struct iovec *iov = conn->ksnc_rx_iov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int nob; - int i; - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - LASSERT (niov > 0); - - for (nob = i = 0; i < niov; i++) { - scratchiov[i] = iov[i]; - nob += scratchiov[i].iov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); - /* NB this is just a boolean..........................^ */ - set_fs (oldmm); - - if (rc <= 0) - return (rc); + int nob; + int rc; - /* received something... */ - nob = rc; + LASSERT (conn->ksnc_rx_niov > 0); - conn->ksnc_peer->ksnp_last_alive = jiffies; - conn->ksnc_rx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; + /* Never touch conn->ksnc_rx_iov or change connection + * status inside ksocknal_lib_recv_iov */ + rc = ksocknal_lib_recv_iov(conn); + + if (rc <= 0) + return (rc); - conn->ksnc_rx_nob_wanted -= nob; + /* received something... */ + nob = rc; + + conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); + conn->ksnc_rx_deadline = cfs_time_shift (ksocknal_tunables.ksnd_io_timeout); + mb(); /* order with setting rx_started */ + conn->ksnc_rx_started = 1; + + conn->ksnc_rx_nob_wanted -= nob; conn->ksnc_rx_nob_left -= nob; - do { - LASSERT (conn->ksnc_rx_niov > 0); - - if (nob < iov->iov_len) { - iov->iov_len -= nob; - iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob); - return (-EAGAIN); - } - - nob -= iov->iov_len; - conn->ksnc_rx_iov = ++iov; - conn->ksnc_rx_niov--; + do { + LASSERT (conn->ksnc_rx_niov > 0); + + if (nob < iov->iov_len) { + iov->iov_len -= nob; + iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob); + return (-EAGAIN); + } + + nob -= iov->iov_len; + conn->ksnc_rx_iov = ++iov; + conn->ksnc_rx_niov--; } while (nob != 0); return (rc); @@ -453,77 +257,41 @@ ksocknal_recv_iov (ksock_conn_t *conn) int ksocknal_recv_kiov (ksock_conn_t *conn) { -#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK - struct iovec scratch; - struct iovec *scratchiov = &scratch; - int niov = 1; -#else -#ifdef CONFIG_HIGHMEM -#warning "XXX risk of kmap deadlock on multiple frags..." -#endif - struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; - int niov = conn->ksnc_rx_nkiov; -#endif ptl_kiov_t *kiov = conn->ksnc_rx_kiov; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = scratchiov, - .msg_iovlen = niov, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int nob; - int i; - int rc; - + int nob; + int rc; LASSERT (conn->ksnc_rx_nkiov > 0); - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone. */ - for (nob = i = 0; i < niov; i++) { - scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; - nob += scratchiov[i].iov_len = kiov[i].kiov_len; - } - LASSERT (nob <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); - /* NB this is just a boolean.......................^ */ - set_fs (oldmm); - - for (i = 0; i < niov; i++) - kunmap(kiov[i].kiov_page); - - if (rc <= 0) - return (rc); + /* Never touch conn->ksnc_rx_kiov or change connection + * status inside ksocknal_lib_recv_iov */ + rc = ksocknal_lib_recv_kiov(conn); - /* received something... */ - nob = rc; + if (rc <= 0) + return (rc); + + /* received something... */ + nob = rc; - conn->ksnc_peer->ksnp_last_alive = jiffies; - conn->ksnc_rx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; - mb(); /* order with setting rx_started */ + conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); + conn->ksnc_rx_deadline = cfs_time_shift (ksocknal_tunables.ksnd_io_timeout); + mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; - conn->ksnc_rx_nob_wanted -= nob; - conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT (conn->ksnc_rx_nkiov > 0); - - if (nob < kiov->kiov_len) { - kiov->kiov_offset += nob; - kiov->kiov_len -= nob; - return -EAGAIN; - } - - nob -= kiov->kiov_len; - conn->ksnc_rx_kiov = ++kiov; - conn->ksnc_rx_nkiov--; + conn->ksnc_rx_nob_wanted -= nob; + conn->ksnc_rx_nob_left -= nob; + + do { + LASSERT (conn->ksnc_rx_nkiov > 0); + + if (nob < kiov->kiov_len) { + kiov->kiov_offset += nob; + kiov->kiov_len -= nob; + return -EAGAIN; + } + + nob -= kiov->kiov_len; + conn->ksnc_rx_kiov = ++kiov; + conn->ksnc_rx_nkiov--; } while (nob != 0); return 1; @@ -540,7 +308,7 @@ ksocknal_receive (ksock_conn_t *conn) if (ksocknal_data.ksnd_stall_rx != 0) { set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (ksocknal_data.ksnd_stall_rx * HZ); + schedule_timeout(cfs_time_seconds (ksocknal_data.ksnd_stall_rx)); } rc = ksocknal_getconnsock (conn); @@ -574,7 +342,7 @@ ksocknal_receive (ksock_conn_t *conn) (conn->ksnc_rx_state == SOCKNAL_RX_BODY || conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) { /* Remind the socket to ack eagerly... */ - ksocknal_eager_ack(conn); + ksocknal_lib_eager_ack(conn); } rc = 1; break; @@ -599,7 +367,7 @@ ksocknal_zc_callback (zccd_t *zcd) spin_lock_irqsave (&sched->kss_lock, flags); list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list); - wake_up (&sched->kss_waitq); + cfs_waitq_signal (&sched->kss_waitq); spin_unlock_irqrestore (&sched->kss_lock, flags); EXIT; @@ -699,9 +467,10 @@ ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx) LASSERT (conn->ksnc_tx_scheduled); list_add_tail(&conn->ksnc_tx_list, &ksocknal_data.ksnd_enomem_conns); - if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY, + if (!cfs_time_aftereq(cfs_time_add(cfs_time_current(), + SOCKNAL_ENOMEM_RETRY), ksocknal_data.ksnd_reaper_waketime)) - wake_up (&ksocknal_data.ksnd_reaper_waitq); + cfs_waitq_signal (&ksocknal_data.ksnd_reaper_waitq); spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags); return (rc); @@ -747,7 +516,7 @@ ksocknal_launch_autoconnect_locked (ksock_route_t *route) list_add_tail (&route->ksnr_connect_list, &ksocknal_data.ksnd_autoconnectd_routes); - wake_up (&ksocknal_data.ksnd_autoconnectd_waitq); + cfs_waitq_signal (&ksocknal_data.ksnd_autoconnectd_waitq); spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags); } @@ -803,7 +572,7 @@ ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer) const int nob = 0; #else int nob = atomic_read(&c->ksnc_tx_nob) + - c->ksnc_sock->sk->sk_wmem_queued; + SOCK_WMEM_QUEUED(c->ksnc_sock); #endif LASSERT (!c->ksnc_closing); @@ -880,10 +649,9 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) spin_lock_irqsave (&sched->kss_lock, flags); if (list_empty(&conn->ksnc_tx_queue) && - conn->ksnc_sock->sk->sk_wmem_queued == 0) { + SOCK_WMEM_QUEUED(conn->ksnc_sock) == 0) { /* First packet starts the timeout */ - conn->ksnc_tx_deadline = jiffies + - ksocknal_tunables.ksnd_io_timeout * HZ; + conn->ksnc_tx_deadline = cfs_time_shift(ksocknal_tunables.ksnd_io_timeout); conn->ksnc_tx_bufnob = 0; mb(); /* order with adding to tx_queue */ } @@ -897,7 +665,7 @@ ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn) list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); conn->ksnc_tx_scheduled = 1; - wake_up (&sched->kss_waitq); + cfs_waitq_signal (&sched->kss_waitq); } spin_unlock_irqrestore (&sched->kss_lock, flags); @@ -927,7 +695,7 @@ ksocknal_find_connectable_route_locked (ksock_peer_t *peer) continue; /* too soon to retry this guy? */ - if (!time_after_eq (jiffies, route->ksnr_timeout)) + if (!cfs_time_aftereq (cfs_time_current(), route->ksnr_timeout)) continue; return (route); @@ -1196,7 +964,7 @@ ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) int ksocknal_thread_start (int (*fn)(void *arg), void *arg) { - long pid = kernel_thread (fn, arg, 0); + long pid = cfs_kernel_thread (fn, arg, 0); unsigned long flags; if (pid < 0) @@ -1272,7 +1040,7 @@ ksocknal_fmb_callback (void *arg, int error) spin_lock_irqsave (&sched->kss_lock, flags); list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns); - wake_up (&sched->kss_waitq); + cfs_waitq_signal (&sched->kss_waitq); spin_unlock_irqrestore (&sched->kss_lock, flags); } @@ -1288,7 +1056,7 @@ ksocknal_get_idle_fmb (ksock_conn_t *conn) LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); LASSERT (kpr_routing(&ksocknal_data.ksnd_router)); - if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) + if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * CFS_PAGE_SIZE) pool = &ksocknal_data.ksnd_small_fmp; else pool = &ksocknal_data.ksnd_large_fmp; @@ -1328,8 +1096,8 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left); LASSERT (payload_nob >= 0); - LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE); - LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE); + LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * CFS_PAGE_SIZE); + LASSERT (sizeof (ptl_hdr_t) < CFS_PAGE_SIZE); LASSERT (fmb->fmb_kiov[0].kiov_offset == 0); /* Take a ref on the conn's peer to prevent module unload before @@ -1346,8 +1114,8 @@ ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) while (nob > 0) { LASSERT (niov < fmb->fmb_pool->fmp_buff_pages); LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0); - fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob); - nob -= PAGE_SIZE; + fmb->fmb_kiov[niov].kiov_len = MIN (CFS_PAGE_SIZE, nob); + nob -= CFS_PAGE_SIZE; niov++; } @@ -1876,176 +1644,78 @@ int ksocknal_scheduler (void *arg) return (0); } -void -ksocknal_data_ready (struct sock *sk, int n) +/* + * Add connection to kss_rx_conns of scheduler + * and wakeup the scheduler. + */ +void ksocknal_read_callback (ksock_conn_t *conn) { + ksock_sched_t *sched; unsigned long flags; - ksock_conn_t *conn; - ksock_sched_t *sched; ENTRY; - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ - LASSERT (sk->sk_data_ready != &ksocknal_data_ready); - sk->sk_data_ready (sk, n); - } else { - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); + sched = conn->ksnc_scheduler; - conn->ksnc_rx_ready = 1; + spin_lock_irqsave (&sched->kss_lock, flags); - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - wake_up (&sched->kss_waitq); - } + conn->ksnc_rx_ready = 1; - spin_unlock_irqrestore (&sched->kss_lock, flags); - } + if (!conn->ksnc_rx_scheduled) { /* not being progressed */ + list_add_tail(&conn->ksnc_rx_list, + &sched->kss_rx_conns); + conn->ksnc_rx_scheduled = 1; + /* extra ref for scheduler */ + atomic_inc (&conn->ksnc_refcount); - read_unlock (&ksocknal_data.ksnd_global_lock); + cfs_waitq_signal (&sched->kss_waitq); + } + spin_unlock_irqrestore (&sched->kss_lock, flags); EXIT; -} +} -void -ksocknal_write_space (struct sock *sk) -{ +/* + * Add connection to kss_tx_conns of scheduler + * and wakeup the scheduler. + */ +void ksocknal_write_callback (ksock_conn_t *conn) +{ + ksock_sched_t *sched; unsigned long flags; - ksock_conn_t *conn; - ksock_sched_t *sched; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_global_lock); - - conn = sk->sk_user_data; - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn, - (conn == NULL) ? "" : (conn->ksnc_tx_ready ? - " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (conn == NULL) { /* raced with ksocknal_terminate_conn */ - LASSERT (sk->sk_write_space != &ksocknal_write_space); - sk->sk_write_space (sk); - - read_unlock (&ksocknal_data.ksnd_global_lock); - return; - } - - if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */ - sched = conn->ksnc_scheduler; + ENTRY; + + sched = conn->ksnc_scheduler; - spin_lock_irqsave (&sched->kss_lock, flags); + spin_lock_irqsave (&sched->kss_lock, flags); - clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags); - conn->ksnc_tx_ready = 1; + conn->ksnc_tx_ready = 1; - if (!conn->ksnc_tx_scheduled && // not being progressed - !list_empty(&conn->ksnc_tx_queue)){//packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); + if (!conn->ksnc_tx_scheduled && // not being progressed + !list_empty(&conn->ksnc_tx_queue)){//packets to send + list_add_tail (&conn->ksnc_tx_list, + &sched->kss_tx_conns); + conn->ksnc_tx_scheduled = 1; + /* extra ref for scheduler */ + atomic_inc (&conn->ksnc_refcount); - wake_up (&sched->kss_waitq); - } + cfs_waitq_signal (&sched->kss_waitq); + } - spin_unlock_irqrestore (&sched->kss_lock, flags); - } + spin_unlock_irqrestore (&sched->kss_lock, flags); - read_unlock (&ksocknal_data.ksnd_global_lock); + EXIT; } int ksocknal_sock_write (struct socket *sock, void *buffer, int nob) { - int rc; - mm_segment_t oldmm = get_fs(); - - while (nob > 0) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - - set_fs (KERNEL_DS); - rc = sock_sendmsg (sock, &msg, iov.iov_len); - set_fs (oldmm); - - if (rc < 0) - return (rc); - - if (rc == 0) { - CERROR ("Unexpected zero rc\n"); - return (-ECONNABORTED); - } - - buffer = ((char *)buffer) + rc; - nob -= rc; - } - - return (0); + return ksocknal_lib_sock_write(sock, buffer, nob); } int ksocknal_sock_read (struct socket *sock, void *buffer, int nob) { - int rc; - mm_segment_t oldmm = get_fs(); - - while (nob > 0) { - struct iovec iov = { - .iov_base = buffer, - .iov_len = nob - }; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - - set_fs (KERNEL_DS); - rc = sock_recvmsg (sock, &msg, iov.iov_len, 0); - set_fs (oldmm); - - if (rc < 0) - return (rc); - - if (rc == 0) - return (-ECONNABORTED); - - buffer = ((char *)buffer) + rc; - nob -= rc; - } - - return (0); + return ksocknal_lib_sock_read(sock, buffer, nob); } int @@ -2251,293 +1921,7 @@ ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid, int ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) { - mm_segment_t oldmm = get_fs (); - struct socket *sock = conn->ksnc_sock; - int len; - int rc; - - rc = ksocknal_getconnsock (conn); - if (rc != 0) { - LASSERT (conn->ksnc_closing); - *txmem = *rxmem = *nagle = 0; - return (-ESHUTDOWN); - } - - set_fs (KERNEL_DS); - - len = sizeof(*txmem); - rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF, - (char *)txmem, &len); - if (rc == 0) { - len = sizeof(*rxmem); - rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF, - (char *)rxmem, &len); - } - if (rc == 0) { - len = sizeof(*nagle); - rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY, - (char *)nagle, &len); - } - - set_fs (oldmm); - ksocknal_putconnsock (conn); - - if (rc == 0) - *nagle = !*nagle; - else - *txmem = *rxmem = *nagle = 0; - - return (rc); -} - -int -ksocknal_setup_sock (struct socket *sock) -{ - mm_segment_t oldmm = get_fs (); - int rc; - int option; - int keep_idle; - int keep_intvl; - int keep_count; - int do_keepalive; - struct linger linger; - - sock->sk->sk_allocation = GFP_NOFS; - - /* Ensure this socket aborts active sends immediately when we close - * it. */ - - linger.l_onoff = 0; - linger.l_linger = 0; - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER, - (char *)&linger, sizeof (linger)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_LINGER: %d\n", rc); - return (rc); - } - - option = -1; - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_LINGER2: %d\n", rc); - return (rc); - } - - if (!ksocknal_tunables.ksnd_nagle) { - option = 1; - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't disable nagle: %d\n", rc); - return (rc); - } - } - - if (ksocknal_tunables.ksnd_buffer_size > 0) { - option = ksocknal_tunables.ksnd_buffer_size; - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set send buffer %d: %d\n", - option, rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set receive buffer %d: %d\n", - option, rc); - return (rc); - } - } - - /* snapshot tunables */ - keep_idle = ksocknal_tunables.ksnd_keepalive_idle; - keep_count = ksocknal_tunables.ksnd_keepalive_count; - keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl; - - do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); - - option = (do_keepalive ? 1 : 0); - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); - return (rc); - } - - if (!do_keepalive) - return (0); - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, - (char *)&keep_idle, sizeof (keep_idle)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, - (char *)&keep_intvl, sizeof (keep_intvl)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); - return (rc); - } - - set_fs (KERNEL_DS); - rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, - (char *)&keep_count, sizeof (keep_count)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); - return (rc); - } - - return (0); -} - -static int -ksocknal_connect_sock(struct socket **sockp, int *may_retry, - ksock_route_t *route, int local_port) -{ - struct sockaddr_in locaddr; - struct sockaddr_in srvaddr; - struct socket *sock; - int rc; - int option; - mm_segment_t oldmm = get_fs(); - struct timeval tv; - - memset(&locaddr, 0, sizeof(locaddr)); - locaddr.sin_family = AF_INET; - locaddr.sin_port = htons(local_port); - locaddr.sin_addr.s_addr = - (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr) - : INADDR_ANY; - - memset (&srvaddr, 0, sizeof (srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons (route->ksnr_port); - srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); - - *may_retry = 0; - - rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); - *sockp = sock; - if (rc != 0) { - CERROR ("Can't create autoconnect socket: %d\n", rc); - return (rc); - } - - /* Ugh; have to map_fd for compatibility with sockets passed in - * from userspace. And we actually need the sock->file refcounting - * that this gives you :) */ - - rc = sock_map_fd (sock); - if (rc < 0) { - sock_release (sock); - CERROR ("sock_map_fd error %d\n", rc); - return (rc); - } - - /* NB the file descriptor (rc) now owns the ref on sock->file */ - LASSERT (sock->file != NULL); - LASSERT (file_count(sock->file) == 1); - - get_file(sock->file); /* extra ref makes sock->file */ - sys_close(rc); /* survive this close */ - - /* Still got a single ref on sock->file */ - LASSERT (file_count(sock->file) == 1); - - /* Set the socket timeouts, so our connection attempt completes in - * finite time */ - tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; - tv.tv_usec = 0; - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO, - (char *)&tv, sizeof (tv)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set send timeout %d: %d\n", - ksocknal_tunables.ksnd_io_timeout, rc); - goto failed; - } - - set_fs (KERNEL_DS); - rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO, - (char *)&tv, sizeof (tv)); - set_fs (oldmm); - if (rc != 0) { - CERROR ("Can't set receive timeout %d: %d\n", - ksocknal_tunables.ksnd_io_timeout, rc); - goto failed; - } - - set_fs (KERNEL_DS); - option = 1; - rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, - (char *)&option, sizeof (option)); - set_fs (oldmm); - if (rc != 0) { - CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); - goto failed; - } - - rc = sock->ops->bind(sock, - (struct sockaddr *)&locaddr, sizeof(locaddr)); - if (rc == -EADDRINUSE) { - CDEBUG(D_NET, "Port %d already in use\n", local_port); - *may_retry = 1; - goto failed; - } - if (rc != 0) { - CERROR("Error trying to bind to reserved port %d: %d\n", - local_port, rc); - goto failed; - } - - rc = sock->ops->connect(sock, - (struct sockaddr *)&srvaddr, sizeof(srvaddr), - sock->file->f_flags); - if (rc == 0) - return 0; - - /* EADDRNOTAVAIL probably means we're already connected to the same - * peer/port on the same local port on a differently typed - * connection. Let our caller retry with a different local - * port... */ - *may_retry = (rc == -EADDRNOTAVAIL); - - CDEBUG(*may_retry ? D_NET : D_ERROR, - "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, - HIPQUAD(route->ksnr_myipaddr), local_port, - HIPQUAD(route->ksnr_ipaddr), route->ksnr_port); - - failed: - fput(sock->file); - return rc; + return ksocknal_lib_get_conn_tunables(conn, txmem, rxmem, nagle); } int @@ -2555,11 +1939,11 @@ ksocknal_connect_peer (ksock_route_t *route, int type) for (port = 1023; port > 512; --port) { - rc = ksocknal_connect_sock(&sock, &may_retry, route, port); + rc = ksocknal_lib_connect_sock(&sock, &may_retry, route, port); if (rc == 0) { rc = ksocknal_create_conn(route, sock, type); - fput(sock->file); + cfs_put_file(KSN_SOCK2FILE(sock)); return rc; } @@ -2574,7 +1958,7 @@ ksocknal_connect_peer (ksock_route_t *route, int type) void ksocknal_autoconnect (ksock_route_t *route) { - LIST_HEAD (zombies); + CFS_LIST_HEAD (zombies); ksock_tx_t *tx; ksock_peer_t *peer; unsigned long flags; @@ -2609,7 +1993,8 @@ ksocknal_autoconnect (ksock_route_t *route) /* This is a retry rather than a new connection */ LASSERT (route->ksnr_retry_interval != 0); - route->ksnr_timeout = jiffies + route->ksnr_retry_interval; + route->ksnr_timeout = cfs_time_add(cfs_time_current(), + route->ksnr_retry_interval); route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2, SOCKNAL_MAX_RECONNECT_INTERVAL); @@ -2720,17 +2105,18 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) /* Don't need the {get,put}connsock dance to deref ksnc_sock... */ LASSERT (!conn->ksnc_closing); - if (conn->ksnc_sock->sk->sk_err != 0) { + if (SOCK_ERROR(conn->ksnc_sock) != 0) { /* Something (e.g. failed keepalive) set the socket error */ atomic_inc (&conn->ksnc_refcount); CERROR ("Socket error %d: "LPX64" %p %d.%d.%d.%d\n", - conn->ksnc_sock->sk->sk_err, peer->ksnp_nid, + SOCK_ERROR(conn->ksnc_sock), peer->ksnp_nid, conn, HIPQUAD(conn->ksnc_ipaddr)); return (conn); } if (conn->ksnc_rx_started && - time_after_eq (jiffies, conn->ksnc_rx_deadline)) { + cfs_time_aftereq (cfs_time_current(), + conn->ksnc_rx_deadline)) { /* Timed out incomplete incoming message */ atomic_inc (&conn->ksnc_refcount); CERROR ("Timed out RX from "LPX64" %p %d.%d.%d.%d\n", @@ -2739,15 +2125,16 @@ ksocknal_find_timed_out_conn (ksock_peer_t *peer) } if ((!list_empty (&conn->ksnc_tx_queue) || - conn->ksnc_sock->sk->sk_wmem_queued != 0) && - time_after_eq (jiffies, conn->ksnc_tx_deadline)) { + SOCK_WMEM_QUEUED(conn->ksnc_sock) != 0) && + cfs_time_aftereq (cfs_time_current(), + conn->ksnc_tx_deadline)) { /* Timed out messages queued for sending or * buffered in the socket's send buffer */ atomic_inc (&conn->ksnc_refcount); CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n", peer->ksnp_nid, list_empty (&conn->ksnc_tx_queue) ? "" : "Q ", - conn->ksnc_sock->sk->sk_wmem_queued, conn, + SOCK_WMEM_QUEUED(conn->ksnc_sock), conn, HIPQUAD(conn->ksnc_ipaddr)); return (conn); } @@ -2797,22 +2184,22 @@ ksocknal_check_peer_timeouts (int idx) int ksocknal_reaper (void *arg) { - wait_queue_t wait; + cfs_waitlink_t wait; unsigned long flags; ksock_conn_t *conn; ksock_sched_t *sched; struct list_head enomem_conns; int nenomem_conns; - int timeout; + cfs_duration_t timeout; int i; int peer_index = 0; - unsigned long deadline = jiffies; + cfs_time_t deadline = cfs_time_current(); kportal_daemonize ("ksocknal_reaper"); kportal_blockallsigs (); - INIT_LIST_HEAD(&enomem_conns); - init_waitqueue_entry (&wait, current); + CFS_INIT_LIST_HEAD(&enomem_conns); + cfs_waitlink_init (&wait); spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); @@ -2866,14 +2253,15 @@ ksocknal_reaper (void *arg) LASSERT (conn->ksnc_tx_scheduled); conn->ksnc_tx_ready = 1; list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); - wake_up (&sched->kss_waitq); + cfs_waitq_signal (&sched->kss_waitq); spin_unlock_irqrestore (&sched->kss_lock, flags); nenomem_conns++; } /* careful with the jiffy wrap... */ - while ((timeout = (int)(deadline - jiffies)) <= 0) { + while ((timeout = cfs_time_sub(deadline, + cfs_time_current())) <= 0) { const int n = 4; const int p = 1; int chunk = ksocknal_data.ksnd_peer_hash_size; @@ -2897,7 +2285,7 @@ ksocknal_reaper (void *arg) ksocknal_data.ksnd_peer_hash_size; } - deadline += p * HZ; + deadline = cfs_time_add(deadline, cfs_time_seconds(p)); } if (nenomem_conns != 0) { @@ -2906,18 +2294,19 @@ ksocknal_reaper (void *arg) * if any go back on my enomem list. */ timeout = SOCKNAL_ENOMEM_RETRY; } - ksocknal_data.ksnd_reaper_waketime = jiffies + timeout; + ksocknal_data.ksnd_reaper_waketime = + cfs_time_add(cfs_time_current(), timeout); set_current_state (TASK_INTERRUPTIBLE); - add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); + cfs_waitq_add (&ksocknal_data.ksnd_reaper_waitq, &wait); if (!ksocknal_data.ksnd_shuttingdown && list_empty (&ksocknal_data.ksnd_deathrow_conns) && list_empty (&ksocknal_data.ksnd_zombie_conns)) - schedule_timeout (timeout); + cfs_waitq_timedwait (&wait, timeout); set_current_state (TASK_RUNNING); - remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait); + cfs_waitq_del (&ksocknal_data.ksnd_reaper_waitq, &wait); spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); } diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.c b/lnet/klnds/socklnd/socklnd_lib-darwin.c new file mode 100644 index 0000000..ada5b64 --- /dev/null +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.c @@ -0,0 +1,1011 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#include +#include +#include +#include +#include + +#include "socknal.h" + +#if 0 +#undef SOCKNAL_SINGLE_FRAG_TX +#define SOCKNAL_SINGLE_FRAG_TX 1 +#undef SOCKNAL_SINGLE_FRAG_RX +#define SOCKNAL_SINGLE_FRAG_RX 1 +#endif + +SYSCTL_DECL(_portals); + +SYSCTL_NODE (_portals, OID_AUTO, ksocknal, CTLFLAG_RW, + 0, "ksocknal_sysctl"); + +SYSCTL_INT(_portals_ksocknal, OID_AUTO, timeout, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_io_timeout, + 0, "timeout"); +SYSCTL_INT(_portals_ksocknal, OID_AUTO, eager_ack, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_eager_ack, + 0, "eager_ack"); +SYSCTL_INT(_portals_ksocknal, OID_AUTO, typed, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_typed_conns, + 0, "typed"); +SYSCTL_INT(_portals_ksocknal, OID_AUTO, min_bulk, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_min_bulk, + 0, "min_bulk"); +SYSCTL_INT(_portals_ksocknal, OID_AUTO, buffer_size, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_buffer_size, + 0, "buffer_size"); +SYSCTL_INT(_portals_ksocknal, OID_AUTO, nagle, + CTLTYPE_INT | CTLFLAG_RW , &ksocknal_tunables.ksnd_nagle, + 0, "nagle"); + +cfs_sysctl_table_t ksocknal_top_ctl_table [] = { + &sysctl__portals_ksocknal, + &sysctl__portals_ksocknal_timeout, + &sysctl__portals_ksocknal_eager_ack, + &sysctl__portals_ksocknal_typed, + &sysctl__portals_ksocknal_min_bulk, + &sysctl__portals_ksocknal_buffer_size, + &sysctl__portals_ksocknal_nagle, + NULL +}; + +static unsigned long ksocknal_mbuf_size = (u_quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); + +struct socket * +sockfd_lookup(int fd, void *foo) +{ + struct socket *so; + struct file *fp; + CFS_DECL_FUNNEL_DATA; + + CFS_NET_IN; + getsock(current_proc()->p_fd, fd, &fp); + CFS_NET_EX; + so = (struct socket *)fp->f_data; + so->reserved4 = fp; + CFS_CONE_IN; + fref(fp); + CFS_CONE_EX; + return so; +} + +extern struct fileops socketops; + +static int +sock_map_fd (struct socket *so) +{ + struct file *fp; + int fd; + CFS_DECL_FUNNEL_DATA; + + CFS_CONE_IN; + falloc(current_proc(), &fp, &fd); + fp->f_flag = FREAD|FWRITE; + fp->f_type = DTYPE_SOCKET; + fp->f_ops = &socketops; + fp->f_data = (caddr_t)so; + so->reserved4 = fp; + *fdflags(current_proc(), fd) &= ~UF_RESERVED; + CFS_CONE_EX; + + return fd; +} + +static void +sock_release(struct socket *so) +{ + struct file *fp; + CFS_DECL_FUNNEL_DATA; + + fp = (struct file *)so->reserved4; + so->reserved4 = NULL; + fp->f_data = NULL; + CFS_CONE_IN; + frele(fp); + CFS_CONE_EX; + CFS_NET_IN; + soshutdown(so, 0); + CFS_NET_EX; +} + +static void +sock_fdrelse(int fd) +{ + CFS_DECL_FUNNEL_DATA; + + CFS_CONE_IN; + fdrelse(current_proc(), fd); + CFS_CONE_EX; +} + +void +ksocknal_lib_bind_irq (unsigned int irq) +{ + return; +} + +unsigned int +ksocknal_lib_sock_irq (struct socket *sock) +{ + return 0; +} + +int +ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) +{ + struct sockaddr_in *sin; + struct sockaddr *sa; + int rc; + CFS_DECL_NET_DATA; + + CFS_NET_IN; + rc = conn->ksnc_sock->so_proto->pr_usrreqs->pru_peeraddr(conn->ksnc_sock, &sa); + LASSERT (!conn->ksnc_closing); + if (rc != 0) { + CFS_NET_EX; + if (sa) FREE(sa, M_SONAME); + CERROR ("Error %d getting sock peer IP\n", rc); + return rc; + } + sin = (struct sockaddr_in *)sa; + conn->ksnc_ipaddr = ntohl (sin->sin_addr.s_addr); + conn->ksnc_port = ntohs (sin->sin_port); + if (sa) FREE(sa, M_SONAME); + rc = conn->ksnc_sock->so_proto->pr_usrreqs->pru_sockaddr(conn->ksnc_sock, &sa); + CFS_NET_EX; + if (rc != 0) { + if (sa) FREE(sa, M_SONAME); + CERROR ("Error %d getting sock local IP\n", rc); + return rc; + } + conn->ksnc_myipaddr = ntohl (sin->sin_addr.s_addr); + + return 0; +} + +int +ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) +{ +#if SOCKNAL_SINGLE_FRAG_TX + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else + struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + int niov = tx->tx_niov; +#endif + struct socket *sock = conn->ksnc_sock; + int nob; + int rc; + int i; + struct uio suio = { + .uio_iov = scratchiov, + .uio_iovcnt = niov, + .uio_offset = 0, + .uio_resid = 0, /* This will be valued after a while */ + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_WRITE, + .uio_procp = NULL + }; + int flags = MSG_DONTWAIT; + CFS_DECL_NET_DATA; + + for (nob = i = 0; i < niov; i++) { + scratchiov[i] = tx->tx_iov[i]; + nob += scratchiov[i].iov_len; + } + suio.uio_resid = nob; + + CFS_NET_IN; + rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); + CFS_NET_EX; + + /* NB there is no return value can indicate how many + * have been sent and how many resid, we have to get + * sent bytes from suio. */ + if (rc != 0) { + if (suio.uio_resid != nob &&\ + (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) + /* We have sent something */ + rc = nob - suio.uio_resid; + else if ( rc == EWOULDBLOCK ) + /* Actually, EAGAIN and EWOULDBLOCK have same value in OSX */ + rc = -EAGAIN; + else + rc = -rc; + } else /* rc == 0 */ + rc = nob - suio.uio_resid; + + return rc; +} + +int +ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) +{ +#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else + struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + int niov = tx->tx_nkiov; +#endif + struct socket *sock = conn->ksnc_sock; + ptl_kiov_t *kiov = tx->tx_kiov; + int nob; + int rc; + int i; + struct uio suio = { + .uio_iov = scratchiov, + .uio_iovcnt = niov, + .uio_offset = 0, + .uio_resid = 0, /* It should be valued after a while */ + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_WRITE, + .uio_procp = NULL + }; + int flags = MSG_DONTWAIT; + CFS_DECL_NET_DATA; + + for (nob = i = 0; i < niov; i++) { + scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + + kiov[i].kiov_offset; + nob += scratchiov[i].iov_len = kiov[i].kiov_len; + } + suio.uio_resid = nob; + + CFS_NET_IN; + rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, flags); + CFS_NET_EX; + + for (i = 0; i < niov; i++) + cfs_kunmap(kiov[i].kiov_page); + + if (rc != 0) { + if (suio.uio_resid != nob &&\ + (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) + /* We have sent something */ + rc = nob - suio.uio_resid; + else if ( rc == EWOULDBLOCK ) + /* EAGAIN and EWOULD BLOCK have same value in OSX */ + rc = -EAGAIN; + else + rc = -rc; + } else /* rc == 0 */ + rc = nob - suio.uio_resid; + + return rc; +} + +/* + * liang: Hack of inpcb and tcpcb. + * To get tcpcb of a socket, and call tcp_output + * to send quick ack. + */ +struct ks_tseg_qent{ + int foo; +}; + +struct ks_tcptemp{ + int foo; +}; + +LIST_HEAD(ks_tsegqe_head, ks_tseg_qent); + +struct ks_tcpcb { + struct ks_tsegqe_head t_segq; + int t_dupacks; + struct ks_tcptemp *unused; + int t_timer[4]; + struct inpcb *t_inpcb; + int t_state; + u_int t_flags; + /* + * There are more fields but we dont need + * ...... + */ +}; + +#define TF_ACKNOW 0x00001 +#define TF_DELACK 0x00002 + +struct ks_inpcb { + LIST_ENTRY(ks_inpcb) inp_hash; + struct in_addr reserved1; + struct in_addr reserved2; + u_short inp_fport; + u_short inp_lport; + LIST_ENTRY(inpcb) inp_list; + caddr_t inp_ppcb; + /* + * There are more fields but we dont need + * ...... + */ +}; + +#define ks_sotoinpcb(so) ((struct ks_inpcb *)(so)->so_pcb) +#define ks_intotcpcb(ip) ((struct ks_tcpcb *)(ip)->inp_ppcb) +#define ks_sototcpcb(so) (intotcpcb(sotoinpcb(so))) + +void +ksocknal_lib_eager_ack (ksock_conn_t *conn) +{ + struct socket *sock = conn->ksnc_sock; + struct ks_inpcb *inp = ks_sotoinpcb(sock); + struct ks_tcpcb *tp = ks_intotcpcb(inp); + int s; + CFS_DECL_NET_DATA; + + extern int tcp_output(register struct ks_tcpcb *tp); + + CFS_NET_IN; + s = splnet(); + + if (tp && tp->t_flags & TF_DELACK){ + tp->t_flags &= ~TF_DELACK; + tp->t_flags |= TF_ACKNOW; + (void) tcp_output(tp); + } + splx(s); + + /* + * No TCP_QUICKACK supported in BSD, so I have to call tcp_fasttimo + * to send immediate ACK. It's not the best resolution because + * tcp_fasttimo will send out ACK for all delayed-ack tcp socket. + * Anyway, it's working now. + * extern void tcp_fasttimo(); + * tcp_fasttimo(); + */ + CFS_NET_EX; + + return; +} + +int +ksocknal_lib_recv_iov (ksock_conn_t *conn) +{ +#if SOCKNAL_SINGLE_FRAG_RX + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else + struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + int niov = conn->ksnc_rx_niov; +#endif + struct iovec *iov = conn->ksnc_rx_iov; + int nob; + int rc; + int i; + struct uio ruio = { + .uio_iov = scratchiov, + .uio_iovcnt = niov, + .uio_offset = 0, + .uio_resid = 0, /* It should be valued after a while */ + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_READ, + .uio_procp = NULL + }; + int flags = MSG_DONTWAIT; + CFS_DECL_NET_DATA; + + for (nob = i = 0; i < niov; i++) { + scratchiov[i] = iov[i]; + nob += scratchiov[i].iov_len; + } + LASSERT (nob <= conn->ksnc_rx_nob_wanted); + + ruio.uio_resid = nob; + + CFS_NET_IN; + rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, &flags); + CFS_NET_EX; + if (rc){ + if (ruio.uio_resid != nob && \ + (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK || rc == EAGAIN)) + /* data particially received */ + rc = nob - ruio.uio_resid; + else if (rc == EWOULDBLOCK) + /* EAGAIN and EWOULD BLOCK have same value in OSX */ + rc = -EAGAIN; + else + rc = -rc; + } else + rc = nob - ruio.uio_resid; + + return (rc); +} + +int +ksocknal_lib_recv_kiov (ksock_conn_t *conn) +{ +#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else + struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + int niov = conn->ksnc_rx_nkiov; +#endif + ptl_kiov_t *kiov = conn->ksnc_rx_kiov; + int nob; + int rc; + int i; + struct uio ruio = { + .uio_iov = scratchiov, + .uio_iovcnt = niov, + .uio_offset = 0, + .uio_resid = 0, + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_READ, + .uio_procp = NULL + }; + int flags = MSG_DONTWAIT; + CFS_DECL_NET_DATA; + + for (nob = i = 0; i < niov; i++) { + scratchiov[i].iov_base = cfs_kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; + nob += scratchiov[i].iov_len = kiov[i].kiov_len; + } + LASSERT (nob <= conn->ksnc_rx_nob_wanted); + + ruio.uio_resid = nob; + + CFS_NET_IN; + rc = soreceive(conn->ksnc_sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, NULL, &flags); + CFS_NET_EX; + + for (i = 0; i < niov; i++) + cfs_kunmap(kiov[i].kiov_page); + + if (rc){ + if (ruio.uio_resid != nob && \ + (rc == ERESTART || rc == EINTR || rc == EWOULDBLOCK)) + /* data particially received */ + rc = nob - ruio.uio_resid; + else if (rc == EWOULDBLOCK) + /* receive blocked, EWOULDBLOCK == EAGAIN */ + rc = -EAGAIN; + else + rc = -rc; + } else + rc = nob - ruio.uio_resid; + + return (rc); +} + +int +ksocknal_lib_sock_write (struct socket *sock, void *buffer, int nob) +{ + int rc; + CFS_DECL_NET_DATA; + + while (nob > 0) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct uio suio = { + .uio_iov = &iov, + .uio_iovcnt = 1, + .uio_offset = 0, + .uio_resid = nob, + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_WRITE, + .uio_procp = NULL + }; + + CFS_NET_IN; + rc = sosend(sock, NULL, &suio, (struct mbuf *)0, (struct mbuf *)0, 0); + CFS_NET_EX; + + if (rc != 0) { + if ( suio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ + rc == EWOULDBLOCK)) + rc = 0; + if ( rc != 0 ) + return -rc; + rc = nob - suio.uio_resid; + buffer = ((char *)buffer) + rc; + nob = suio.uio_resid; + continue; + } + break; + } + + return (0); +} + +int +ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob) +{ + int rc; + CFS_DECL_NET_DATA; + + while (nob > 0) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct uio ruio = { + .uio_iov = &iov, + .uio_iovcnt = 1, + .uio_offset = 0, + .uio_resid = nob, + .uio_segflg = UIO_SYSSPACE, + .uio_rw = UIO_READ, + .uio_procp = NULL + }; + + CFS_NET_IN; + rc = soreceive(sock, (struct sockaddr **)0, &ruio, (struct mbuf **)0, (struct mbuf **)0, (int *)0); + CFS_NET_EX; + + if (rc != 0) { + if ( ruio.uio_resid != nob && ( rc == ERESTART || rc == EINTR ||\ + rc == EWOULDBLOCK)) + rc = 0; + if (rc != 0) + return -rc; + rc = nob - ruio.uio_resid; + buffer = ((char *)buffer) + rc; + nob = ruio.uio_resid; + continue; + } + break; + } + + return (0); +} + +int +ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) +{ + struct sockopt sopt; + struct socket *sock = conn->ksnc_sock; + int len; + int rc; + CFS_DECL_NET_DATA; + + rc = ksocknal_getconnsock (conn); + if (rc != 0) { + LASSERT (conn->ksnc_closing); + *txmem = *rxmem = *nagle = 0; + rc = -ESHUTDOWN; + goto out; + } + len = sizeof(*txmem); + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_SNDBUF; + sopt.sopt_val = txmem; + sopt.sopt_valsize = len; + + CFS_NET_IN; + rc = sogetopt(sock, &sopt); + if (rc == 0) { + len = sizeof(*rxmem); + sopt.sopt_name = SO_RCVBUF; + sopt.sopt_val = rxmem; + rc = sogetopt(sock, &sopt); + } + if (rc == 0) { + len = sizeof(*nagle); + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = nagle; + rc = sogetopt(sock, &sopt); + } + CFS_NET_EX; + + ksocknal_putconnsock (conn); + + if (rc == 0) + *nagle = !*nagle; + else + *txmem = *rxmem = *nagle = 0; +out: + return (-rc); +} + +int +ksocknal_lib_setup_sock (struct socket *so) +{ + struct sockopt sopt; + int rc; + int option; + int keep_idle; + int keep_intvl; + int keep_count; + int do_keepalive; + struct linger linger; + CFS_DECL_NET_DATA; + + /* Ensure this socket aborts active sends immediately when we close + * it. */ + + bzero(&sopt, sizeof sopt); + + linger.l_onoff = 0; + linger.l_linger = 0; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_LINGER; + sopt.sopt_val = &linger; + sopt.sopt_valsize = sizeof(linger); + + CFS_NET_IN; + rc = sosetopt(so, &sopt); + if (rc != 0) { + CERROR ("Can't set SO_LINGER: %d\n", rc); + goto out; + } + + + if (!ksocknal_tunables.ksnd_nagle) { + option = 1; + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = &option; + sopt.sopt_valsize = sizeof(option); + rc = sosetopt(so, &sopt); + if (rc != 0) { + CERROR ("Can't disable nagle: %d\n", rc); + goto out; + } + } + if (ksocknal_tunables.ksnd_buffer_size > 0) { + option = ksocknal_tunables.ksnd_buffer_size; + if (option > ksocknal_mbuf_size) + option = ksocknal_mbuf_size; + + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_SNDBUF; + sopt.sopt_val = &option; + sopt.sopt_valsize = sizeof(option); + rc = sosetopt(so, &sopt); + if (rc != 0) { + CERROR ("Can't set send buffer %d: %d\n", + option, rc); + goto out; + } + + sopt.sopt_name = SO_RCVBUF; + rc = sosetopt(so, &sopt); + if (rc != 0) { + CERROR ("Can't set receive buffer %d: %d\n", + option, rc); + goto out; + } + } + /* snapshot tunables */ + keep_idle = ksocknal_tunables.ksnd_keepalive_idle; + keep_count = ksocknal_tunables.ksnd_keepalive_count; + keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl; + + do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); + option = (do_keepalive ? 1 : 0); + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_KEEPALIVE; + sopt.sopt_val = &option; + sopt.sopt_valsize = sizeof(option); + rc = sosetopt(so, &sopt); + if (rc != 0) { + CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); + goto out; + } + + if (!do_keepalive) { + /* no more setting, just return */ + rc = 0; + goto out; + } + + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_KEEPALIVE; + sopt.sopt_val = &keep_idle; + sopt.sopt_valsize = sizeof(keep_idle); + rc = sosetopt(so, &sopt); + if (rc != 0) { + CERROR ("Can't set TCP_KEEPALIVE : %d\n", rc); + goto out; + } +out: + CFS_NET_EX; + return (-rc); +} + +int +ksocknal_lib_connect_sock (struct socket **sockp, int *may_retry, + ksock_route_t *route, int local_port) +{ + struct sockaddr_in locaddr; + struct sockaddr_in srvaddr; + struct timeval tv; + int fd; + struct socket *so; + struct sockopt sopt; + int option; + int rc; + int s; + CFS_DECL_FUNNEL_DATA; + + ENTRY; + bzero (&locaddr, sizeof (locaddr)); + locaddr.sin_len = sizeof(struct sockaddr_in); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons (local_port); + locaddr.sin_addr.s_addr = + (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr) + : INADDR_ANY; + bzero(&srvaddr, sizeof(srvaddr)); + srvaddr.sin_len = sizeof(struct sockaddr_in); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons (route->ksnr_port); + srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); + + *may_retry = 0; + + CFS_NET_IN; + rc = socreate(PF_INET, &so, SOCK_STREAM, 0); + CFS_NET_EX; + *sockp = so; + if (rc != 0) { + CERROR ("Can't create autoconnect socket: %d\n", rc); + return (-rc); + } + + /* + * XXX + * Liang: what do we need here? + */ + fd = sock_map_fd (so); + if (fd < 0) { + sock_release (so); + CERROR ("sock_map_fd error %d\n", fd); + return (fd); + } + sock_fdrelse(fd); + + /* Set the socket timeouts, so our connection attempt completes in + * finite time */ + tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; + tv.tv_usec = 0; + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_SNDTIMEO; + sopt.sopt_val = &tv; + sopt.sopt_valsize = sizeof(tv); + + CFS_NET_IN; + rc = sosetopt(so, &sopt); + if (rc != 0) { + CFS_NET_EX; + CERROR ("Can't set send timeout %d: %d\n", + ksocknal_tunables.ksnd_io_timeout, rc); + goto out; + } + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_RCVTIMEO; + rc = sosetopt(so, &sopt); + if (rc != 0) { + CFS_NET_EX; + CERROR ("Can't set receive timeout %d: %d\n", + ksocknal_tunables.ksnd_io_timeout, rc); + goto out; + } + option = 1; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_REUSEADDR; + sopt.sopt_val = &option; + sopt.sopt_valsize = sizeof(option); + rc = sosetopt(so, &sopt); + if (rc != 0) { + CFS_NET_EX; + CERROR ("Can't set sock reuse address: %d\n", rc); + goto out; + } + rc = sobind(so, (struct sockaddr *)&locaddr); + if (rc == EADDRINUSE) { + CFS_NET_EX; + CDEBUG(D_NET, "Port %d already in use\n", local_port); + *may_retry = 1; + goto out; + } + if (rc != 0) { + CFS_NET_EX; + CERROR ("Can't bind to local IP Address %u.%u.%u.%u: %d\n", + HIPQUAD(route->ksnr_myipaddr), rc); + goto out; + } + rc = soconnect(so, (struct sockaddr *)&srvaddr); + *may_retry = (rc == EADDRNOTAVAIL || rc == EADDRINUSE); + if (rc != 0) { + CFS_NET_EX; + if (rc != EADDRNOTAVAIL && rc != EADDRINUSE) + CERROR ("Can't connect to nid "LPX64 + " local IP: %u.%u.%u.%u," + " remote IP: %u.%u.%u.%u/%d: %d\n", + route->ksnr_peer->ksnp_nid, + HIPQUAD(route->ksnr_myipaddr), + HIPQUAD(route->ksnr_ipaddr), + route->ksnr_port, rc); + goto out; + } + + s = splnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + CDEBUG(D_NET, "ksocknal sleep for waiting auto_connect.\n"); + (void) tsleep((caddr_t)&so->so_timeo, PSOCK, "ksocknal_conn", hz); + } + LASSERT((so->so_state & SS_ISCONNECTED)); + splx(s); + CFS_NET_EX; + + rc = so->so_error; + if (rc != 0) { + CERROR ("Error %d waiting for connection to nid "LPX64 + " local IP: %u.%u.%u.%u," + " remote IP: %u.%u.%u.%u/%d: %d\n", rc, + route->ksnr_peer->ksnp_nid, + HIPQUAD(route->ksnr_myipaddr), + HIPQUAD(route->ksnr_ipaddr), + route->ksnr_port, rc); + goto out; + } + return (-rc); + + out: + rele_file(KSN_SOCK2FILE(so)); + + return (-rc); +} + +void +ksocknal_lib_push_conn(ksock_conn_t *conn) +{ + struct socket *sock; + struct sockopt sopt; + int val = 1; + int rc; + CFS_DECL_NET_DATA; + + rc = ksocknal_getconnsock (conn); + if (rc != 0) /* being shut down */ + return; + sock = conn->ksnc_sock; + bzero(&sopt, sizeof sopt); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = &val; + sopt.sopt_valsize = sizeof val; + + CFS_NET_IN; + sosetopt(sock, &sopt); + CFS_NET_EX; + + ksocknal_putconnsock (conn); + return; +} + +extern void ksocknal_read_callback (ksock_conn_t *conn); +extern void ksocknal_write_callback (ksock_conn_t *conn); + +static void +ksocknal_upcall(struct socket *so, caddr_t arg, int waitf) +{ + ksock_conn_t *conn; + CFS_DECL_NET_DATA; + ENTRY; + + read_lock (&ksocknal_data.ksnd_global_lock); + conn = so->reserved3; + + if (conn == NULL){ + /* More processing is needed? */ + goto out; + } + if ((so->so_rcv.sb_flags & SB_UPCALL) || !arg ) { + extern int soreadable(struct socket *so); + CFS_NET_IN; + if (conn->ksnc_rx_nob_wanted && soreadable(so)){ + /* To verify whether the upcall is for receive */ + CFS_NET_EX; + ksocknal_read_callback (conn); + }else + CFS_NET_EX; + } + /* go foward? */ + if ((so->so_snd.sb_flags & SB_UPCALL) || !arg){ + extern int sowriteable(struct socket *so); + CFS_NET_IN; + if (sowriteable(so)){ + /* socket is writable */ + CFS_NET_EX; + ksocknal_write_callback(conn); + } else + CFS_NET_EX; + } +out: + read_unlock (&ksocknal_data.ksnd_global_lock); + + EXIT; +} + +void +ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) +{ + /* No callback need to save in osx */ + return; +} + +void +ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) +{ + CFS_DECL_NET_DATA; + + CFS_NET_IN; + sock->so_upcallarg = (void *)sock; /* anything not NULL */ + sock->so_upcall = ksocknal_upcall; + sock->so_snd.sb_timeo = 0; + sock->so_rcv.sb_timeo = 2 * HZ; + sock->so_rcv.sb_flags |= SB_UPCALL; + sock->so_snd.sb_flags |= SB_UPCALL; + sock->reserved3 = conn; + CFS_NET_EX; + return; +} + +void +ksocknal_lib_act_callback(struct socket *sock) +{ + /* upcall will take the network funnel */ + ksocknal_upcall (sock, 0, 0); +} + +void +ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) +{ + CFS_DECL_NET_DATA; + + CFS_NET_IN; + sock->so_upcall = NULL; + sock->so_upcallarg = NULL; + sock->so_rcv.sb_flags &= ~SB_UPCALL; + sock->so_snd.sb_flags &= ~SB_UPCALL; + CFS_NET_EX; +} + + diff --git a/lnet/klnds/socklnd/socklnd_lib-darwin.h b/lnet/klnds/socklnd/socklnd_lib-darwin.h new file mode 100644 index 0000000..e3b286bc --- /dev/null +++ b/lnet/klnds/socklnd/socklnd_lib-darwin.h @@ -0,0 +1,50 @@ +#ifndef __XNU_SOCKNAL_LIB_H__ +#define __XNU_SOCKNAL_LIB_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define SOCKNAL_ARCH_EAGER_ACK 1 + +#define KSN_SOCK2FILE(so) ((struct file *)(so)->reserved4) +#define KSN_CONN2FILE(conn) ((struct file *)(conn)->ksnc_sock->reserved4) + +#define SOCK_WMEM_QUEUED(so) ((so)->so_snd.sb_cc) +#define SOCK_ERROR(so) ((so)->so_error) + +#define SOCK_TEST_NOSPACE(so) (sbspace(&(so)->so_snd) < (so)->so_snd.sb_lowat) +extern struct socket * sockfd_lookup(int fd, void *foo); + +static inline +int ksocknal_nsched(void) +{ + return 1; +} + +#endif diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.c b/lnet/klnds/socklnd/socklnd_lib-linux.c new file mode 100644 index 0000000..3a962ac --- /dev/null +++ b/lnet/klnds/socklnd/socklnd_lib-linux.c @@ -0,0 +1,977 @@ +#include "socknal.h" + +#ifdef CONFIG_SYSCTL +#define SOCKNAL_SYSCTL 200 + +#define SOCKNAL_SYSCTL_TIMEOUT 1 +#define SOCKNAL_SYSCTL_EAGER_ACK 2 +#define SOCKNAL_SYSCTL_ZERO_COPY 3 +#define SOCKNAL_SYSCTL_TYPED 4 +#define SOCKNAL_SYSCTL_MIN_BULK 5 +#define SOCKNAL_SYSCTL_BUFFER_SIZE 6 +#define SOCKNAL_SYSCTL_NAGLE 7 +#define SOCKNAL_SYSCTL_IRQ_AFFINITY 8 +#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE 9 +#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10 +#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11 + +static ctl_table ksocknal_ctl_table[] = { + {SOCKNAL_SYSCTL_TIMEOUT, "timeout", + &ksocknal_tunables.ksnd_io_timeout, sizeof (int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", + &ksocknal_tunables.ksnd_eager_ack, sizeof (int), + 0644, NULL, &proc_dointvec}, +#if SOCKNAL_ZC + {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", + &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int), + 0644, NULL, &proc_dointvec}, +#endif + {SOCKNAL_SYSCTL_TYPED, "typed", + &ksocknal_tunables.ksnd_typed_conns, sizeof (int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", + &ksocknal_tunables.ksnd_min_bulk, sizeof (int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size", + &ksocknal_tunables.ksnd_buffer_size, sizeof(int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_NAGLE, "nagle", + &ksocknal_tunables.ksnd_nagle, sizeof(int), + 0644, NULL, &proc_dointvec}, +#if CPU_AFFINITY + {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity", + &ksocknal_tunables.ksnd_irq_affinity, sizeof(int), + 0644, NULL, &proc_dointvec}, +#endif + {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle", + &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count", + &ksocknal_tunables.ksnd_keepalive_count, sizeof(int), + 0644, NULL, &proc_dointvec}, + {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl", + &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int), + 0644, NULL, &proc_dointvec}, + { 0 } +}; + +ctl_table ksocknal_top_ctl_table[] = { + {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table}, + { 0 } +}; +#endif + +void +ksocknal_lib_bind_irq (unsigned int irq) +{ +#if (defined(CONFIG_SMP) && CPU_AFFINITY) + int bind; + int cpu; + unsigned long flags; + char cmdline[64]; + ksock_irqinfo_t *info; + char *argv[] = {"/bin/sh", + "-c", + cmdline, + NULL}; + char *envp[] = {"HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + + LASSERT (irq < NR_IRQS); + if (irq == 0) /* software NIC or affinity disabled */ + return; + + info = &ksocknal_data.ksnd_irqinfo[irq]; + + write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags); + + LASSERT (info->ksni_valid); + bind = !info->ksni_bound; + info->ksni_bound = 1; + + write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags); + + if (!bind) /* bound already */ + return; + + cpu = ksocknal_irqsched2cpu(info->ksni_sched); + snprintf (cmdline, sizeof (cmdline), + "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); + + printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n", + irq, cpu, cmdline); + + /* FIXME: Find a better method of setting IRQ affinity... + */ + + USERMODEHELPER(argv[0], argv, envp); +#endif +} + +int +ksocknal_lib_get_conn_addrs (ksock_conn_t *conn) +{ + struct sockaddr_in sin; + int len = sizeof (sin); + int rc; + + rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock, + (struct sockaddr *)&sin, &len, 2); + /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */ + LASSERT (!conn->ksnc_closing); + + if (rc != 0) { + CERROR ("Error %d getting sock peer IP\n", rc); + return rc; + } + + conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr); + conn->ksnc_port = ntohs (sin.sin_port); + + rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock, + (struct sockaddr *)&sin, &len, 0); + if (rc != 0) { + CERROR ("Error %d getting sock local IP\n", rc); + return rc; + } + + conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr); + + return 0; +} + +unsigned int +ksocknal_lib_sock_irq (struct socket *sock) +{ + int irq = 0; + struct dst_entry *dst; + + if (!ksocknal_tunables.ksnd_irq_affinity) + return 0; + + dst = sk_dst_get (sock->sk); + if (dst != NULL) { + if (dst->dev != NULL) { + irq = dst->dev->irq; + if (irq >= NR_IRQS) { + CERROR ("Unexpected IRQ %x\n", irq); + irq = 0; + } + } + dst_release (dst); + } + + return (irq); +} + +#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) +static struct page * +ksocknal_kvaddr_to_page (unsigned long vaddr) +{ + struct page *page; + + if (vaddr >= VMALLOC_START && + vaddr < VMALLOC_END) + page = vmalloc_to_page ((void *)vaddr); +#if CONFIG_HIGHMEM + else if (vaddr >= PKMAP_BASE && + vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) + page = vmalloc_to_page ((void *)vaddr); + /* in 2.4 ^ just walks the page tables */ +#endif + else + page = virt_to_page (vaddr); + + if (page == NULL || + !VALID_PAGE (page)) + return (NULL); + + return (page); +} +#endif + +int +ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx) +{ + struct socket *sock = conn->ksnc_sock; +#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) + unsigned long vaddr = (unsigned long)iov->iov_base + int offset = vaddr & (PAGE_SIZE - 1); + int zcsize = MIN (iov->iov_len, PAGE_SIZE - offset); + struct page *page; +#endif + int nob; + int rc; + + /* NB we can't trust socket ops to either consume our iovs + * or leave them alone. */ + +#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC) + if (zcsize >= ksocknal_data.ksnd_zc_min_frag && + (sock->sk->route_caps & NETIF_F_SG) && + (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && + (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { + int msgflg = MSG_DONTWAIT; + + CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", + (void *)vaddr, page, page_address(page), offset, zcsize); + + if (!list_empty (&conn->ksnc_tx_queue) || + zcsize < tx->tx_resid) + msgflg |= MSG_MORE; + + rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd); + } else +#endif + { +#if SOCKNAL_SINGLE_FRAG_TX + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else + struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + int niov = tx->tx_niov; +#endif + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = scratchiov, + .msg_iovlen = niov, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = MSG_DONTWAIT + }; + mm_segment_t oldmm = get_fs(); + int i; + + for (nob = i = 0; i < niov; i++) { + scratchiov[i] = tx->tx_iov[i]; + nob += scratchiov[i].iov_len; + } + + if (!list_empty(&conn->ksnc_tx_queue) || + nob < tx->tx_resid) + msg.msg_flags |= MSG_MORE; + + set_fs (KERNEL_DS); + rc = sock_sendmsg(sock, &msg, nob); + set_fs (oldmm); + } + return rc; +} + +int +ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx) +{ + struct socket *sock = conn->ksnc_sock; + ptl_kiov_t *kiov = tx->tx_kiov; + int rc; + int nob; + + /* NB we can't trust socket ops to either consume our iovs + * or leave them alone. */ + +#if SOCKNAL_ZC + if (kiov->kiov_len >= ksocknal_tunables.ksnd_zc_min_frag && + (sock->sk->route_caps & NETIF_F_SG) && + (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) { + struct page *page = kiov->kiov_page; + int offset = kiov->kiov_offset; + int fragsize = kiov->kiov_len; + int msgflg = MSG_DONTWAIT; + + CDEBUG(D_NET, "page %p + offset %x for %d\n", + page, offset, kiov->kiov_len); + + if (!list_empty(&conn->ksnc_tx_queue) || + fragsize < tx->tx_resid) + msgflg |= MSG_MORE; + + rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg, + &tx->tx_zccd); + } else +#endif + { +#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else +#ifdef CONFIG_HIGHMEM +#warning "XXX risk of kmap deadlock on multiple frags..." +#endif + struct iovec *scratchiov = conn->ksnc_tx_scratch_iov; + int niov = tx->tx_nkiov; +#endif + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = scratchiov, + .msg_iovlen = niov, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = MSG_DONTWAIT + }; + mm_segment_t oldmm = get_fs(); + int i; + + for (nob = i = 0; i < niov; i++) { + scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + + kiov[i].kiov_offset; + nob += scratchiov[i].iov_len = kiov[i].kiov_len; + } + + if (!list_empty(&conn->ksnc_tx_queue) || + nob < tx->tx_resid) + msg.msg_flags |= MSG_DONTWAIT; + + set_fs (KERNEL_DS); + rc = sock_sendmsg(sock, &msg, nob); + set_fs (oldmm); + + for (i = 0; i < niov; i++) + kunmap(kiov[i].kiov_page); + } + return rc; +} + +void +ksocknal_lib_eager_ack (ksock_conn_t *conn) +{ + int opt = 1; + mm_segment_t oldmm = get_fs(); + struct socket *sock = conn->ksnc_sock; + + /* Remind the socket to ACK eagerly. If I don't, the socket might + * think I'm about to send something it could piggy-back the ACK + * on, introducing delay in completing zero-copy sends in my + * peer. */ + + set_fs(KERNEL_DS); + sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK, + (char *)&opt, sizeof (opt)); + set_fs(oldmm); +} + +int +ksocknal_lib_recv_iov (ksock_conn_t *conn) +{ +#if SOCKNAL_SINGLE_FRAG_RX + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else + struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + int niov = conn->ksnc_rx_niov; +#endif + struct iovec *iov = conn->ksnc_rx_iov; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = scratchiov, + .msg_iovlen = niov, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + mm_segment_t oldmm = get_fs(); + int nob; + int i; + int rc; + + /* NB we can't trust socket ops to either consume our iovs + * or leave them alone. */ + LASSERT (niov > 0); + + for (nob = i = 0; i < niov; i++) { + scratchiov[i] = iov[i]; + nob += scratchiov[i].iov_len; + } + LASSERT (nob <= conn->ksnc_rx_nob_wanted); + + set_fs (KERNEL_DS); + rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); + /* NB this is just a boolean..........................^ */ + set_fs (oldmm); + + return rc; +} + +int +ksocknal_lib_recv_kiov (ksock_conn_t *conn) +{ +#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK + struct iovec scratch; + struct iovec *scratchiov = &scratch; + int niov = 1; +#else +#ifdef CONFIG_HIGHMEM +#warning "XXX risk of kmap deadlock on multiple frags..." +#endif + struct iovec *scratchiov = conn->ksnc_rx_scratch_iov; + int niov = conn->ksnc_rx_nkiov; +#endif + ptl_kiov_t *kiov = conn->ksnc_rx_kiov; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = scratchiov, + .msg_iovlen = niov, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + mm_segment_t oldmm = get_fs(); + int nob; + int i; + int rc; + + /* NB we can't trust socket ops to either consume our iovs + * or leave them alone. */ + for (nob = i = 0; i < niov; i++) { + scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset; + nob += scratchiov[i].iov_len = kiov[i].kiov_len; + } + LASSERT (nob <= conn->ksnc_rx_nob_wanted); + + set_fs (KERNEL_DS); + rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); + /* NB this is just a boolean.......................^ */ + set_fs (oldmm); + + for (i = 0; i < niov; i++) + kunmap(kiov[i].kiov_page); + + return (rc); +} + +int +ksocknal_lib_sock_write (struct socket *sock, void *buffer, int nob) +{ + int rc; + mm_segment_t oldmm = get_fs(); + + while (nob > 0) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + + set_fs (KERNEL_DS); + rc = sock_sendmsg (sock, &msg, iov.iov_len); + set_fs (oldmm); + + if (rc < 0) + return (rc); + + if (rc == 0) { + CERROR ("Unexpected zero rc\n"); + return (-ECONNABORTED); + } + + buffer = ((char *)buffer) + rc; + nob -= rc; + } + + return (0); +} + +int +ksocknal_lib_sock_read (struct socket *sock, void *buffer, int nob) +{ + int rc; + mm_segment_t oldmm = get_fs(); + + while (nob > 0) { + struct iovec iov = { + .iov_base = buffer, + .iov_len = nob + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = NULL, + .msg_controllen = 0, + .msg_flags = 0 + }; + + set_fs (KERNEL_DS); + rc = sock_recvmsg (sock, &msg, iov.iov_len, 0); + set_fs (oldmm); + + if (rc < 0) + return (rc); + + if (rc == 0) + return (-ECONNABORTED); + + buffer = ((char *)buffer) + rc; + nob -= rc; + } + + return (0); +} + +int +ksocknal_lib_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle) +{ + mm_segment_t oldmm = get_fs (); + struct socket *sock = conn->ksnc_sock; + int len; + int rc; + + rc = ksocknal_getconnsock (conn); + if (rc != 0) { + LASSERT (conn->ksnc_closing); + *txmem = *rxmem = *nagle = 0; + return (-ESHUTDOWN); + } + + set_fs (KERNEL_DS); + + len = sizeof(*txmem); + rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF, + (char *)txmem, &len); + if (rc == 0) { + len = sizeof(*rxmem); + rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF, + (char *)rxmem, &len); + } + if (rc == 0) { + len = sizeof(*nagle); + rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY, + (char *)nagle, &len); + } + + set_fs (oldmm); + ksocknal_putconnsock (conn); + + if (rc == 0) + *nagle = !*nagle; + else + *txmem = *rxmem = *nagle = 0; + + return (rc); +} + +int +ksocknal_lib_setup_sock (struct socket *sock) +{ + mm_segment_t oldmm = get_fs (); + int rc; + int option; + int keep_idle; + int keep_intvl; + int keep_count; + int do_keepalive; + struct linger linger; + + sock->sk->sk_allocation = GFP_NOFS; + + /* Ensure this socket aborts active sends immediately when we close + * it. */ + + linger.l_onoff = 0; + linger.l_linger = 0; + + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER, + (char *)&linger, sizeof (linger)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set SO_LINGER: %d\n", rc); + return (rc); + } + + option = -1; + set_fs (KERNEL_DS); + rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set SO_LINGER2: %d\n", rc); + return (rc); + } + + if (!ksocknal_tunables.ksnd_nagle) { + option = 1; + + set_fs (KERNEL_DS); + rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't disable nagle: %d\n", rc); + return (rc); + } + } + + if (ksocknal_tunables.ksnd_buffer_size > 0) { + option = ksocknal_tunables.ksnd_buffer_size; + + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set send buffer %d: %d\n", + option, rc); + return (rc); + } + + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set receive buffer %d: %d\n", + option, rc); + return (rc); + } + } + + /* snapshot tunables */ + keep_idle = ksocknal_tunables.ksnd_keepalive_idle; + keep_count = ksocknal_tunables.ksnd_keepalive_count; + keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl; + + do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0); + + option = (do_keepalive ? 1 : 0); + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set SO_KEEPALIVE: %d\n", rc); + return (rc); + } + + if (!do_keepalive) + return (0); + + set_fs (KERNEL_DS); + rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE, + (char *)&keep_idle, sizeof (keep_idle)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc); + return (rc); + } + + set_fs (KERNEL_DS); + rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL, + (char *)&keep_intvl, sizeof (keep_intvl)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc); + return (rc); + } + + set_fs (KERNEL_DS); + rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT, + (char *)&keep_count, sizeof (keep_count)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set TCP_KEEPCNT: %d\n", rc); + return (rc); + } + + return (0); +} + +int +ksocknal_lib_connect_sock(struct socket **sockp, int *may_retry, + ksock_route_t *route, int local_port) +{ + struct sockaddr_in locaddr; + struct sockaddr_in srvaddr; + struct socket *sock; + int rc; + int option; + mm_segment_t oldmm = get_fs(); + struct timeval tv; + + memset(&locaddr, 0, sizeof(locaddr)); + locaddr.sin_family = AF_INET; + locaddr.sin_port = htons(local_port); + locaddr.sin_addr.s_addr = + (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr) + : INADDR_ANY; + + memset (&srvaddr, 0, sizeof (srvaddr)); + srvaddr.sin_family = AF_INET; + srvaddr.sin_port = htons (route->ksnr_port); + srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr); + + *may_retry = 0; + + rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock); + *sockp = sock; + if (rc != 0) { + CERROR ("Can't create autoconnect socket: %d\n", rc); + return (rc); + } + + /* Ugh; have to map_fd for compatibility with sockets passed in + * from userspace. And we actually need the sock->file refcounting + * that this gives you :) */ + + rc = sock_map_fd (sock); + if (rc < 0) { + sock_release (sock); + CERROR ("sock_map_fd error %d\n", rc); + return (rc); + } + + /* NB the file descriptor (rc) now owns the ref on sock->file */ + LASSERT (sock->file != NULL); + LASSERT (file_count(sock->file) == 1); + + get_file(sock->file); /* extra ref makes sock->file */ + sys_close(rc); /* survive this close */ + + /* Still got a single ref on sock->file */ + LASSERT (file_count(sock->file) == 1); + + /* Set the socket timeouts, so our connection attempt completes in + * finite time */ + tv.tv_sec = ksocknal_tunables.ksnd_io_timeout; + tv.tv_usec = 0; + + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO, + (char *)&tv, sizeof (tv)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set send timeout %d: %d\n", + ksocknal_tunables.ksnd_io_timeout, rc); + goto failed; + } + + set_fs (KERNEL_DS); + rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO, + (char *)&tv, sizeof (tv)); + set_fs (oldmm); + if (rc != 0) { + CERROR ("Can't set receive timeout %d: %d\n", + ksocknal_tunables.ksnd_io_timeout, rc); + goto failed; + } + + set_fs (KERNEL_DS); + option = 1; + rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&option, sizeof (option)); + set_fs (oldmm); + if (rc != 0) { + CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc); + goto failed; + } + + rc = sock->ops->bind(sock, + (struct sockaddr *)&locaddr, sizeof(locaddr)); + if (rc == -EADDRINUSE) { + CDEBUG(D_NET, "Port %d already in use\n", local_port); + *may_retry = 1; + goto failed; + } + if (rc != 0) { + CERROR("Error trying to bind to reserved port %d: %d\n", + local_port, rc); + goto failed; + } + + rc = sock->ops->connect(sock, + (struct sockaddr *)&srvaddr, sizeof(srvaddr), + sock->file->f_flags); + if (rc == 0) + return 0; + + /* EADDRNOTAVAIL probably means we're already connected to the same + * peer/port on the same local port on a differently typed + * connection. Let our caller retry with a different local + * port... */ + *may_retry = (rc == -EADDRNOTAVAIL); + + CDEBUG(*may_retry ? D_NET : D_ERROR, + "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc, + HIPQUAD(route->ksnr_myipaddr), local_port, + HIPQUAD(route->ksnr_ipaddr), route->ksnr_port); + + failed: + fput(sock->file); + return rc; +} + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +struct tcp_opt *sock2tcp_opt(struct sock *sk) +{ + return &(sk->tp_pinfo.af_tcp); +} +#else +struct tcp_opt *sock2tcp_opt(struct sock *sk) +{ + struct tcp_sock *s = (struct tcp_sock *)sk; + return &s->tcp; +} +#endif + +void +ksocknal_lib_push_conn (ksock_conn_t *conn) +{ + struct sock *sk; + struct tcp_opt *tp; + int nonagle; + int val = 1; + int rc; + mm_segment_t oldmm; + + rc = ksocknal_getconnsock (conn); + if (rc != 0) /* being shut down */ + return; + + sk = conn->ksnc_sock->sk; + tp = sock2tcp_opt(sk); + + lock_sock (sk); + nonagle = tp->nonagle; + tp->nonagle = 1; + release_sock (sk); + + oldmm = get_fs (); + set_fs (KERNEL_DS); + + rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, + (char *)&val, sizeof (val)); + LASSERT (rc == 0); + + set_fs (oldmm); + + lock_sock (sk); + tp->nonagle = nonagle; + release_sock (sk); + + ksocknal_putconnsock (conn); +} + +extern void ksocknal_read_callback (ksock_conn_t *conn); +extern void ksocknal_write_callback (ksock_conn_t *conn); +/* + * socket call back in Linux + */ +static void +ksocknal_data_ready (struct sock *sk, int n) +{ + ksock_conn_t *conn; + ENTRY; + + /* interleave correctly with closing sockets... */ + read_lock (&ksocknal_data.ksnd_global_lock); + + conn = sk->sk_user_data; + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ + LASSERT (sk->sk_data_ready != &ksocknal_data_ready); + sk->sk_data_ready (sk, n); + } else + ksocknal_read_callback(conn); + + read_unlock (&ksocknal_data.ksnd_global_lock); + + EXIT; +} + +static void +ksocknal_write_space (struct sock *sk) +{ + ksock_conn_t *conn; + + /* interleave correctly with closing sockets... */ + read_lock (&ksocknal_data.ksnd_global_lock); + + conn = sk->sk_user_data; + + CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", + sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn, + (conn == NULL) ? "" : (conn->ksnc_tx_ready ? + " ready" : " blocked"), + (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? + " scheduled" : " idle"), + (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? + " empty" : " queued")); + + if (conn == NULL) { /* raced with ksocknal_terminate_conn */ + LASSERT (sk->sk_write_space != &ksocknal_write_space); + sk->sk_write_space (sk); + + read_unlock (&ksocknal_data.ksnd_global_lock); + return; + } + + if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */ + ksocknal_write_callback(conn); + + /* Clear SOCK_NOSPACE _after_ ksocknal_write_callback so the + * ENOMEM check in ksocknal_transmit is race-free (think about + * it). */ + + clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags); + } + + read_unlock (&ksocknal_data.ksnd_global_lock); +} + +void +ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn) +{ + conn->ksnc_saved_data_ready = sock->sk->sk_data_ready; + conn->ksnc_saved_write_space = sock->sk->sk_write_space; +} + +void +ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn) +{ + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = ksocknal_data_ready; + sock->sk->sk_write_space = ksocknal_write_space; + return; +} + +void +ksocknal_lib_act_callback(struct socket *sock, ksock_conn_t *conn) +{ + ksocknal_data_ready (sock->sk, 0); + ksocknal_write_space (sock->sk); + return; +} + +void +ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn) +{ + /* Remove conn's network callbacks. + * NB I _have_ to restore the callback, rather than storing a noop, + * since the socket could survive past this module being unloaded!! */ + sock->sk->sk_data_ready = conn->ksnc_saved_data_ready; + sock->sk->sk_write_space = conn->ksnc_saved_write_space; + + /* A callback could be in progress already; they hold a read lock + * on ksnd_global_lock (to serialise with me) and NOOP if + * sk_user_data is NULL. */ + sock->sk->sk_user_data = NULL; + + return ; +} + diff --git a/lnet/klnds/socklnd/socklnd_lib-linux.h b/lnet/klnds/socklnd/socklnd_lib-linux.h new file mode 100644 index 0000000..6129fdc --- /dev/null +++ b/lnet/klnds/socklnd/socklnd_lib-linux.h @@ -0,0 +1,125 @@ +#define DEBUG_PORTAL_ALLOC +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#ifndef __LINUX_SOCKNAL_LIB_H__ +#define __LINUX_SOCKNAL_LIB_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +# include +#endif + +#include +#include + +#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10) + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72)) +# define sk_allocation allocation +# define sk_data_ready data_ready +# define sk_write_space write_space +# define sk_user_data user_data +# define sk_prot prot +# define sk_sndbuf sndbuf +# define sk_socket socket +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) +# define sk_wmem_queued wmem_queued +# define sk_err err +#endif + +#define SOCKNAL_ARCH_EAGER_ACK 0 +#define SOCK_WMEM_QUEUED(so) ((so)->sk->sk_wmem_queued) +#define SOCK_ERROR(so) ((so)->sk->sk_err) +#define SOCK_TEST_NOSPACE(so) test_bit(SOCK_NOSPACE, &(so)->flags) + +#define KSN_SOCK2FILE(so) ((so)->file) +#define KSN_CONN2FILE(conn) ((conn)->ksnc_sock->file) + +#ifndef CONFIG_SMP +static inline +int ksocknal_nsched(void) +{ + return 1; +} +#else +#include +# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT)) +static inline int +ksocknal_nsched(void) +{ + return num_online_cpus(); +} + +static inline int +ksocknal_sched2cpu(int i) +{ + return i; +} + +static inline int +ksocknal_irqsched2cpu(int i) +{ + return i; +} +# else +static inline int +ksocknal_nsched(void) +{ + if (smp_num_siblings == 1) + return (num_online_cpus()); + + /* We need to know if this assumption is crap */ + LASSERT (smp_num_siblings == 2); + return (num_online_cpus()/2); +} + +static inline int +ksocknal_sched2cpu(int i) +{ + if (smp_num_siblings == 1) + return i; + + return (i * 2); +} + +static inline int +ksocknal_irqsched2cpu(int i) +{ + return (ksocknal_sched2cpu(i) + 1); +} +# endif +#endif + +#endif diff --git a/lnet/klnds/viblnd/viblnd.h b/lnet/klnds/viblnd/viblnd.h index 7866aba..cf90aed 100644 --- a/lnet/klnds/viblnd/viblnd.h +++ b/lnet/klnds/viblnd/viblnd.h @@ -53,7 +53,7 @@ #define IBNAL_CHECK_ADVERT -#include +#include #include #include #include @@ -159,7 +159,7 @@ #define GSI_TIMEOUT 5 #define GSI_RETRY 10 -typedef struct +typedef struct { int kib_io_timeout; /* comms timeout (seconds) */ struct ctl_table_header *kib_sysctl; /* sysctl interface */ @@ -185,8 +185,8 @@ typedef struct __u32 md_rkey; __u64 md_addr; } kib_md_t __attribute__((packed)); - -typedef struct + +typedef struct { /* initialisation state. These values are sorted by their initialization order. */ enum { @@ -235,7 +235,7 @@ typedef struct struct list_head kib_sched_txq; /* tx requiring attention */ struct list_head kib_sched_rxq; /* rx requiring attention */ spinlock_t kib_sched_lock; /* serialise */ - + struct kib_tx *kib_tx_descs; /* all the tx descriptors */ kib_pages_t *kib_tx_pages; /* premapped tx msg pages */ @@ -244,7 +244,7 @@ typedef struct wait_queue_head_t kib_idle_tx_waitq; /* block here for tx descriptor */ __u64 kib_next_tx_cookie; /* RDMA completion cookie */ spinlock_t kib_tx_lock; /* serialise */ - + vv_hca_h_t kib_hca; /* The HCA */ vv_hca_attrib_t kib_hca_attrs; /* HCA attributes */ @@ -257,7 +257,7 @@ typedef struct void *kib_listen_handle; /* where I listen for connections */ /* These fields are left untouched, so they can be shared. */ - union { + union { cm_drequest_data_t dreq_data; cm_dreply_data_t drep_data; } cm_data; @@ -293,7 +293,7 @@ typedef struct /* these arrays serve two purposes during rdma. they are built on the passive * side and sent to the active side as remote arguments. On the active side - * the descs are used as a data structure on the way to local gather items. + * the descs are used as a data structure on the way to local gather items. * the different roles result in split local/remote meaning of desc->rd_key */ typedef struct { @@ -412,7 +412,7 @@ typedef struct kib_connreq } kib_connreq_t; typedef struct kib_conn -{ +{ struct kib_peer *ibc_peer; /* owning peer */ struct list_head ibc_list; /* stash on peer's conn list */ __u64 ibc_incarnation; /* which instance of the peer */ @@ -539,10 +539,10 @@ static inline int wrq_signals_completion(vv_wr_t *wrq) /******************************************************************************/ static inline struct list_head * -kibnal_nid2peerlist (ptl_nid_t nid) +kibnal_nid2peerlist (ptl_nid_t nid) { unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size; - + return (&kibnal_data.kib_peers [hash]); } @@ -589,7 +589,7 @@ static inline __u64 kibnal_page2phys (struct page *p) { __u64 page_number = p - mem_map; - + return (page_number << PAGE_SHIFT); } #else @@ -649,7 +649,7 @@ static void dump_qp(kib_conn_t *conn) void *qp_context; vv_return_t retval; - CERROR("QP dumping %p\n", conn); + CERROR("QP dumping %p\n", conn); retval = vv_qp_query(kibnal_data.kib_hca, conn->ibc_qp, &qp_context, &conn->ibc_qp_attrs); if (retval) { @@ -691,7 +691,7 @@ static void dump_qp(kib_conn_t *conn) static void dump_wqe(vv_wr_t *wr) { CERROR("Dumping send WR %p\n", wr); - + CERROR(" wr_id = %llx\n", wr->wr_id); CERROR(" completion_notification = %d\n", wr->completion_notification); CERROR(" scatgat_list = %p\n", wr->scatgat_list); @@ -704,14 +704,14 @@ static void dump_wqe(vv_wr_t *wr) } CERROR(" wr_type = %d\n", wr->wr_type); - + switch(wr->wr_type) { case vv_wr_send: CERROR(" send\n"); - + CERROR(" fance_indicator = %d\n", wr->type.send.send_qp_type.rc_type.fance_indicator); break; - + case vv_wr_receive: break; @@ -785,7 +785,7 @@ extern void kibnal_destroy_peer (kib_peer_t *peer); extern int kibnal_del_peer (ptl_nid_t nid, int single_share); extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid); extern void kibnal_unlink_peer_locked (kib_peer_t *peer); -extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, +extern int kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation); extern kib_conn_t *kibnal_create_conn (void); extern void kibnal_put_conn (kib_conn_t *conn); @@ -803,9 +803,9 @@ extern int kibnal_scheduler(void *arg); extern int kibnal_connd (void *arg); extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob); extern void kibnal_close_conn (kib_conn_t *conn, int why); -extern void kibnal_start_active_rdma (int type, int status, - kib_rx_t *rx, lib_msg_t *libmsg, - unsigned int niov, +extern void kibnal_start_active_rdma (int type, int status, + kib_rx_t *rx, lib_msg_t *libmsg, + unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov, size_t offset, size_t nob); diff --git a/lnet/libcfs/Info.plist b/lnet/libcfs/Info.plist new file mode 100644 index 0000000..7e3cc08 --- /dev/null +++ b/lnet/libcfs/Info.plist @@ -0,0 +1,33 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + libcfs + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.libcfs + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kernel.bsd + 1.1 + com.apple.kernel.iokit + 1.0.0b1 + com.apple.kernel.mach + 1.0.0b1 + + + diff --git a/lnet/libcfs/Makefile.in b/lnet/libcfs/Makefile.in index 15fff12..aaaad93 100644 --- a/lnet/libcfs/Makefile.in +++ b/lnet/libcfs/Makefile.in @@ -1,4 +1,33 @@ MODULES = libcfs -libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o watchdog.o + +libcfs-linux-objs := linux-tracefile.o linux-debug.o +libcfs-linux-objs += linux-prim.o linux-mem.o +libcfs-linux-objs += linux-fs.o linux-sync.o +libcfs-linux-objs += linux-lwt.o linux-proc.o linux-curproc.o +libcfs-linux-objs += linux-utils.o linux-module.o + +ifeq ($(PATCHLEVEL),6) +libcfs-linux-objs := $(addprefix linux/,$(libcfs-linux-objs)) +endif + +default: all + +ifeq (@linux25@,no) +sources: + @for i in $(libcfs-linux-objs:%.o=%.c) ; do \ + echo "ln -s @srcdir@/linux/$$i ." ; \ + ln -sf @srcdir@/linux/$$i . || exit 1 ; \ + done + +else +sources: + +endif + +libcfs-all-objs := debug.o lwt.o module.o tracefile.o watchdog.o + +libcfs-objs := $(libcfs-linux-objs) $(libcfs-all-objs) + +EXTRA_PRE_CFLAGS := -I@LUSTRE@/../portals/libcfs @INCLUDE_RULES@ diff --git a/lnet/libcfs/autoMakefile.am b/lnet/libcfs/autoMakefile.am index 9c27693..824ab1a 100644 --- a/lnet/libcfs/autoMakefile.am +++ b/lnet/libcfs/autoMakefile.am @@ -3,9 +3,36 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution +SUBDIRS := darwin linux + if MODULES + +if LINUX modulenet_DATA := libcfs$(KMODEXT) endif -MOSTLYCLEANFILES = *.o *.ko *.mod.c -DIST_SOURCES = $(libcfs-objs:%.o=%.c) tracefile.h +if DARWIN +macos_PROGRAMS := libcfs + +libcfs_SOURCES := debug.c module.c tracefile.c darwin/darwin-debug.c \ + darwin/darwin-fs.c darwin/darwin-mem.c darwin/darwin-module.c \ + darwin/darwin-prim.c darwin/darwin-proc.c \ + darwin/darwin-tracefile.c darwin/darwin-utils.c \ + darwin/darwin-sync.c darwin/darwin-curproc.c user-prim.c user-lock.c + +libcfs_CFLAGS := $(EXTRA_KCFLAGS) +libcfs_LDFLAGS := $(EXTRA_KLDFLAGS) +libcfs_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install-data-hook: fix-kext-ownership + +endif + +endif + +EXTRA_DIST := Info.plist + +MOSTLYCLEANFILES = *.o *.ko *.mod.c linux-*.c linux/*.o darwin/*.o libcfs +DIST_SOURCES = $(libcfs-all-objs:%.o=%.c) tracefile.h diff --git a/lnet/libcfs/darwin/.cvsignore b/lnet/libcfs/darwin/.cvsignore new file mode 100644 index 0000000..282522d --- /dev/null +++ b/lnet/libcfs/darwin/.cvsignore @@ -0,0 +1,2 @@ +Makefile +Makefile.in diff --git a/lnet/libcfs/darwin/Makefile.am b/lnet/libcfs/darwin/Makefile.am new file mode 100644 index 0000000..8e77294 --- /dev/null +++ b/lnet/libcfs/darwin/Makefile.am @@ -0,0 +1,11 @@ +EXTRA_DIST := \ + darwin-mem.c \ + darwin-proc.c \ + darwin-utils.c \ + darwin-debug.c \ + darwin-module.c \ + darwin-sync.c \ + darwin-fs.c \ + darwin-prim.c \ + darwin-tracefile.c \ + darwin-curproc.c diff --git a/lnet/libcfs/darwin/darwin-curproc.c b/lnet/libcfs/darwin/darwin-curproc.c new file mode 100644 index 0000000..d930051 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-curproc.c @@ -0,0 +1,124 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre curproc API implementation for XNU kernel + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. You should have received a copy of the GNU + * General Public License along with Lustre; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include + +/* + * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) + * for XNU kernel. + */ + +static inline struct ucred *curproc_ucred(void) +{ + return current_proc()->p_cred->pc_ucred; +} + +uid_t cfs_curproc_uid(void) +{ + return curproc_ucred()->cr_uid; +} + +gid_t cfs_curproc_gid(void) +{ + LASSERT(curproc_ucred()->cr_ngroups > 0); + return curproc_ucred()->cr_groups[0]; +} + +uid_t cfs_curproc_fsuid(void) +{ + return current_proc()->p_cred->p_ruid; +} + +gid_t cfs_curproc_fsgid(void) +{ + return current_proc()->p_cred->p_rgid; +} + +pid_t cfs_curproc_pid(void) +{ + return current_proc()->p_pid; +} + +int cfs_curproc_groups_nr(void) +{ + LASSERT(curproc_ucred()->cr_ngroups > 0); + return curproc_ucred()->cr_ngroups - 1; +} + +int cfs_curproc_is_in_groups(gid_t gid) +{ + int i; + struct ucred *cr; + + cr = curproc_ucred(); + LASSERT(cr != NULL); + + for (i = 0; i < cr->cr_ngroups; ++ i) { + if (cr->cr_groups[i] == gid) + return 1; + } + return 0; +} + +void cfs_curproc_groups_dump(gid_t *array, int size) +{ + struct ucred *cr; + + cr = curproc_ucred(); + LASSERT(cr != NULL); + CLASSERT(sizeof array[0] == sizeof (__u32)); + + size = min_t(int, size, cr->cr_ngroups); + memcpy(array, &cr->cr_groups[1], size * sizeof(gid_t)); +} + +mode_t cfs_curproc_umask(void) +{ + return current_proc()->p_fd->fd_cmask; +} + +char *cfs_curproc_comm(void) +{ + return current_proc()->p_comm; +} + +cfs_kernel_cap_t cfs_curproc_cap_get(void) +{ + return 0; +} + +void cfs_curproc_cap_set(cfs_kernel_cap_t cap) +{ + return; +} + + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/libcfs/darwin/darwin-debug.c b/lnet/libcfs/darwin/darwin-debug.c new file mode 100644 index 0000000..970c5b9 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-debug.c @@ -0,0 +1,25 @@ +# define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include "tracefile.h" + +void portals_debug_dumpstack(cfs_task_t *tsk) +{ + return; +} + +cfs_task_t *portals_current(void) +{ + return cfs_current(); +} + +int portals_arch_debug_init(unsigned long bufsize) +{ + return 0; +} + +int portals_arch_debug_cleanup(void) +{ + return 0; +} diff --git a/lnet/libcfs/darwin/darwin-fs.c b/lnet/libcfs/darwin/darwin-fs.c new file mode 100644 index 0000000..5b0f44c --- /dev/null +++ b/lnet/libcfs/darwin/darwin-fs.c @@ -0,0 +1,330 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include + +/* + * Kernel APIs for file system in xnu + * + * Public functions + */ +int +filp_node_size(struct file *fp, off_t *size) +{ + struct vnode *vp = (struct vnode *)fp->f_data; + struct stat sb; + int rc; + + rc = vn_stat(vp, &sb, current_proc()); + if (rc) { + *size = 0; + return rc; + } + *size = sb.st_size; + return 0; +} + +cfs_file_t * +filp_open(const char * filename, int flags, int mode, int *err) +{ + struct nameidata nd; + register cfs_file_t *fp; + register struct vnode *vp; + cfs_file_t *nfp; + int rc; + extern struct fileops vnops; + extern int nfiles; + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + nfiles++; + MALLOC_ZONE(nfp, cfs_file_t *, sizeof(cfs_file_t), M_FILE, M_WAITOK|M_ZERO); + bzero(nfp, sizeof(cfs_file_t)); + nfp->f_count = 1; + fp = nfp; + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)filename, current_proc()); + if ((rc = vn_open(&nd, flags, mode)) != 0){ + printf("filp_open failed at (%d)\n", rc); + if (err != NULL) + *err = rc; + ffree(fp); + CFS_CONE_EX; + return NULL; + } + vp = nd.ni_vp; + fp->f_flag = flags & FMASK; + fp->f_type = DTYPE_VNODE; + fp->f_ops = &vnops; + fp->f_data = (caddr_t)vp; + fp->f_cred = current_proc()->p_ucred; + /* + * Hold cred to increase reference + */ + crhold(fp->f_cred); + /* + * vnode is locked inside vn_open for lookup, + * we should release the lock before return + */ + VOP_UNLOCK(vp, 0, current_proc()); + CFS_CONE_EX; + + return fp; +} + +static int +frele_internal(cfs_file_t *fp) +{ + if (fp->f_count == (short)0xffff) + panic("frele of lustre: stale"); + if (--fp->f_count < 0) + panic("frele of lustre: count < 0"); + return ((int)fp->f_count); +} + +int +filp_close (cfs_file_t *fp) +{ + struct vnode *vp; + CFS_DECL_CONE_DATA; + + if (fp == NULL) + return 0; + + CFS_CONE_IN; + if (frele_internal(fp) > 0) + goto out; + vp = (struct vnode *)fp->f_data; + (void )vn_close(vp, fp->f_flag, fp->f_cred, current_proc()); + /* + * ffree(fp); + * Dont use ffree to release fp!!!! + * ffree will call LIST_REMOVE(fp), + * but fp is not in any list, this will + * cause kernel panic + */ + struct ucred *cred; + cred = fp->f_cred; + if (cred != NOCRED) { + fp->f_cred = NOCRED; + crfree(cred); + } + extern int nfiles; + nfiles--; + memset(fp, 0xff, sizeof *fp); + fp->f_count = (short)0xffff; + FREE_ZONE(fp, sizeof *fp, M_FILE); +out: + CFS_CONE_EX; + return 0; +} + +extern void bwillwrite(void); + +/* + * Write buffer to filp inside kernel + */ +int +filp_write (cfs_file_t *fp, void *buf, size_t nbyte, off_t *pos) +{ + struct uio auio; + struct iovec aiov; + struct proc *p = current_proc(); + long cnt, error = 0; + CFS_DECL_CONE_DATA; + + aiov.iov_base = (void *)(uintptr_t)buf; + aiov.iov_len = nbyte; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + if (pos != NULL) + auio.uio_offset = *pos; + else + auio.uio_offset = (off_t)-1; + if (nbyte > INT_MAX) + return (EINVAL); + auio.uio_resid = nbyte; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + cnt = nbyte; + CFS_CONE_IN; + if (fp->f_type == DTYPE_VNODE) + bwillwrite(); /* empty stuff now */ + if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { + if (auio.uio_resid != cnt && (error == ERESTART ||\ + error == EINTR || error == EWOULDBLOCK)) + error = 0; + /* The socket layer handles SIGPIPE */ + if (error == EPIPE && fp->f_type != DTYPE_SOCKET) + psignal(p, SIGPIPE); + } + CFS_CONE_EX; + if (error != 0) + cnt = -error; + else + cnt -= auio.uio_resid; + if (pos != NULL) + *pos = auio.uio_offset; + return cnt; +} + +/* + * Read from filp inside kernel + */ +int +filp_read (cfs_file_t *fp, void *buf, size_t nbyte, off_t *pos) +{ + struct uio auio; + struct iovec aiov; + struct proc *p = current_proc(); + long cnt, error = 0; + CFS_DECL_CONE_DATA; + + aiov.iov_base = (caddr_t)buf; + aiov.iov_len = nbyte; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + if (pos != NULL) + auio.uio_offset = *pos; + else + auio.uio_offset = (off_t)-1; + if (nbyte > INT_MAX) + return (EINVAL); + auio.uio_resid = nbyte; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + cnt = nbyte; + CFS_CONE_IN; + if ((error = fo_read(fp, &auio, fp->f_cred, 0, p)) != 0) { + if (auio.uio_resid != cnt && (error == ERESTART || + error == EINTR || error == EWOULDBLOCK)) + error = 0; + } + CFS_CONE_EX; + if (error != 0) + cnt = -error; + else + cnt -= auio.uio_resid; + if (pos != NULL) + *pos = auio.uio_offset; + + return cnt; +} + +int +filp_fsync (cfs_file_t *fp) +{ + struct vnode *vp = (struct vnode *)fp->f_data; + struct proc *p = current_proc(); + int error = 0; + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + if (fref(fp) == -1) { + CFS_CONE_EX; + return (-EBADF); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); + VOP_UNLOCK(vp, 0, p); + frele(fp); + CFS_CONE_EX; + + return error; +} + +int +ref_file(cfs_file_t *fp) +{ + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + fref(fp); + CFS_CONE_EX; + return 0; +} + +int +rele_file(cfs_file_t *fp) +{ + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + frele(fp); + CFS_CONE_EX; + return 0; +} + +/* + * Private functions + */ +void vrele_safe(struct vnode *nd) +{ + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + vrele(nd); + CFS_CONE_EX; +} + +int +path_lookup(const char *path, unsigned int flags, struct nameidata *nd) +{ + int ret = 0; + CFS_DECL_CONE_DATA; + + CFS_CONE_IN; + NDINIT(nd, LOOKUP, FOLLOW, UIO_SYSSPACE, (char *)path, current_proc()); + if ((ret = namei(nd)) != 0){ + CERROR("path_lookup fail!\n"); + } + CFS_CONE_EX; + + return ret; +} + +int +file_count(struct file *fp) +{ + return fcount(fp); +} + + diff --git a/lnet/libcfs/darwin/darwin-mem.c b/lnet/libcfs/darwin/darwin-mem.c new file mode 100644 index 0000000..4cf16d7 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-mem.c @@ -0,0 +1,455 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Definition of struct zone, copied from osfmk/kern/zalloc.h. + */ +struct zone_hack { + int count; /* Number of elements used now */ + vm_offset_t free_elements; + vm_size_t cur_size; /* current memory utilization */ + vm_size_t max_size; /* how large can this zone grow */ + vm_size_t elem_size; /* size of an element */ + vm_size_t alloc_size; /* size used for more memory */ + char *zone_name; /* a name for the zone */ + unsigned int + /* boolean_t */ exhaustible :1, /* (F) merely return if empty? */ + /* boolean_t */ collectable :1, /* (F) garbage collect empty pages */ + /* boolean_t */ expandable :1, /* (T) expand zone (with message)? */ + /* boolean_t */ allows_foreign :1,/* (F) allow non-zalloc space */ + /* boolean_t */ doing_alloc :1, /* is zone expanding now? */ + /* boolean_t */ waiting :1, /* is thread waiting for expansion? */ + /* boolean_t */ async_pending :1; /* asynchronous allocation pending? */ + struct zone_hack * next_zone; /* Link for all-zones list */ + /* + * more fields follow, but we don't need them. We only need + * offset from the beginning of struct zone to ->next_zone + * field: it allows us to scan the list of all zones. + */ +}; + +decl_simple_lock_data(extern, all_zones_lock) + +/* + * returns true iff zone with name @name already exists. + * + * XXX nikita: this function is defined in this file only because there is no + * better place to put it in. + */ +zone_t cfs_find_zone(const char *name) +{ + struct zone_hack *scan; + + /* from osfmk/kern/zalloc.c */ + extern zone_t first_zone; + + LASSERT(name != NULL); + + simple_lock(&all_zones_lock); + for (scan = (struct zone_hack *)first_zone; + scan != NULL; scan = scan->next_zone) { + if (!strcmp(scan->zone_name, name)) + break; + } + simple_unlock(&all_zones_lock); + return((zone_t)scan); +} + +/* + * our wrapper around kern/zalloc.c:zinit() + * + * Creates copy of name and calls zinit() to do real work. Needed because zone + * survives kext unloading, so that @name cannot be just static string + * embedded into kext image. + */ +zone_t cfs_zinit(vm_size_t size, vm_size_t max, int alloc, const char *name) +{ + char *cname; + + cname = _MALLOC(strlen(name) + 1, M_TEMP, M_WAITOK); + LASSERT(cname != NULL); + return zinit(size, max, alloc, strcpy(cname, name)); +} + +cfs_mem_cache_t * +cfs_mem_cache_create (const char *name, size_t objsize, size_t off, unsigned long arg1, + void (*arg2)(void *, cfs_mem_cache_t *, unsigned long), + void (*arg3)(void *, cfs_mem_cache_t *, unsigned long)) +{ + cfs_mem_cache_t *new = NULL; + + MALLOC(new, cfs_mem_cache_t *, objsize, M_TEMP, M_WAITOK|M_ZERO); + if (new == NULL){ + CERROR("cfs_mem_cache created fail!\n"); + return NULL; + } + new->size = objsize; + CFS_INIT_LIST_HEAD(&new->link); + strncpy(new->name, name, 1 + strlen(name)); + new->zone = cfs_find_zone(name); + if (new->zone == NULL) { + new->zone = cfs_zinit (objsize, KMEM_MAX_ZONE * objsize, 0, name); + if (new->zone == NULL) { + CERROR("zone create fault!\n"); + FREE (new, M_TEMP); + return NULL; + } + } + return new; +} + +int +cfs_mem_cache_destroy (cfs_mem_cache_t *cachep) +{ + FREE (cachep, M_TEMP); + return 0; +} + +void * +cfs_mem_cache_alloc (cfs_mem_cache_t *cachep, int flags) +{ + return (void *)zalloc(cachep->zone); +} + +void +cfs_mem_cache_free (cfs_mem_cache_t *cachep, void *objp) +{ + zfree (cachep->zone, (vm_address_t)objp); +} + +/* --------------------------------------------------------------------------- + * Page operations + * + * --------------------------------------------------------------------------- */ + +/* + * "Raw" pages + */ + +extern vm_map_t zone_map; +static inline vm_map_t page_map(struct xnu_raw_page *pg) +{ + LASSERT(pg != NULL); + + return pg->order == 0 ? zone_map : kernel_map; +} + +static int raw_page_init(struct xnu_raw_page *pg) +{ + vm_size_t size = (1UL << pg->order) * PAGE_SIZE; + int upl_flags = UPL_SET_INTERNAL | + UPL_SET_LITE | UPL_SET_IO_WIRE | UPL_COPYOUT_FROM; + int kr = 0; + + /* XXX is it necessary? */ + kr = vm_map_get_upl(page_map(pg), + pg->virtual, &size, &pg->upl, 0, 0, &upl_flags, 0); + return kr; +} + +static void raw_page_done(struct xnu_raw_page *pg) +{ + ubc_upl_abort(pg->upl, UPL_ABORT_FREE_ON_EMPTY); + return; +} + +static struct xnu_page_ops raw_page_ops; +static struct xnu_page_ops *page_ops[XNU_PAGE_NTYPES] = { + [XNU_PAGE_RAW] = &raw_page_ops +}; + +static int page_type_is_valid(cfs_page_t *page) +{ + LASSERT(page != NULL); + return 0 <= page->type && page->type < XNU_PAGE_NTYPES; +} + +static int page_is_raw(cfs_page_t *page) +{ + return page->type == XNU_PAGE_RAW; +} + +static struct xnu_raw_page *as_raw(cfs_page_t *page) +{ + LASSERT(page_is_raw(page)); + return list_entry(page, struct xnu_raw_page, header); +} + +static void *raw_page_address(cfs_page_t *pg) +{ + return (void *)as_raw(pg)->virtual; +} + +static void *raw_page_map(cfs_page_t *pg) +{ + return (void *)as_raw(pg)->virtual; +} + +static void raw_page_unmap(cfs_page_t *pg) +{ +} + +static struct xnu_page_ops raw_page_ops = { + .page_map = raw_page_map, + .page_unmap = raw_page_unmap, + .page_address = raw_page_address +}; + + +extern vm_size_t kalloc_max; +extern vm_size_t kalloc_max_prerounded; +extern int first_k_zone; +extern struct zone *k_zone[16]; +extern vm_offset_t zalloc_canblock( register zone_t, boolean_t ); +extern vm_map_t zone_map; + +static inline vm_address_t +page_zone_alloc(int flags, int order) +{ + register int zindex; + register vm_size_t allocsize; + vm_size_t size = (1UL << order) * PAGE_SIZE; + vm_address_t addr; + kern_return_t kr; + + assert(order >= 0); + if (size > PAGE_SIZE){ + /* XXX Liang: + * zalloc_canblock() call kernel_memory_allocate to allocate + * pages, kernel_memory_allocate cannot guarantee contig pages! + * So any request bigger then PAGE_SIZE should not call zalloc() + * + * NB. kmem_alloc_contig could be very slow!!!! Anyway, I dont + * know what will happen if order >= 1 :-( + * */ + CDEBUG(D_MALLOC, "Allocate contig pages!\n"); + kr = kmem_alloc_contig(kernel_map, &addr, size, 0, 0); + if (kr) + return 0; + return addr; + } + allocsize = KALLOC_MINSIZE; + zindex = first_k_zone; + while (allocsize < size) { + allocsize <<= 1; + zindex++; + } + assert(allocsize < kalloc_max); + if (flags & M_NOWAIT != 0) + addr = zalloc_canblock(k_zone[zindex], FALSE); + else + addr = zalloc_canblock(k_zone[zindex], TRUE); + return addr; +} + +/* Allocate a "page", actually upl of darwin */ +struct xnu_raw_page *alloc_raw_pages(u_int32_t flags, u_int32_t order) +{ + kern_return_t kr; + vm_size_t size = (1UL << order) * PAGE_SIZE; + u_int32_t mflags = 0; + struct xnu_raw_page *pg; + + if (flags & CFS_ALLOC_ATOMIC != 0) + mflags |= M_NOWAIT; + else + mflags |= M_WAITOK; + if (flags & CFS_ALLOC_ZERO != 0) + mflags |= M_ZERO; + + MALLOC (pg, struct xnu_raw_page *, sizeof *pg, M_TEMP, mflags); + if (pg == NULL) + return NULL; + pg->header.type = XNU_PAGE_RAW; + pg->order = order; + cfs_set_page_count(&pg->header, 1); + pg->virtual = page_zone_alloc(flags, order); + if (!pg->virtual) + /* + * XXX nikita: Liang, shouldn't pg be freed here? + */ + return NULL; + + kr = raw_page_init(pg); + if (kr != 0) { + size = (1UL << order) * PAGE_SIZE; + kmem_free(page_map(pg), pg->virtual, size); + return NULL; + } + return pg; +} + +/* Free a "page" */ +void free_raw_pages(struct xnu_raw_page *pg, u_int32_t order) +{ + vm_size_t size = (1UL << order) * PAGE_SIZE; + + if (!atomic_dec_and_test(&pg->count)) + return; + raw_page_done(pg); + kmem_free(page_map(pg), pg->virtual, size); + FREE(pg, M_TEMP); +} + +cfs_page_t *cfs_alloc_pages(u_int32_t flags, u_int32_t order) +{ + return &alloc_raw_pages(flags, order)->header; +} + +cfs_page_t *cfs_alloc_page(u_int32_t flags) +{ + return cfs_alloc_pages(flags, 0); +} + +void cfs_free_pages(cfs_page_t *pages, int order) +{ + free_raw_pages(as_raw(pages), order); +} + +void cfs_free_page(cfs_page_t *page) +{ + cfs_free_pages(page, 0); +} + +void cfs_get_page(cfs_page_t *p) +{ + atomic_inc(&as_raw(p)->count); +} + +int cfs_put_page_testzero(cfs_page_t *p) +{ + return atomic_dec_and_test(&as_raw(p)->count); +} + +int cfs_page_count(cfs_page_t *p) +{ + return atomic_read(&as_raw(p)->count); +} + +void cfs_set_page_count(cfs_page_t *p, int v) +{ + atomic_set(&as_raw(p)->count, v); +} + +/* + * Generic page operations + */ + +void *cfs_page_address(cfs_page_t *pg) +{ + LASSERT(page_type_is_valid(pg)); + return page_ops[pg->type]->page_address(pg); +} + +void *cfs_kmap(cfs_page_t *pg) +{ + LASSERT(page_type_is_valid(pg)); + return page_ops[pg->type]->page_map(pg); +} + +void cfs_kunmap(cfs_page_t *pg) +{ + LASSERT(page_type_is_valid(pg)); + return page_ops[pg->type]->page_unmap(pg); +} + +void xnu_page_ops_register(int type, struct xnu_page_ops *ops) +{ + LASSERT(0 <= type && type < XNU_PAGE_NTYPES); + LASSERT(ops != NULL); + LASSERT(page_ops[type] == NULL); + + page_ops[type] = ops; +} + +void xnu_page_ops_unregister(int type) +{ + LASSERT(0 <= type && type < XNU_PAGE_NTYPES); + LASSERT(page_ops[type] != NULL); + + page_ops[type] = NULL; +} + +/* + * Portable memory allocator API + */ +#ifdef HAVE_GET_PREEMPTION_LEVEL +extern int get_preemption_level(void); +#else +#define get_preemption_level() (0) +#endif + +void *cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + int mflags; + + mflags = 0; + if (flags & CFS_ALLOC_ATOMIC != 0) { + mflags |= 0 /* M_NOWAIT */; + } else { + LASSERT(get_preemption_level() == 0); + mflags |= M_WAITOK; + } + + if (flags & CFS_ALLOC_ZERO != 0) + mflags |= M_ZERO; + + return _MALLOC(nr_bytes, M_TEMP, mflags); +} + +void cfs_free(void *addr) +{ + return _FREE(addr, M_TEMP); +} + +void *cfs_alloc_large(size_t nr_bytes) +{ + LASSERT(get_preemption_level() == 0); + return _MALLOC(nr_bytes, M_TEMP, M_WAITOK); +} + +void cfs_free_large(void *addr) +{ + return _FREE(addr, M_TEMP); +} diff --git a/lnet/libcfs/darwin/darwin-module.c b/lnet/libcfs/darwin/darwin-module.c new file mode 100644 index 0000000..4f858624 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-module.c @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_PORTALS +#include +#include + +int portal_ioctl_getdata(char *buf, char *end, void *arg) +{ + struct portal_ioctl_hdr *hdr; + struct portal_ioctl_data *data; + int err = 0; + ENTRY; + + hdr = (struct portal_ioctl_hdr *)buf; + data = (struct portal_ioctl_data *)buf; + /* portals_ioctl_data has been copied in by ioctl of osx */ + memcpy(buf, arg, sizeof(struct portal_ioctl_data)); + + if (hdr->ioc_version != PORTAL_IOCTL_VERSION) { + CERROR("PORTALS: version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len + buf >= end) { + CERROR("PORTALS: user buffer exceeds kernel buffer\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) { + CERROR("PORTALS: user buffer too small for ioctl\n"); + RETURN(-EINVAL); + } + buf += size_round(sizeof(*data)); + + if (data->ioc_inllen1) { + err = copy_from_user(buf, data->ioc_inlbuf1, size_round(data->ioc_inllen1)); + if (err) + RETURN(err); + data->ioc_inlbuf1 = buf; + buf += size_round(data->ioc_inllen1); + } + + if (data->ioc_inllen2) { + copy_from_user(buf, data->ioc_inlbuf2, size_round(data->ioc_inllen2)); + if (err) + RETURN(err); + data->ioc_inlbuf2 = buf; + } + + RETURN(err); +} + +extern struct cfs_psdev_ops libcfs_psdev_ops; +struct portals_device_userstate *mdev_state[16]; + +static int +libcfs_psdev_open(dev_t dev, int flags, int devtype, struct proc *p) +{ + struct portals_device_userstate *mstat = NULL; + int rc = 0; + int devid; + devid = minor(dev); + + if (devid > 16) return (-ENXIO); + + if (libcfs_psdev_ops.p_open != NULL) + rc = libcfs_psdev_ops.p_open(0, &mstat); + else + rc = -EPERM; + if (!rc) + return rc; + mdev_state[devid] = mstat; + return rc; +} + +static int +libcfs_psdev_close(dev_t dev, int flags, int mode, struct proc *p) +{ + int devid; + devid = minor(dev); + int rc = 0; + + if (devid > 16) return (-ENXIO); + + if (libcfs_psdev_ops.p_close != NULL) + rc = libcfs_psdev_ops.p_close(0, mdev_state[devid]); + else + rc = -EPERM; + if (rc) + return rc; + mdev_state[devid] = NULL; + return rc; +} + +static int +libcfs_ioctl (dev_t dev, u_long cmd, caddr_t arg, int flag, struct proc *p) +{ + int rc = 0; + struct cfs_psdev_file pfile; + int devid; + devid = minor(dev); + + if (devid > 16) return (-ENXIO); + + if (suser(p->p_ucred, &p->p_acflag)) + return (-EPERM); + + pfile.off = 0; + pfile.private_data = mdev_state[devid]; + + if (libcfs_psdev_ops.p_ioctl != NULL) + rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); + else + rc = -EPERM; + return rc; +} + +static struct cdevsw libcfs_devsw = +{ + libcfs_psdev_open, /* open */ + libcfs_psdev_close, /* close */ + NULL, /* read */ + NULL, /* write */ + libcfs_ioctl, /* ioctl */ + NULL, /* stop */ + NULL, /* reset */ + NULL, /* tty's */ + NULL, /* select */ + NULL, /* mmap */ + NULL, /* strategy */ + NULL, /* getc */ + NULL, /* putc */ + 0 /* type */ +}; + +cfs_psdev_t libcfs_dev = { + -1, + NULL, + "portals", + &libcfs_devsw, + NULL +}; + +void +kportal_daemonize (char *str) +{ + printf("Daemonize request: %s.\n", str); + return; +} + +void +kportal_blockallsigs(void) +{ + return; +} diff --git a/lnet/libcfs/darwin/darwin-prim.c b/lnet/libcfs/darwin/darwin-prim.c new file mode 100644 index 0000000..fd2d120 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-prim.c @@ -0,0 +1,402 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +void *darwin_current_journal_info = NULL; +int darwin_current_cap_effective = -1; + +/* + * cfs pseudo device, actually pseudo char device in darwin + */ +#define KPORTAL_MAJOR -1 + +kern_return_t cfs_psdev_register(cfs_psdev_t *dev) { + dev->index = cdevsw_add(KPORTAL_MAJOR, dev->devsw); + if (dev->index < 0) { + printf("portal_init: failed to allocate a major number!\n"); + return KERN_FAILURE; + } + dev->handle = devfs_make_node(makedev (dev->index, 0), + DEVFS_CHAR, UID_ROOT, + GID_WHEEL, 0666, (char *)dev->name, 0); + return KERN_SUCCESS; +} + +kern_return_t cfs_psdev_deregister(cfs_psdev_t *dev) { + devfs_remove(dev->handle); + cdevsw_remove(dev->index, dev->devsw); + return KERN_SUCCESS; +} + +/* + * KPortal symbol register / unregister support + */ +static struct rw_semaphore cfs_symbol_lock; +struct list_head cfs_symbol_list; + +void * +cfs_symbol_get(const char *name) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_read(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + sym->ref ++; + break; + } + } + up_read(&cfs_symbol_lock); + if (sym != NULL) + return sym->value; + return NULL; +} + +kern_return_t +cfs_symbol_put(const char *name) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_read(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + sym->ref --; + LASSERT(sym->ref >= 0); + break; + } + } + up_read(&cfs_symbol_lock); + LASSERT(sym != NULL); + + return 0; +} + +kern_return_t +cfs_symbol_register(const char *name, const void *value) +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + struct cfs_symbol *new = NULL; + + MALLOC(new, struct cfs_symbol *, sizeof(struct cfs_symbol), M_TEMP, M_WAITOK|M_ZERO); + strncpy(new->name, name, CFS_SYMBOL_LEN); + new->value = (void *)value; + new->ref = 0; + CFS_INIT_LIST_HEAD(&new->sym_list); + + down_write(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + up_write(&cfs_symbol_lock); + FREE(new, M_TEMP); + return KERN_NAME_EXISTS; + } + + } + list_add_tail(&new->sym_list, &cfs_symbol_list); + up_write(&cfs_symbol_lock); + + return KERN_SUCCESS; +} + +kern_return_t +cfs_symbol_unregister(const char *name) +{ + struct list_head *walker; + struct list_head *nxt; + struct cfs_symbol *sym = NULL; + + down_write(&cfs_symbol_lock); + list_for_each_safe(walker, nxt, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + if (!strcmp(sym->name, name)) { + LASSERT(sym->ref == 0); + list_del (&sym->sym_list); + FREE(sym, M_TEMP); + break; + } + } + up_write(&cfs_symbol_lock); + + return KERN_SUCCESS; +} + +void +cfs_symbol_clean() +{ + struct list_head *walker; + struct cfs_symbol *sym = NULL; + + down_write(&cfs_symbol_lock); + list_for_each(walker, &cfs_symbol_list) { + sym = list_entry (walker, struct cfs_symbol, sym_list); + LASSERT(sym->ref == 0); + list_del (&sym->sym_list); + FREE(sym, M_TEMP); + } + up_write(&cfs_symbol_lock); + return; +} + +/* + * Register sysctl table + */ +cfs_sysctl_table_header_t * +register_cfs_sysctl_table (cfs_sysctl_table_t *table, int arg) +{ + cfs_sysctl_table_t item; + int i = 0; + + while ((item = table[i++]) != NULL) { + sysctl_register_oid(item); + } + return table; +} + +/* + * Unregister sysctl table + */ +void +unregister_cfs_sysctl_table (cfs_sysctl_table_header_t *table) { + int i = 0; + cfs_sysctl_table_t item; + + while ((item = table[i++]) != NULL) { + sysctl_unregister_oid(item); + } + return; +} + +struct kernel_thread_arg cfs_thread_arg; + +void +cfs_thread_agent_init() +{ + set_targ_stat(&cfs_thread_arg, THREAD_ARG_FREE); + spin_lock_init(&cfs_thread_arg.lock); + cfs_thread_arg.arg = NULL; + cfs_thread_arg.func = NULL; +} + +void +cfs_thread_agent (void) +{ + cfs_thread_t func = NULL; + void *arg = NULL; + + thread_arg_recv(&cfs_thread_arg, func, arg); + printf("entry of thread agent (func: %08lx).\n", (void *)func); + assert(func != NULL); + func(arg); + printf("thread agent exit. (func: %08lx)\n", (void *)func); + (void) thread_terminate(current_act()); +} + +int +cfs_kernel_thread(cfs_thread_t func, void *arg, int flag) +{ + int ret = 0; + thread_t th = NULL; + + thread_arg_hold(&cfs_thread_arg, func, arg); + th = kernel_thread(kernel_task, cfs_thread_agent); + thread_arg_release(&cfs_thread_arg); + if (th == THREAD_NULL) + ret = -1; + return ret; +} + +void lustre_cone_in(boolean_t *state, funnel_t **cone) +{ + *cone = thread_funnel_get(); + if (*cone == network_flock) + thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + else if (*cone == NULL) + *state = thread_funnel_set(kernel_flock, TRUE); +} + +void lustre_cone_ex(boolean_t state, funnel_t *cone) +{ + if (cone == network_flock) + thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + else if (cone == NULL) + (void) thread_funnel_set(kernel_flock, state); +} + +void lustre_net_in(boolean_t *state, funnel_t **cone) +{ + *cone = thread_funnel_get(); + if (*cone == kernel_flock) + thread_funnel_switch(KERNEL_FUNNEL, NETWORK_FUNNEL); + else if (*cone == NULL) + *state = thread_funnel_set(network_flock, TRUE); +} + +void lustre_net_ex(boolean_t state, funnel_t *cone) +{ + if (cone == kernel_flock) + thread_funnel_switch(NETWORK_FUNNEL, KERNEL_FUNNEL); + else if (cone == NULL) + (void) thread_funnel_set(network_flock, state); +} + + +void cfs_waitq_init(struct cfs_waitq *waitq) +{ + ksleep_chan_init(&waitq->wq_ksleep_chan); +} + +void cfs_waitlink_init(struct cfs_waitlink *link) +{ + ksleep_link_init(&link->wl_ksleep_link); +} + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + link->wl_waitq = waitq; + ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); +} + +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, + struct cfs_waitlink *link) +{ + link->wl_waitq = waitq; + link->wl_ksleep_link.flags |= KSLEEP_EXCLUSIVE; + ksleep_add(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); +} + +void cfs_waitq_forward(struct cfs_waitlink *link, + struct cfs_waitq *waitq) +{ + link->wl_ksleep_link.forward = &waitq->wq_ksleep_chan; +} + +void cfs_waitq_del(struct cfs_waitq *waitq, + struct cfs_waitlink *link) +{ + ksleep_del(&waitq->wq_ksleep_chan, &link->wl_ksleep_link); +} + +int cfs_waitq_active(struct cfs_waitq *waitq) +{ + return (1); +} + +void cfs_waitq_signal(struct cfs_waitq *waitq) +{ + ksleep_wake(&waitq->wq_ksleep_chan); +} + +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) +{ + ksleep_wake_nr(&waitq->wq_ksleep_chan, nr); +} + +void cfs_waitq_broadcast(struct cfs_waitq *waitq) +{ + ksleep_wake_all(&waitq->wq_ksleep_chan); +} + +void cfs_waitq_wait(struct cfs_waitlink *link) +{ + ksleep_wait(&link->wl_waitq->wq_ksleep_chan); +} + +cfs_duration_t cfs_waitq_timedwait(struct cfs_waitlink *link, + cfs_duration_t timeout) +{ + CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout); + return ksleep_timedwait(&link->chan->c, timeout); +} + +typedef void (*ktimer_func_t)(void *); +void cfs_timer_init(cfs_timer_t *t, void (* func)(unsigned long), void *arg) +{ + ktimer_init(&t->t, (ktimer_func_t)func, arg); +} + +void cfs_timer_done(struct cfs_timer *t) +{ + ktimer_done(&t->t); +} + +void cfs_timer_arm(struct cfs_timer *t, cfs_time_t deadline) +{ + ktimer_arm(&t->t, deadline); +} + +void cfs_timer_disarm(struct cfs_timer *t) +{ + ktimer_disarm(&t->t); +} + +int cfs_timer_is_armed(struct cfs_timer *t) +{ + return ktimer_is_armed(&t->t); +} + +cfs_time_t cfs_timer_deadline(struct cfs_timer *t) +{ + return ktimer_deadline(&t->t); +} + +int +libcfs_arch_init(void) +{ + init_rwsem(&cfs_symbol_lock); + CFS_INIT_LIST_HEAD(&cfs_symbol_list); + cfs_thread_agent_init(); + return 0; +} + +void +libcfs_arch_cleanup(void) +{ + cfs_symbol_clean(); +} + diff --git a/lnet/libcfs/darwin/darwin-proc.c b/lnet/libcfs/darwin/darwin-proc.c new file mode 100644 index 0000000..efa51aa --- /dev/null +++ b/lnet/libcfs/darwin/darwin-proc.c @@ -0,0 +1,129 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_PORTALS +#include + +static cfs_sysctl_table_header_t *portals_table_header = NULL; +extern unsigned int portal_debug; +extern char debug_file_path[1024]; +extern unsigned int portal_subsystem_debug; +extern unsigned int portal_printk; +extern atomic_t portal_kmemory; + +extern long max_debug_mb; +extern int cfs_trace_daemon SYSCTL_HANDLER_ARGS; +extern int cfs_debug_mb SYSCTL_HANDLER_ARGS; +/* + * sysctl table for portals + */ +SYSCTL_NODE (, OID_AUTO, portals, CTLFLAG_RW, + 0, "portals sysctl top"); + +SYSCTL_INT(_portals, OID_AUTO, debug, + CTLTYPE_INT | CTLFLAG_RW , &portal_debug, + 0, "debug"); +SYSCTL_INT(_portals, OID_AUTO, subsystem_debug, + CTLTYPE_INT | CTLFLAG_RW, &portal_subsystem_debug, + 0, "subsystem debug"); +SYSCTL_INT(_portals, OID_AUTO, printk, + CTLTYPE_INT | CTLFLAG_RW, &portal_printk, + 0, "printk"); +SYSCTL_STRING(_portals, OID_AUTO, debug_path, + CTLTYPE_STRING | CTLFLAG_RW, debug_file_path, + 1024, "debug path"); +SYSCTL_INT(_portals, OID_AUTO, memused, + CTLTYPE_INT | CTLFLAG_RW, (int *)&portal_kmemory.counter, + 0, "memused"); +SYSCTL_PROC(_portals, OID_AUTO, trace_daemon, + CTLTYPE_STRING | CTLFLAG_RW, 0, + 0, &cfs_trace_daemon, "A", "trace daemon"); +SYSCTL_PROC(_portals, OID_AUTO, debug_mb, + CTLTYPE_INT | CTLFLAG_RW, &max_debug_mb, + 0, &cfs_debug_mb, "L", "max debug size"); + + +static cfs_sysctl_table_t top_table[] = { + &sysctl__portals, + &sysctl__portals_debug, + &sysctl__portals_subsystem_debug, + &sysctl__portals_printk, + &sysctl__portals_debug_path, + &sysctl__portals_memused, + &sysctl__portals_trace_daemon, + &sysctl__portals_debug_mb, + NULL +}; + +/* no proc in osx */ +cfs_proc_dir_entry_t * +cfs_create_proc_entry(char *name, int mod, cfs_proc_dir_entry_t *parent) +{ + cfs_proc_dir_entry_t *entry; + MALLOC(entry, cfs_proc_dir_entry_t *, sizeof(cfs_proc_dir_entry_t), M_TEMP, M_WAITOK|M_ZERO); + + return entry; +} + +void +cfs_free_proc_entry(cfs_proc_dir_entry_t *de){ + FREE(de, M_TEMP); + return; +}; + +void +cfs_remove_proc_entry(char *name, cfs_proc_dir_entry_t *entry) +{ + cfs_free_proc_entry(entry); + return; +} + +int +insert_proc(void) +{ +#if 1 + if (!portals_table_header) + portals_table_header = register_cfs_sysctl_table(top_table, 0); +#endif + return 0; +} + +void +remove_proc(void) +{ +#if 1 + if (portals_table_header != NULL) + unregister_cfs_sysctl_table(portals_table_header); + portals_table_header = NULL; +#endif + return; +} + + diff --git a/lnet/libcfs/darwin/darwin-sync.c b/lnet/libcfs/darwin/darwin-sync.c new file mode 100644 index 0000000..7ac24f6 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-sync.c @@ -0,0 +1,868 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre Light Super operations + * + * Copyright (c) 2004 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under + * the terms of version 2 of the GNU General Public License as published by + * the Free Software Foundation. Lustre is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. You should have received a + * copy of the GNU General Public License along with Lustre; if not, write + * to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, + * USA. + */ + +/* + * xnu_sync.c + * + * Created by nikita on Sun Jul 18 2004. + * + * Prototypes of XNU synchronization primitives. + */ + +/* + * This file contains very simplistic implementations of (saner) API for + * basic synchronization primitives: + * + * - spin-lock (kspin) + * + * - semaphore (ksem) + * + * - mutex (kmut) + * + * - condition variable (kcond) + * + * - wait-queue (ksleep_chan and ksleep_link) + * + * - timer (ktimer) + * + * A lot can be optimized here. + */ + +#include +#include +#include + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include + +#define SLASSERT(e) ON_SYNC_DEBUG(LASSERT(e)) + +#ifdef HAVE_GET_PREEMPTION_LEVEL +extern int get_preemption_level(void); +#else +#define get_preemption_level() (0) +#endif + +/* + * Warning: low level portals debugging code (portals_debug_msg(), for + * example), uses spin-locks, so debugging output here may lead to nasty + * surprises. + */ + +#if SMP + +extern void hw_lock_init(hw_lock_t); +extern void hw_lock_lock(hw_lock_t); +extern void hw_lock_unlock(hw_lock_t); +extern unsigned int hw_lock_to(hw_lock_t, unsigned int); +extern unsigned int hw_lock_try(hw_lock_t); +extern unsigned int hw_lock_held(hw_lock_t); + +void kspin_init(struct kspin *spin) +{ + SLASSERT(spin != NULL); + hw_lock_init(&spin->lock); + ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC); + ON_SYNC_DEBUG(spin->owner = NULL); +} + +void kspin_done(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == NULL); +} + +void kspin_lock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner != current_thread); + + hw_lock_lock(&spin->lock); + SLASSERT(spin->owner == NULL); + ON_SYNC_DEBUG(spin->owner = current_thread); +} + +void kspin_unlock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == current_thread); + ON_SYNC_DEBUG(spin->owner = NULL); + hw_lock_unlock(&spin->lock); +} + +int kspin_trylock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + + if (hw_lock_try(&spin->lock)) { + SLASSERT(spin->owner == NULL); + ON_SYNC_DEBUG(spin->owner = current_thread); + return 1; + } else + return 0; +} + +/* SMP */ +#else + +/* + * uniprocessor version of spin-lock. Only checks. + */ + +void kspin_init(struct kspin *spin) +{ + SLASSERT(spin != NULL); + ON_SYNC_DEBUG(spin->magic = KSPIN_MAGIC); + ON_SYNC_DEBUG(spin->owner = NULL); +} + +void kspin_done(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == NULL); +} + +void kspin_lock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == NULL); + ON_SYNC_DEBUG(spin->owner = current_thread); +} + +void kspin_unlock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == current_thread); + ON_SYNC_DEBUG(spin->owner = NULL); +} + +int kspin_trylock(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + SLASSERT(spin->owner == NULL); + ON_SYNC_DEBUG(spin->owner = current_thread); + return 1; +} + +/* SMP */ +#endif + +#if XNU_SYNC_DEBUG +int kspin_islocked(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + return spin->owner == current_thread; +} + +int kspin_isnotlocked(struct kspin *spin) +{ + SLASSERT(spin != NULL); + SLASSERT(spin->magic == KSPIN_MAGIC); + return spin->owner != current_thread; +} +#endif + +void ksem_init(struct ksem *sem, int value) +{ + SLASSERT(sem != NULL); + kspin_init(&sem->guard); + wait_queue_init(&sem->q, SYNC_POLICY_FIFO); + sem->value = value; + ON_SYNC_DEBUG(sem->magic = KSEM_MAGIC); +} + +void ksem_done(struct ksem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + /* + * XXX nikita: cannot check that &sem->q is empty because + * wait_queue_empty() is Apple private API. + */ + kspin_done(&sem->guard); +} + +int ksem_up(struct ksem *sem, int value) +{ + int result; + + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + SLASSERT(value >= 0); + + kspin_lock(&sem->guard); + sem->value += value; + if (sem->value == 0) + result = wait_queue_wakeup_one(&sem->q, (event_t)sem, + THREAD_AWAKENED); + else + result = wait_queue_wakeup_all(&sem->q, (event_t)sem, + THREAD_AWAKENED); + kspin_unlock(&sem->guard); + SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); + return (result == KERN_SUCCESS) ? 0 : 1; +} + +void ksem_down(struct ksem *sem, int value) +{ + int result; + + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + SLASSERT(value >= 0); + SLASSERT(get_preemption_level() == 0); + + kspin_lock(&sem->guard); + while (sem->value < value) { + result = wait_queue_assert_wait(&sem->q, (event_t)sem, + THREAD_UNINT); + SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); + kspin_unlock(&sem->guard); + if (result == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); + kspin_lock(&sem->guard); + } + sem->value -= value; + kspin_unlock(&sem->guard); +} + +int ksem_trydown(struct ksem *sem, int value) +{ + int result; + + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KSEM_MAGIC); + SLASSERT(value >= 0); + + kspin_lock(&sem->guard); + if (sem->value >= value) { + sem->value -= value; + result = 0; + } else + result = -EBUSY; + kspin_unlock(&sem->guard); + return result; +} + +void kmut_init(struct kmut *mut) +{ + SLASSERT(mut != NULL); + ksem_init(&mut->s, 1); + ON_SYNC_DEBUG(mut->magic = KMUT_MAGIC); + ON_SYNC_DEBUG(mut->owner = NULL); +} + +void kmut_done(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + SLASSERT(mut->owner == NULL); + ksem_done(&mut->s); +} + +void kmut_lock(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + SLASSERT(mut->owner != current_thread); + SLASSERT(get_preemption_level() == 0); + + ksem_down(&mut->s, 1); + ON_SYNC_DEBUG(mut->owner = current_thread); +} + +void kmut_unlock(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + SLASSERT(mut->owner == current_thread); + + ON_SYNC_DEBUG(mut->owner = NULL); + ksem_up(&mut->s, 1); +} + +int kmut_trylock(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + return ksem_trydown(&mut->s, 1); +} + +#if XNU_SYNC_DEBUG +int kmut_islocked(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + return mut->owner == current_thread; +} + +int kmut_isnotlocked(struct kmut *mut) +{ + SLASSERT(mut != NULL); + SLASSERT(mut->magic == KMUT_MAGIC); + return mut->owner != current_thread; +} +#endif + + +void kcond_init(struct kcond *cond) +{ + SLASSERT(cond != NULL); + + kspin_init(&cond->guard); + cond->waiters = NULL; + ON_SYNC_DEBUG(cond->magic = KCOND_MAGIC); +} + +void kcond_done(struct kcond *cond) +{ + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(cond->waiters == NULL); + kspin_done(&cond->guard); +} + +void kcond_wait(struct kcond *cond, struct kspin *lock) +{ + struct kcond_link link; + + SLASSERT(cond != NULL); + SLASSERT(lock != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(lock)); + + ksem_init(&link.sem, 0); + kspin_lock(&cond->guard); + link.next = cond->waiters; + cond->waiters = &link; + kspin_unlock(&cond->guard); + kspin_unlock(lock); + + ksem_down(&link.sem, 1); + + kspin_lock(&cond->guard); + kspin_unlock(&cond->guard); + kspin_lock(lock); +} + +void kcond_wait_guard(struct kcond *cond) +{ + struct kcond_link link; + + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(&cond->guard)); + + ksem_init(&link.sem, 0); + link.next = cond->waiters; + cond->waiters = &link; + kspin_unlock(&cond->guard); + + ksem_down(&link.sem, 1); + + kspin_lock(&cond->guard); +} + +void kcond_signal_guard(struct kcond *cond) +{ + struct kcond_link *link; + + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(&cond->guard)); + + link = cond->waiters; + if (link != NULL) { + cond->waiters = link->next; + ksem_up(&link->sem, 1); + } +} + +void kcond_signal(struct kcond *cond) +{ + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + + kspin_lock(&cond->guard); + kcond_signal_guard(cond); + kspin_unlock(&cond->guard); +} + +void kcond_broadcast_guard(struct kcond *cond) +{ + struct kcond_link *link; + + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + SLASSERT(kspin_islocked(&cond->guard)); + + for (link = cond->waiters; link != NULL; link = link->next) + ksem_up(&link->sem, 1); + cond->waiters = NULL; +} + +void kcond_broadcast(struct kcond *cond) +{ + SLASSERT(cond != NULL); + SLASSERT(cond->magic == KCOND_MAGIC); + + kspin_lock(&cond->guard); + kcond_broadcast_guard(cond); + kspin_unlock(&cond->guard); +} + +void krw_sem_init(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + + kcond_init(&sem->cond); + sem->count = 0; + ON_SYNC_DEBUG(sem->magic = KRW_MAGIC); +} + +void krw_sem_done(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(sem->count == 0); + kcond_done(&sem->cond); +} + +void krw_sem_down_r(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(get_preemption_level() == 0); + + kspin_lock(&sem->cond.guard); + while (sem->count < 0) + kcond_wait_guard(&sem->cond); + ++ sem->count; + kspin_unlock(&sem->cond.guard); +} + +int krw_sem_down_r_try(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + + kspin_lock(&sem->cond.guard); + if (sem->count < 0) { + kspin_unlock(&sem->cond.guard); + return -EBUSY; + } + ++ sem->count; + kspin_unlock(&sem->cond.guard); + return 0; +} + +void krw_sem_down_w(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(get_preemption_level() == 0); + + kspin_lock(&sem->cond.guard); + while (sem->count != 0) + kcond_wait_guard(&sem->cond); + sem->count = -1; + kspin_unlock(&sem->cond.guard); +} + +int krw_sem_down_w_try(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + + kspin_lock(&sem->cond.guard); + if (sem->count != 0) { + kspin_unlock(&sem->cond.guard); + return -EBUSY; + } + sem->count = -1; + kspin_unlock(&sem->cond.guard); + return 0; +} + +void krw_sem_up_r(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(sem->count > 0); + + kspin_lock(&sem->cond.guard); + -- sem->count; + if (sem->count == 0) + kcond_broadcast_guard(&sem->cond); + kspin_unlock(&sem->cond.guard); +} + +void krw_sem_up_w(struct krw_sem *sem) +{ + SLASSERT(sem != NULL); + SLASSERT(sem->magic == KRW_MAGIC); + SLASSERT(sem->count == -1); + + kspin_lock(&sem->cond.guard); + sem->count = 0; + kspin_unlock(&sem->cond.guard); + kcond_broadcast(&sem->cond); +} + +void ksleep_chan_init(struct ksleep_chan *chan) +{ + SLASSERT(chan != NULL); + + kspin_init(&chan->guard); + CFS_INIT_LIST_HEAD(&chan->waiters); + ON_SYNC_DEBUG(chan->magic = KSLEEP_CHAN_MAGIC); +} + +void ksleep_chan_done(struct ksleep_chan *chan) +{ + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(list_empty(&chan->waiters)); + kspin_done(&chan->guard); +} + +void ksleep_link_init(struct ksleep_link *link) +{ + SLASSERT(link != NULL); + + CFS_INIT_LIST_HEAD(&link->linkage); + link->flags = 0; + link->event = current_thread; + link->hits = 0; + link->forward = NULL; + ON_SYNC_DEBUG(link->magic = KSLEEP_LINK_MAGIC); +} + +void ksleep_link_done(struct ksleep_link *link) +{ + SLASSERT(link != NULL); + SLASSERT(link->magic == KSLEEP_LINK_MAGIC); + SLASSERT(list_empty(&link->linkage)); +} + +void ksleep_add(struct ksleep_chan *chan, struct ksleep_link *link) +{ + SLASSERT(chan != NULL); + SLASSERT(link != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(link->magic == KSLEEP_LINK_MAGIC); + SLASSERT(list_empty(&link->linkage)); + + kspin_lock(&chan->guard); + if (link->flags & KSLEEP_EXCLUSIVE) + list_add_tail(&link->linkage, &chan->waiters); + else + list_add(&link->linkage, &chan->waiters); + kspin_unlock(&chan->guard); +} + +void ksleep_del(struct ksleep_chan *chan, struct ksleep_link *link) +{ + SLASSERT(chan != NULL); + SLASSERT(link != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(link->magic == KSLEEP_LINK_MAGIC); + + kspin_lock(&chan->guard); + list_del_init(&link->linkage); + kspin_unlock(&chan->guard); +} + +static int has_hits(struct ksleep_chan *chan, event_t event) +{ + struct ksleep_link *scan; + + SLASSERT(kspin_islocked(&chan->guard)); + list_for_each_entry(scan, &chan->waiters, linkage) { + if (scan->event == event && scan->hits > 0) { + /* consume hit */ + -- scan->hits; + return 1; + } + } + return 0; +} + +static void add_hit(struct ksleep_chan *chan, event_t event) +{ + struct ksleep_link *scan; + + SLASSERT(kspin_islocked(&chan->guard)); + list_for_each_entry(scan, &chan->waiters, linkage) { + if (scan->event == event) { + ++ scan->hits; + break; + } + } +} + +void ksleep_wait(struct ksleep_chan *chan) +{ + event_t event; + int result; + + ENTRY; + + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(get_preemption_level() == 0); + + event = current_thread; + kspin_lock(&chan->guard); + if (!has_hits(chan, event)) { + result = assert_wait(event, THREAD_UNINT); + kspin_unlock(&chan->guard); + SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); + if (result == THREAD_WAITING) + thread_block(THREAD_CONTINUE_NULL); + } else + kspin_unlock(&chan->guard); + EXIT; +} + +int64_t ksleep_timedwait(struct ksleep_chan *chan, uint64_t timeout) +{ + event_t event; + int64_t result; + AbsoluteTime clock_current; + AbsoluteTime clock_delay; + + ENTRY; + + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + SLASSERT(get_preemption_level() == 0); + + CDEBUG(D_TRACE, "timeout: %llu\n", (long long unsigned)timeout); + + event = current_thread; + result = 0; + kspin_lock(&chan->guard); + if (!has_hits(chan, event)) { + result = assert_wait(event, THREAD_UNINT); + if (timeout > 0) { + /* + * arm a timer. thread_set_timer()'s first argument is + * uint32_t, so we have to cook deadline ourselves. + */ + clock_get_uptime(&clock_current); + nanoseconds_to_absolutetime(timeout, &clock_delay); + ADD_ABSOLUTETIME(&clock_current, &clock_delay); + thread_set_timer_deadline(clock_current); + } + kspin_unlock(&chan->guard); + SLASSERT(result == THREAD_AWAKENED || result == THREAD_WAITING); + if (result == THREAD_WAITING) + result = thread_block(THREAD_CONTINUE_NULL); + thread_cancel_timer(); + + clock_get_uptime(&clock_delay); + SUB_ABSOLUTETIME(&clock_delay, &clock_current); + if (result == THREAD_TIMED_OUT) + result = 0; + else { + absolutetime_to_nanoseconds(clock_delay, &result); + if (result < 0) + result = 0; + } + } else + kspin_unlock(&chan->guard); + + RETURN(result); +} + +/* + * wake up single exclusive waiter (plus some arbitrary number of * + * non-exclusive) + */ +void ksleep_wake(struct ksleep_chan *chan) +{ + ENTRY; + ksleep_wake_nr(chan, 1); + EXIT; +} + +/* + * wake up all waiters on @chan + */ +void ksleep_wake_all(struct ksleep_chan *chan) +{ + ENTRY; + ksleep_wake_nr(chan, 0); + EXIT; +} + +/* + * wakeup no more than @nr exclusive waiters from @chan, plus some arbitrary + * number of non-exclusive. If @nr is 0, wake up all waiters. + */ +void ksleep_wake_nr(struct ksleep_chan *chan, int nr) +{ + struct ksleep_link *scan; + int result; + + ENTRY; + + SLASSERT(chan != NULL); + SLASSERT(chan->magic == KSLEEP_CHAN_MAGIC); + + kspin_lock(&chan->guard); + list_for_each_entry(scan, &chan->waiters, linkage) { + struct ksleep_chan *forward; + + forward = scan->forward; + if (forward != NULL) + kspin_lock(&forward->guard); + result = thread_wakeup(scan->event); + CDEBUG(D_INFO, "waking 0x%x: %d\n", + (unsigned int)scan->event, result); + SLASSERT(result == KERN_SUCCESS || result == KERN_NOT_WAITING); + if (result == KERN_NOT_WAITING) { + ++ scan->hits; + if (forward != NULL) + add_hit(forward, scan->event); + } + if (forward != NULL) + kspin_unlock(&forward->guard); + if ((scan->flags & KSLEEP_EXCLUSIVE) && --nr == 0) + break; + } + kspin_unlock(&chan->guard); + EXIT; +} + +void ktimer_init(struct ktimer *t, void (*func)(void *), void *arg) +{ + SLASSERT(t != NULL); + SLASSERT(func != NULL); + + kspin_init(&t->guard); + t->func = func; + t->arg = arg; + ON_SYNC_DEBUG(t->magic = KTIMER_MAGIC); +} + +void ktimer_done(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + kspin_done(&t->guard); + ON_SYNC_DEBUG(t->magic = 0); +} + +static void ktimer_actor(void *arg0, void *arg1) +{ + struct ktimer *t; + int armed; + + t = arg0; + /* + * this assumes that ktimer's are never freed. + */ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + /* + * call actual timer function + */ + kspin_lock(&t->guard); + armed = t->armed; + t->armed = 0; + kspin_unlock(&t->guard); + + if (armed) + t->func(t->arg); +} + +static void ktimer_disarm_locked(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + thread_call_func_cancel(ktimer_actor, t, FALSE); +} + +void ktimer_arm(struct ktimer *t, u_int64_t deadline) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + kspin_lock(&t->guard); + ktimer_disarm_locked(t); + t->armed = 1; + thread_call_func_delayed(ktimer_actor, t, *(AbsoluteTime *)&deadline); + kspin_unlock(&t->guard); +} + +void ktimer_disarm(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + kspin_lock(&t->guard); + t->armed = 0; + ktimer_disarm_locked(t); + kspin_unlock(&t->guard); +} + +int ktimer_is_armed(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + /* + * no locking---result is only a hint anyway. + */ + return t->armed; +} + +u_int64_t ktimer_deadline(struct ktimer *t) +{ + SLASSERT(t != NULL); + SLASSERT(t->magic == KTIMER_MAGIC); + + return t->deadline; +} + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/libcfs/darwin/darwin-tracefile.c b/lnet/libcfs/darwin/darwin-tracefile.c new file mode 100644 index 0000000..9f58267 --- /dev/null +++ b/lnet/libcfs/darwin/darwin-tracefile.c @@ -0,0 +1,159 @@ + +#define DEBUG_SUBSYSTEM S_PORTALS +#define LUSTRE_TRACEFILE_PRIVATE +#include +#include +#include "tracefile.h" + +/* + * We can't support smp tracefile currently. + * Everything is put on one cpu. + */ + +#define M_TCD_MAX_PAGES (128 * 1280) +extern union trace_data_union trace_data[NR_CPUS]; +extern char *tracefile; +extern long long tracefile_size; +extern struct rw_semaphore tracefile_sem; +extern int trace_start_thread(void); +extern void trace_stop_thread(void); + +long max_debug_mb = M_TCD_MAX_PAGES; +static long max_permit_mb = (64 * 1024); + +inline struct trace_cpu_data * +__trace_get_tcd (unsigned long *flags) +{ + return &trace_data[0].tcd; +} + +inline void +__trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags) +{ + return; +} + +void +set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, + const int line, unsigned long stack) +{ + struct timeval tv; + + do_gettimeofday(&tv); + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = 0; + header->ph_line_num = line; + header->ph_extern_pid = 0; +} + +void print_to_console(struct ptldebug_header *hdr, int mask, char *buf, + int len, char *file, const char *fn) +{ + char *prefix = NULL, *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = "LustreError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "LustreError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "Lustre"; + ptype = KERN_WARNING; + } else if (portal_printk != 0) { + prefix = "Lustre"; + ptype = KERN_INFO; + } + printk("%s%s: %d:%d:(%s:%d:%s()) %*s", ptype, prefix, hdr->ph_pid, + hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); +} + +/* + * Sysctl handle of libcfs + */ +int cfs_trace_daemon SYSCTL_HANDLER_ARGS +{ + int error = 0; + char *name = NULL; + + MALLOC(name, char *, req->newlen + 1, M_TEMP, M_WAITOK | M_ZERO); + if (name == NULL) + return -ENOMEM; + down_write(&tracefile_sem); + error = sysctl_handle_string(oidp, name, req->newlen + 1, req); + if (!error || req->newptr != NULL) { + /* write */ + if (strcmp(name, "stop") == 0) { + /* stop tracefile daemon */ + tracefile = NULL; + trace_stop_thread(); + goto out; + }else if (strncmp(name, "size=", 5) == 0) { + tracefile_size = simple_strtoul(name + 5, NULL, 0); + if (tracefile_size < 10 || tracefile_size > 20480) + tracefile_size = TRACEFILE_SIZE; + else + tracefile_size <<= 20; + goto out; + + } + if (name[0] != '/') { + error = -EINVAL; + goto out; + } + if (tracefile != NULL) + cfs_free(tracefile); + tracefile = name; + name = NULL; + trace_start_thread(); + } else if (req->newptr != NULL) { + /* Something was wrong with the write request */ + printf("sysctl debug daemon failed: %d.\n", error); + goto out; + } else { + /* Read request */ + SYSCTL_OUT(req, tracefile, sizeof(tracefile)); + } +out: + if (name != NULL) + FREE(name, M_TEMP); + up_write(&tracefile_sem); + return error; +} + + +int cfs_debug_mb SYSCTL_HANDLER_ARGS +{ + int i; + int error = 0; + + error = sysctl_handle_long(oidp, oidp->oid_arg1, oidp->oid_arg2, req); + if (!error && req->newptr != NULL) { + /* We have a new value stored in the standard location */ + if (max_debug_mb <= 0) + return -EINVAL; + if (max_debug_mb > max_permit_mb) { + printf("sysctl debug_mb is too big: %d.\n", max_debug_mb); + return 0; + } + for (i = 0; i < NR_CPUS; i++) { + struct trace_cpu_data *tcd; + tcd = &trace_data[i].tcd; + tcd->tcd_max_pages = max_debug_mb; + } + } else if (req->newptr != NULL) { + /* Something was wrong with the write request */ + printf ("sysctl debug_mb fault: %d.\n", error); + } else { + /* Read request */ + error = SYSCTL_OUT(req, &max_debug_mb, sizeof max_debug_mb); + } + return error; +} + + diff --git a/lnet/libcfs/darwin/darwin-utils.c b/lnet/libcfs/darwin/darwin-utils.c new file mode 100644 index 0000000..630db6b --- /dev/null +++ b/lnet/libcfs/darwin/darwin-utils.c @@ -0,0 +1,482 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Darwin porting library + * Make things easy to port + */ +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include +#include +#include +#include + +#ifndef isspace +inline int +isspace(char c) +{ + return (c == ' ' || c == '\t' || c == '\n' || c == '\12'); +} +#endif + +char * strpbrk(const char * cs,const char * ct) +{ + const char *sc1,*sc2; + + for( sc1 = cs; *sc1 != '\0'; ++sc1) { + for( sc2 = ct; *sc2 != '\0'; ++sc2) { + if (*sc1 == *sc2) + return (char *) sc1; + } + } + return NULL; +} + +char * strsep(char **s, const char *ct) +{ + char *sbegin = *s, *end; + + if (sbegin == NULL) + return NULL; + end = strpbrk(sbegin, ct); + if (end != NULL) + *end++ = '\0'; + *s = end; + + return sbegin; +} + +size_t strnlen(const char * s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +char * +strstr(const char *in, const char *str) +{ + char c; + size_t len; + + c = *str++; + if (!c) + return (char *) in; // Trivial empty string case + len = strlen(str); + do { + char sc; + do { + sc = *in++; + if (!sc) + return (char *) 0; + } while (sc != c); + } while (strncmp(in, str, len) != 0); + return (char *) (in - 1); +} + +char * +strrchr(const char *p, int ch) +{ + const char *end = p + strlen(p); + do { + if (*end == (char)ch) + return (char *)end; + } while (--end >= p); + return NULL; +} + +char * +ul2dstr(unsigned long address, char *buf, int len) +{ + char *pos = buf + len - 1; + + if (len <= 0 || !buf) + return NULL; + *pos = 0; + while (address) { + if (!--len) break; + *--pos = address % 10 + '0'; + address /= 10; + } + return pos; +} + +/* + * miscellaneous libcfs stuff + */ + +/* + * Convert server error code to client format. + * Linux errno.h. + */ + +/* obtained by + * + * cc /usr/include/asm/errno.h -E -dM | grep '#define E' | sort -n -k3,3 + * + */ +enum linux_errnos { + LINUX_EPERM = 1, + LINUX_ENOENT = 2, + LINUX_ESRCH = 3, + LINUX_EINTR = 4, + LINUX_EIO = 5, + LINUX_ENXIO = 6, + LINUX_E2BIG = 7, + LINUX_ENOEXEC = 8, + LINUX_EBADF = 9, + LINUX_ECHILD = 10, + LINUX_EAGAIN = 11, + LINUX_ENOMEM = 12, + LINUX_EACCES = 13, + LINUX_EFAULT = 14, + LINUX_ENOTBLK = 15, + LINUX_EBUSY = 16, + LINUX_EEXIST = 17, + LINUX_EXDEV = 18, + LINUX_ENODEV = 19, + LINUX_ENOTDIR = 20, + LINUX_EISDIR = 21, + LINUX_EINVAL = 22, + LINUX_ENFILE = 23, + LINUX_EMFILE = 24, + LINUX_ENOTTY = 25, + LINUX_ETXTBSY = 26, + LINUX_EFBIG = 27, + LINUX_ENOSPC = 28, + LINUX_ESPIPE = 29, + LINUX_EROFS = 30, + LINUX_EMLINK = 31, + LINUX_EPIPE = 32, + LINUX_EDOM = 33, + LINUX_ERANGE = 34, + LINUX_EDEADLK = 35, + LINUX_ENAMETOOLONG = 36, + LINUX_ENOLCK = 37, + LINUX_ENOSYS = 38, + LINUX_ENOTEMPTY = 39, + LINUX_ELOOP = 40, + LINUX_ENOMSG = 42, + LINUX_EIDRM = 43, + LINUX_ECHRNG = 44, + LINUX_EL2NSYNC = 45, + LINUX_EL3HLT = 46, + LINUX_EL3RST = 47, + LINUX_ELNRNG = 48, + LINUX_EUNATCH = 49, + LINUX_ENOCSI = 50, + LINUX_EL2HLT = 51, + LINUX_EBADE = 52, + LINUX_EBADR = 53, + LINUX_EXFULL = 54, + LINUX_ENOANO = 55, + LINUX_EBADRQC = 56, + LINUX_EBADSLT = 57, + LINUX_EBFONT = 59, + LINUX_ENOSTR = 60, + LINUX_ENODATA = 61, + LINUX_ETIME = 62, + LINUX_ENOSR = 63, + LINUX_ENONET = 64, + LINUX_ENOPKG = 65, + LINUX_EREMOTE = 66, + LINUX_ENOLINK = 67, + LINUX_EADV = 68, + LINUX_ESRMNT = 69, + LINUX_ECOMM = 70, + LINUX_EPROTO = 71, + LINUX_EMULTIHOP = 72, + LINUX_EDOTDOT = 73, + LINUX_EBADMSG = 74, + LINUX_EOVERFLOW = 75, + LINUX_ENOTUNIQ = 76, + LINUX_EBADFD = 77, + LINUX_EREMCHG = 78, + LINUX_ELIBACC = 79, + LINUX_ELIBBAD = 80, + LINUX_ELIBSCN = 81, + LINUX_ELIBMAX = 82, + LINUX_ELIBEXEC = 83, + LINUX_EILSEQ = 84, + LINUX_ERESTART = 85, + LINUX_ESTRPIPE = 86, + LINUX_EUSERS = 87, + LINUX_ENOTSOCK = 88, + LINUX_EDESTADDRREQ = 89, + LINUX_EMSGSIZE = 90, + LINUX_EPROTOTYPE = 91, + LINUX_ENOPROTOOPT = 92, + LINUX_EPROTONOSUPPORT = 93, + LINUX_ESOCKTNOSUPPORT = 94, + LINUX_EOPNOTSUPP = 95, + LINUX_EPFNOSUPPORT = 96, + LINUX_EAFNOSUPPORT = 97, + LINUX_EADDRINUSE = 98, + LINUX_EADDRNOTAVAIL = 99, + LINUX_ENETDOWN = 100, + LINUX_ENETUNREACH = 101, + LINUX_ENETRESET = 102, + LINUX_ECONNABORTED = 103, + LINUX_ECONNRESET = 104, + LINUX_ENOBUFS = 105, + LINUX_EISCONN = 106, + LINUX_ENOTCONN = 107, + LINUX_ESHUTDOWN = 108, + LINUX_ETOOMANYREFS = 109, + LINUX_ETIMEDOUT = 110, + LINUX_ECONNREFUSED = 111, + LINUX_EHOSTDOWN = 112, + LINUX_EHOSTUNREACH = 113, + LINUX_EALREADY = 114, + LINUX_EINPROGRESS = 115, + LINUX_ESTALE = 116, + LINUX_EUCLEAN = 117, + LINUX_ENOTNAM = 118, + LINUX_ENAVAIL = 119, + LINUX_EISNAM = 120, + LINUX_EREMOTEIO = 121, + LINUX_EDQUOT = 122, + LINUX_ENOMEDIUM = 123, + LINUX_EMEDIUMTYPE = 124, + + /* + * we don't need these, but for completeness.. + */ + LINUX_EDEADLOCK = LINUX_EDEADLK, + LINUX_EWOULDBLOCK = LINUX_EAGAIN +}; + +int convert_server_error(__u64 ecode) +{ + int sign; + int code; + + static int errno_xlate[] = { + /* success is always success */ + [0] = 0, + [LINUX_EPERM] = EPERM, + [LINUX_ENOENT] = ENOENT, + [LINUX_ESRCH] = ESRCH, + [LINUX_EINTR] = EINTR, + [LINUX_EIO] = EIO, + [LINUX_ENXIO] = ENXIO, + [LINUX_E2BIG] = E2BIG, + [LINUX_ENOEXEC] = ENOEXEC, + [LINUX_EBADF] = EBADF, + [LINUX_ECHILD] = ECHILD, + [LINUX_EAGAIN] = EAGAIN, + [LINUX_ENOMEM] = ENOMEM, + [LINUX_EACCES] = EACCES, + [LINUX_EFAULT] = EFAULT, + [LINUX_ENOTBLK] = ENOTBLK, + [LINUX_EBUSY] = EBUSY, + [LINUX_EEXIST] = EEXIST, + [LINUX_EXDEV] = EXDEV, + [LINUX_ENODEV] = ENODEV, + [LINUX_ENOTDIR] = ENOTDIR, + [LINUX_EISDIR] = EISDIR, + [LINUX_EINVAL] = EINVAL, + [LINUX_ENFILE] = ENFILE, + [LINUX_EMFILE] = EMFILE, + [LINUX_ENOTTY] = ENOTTY, + [LINUX_ETXTBSY] = ETXTBSY, + [LINUX_EFBIG] = EFBIG, + [LINUX_ENOSPC] = ENOSPC, + [LINUX_ESPIPE] = ESPIPE, + [LINUX_EROFS] = EROFS, + [LINUX_EMLINK] = EMLINK, + [LINUX_EPIPE] = EPIPE, + [LINUX_EDOM] = EDOM, + [LINUX_ERANGE] = ERANGE, + [LINUX_EDEADLK] = EDEADLK, + [LINUX_ENAMETOOLONG] = ENAMETOOLONG, + [LINUX_ENOLCK] = ENOLCK, + [LINUX_ENOSYS] = ENOSYS, + [LINUX_ENOTEMPTY] = ENOTEMPTY, + [LINUX_ELOOP] = ELOOP, + [LINUX_ENOMSG] = ENOMSG, + [LINUX_EIDRM] = EIDRM, + [LINUX_ECHRNG] = EINVAL /* ECHRNG */, + [LINUX_EL2NSYNC] = EINVAL /* EL2NSYNC */, + [LINUX_EL3HLT] = EINVAL /* EL3HLT */, + [LINUX_EL3RST] = EINVAL /* EL3RST */, + [LINUX_ELNRNG] = EINVAL /* ELNRNG */, + [LINUX_EUNATCH] = EINVAL /* EUNATCH */, + [LINUX_ENOCSI] = EINVAL /* ENOCSI */, + [LINUX_EL2HLT] = EINVAL /* EL2HLT */, + [LINUX_EBADE] = EINVAL /* EBADE */, + [LINUX_EBADR] = EBADRPC, + [LINUX_EXFULL] = EINVAL /* EXFULL */, + [LINUX_ENOANO] = EINVAL /* ENOANO */, + [LINUX_EBADRQC] = EINVAL /* EBADRQC */, + [LINUX_EBADSLT] = EINVAL /* EBADSLT */, + [LINUX_EBFONT] = EINVAL /* EBFONT */, + [LINUX_ENOSTR] = EINVAL /* ENOSTR */, + [LINUX_ENODATA] = EINVAL /* ENODATA */, + [LINUX_ETIME] = EINVAL /* ETIME */, + [LINUX_ENOSR] = EINVAL /* ENOSR */, + [LINUX_ENONET] = EINVAL /* ENONET */, + [LINUX_ENOPKG] = EINVAL /* ENOPKG */, + [LINUX_EREMOTE] = EREMOTE, + [LINUX_ENOLINK] = EINVAL /* ENOLINK */, + [LINUX_EADV] = EINVAL /* EADV */, + [LINUX_ESRMNT] = EINVAL /* ESRMNT */, + [LINUX_ECOMM] = EINVAL /* ECOMM */, + [LINUX_EPROTO] = EPROTOTYPE, + [LINUX_EMULTIHOP] = EINVAL /* EMULTIHOP */, + [LINUX_EDOTDOT] = EINVAL /* EDOTDOT */, + [LINUX_EBADMSG] = EINVAL /* EBADMSG */, + [LINUX_EOVERFLOW] = EOVERFLOW, + [LINUX_ENOTUNIQ] = EINVAL /* ENOTUNIQ */, + [LINUX_EBADFD] = EINVAL /* EBADFD */, + [LINUX_EREMCHG] = EINVAL /* EREMCHG */, + [LINUX_ELIBACC] = EINVAL /* ELIBACC */, + [LINUX_ELIBBAD] = EINVAL /* ELIBBAD */, + [LINUX_ELIBSCN] = EINVAL /* ELIBSCN */, + [LINUX_ELIBMAX] = EINVAL /* ELIBMAX */, + [LINUX_ELIBEXEC] = EINVAL /* ELIBEXEC */, + [LINUX_EILSEQ] = EILSEQ, + [LINUX_ERESTART] = ERESTART, + [LINUX_ESTRPIPE] = EINVAL /* ESTRPIPE */, + [LINUX_EUSERS] = EUSERS, + [LINUX_ENOTSOCK] = ENOTSOCK, + [LINUX_EDESTADDRREQ] = EDESTADDRREQ, + [LINUX_EMSGSIZE] = EMSGSIZE, + [LINUX_EPROTOTYPE] = EPROTOTYPE, + [LINUX_ENOPROTOOPT] = ENOPROTOOPT, + [LINUX_EPROTONOSUPPORT] = EPROTONOSUPPORT, + [LINUX_ESOCKTNOSUPPORT] = ESOCKTNOSUPPORT, + [LINUX_EOPNOTSUPP] = EOPNOTSUPP, + [LINUX_EPFNOSUPPORT] = EPFNOSUPPORT, + [LINUX_EAFNOSUPPORT] = EAFNOSUPPORT, + [LINUX_EADDRINUSE] = EADDRINUSE, + [LINUX_EADDRNOTAVAIL] = EADDRNOTAVAIL, + [LINUX_ENETDOWN] = ENETDOWN, + [LINUX_ENETUNREACH] = ENETUNREACH, + [LINUX_ENETRESET] = ENETRESET, + [LINUX_ECONNABORTED] = ECONNABORTED, + [LINUX_ECONNRESET] = ECONNRESET, + [LINUX_ENOBUFS] = ENOBUFS, + [LINUX_EISCONN] = EISCONN, + [LINUX_ENOTCONN] = ENOTCONN, + [LINUX_ESHUTDOWN] = ESHUTDOWN, + [LINUX_ETOOMANYREFS] = ETOOMANYREFS, + [LINUX_ETIMEDOUT] = ETIMEDOUT, + [LINUX_ECONNREFUSED] = ECONNREFUSED, + [LINUX_EHOSTDOWN] = EHOSTDOWN, + [LINUX_EHOSTUNREACH] = EHOSTUNREACH, + [LINUX_EALREADY] = EALREADY, + [LINUX_EINPROGRESS] = EINPROGRESS, + [LINUX_ESTALE] = ESTALE, + [LINUX_EUCLEAN] = EINVAL /* EUCLEAN */, + [LINUX_ENOTNAM] = EINVAL /* ENOTNAM */, + [LINUX_ENAVAIL] = EINVAL /* ENAVAIL */, + [LINUX_EISNAM] = EINVAL /* EISNAM */, + [LINUX_EREMOTEIO] = EINVAL /* EREMOTEIO */, + [LINUX_EDQUOT] = EDQUOT, + [LINUX_ENOMEDIUM] = EINVAL /* ENOMEDIUM */, + [LINUX_EMEDIUMTYPE] = EINVAL /* EMEDIUMTYPE */, + }; + code = (int)ecode; + if (code >= 0) { + sign = +1; + } else { + sign = -1; + code = -code; + } + if (code < (sizeof errno_xlate) / (sizeof errno_xlate[0])) + code = errno_xlate[code]; + else + /* + * Unknown error. Reserved for the future. + */ + code = EINVAL; + return sign * code; +} + +enum { + LINUX_O_RDONLY = 00, + LINUX_O_WRONLY = 01, + LINUX_O_RDWR = 02, + LINUX_O_CREAT = 0100, + LINUX_O_EXCL = 0200, + LINUX_O_NOCTTY = 0400, + LINUX_O_TRUNC = 01000, + LINUX_O_APPEND = 02000, + LINUX_O_NONBLOCK = 04000, + LINUX_O_NDELAY = LINUX_O_NONBLOCK, + LINUX_O_SYNC = 010000, + LINUX_O_FSYNC = LINUX_O_SYNC, + LINUX_O_ASYNC = 020000, + LINUX_O_DIRECT = 040000, + LINUX_O_NOFOLLOW = 0400000 +}; + +static inline void obit_convert(int *cflag, int *sflag, + unsigned cmask, unsigned smask) +{ + if (*cflag & cmask != 0) { + *sflag |= smask; + *cflag &= ~cmask; + } +} + +/* + * convert flag from XNU client to Linux _i386_ server. + */ +int convert_client_oflag(int cflag, int *result) +{ + int sflag; + + cflag = 0; + obit_convert(&cflag, &sflag, O_RDONLY, LINUX_O_RDONLY); + obit_convert(&cflag, &sflag, O_WRONLY, LINUX_O_WRONLY); + obit_convert(&cflag, &sflag, O_RDWR, LINUX_O_RDWR); + obit_convert(&cflag, &sflag, O_NONBLOCK, LINUX_O_NONBLOCK); + obit_convert(&cflag, &sflag, O_APPEND, LINUX_O_APPEND); + obit_convert(&cflag, &sflag, O_ASYNC, LINUX_O_ASYNC); + obit_convert(&cflag, &sflag, O_FSYNC, LINUX_O_FSYNC); + obit_convert(&cflag, &sflag, O_NOFOLLOW, LINUX_O_NOFOLLOW); + obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); + obit_convert(&cflag, &sflag, O_TRUNC, LINUX_O_TRUNC); + obit_convert(&cflag, &sflag, O_EXCL, LINUX_O_EXCL); + obit_convert(&cflag, &sflag, O_CREAT, LINUX_O_CREAT); + obit_convert(&cflag, &sflag, O_NDELAY, LINUX_O_NDELAY); + obit_convert(&cflag, &sflag, O_NOCTTY, LINUX_O_NOCTTY); + /* + * Some more obscure BSD flags have no Linux counterparts: + * + * O_SHLOCK 0x0010 + * O_EXLOCK 0x0020 + * O_EVTONLY 0x8000 + * O_POPUP 0x80000000 + * O_ALERT 0x20000000 + */ + if (cflag == 0) { + *result = sflag; + return 0; + } else + return -EINVAL; +} diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c index 6f2b894..8cbb117 100644 --- a/lnet/libcfs/debug.c +++ b/lnet/libcfs/debug.c @@ -24,41 +24,13 @@ # define EXPORT_SYMTAB #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - # define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include +#include +#include #include "tracefile.h" -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -#endif - unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL); EXPORT_SYMBOL(portal_subsystem_debug); @@ -77,23 +49,23 @@ atomic_t portal_kmemory = ATOMIC_INIT(0); EXPORT_SYMBOL(portal_kmemory); #endif -static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq); +static cfs_waitq_t debug_ctlwq; char debug_file_path[1024] = "/tmp/lustre-log"; static char debug_file_name[1024]; -char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; void portals_debug_dumplog_internal(void *arg) { - void *journal_info = current->journal_info; - current->journal_info = NULL; + CFS_DECL_JOURNAL_DATA; + + CFS_PUSH_JOURNAL; snprintf(debug_file_name, sizeof(debug_file_path) - 1, - "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg); + "%s.%ld.%ld", debug_file_path, cfs_time_current_sec(), (long)arg); printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name); tracefile_dump_all_pages(debug_file_name); - current->journal_info = journal_info; + CFS_POP_JOURNAL; } int portals_debug_dumplog_thread(void *arg) @@ -101,25 +73,26 @@ int portals_debug_dumplog_thread(void *arg) kportal_daemonize(""); reparent_to_init(); portals_debug_dumplog_internal(arg); - wake_up(&debug_ctlwq); + cfs_waitq_signal(&debug_ctlwq); return 0; } void portals_debug_dumplog(void) { - int rc; - DECLARE_WAITQUEUE(wait, current); + int rc; + cfs_waitlink_t wait; ENTRY; /* we're being careful to ensure that the kernel thread is * able to set our state to running as it exits before we * get to schedule() */ + cfs_waitlink_init(&wait); set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&debug_ctlwq, &wait); + cfs_waitq_add(&debug_ctlwq, &wait); - rc = kernel_thread(portals_debug_dumplog_thread, - (void *)(long)current->pid, - CLONE_VM | CLONE_FS | CLONE_FILES); + rc = cfs_kernel_thread(portals_debug_dumplog_thread, + (void *)(long)cfs_curproc_pid(), + CLONE_VM | CLONE_FS | CLONE_FILES); if (rc < 0) printk(KERN_ERR "LustreError: cannot start log dump thread: " "%d\n", rc); @@ -127,7 +100,7 @@ void portals_debug_dumplog(void) schedule(); /* be sure to teardown if kernel_thread() failed */ - remove_wait_queue(&debug_ctlwq, &wait); + cfs_waitq_del(&debug_ctlwq, &wait); set_current_state(TASK_RUNNING); } @@ -166,6 +139,7 @@ extern void *lus_portals_debug; int portals_debug_init(unsigned long bufsize) { + cfs_waitq_init(&debug_ctlwq); #ifdef CRAY_PORTALS lus_portals_debug = &portals_debug_msg; #endif @@ -217,59 +191,6 @@ void portals_debug_set_level(unsigned int debug_level) portal_debug = debug_level; } -void portals_run_upcall(char **argv) -{ - int rc; - int argc; - char *envp[] = { - "HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - ENTRY; - - argv[0] = portals_upcall; - argc = 1; - while (argv[argc] != NULL) - argc++; - - LASSERT(argc >= 2); - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; " - "check /proc/sys/portals/upcall\n", - rc, argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } else { - CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n", - argv[0], argv[1], - argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], - argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], - argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], - argc < 6 ? "" : ",..."); - } -} - -void portals_run_lbug_upcall(char *file, const char *fn, const int line) -{ - char *argv[6]; - char buf[32]; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = (char *)fn; - argv[4] = buf; - argv[5] = NULL; - - portals_run_upcall (argv); -} - char *portals_nid2str(int nal, ptl_nid_t nid, char *str) { if (nid == PTL_NID_ANY) { @@ -280,7 +201,7 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) switch(nal){ /* XXX this could be a nal method of some sort, 'cept it's config * dependent whether (say) socknal NIDs are actually IP addresses... */ -#if !CRAY_PORTALS +#if !CRAY_PORTALS case TCPNAL: /* userspace NAL */ case IIBNAL: @@ -309,48 +230,14 @@ char *portals_nid2str(int nal, ptl_nid_t nid, char *str) char *portals_id2str(int nal, ptl_process_id_t id, char *str) { int len; - + portals_nid2str(nal, id.nid, str); len = strlen(str); snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid); return str; } -#ifdef __KERNEL__ - -void portals_debug_dumpstack(struct task_struct *tsk) -{ -#if defined(__arch_um__) - if (tsk != NULL) - CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", - tsk->pid, UML_PID(tsk)); - asm("int $3"); -#elif defined(HAVE_SHOW_TASK) - /* this is exported by lustre kernel version 42 */ - extern void show_task(struct task_struct *); - - if (tsk == NULL) - tsk = current; - CWARN("showing stack for process %d\n", tsk->pid); - show_task(tsk); -#else - CWARN("can't show stack: kernel doesn't export show_task\n"); -#endif -} - -struct task_struct *portals_current(void) -{ - CWARN("current task struct is %p\n", current); - return current; -} - -EXPORT_SYMBOL(portals_debug_dumpstack); -EXPORT_SYMBOL(portals_current); -#endif /* __KERNEL__ */ - EXPORT_SYMBOL(portals_debug_dumplog); EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_run_upcall); -EXPORT_SYMBOL(portals_run_lbug_upcall); EXPORT_SYMBOL(portals_nid2str); EXPORT_SYMBOL(portals_id2str); diff --git a/lnet/libcfs/libcfs.xcode/project.pbxproj b/lnet/libcfs/libcfs.xcode/project.pbxproj new file mode 100644 index 0000000..479c21b --- /dev/null +++ b/lnet/libcfs/libcfs.xcode/project.pbxproj @@ -0,0 +1,439 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 39; + objects = { + 06AA1262FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; + }; + isa = PBXBuildStyle; + name = Development; + }; + 06AA1263FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; + }; + isa = PBXBuildStyle; + name = Deployment; + }; +//060 +//061 +//062 +//063 +//064 +//080 +//081 +//082 +//083 +//084 + 089C1669FE841209C02AAC07 = { + buildSettings = { + }; + buildStyles = ( + 06AA1262FFB20DD611CA28AA, + 06AA1263FFB20DD611CA28AA, + ); + hasScannedForEncodings = 1; + isa = PBXProject; + mainGroup = 089C166AFE841209C02AAC07; + projectDirPath = ""; + targets = ( + 32A4FEB80562C75700D090E7, + ); + }; + 089C166AFE841209C02AAC07 = { + children = ( + 247142CAFF3F8F9811CA285C, + 089C167CFE841241C02AAC07, + 19C28FB6FE9D52B211CA2CBB, + ); + isa = PBXGroup; + name = libcfs; + refType = 4; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 = { + children = ( + 32A4FEC30562C75700D090E7, + ); + isa = PBXGroup; + name = Resources; + refType = 4; + sourceTree = ""; + }; +//080 +//081 +//082 +//083 +//084 +//190 +//191 +//192 +//193 +//194 + 19444794072D07AD00DAF9BC = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = tracefile.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19444795072D07AD00DAF9BC = { + fileRef = 19444794072D07AD00DAF9BC; + isa = PBXBuildFile; + settings = { + }; + }; + 19444796072D08AA00DAF9BC = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = debug.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19444797072D08AA00DAF9BC = { + fileRef = 19444796072D08AA00DAF9BC; + isa = PBXBuildFile; + settings = { + }; + }; + 19509C03072CD5FF00A958C3 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = module.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19509C04072CD5FF00A958C3 = { + fileRef = 19509C03072CD5FF00A958C3; + isa = PBXBuildFile; + settings = { + }; + }; + 19713B76072E8274004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_prim.c; + path = arch/xnu/cfs_prim.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713B77072E8274004E8469 = { + fileRef = 19713B76072E8274004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713BB7072E8281004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_mem.c; + path = arch/xnu/cfs_mem.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713BB8072E8281004E8469 = { + fileRef = 19713BB7072E8281004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713BF7072E828E004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_proc.c; + path = arch/xnu/cfs_proc.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713BF8072E828E004E8469 = { + fileRef = 19713BF7072E828E004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713C7A072E82B2004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_utils.c; + path = arch/xnu/cfs_utils.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713C7B072E82B2004E8469 = { + fileRef = 19713C7A072E82B2004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713CD6072E8A56004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_module.c; + path = arch/xnu/cfs_module.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713CD7072E8A56004E8469 = { + fileRef = 19713CD6072E8A56004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713D1B072E8E39004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_fs.c; + path = arch/xnu/cfs_fs.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713D1C072E8E39004E8469 = { + fileRef = 19713D1B072E8E39004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713D60072E9109004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = xnu_sync.c; + path = arch/xnu/xnu_sync.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713D61072E9109004E8469 = { + fileRef = 19713D60072E9109004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713DC2072F994D004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_tracefile.c; + path = arch/xnu/cfs_tracefile.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713DC3072F994D004E8469 = { + fileRef = 19713DC2072F994D004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19713E1C072FAFB5004E8469 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = cfs_debug.c; + path = arch/xnu/cfs_debug.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19713E1D072FAFB5004E8469 = { + fileRef = 19713E1C072FAFB5004E8469; + isa = PBXBuildFile; + settings = { + }; + }; + 19C28FB6FE9D52B211CA2CBB = { + children = ( + 32A4FEC40562C75800D090E7, + ); + isa = PBXGroup; + name = Products; + refType = 4; + sourceTree = ""; + }; +//190 +//191 +//192 +//193 +//194 +//240 +//241 +//242 +//243 +//244 + 247142CAFF3F8F9811CA285C = { + children = ( + 19713E1C072FAFB5004E8469, + 19713DC2072F994D004E8469, + 19713D60072E9109004E8469, + 19713D1B072E8E39004E8469, + 19713CD6072E8A56004E8469, + 19713C7A072E82B2004E8469, + 19713BF7072E828E004E8469, + 19713BB7072E8281004E8469, + 19713B76072E8274004E8469, + 19444796072D08AA00DAF9BC, + 19444794072D07AD00DAF9BC, + 19509C03072CD5FF00A958C3, + ); + isa = PBXGroup; + name = Source; + path = ""; + refType = 4; + sourceTree = ""; + }; +//240 +//241 +//242 +//243 +//244 +//320 +//321 +//322 +//323 +//324 + 32A4FEB80562C75700D090E7 = { + buildPhases = ( + 32A4FEB90562C75700D090E7, + 32A4FEBA0562C75700D090E7, + 32A4FEBB0562C75700D090E7, + 32A4FEBD0562C75700D090E7, + 32A4FEBF0562C75700D090E7, + 32A4FEC00562C75700D090E7, + 32A4FEC10562C75700D090E7, + ); + buildRules = ( + ); + buildSettings = { + FRAMEWORK_SEARCH_PATHS = ""; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = NO; + HEADER_SEARCH_PATHS = ../include; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + LIBRARY_SEARCH_PATHS = ""; + MODULE_NAME = com.clusterfs.lustre.portals.libcfs; + MODULE_START = libcfs_start; + MODULE_STOP = libcfs_stop; + MODULE_VERSION = 1.0.1; + OTHER_CFLAGS = "-D__KERNEL__"; + OTHER_LDFLAGS = ""; + OTHER_REZFLAGS = ""; + PRODUCT_NAME = libcfs; + SECTORDER_FLAGS = ""; + WARNING_CFLAGS = "-Wmost"; + WRAPPER_EXTENSION = kext; + }; + dependencies = ( + ); + isa = PBXNativeTarget; + name = libcfs; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = libcfs; + productReference = 32A4FEC40562C75800D090E7; + productType = "com.apple.product-type.kernel-extension"; + }; + 32A4FEB90562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEBA0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXHeadersBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBB0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXResourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBD0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + 19509C04072CD5FF00A958C3, + 19444795072D07AD00DAF9BC, + 19444797072D08AA00DAF9BC, + 19713B77072E8274004E8469, + 19713BB8072E8281004E8469, + 19713BF8072E828E004E8469, + 19713C7B072E82B2004E8469, + 19713CD7072E8A56004E8469, + 19713D1C072E8E39004E8469, + 19713D61072E9109004E8469, + 19713DC3072F994D004E8469, + 19713E1D072FAFB5004E8469, + ); + isa = PBXSourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBF0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXFrameworksBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC00562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXRezBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC10562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEC30562C75700D090E7 = { + isa = PBXFileReference; + lastKnownFileType = text.plist.xml; + path = Info.plist; + refType = 4; + sourceTree = ""; + }; + 32A4FEC40562C75800D090E7 = { + explicitFileType = wrapper.cfbundle; + includeInIndex = 0; + isa = PBXFileReference; + path = libcfs.kext; + refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; + }; + }; + rootObject = 089C1669FE841209C02AAC07; +} diff --git a/lnet/libcfs/linux/.cvsignore b/lnet/libcfs/linux/.cvsignore new file mode 100644 index 0000000..2bc4137 --- /dev/null +++ b/lnet/libcfs/linux/.cvsignore @@ -0,0 +1,3 @@ +Makefile +Makefile.in +*.o.cmd diff --git a/lnet/libcfs/linux/Makefile.am b/lnet/libcfs/linux/Makefile.am new file mode 100644 index 0000000..49f8e87 --- /dev/null +++ b/lnet/libcfs/linux/Makefile.am @@ -0,0 +1,4 @@ +EXTRA_DIST := linux-debug.c linux-lwt.c linux-prim.c linux-tracefile.c \ + linux-fs.c linux-mem.c linux-proc.c linux-utils.c linux-lock.c \ + linux-module.c linux-sync.c linux-curproc.c + diff --git a/lnet/libcfs/linux/linux-curproc.c b/lnet/libcfs/linux/linux-curproc.c new file mode 100644 index 0000000..719e48b --- /dev/null +++ b/lnet/libcfs/linux/linux-curproc.c @@ -0,0 +1,133 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Lustre curproc API implementation for Linux kernel + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. Lustre is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + * Public License for more details. You should have received a copy of the GNU + * General Public License along with Lustre; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include + +/* + * Implementation of cfs_curproc API (see portals/include/libcfs/curproc.h) + * for Linux kernel. + */ + +uid_t cfs_curproc_uid(void) +{ + return current->uid; +} + +gid_t cfs_curproc_gid(void) +{ + return current->gid; +} + +uid_t cfs_curproc_fsuid(void) +{ + return current->fsuid; +} + +gid_t cfs_curproc_fsgid(void) +{ + return current->fsgid; +} + +pid_t cfs_curproc_pid(void) +{ + return current->pid; +} + +int cfs_curproc_groups_nr(void) +{ + int nr; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + task_lock(current); + nr = current->group_info->ngroups; + task_unlock(current); +#else + nr = current->ngroups; +#endif + return nr; +} + +void cfs_curproc_groups_dump(gid_t *array, int size) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + task_lock(current); + size = min_t(int, size, current->group_info->ngroups); + memcpy(array, current->group_info->blocks[0], size * sizeof(__u32)); + task_unlock(current); +#else + LASSERT(size <= NGROUPS); + size = min_t(int, size, current->ngroups); + memcpy(array, current->groups, size * sizeof(__u32)); +#endif +} + + +int cfs_curproc_is_in_groups(gid_t gid) +{ + return in_group_p(gid); +} + +mode_t cfs_curproc_umask(void) +{ + return current->fs->umask; +} + +char *cfs_curproc_comm(void) +{ + return current->comm; +} + +cfs_kernel_cap_t cfs_curproc_cap_get(void) +{ + return current->cap_effective; +} + +void cfs_curproc_cap_set(cfs_kernel_cap_t cap) +{ + current->cap_effective = cap; +} + +EXPORT_SYMBOL(cfs_curproc_uid); +EXPORT_SYMBOL(cfs_curproc_pid); +EXPORT_SYMBOL(cfs_curproc_gid); +EXPORT_SYMBOL(cfs_curproc_fsuid); +EXPORT_SYMBOL(cfs_curproc_fsgid); +EXPORT_SYMBOL(cfs_curproc_umask); +EXPORT_SYMBOL(cfs_curproc_comm); +EXPORT_SYMBOL(cfs_curproc_groups_nr); +EXPORT_SYMBOL(cfs_curproc_groups_dump); +EXPORT_SYMBOL(cfs_curproc_is_in_groups); +EXPORT_SYMBOL(cfs_curproc_cap_get); +EXPORT_SYMBOL(cfs_curproc_cap_set); + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/libcfs/linux/linux-debug.c b/lnet/libcfs/linux/linux-debug.c new file mode 100644 index 0000000..8ac8d55 --- /dev/null +++ b/lnet/libcfs/linux/linux-debug.c @@ -0,0 +1,151 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef EXPORT_SYMTAB +# define EXPORT_SYMTAB +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +# define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include + +#include "tracefile.h" + +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +#include +#endif + +static int handled_panic; /* to avoid recursive calls to notifiers */ +char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; + +void portals_run_upcall(char **argv) +{ + int rc; + int argc; + char *envp[] = { + "HOME=/", + "PATH=/sbin:/bin:/usr/sbin:/usr/bin", + NULL}; + ENTRY; + + argv[0] = portals_upcall; + argc = 1; + while (argv[argc] != NULL) + argc++; + + LASSERT(argc >= 2); + + rc = USERMODEHELPER(argv[0], argv, envp); + if (rc < 0) { + CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; " + "check /proc/sys/portals/upcall\n", + rc, argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } else { + CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n", + argv[0], argv[1], + argc < 3 ? "" : ",", argc < 3 ? "" : argv[2], + argc < 4 ? "" : ",", argc < 4 ? "" : argv[3], + argc < 5 ? "" : ",", argc < 5 ? "" : argv[4], + argc < 6 ? "" : ",..."); + } +} + +void portals_run_lbug_upcall(char *file, const char *fn, const int line) +{ + char *argv[6]; + char buf[32]; + + ENTRY; + snprintf (buf, sizeof buf, "%d", line); + + argv[1] = "LBUG"; + argv[2] = file; + argv[3] = (char *)fn; + argv[4] = buf; + argv[5] = NULL; + + portals_run_upcall (argv); +} + +#ifdef __KERNEL__ + +void portals_debug_dumpstack(struct task_struct *tsk) +{ +#if defined(__arch_um__) + if (tsk != NULL) + CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n", + tsk->pid, UML_PID(tsk)); + asm("int $3"); +#elif defined(HAVE_SHOW_TASK) + /* this is exported by lustre kernel version 42 */ + extern void show_task(struct task_struct *); + + if (tsk == NULL) + tsk = current; + CWARN("showing stack for process %d\n", tsk->pid); + show_task(tsk); +#else + CWARN("can't show stack: kernel doesn't export show_task\n"); +#endif +} + +cfs_task_t *portals_current(void) +{ + CWARN("current task struct is %p\n", current); + return current; +} +EXPORT_SYMBOL(portals_debug_dumpstack); +EXPORT_SYMBOL(portals_current); + +#endif /* __KERNEL__ */ + +EXPORT_SYMBOL(portals_run_upcall); +EXPORT_SYMBOL(portals_run_lbug_upcall); diff --git a/lnet/libcfs/linux/linux-fs.c b/lnet/libcfs/linux/linux-fs.c new file mode 100644 index 0000000..61b7166 --- /dev/null +++ b/lnet/libcfs/linux/linux-fs.c @@ -0,0 +1,31 @@ +# define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include + +#include + +cfs_file_t * +cfs_filp_open (const char *name, int flags, int mode, int *err) +{ + /* XXX + * Maybe we need to handle flags and mode in the future + */ + cfs_file_t *filp = NULL; + + filp = filp_open(name, flags, mode); + if (IS_ERR(filp)) { + int rc; + + rc = PTR_ERR(filp); + printk(KERN_ERR "LustreError: can't open %s file: err %d\n", + name, rc); + if (err) + *err = rc; + filp = NULL; + } + return filp; +} + +EXPORT_SYMBOL(cfs_filp_open); diff --git a/lnet/libcfs/linux/linux-lock.c b/lnet/libcfs/linux/linux-lock.c new file mode 100644 index 0000000..a1d713e --- /dev/null +++ b/lnet/libcfs/linux/linux-lock.c @@ -0,0 +1,4 @@ +# define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include diff --git a/lnet/libcfs/linux/linux-lwt.c b/lnet/libcfs/linux/linux-lwt.c new file mode 100644 index 0000000..32adc80 --- /dev/null +++ b/lnet/libcfs/linux/linux-lwt.c @@ -0,0 +1,2 @@ +# define DEBUG_SUBSYSTEM S_PORTALS + diff --git a/lnet/libcfs/linux/linux-mem.c b/lnet/libcfs/linux/linux-mem.c new file mode 100644 index 0000000..fb2c6a0 --- /dev/null +++ b/lnet/libcfs/linux/linux-mem.c @@ -0,0 +1,175 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001, 2002 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include +#include +#include +#include + +void * +cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + void *ptr = NULL; + unsigned int mflags = 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (flags & CFS_ALLOC_ATOMIC) + mflags |= __GFP_HIGH; + else if (flags & CFS_ALLOC_WAIT) + mflags |= __GFP_WAIT; + else + mflags |= (__GFP_HIGH | __GFP_WAIT); + + if (flags & CFS_ALLOC_FS) + mflags |= __GFP_FS; + if (flags & CFS_ALLOC_IO) + mflags |= __GFP_IO | __GFP_HIGHIO; +#else + if (flags & CFS_ALLOC_ATOMIC) + mflags |= __GFP_HIGH; + else + mflags |= __GFP_WAIT; + if (flags & CFS_ALLOC_FS) + mflags |= __GFP_FS; + if (flags & CFS_ALLOC_IO) + mflags |= __GFP_IO; +#endif + + ptr = kmalloc(nr_bytes, mflags); + if (ptr != NULL && (flags & CFS_ALLOC_ZERO)) + memset(ptr, 0, nr_bytes); + return ptr; +} + +void +cfs_free(void *addr) +{ + kfree(addr); +} + +void * +cfs_alloc_large(size_t nr_bytes) +{ + return vmalloc(nr_bytes); +} + +void +cfs_free_large(void *addr) +{ + vfree(addr); +} + +cfs_page_t * +cfs_alloc_pages(unsigned int flags, unsigned int order) +{ + unsigned int mflags = 0; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (flags & CFS_ALLOC_ATOMIC) + mflags |= __GFP_HIGH; + else if (flags & CFS_ALLOC_WAIT) + mflags |= __GFP_WAIT; + else + mflags |= (__GFP_HIGH | __GFP_WAIT); + + if (flags & CFS_ALLOC_FS) + mflags |= __GFP_FS; + if (flags & CFS_ALLOC_IO) + mflags |= __GFP_IO | __GFP_HIGHIO; + if (flags & CFS_ALLOC_HIGH) + mflags |= __GFP_HIGHMEM; +#else + if (flags & CFS_ALLOC_ATOMIC) + mflags |= __GFP_HIGH; + else + mflags |= __GFP_WAIT; + if (flags & CFS_ALLOC_FS) + mflags |= __GFP_FS; + if (flags & CFS_ALLOC_IO) + mflags |= __GFP_IO; + if (flags & CFS_ALLOC_HIGH) + mflags |= __GFP_HIGHMEM; +#endif + + return alloc_pages(mflags, order); +} + +cfs_mem_cache_t * +cfs_mem_cache_create (const char *name, size_t size, size_t offset, + unsigned long flags, void (*ctor)(void*, kmem_cache_t *, unsigned long), + void (*dtor)(void*, cfs_mem_cache_t *, unsigned long)) +{ + return kmem_cache_create(name, size, offset, flags, ctor, dtor); +} + +int +cfs_mem_cache_destroy (cfs_mem_cache_t * cachep) +{ + return kmem_cache_destroy(cachep); +} + +void * +cfs_mem_cache_alloc(cfs_mem_cache_t *cachep, int flags) +{ + unsigned int mflags = 0; +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + if (flags & CFS_SLAB_ATOMIC) + mflags |= __GFP_HIGH; + else if (flags & CFS_ALLOC_WAIT) + mflags |= __GFP_WAIT; + else + mflags |= (__GFP_HIGH | __GFP_WAIT); + + if (flags & CFS_SLAB_FS) + mflags |= __GFP_FS; + if (flags & CFS_SLAB_IO) + mflags |= __GFP_IO | __GFP_HIGHIO; +#else + if (flags & CFS_SLAB_ATOMIC) + mflags |= __GFP_HIGH; + else + mflags |= __GFP_WAIT; + if (flags & CFS_SLAB_FS) + mflags |= __GFP_FS; + if (flags & CFS_SLAB_IO) + mflags |= __GFP_IO; +#endif + + return kmem_cache_alloc(cachep, mflags); +} + +void +cfs_mem_cache_free(cfs_mem_cache_t *cachep, void *objp) +{ + return kmem_cache_free(cachep, objp); +} + +EXPORT_SYMBOL(cfs_alloc); +EXPORT_SYMBOL(cfs_free); +EXPORT_SYMBOL(cfs_alloc_large); +EXPORT_SYMBOL(cfs_free_large); +EXPORT_SYMBOL(cfs_alloc_pages); +EXPORT_SYMBOL(cfs_mem_cache_create); +EXPORT_SYMBOL(cfs_mem_cache_destroy); +EXPORT_SYMBOL(cfs_mem_cache_alloc); +EXPORT_SYMBOL(cfs_mem_cache_free); diff --git a/lnet/libcfs/linux/linux-module.c b/lnet/libcfs/linux/linux-module.c new file mode 100644 index 0000000..4b2558b --- /dev/null +++ b/lnet/libcfs/linux/linux-module.c @@ -0,0 +1,170 @@ +#define DEBUG_SUBSYSTEM S_PORTALS + +#include +#include + +#define PORTAL_MINOR 240 + + +void +kportal_daemonize (char *str) +{ +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) + daemonize(str); +#else + daemonize(); + snprintf (current->comm, sizeof (current->comm), "%s", str); +#endif +} + +void +kportal_blockallsigs () +{ + unsigned long flags; + + SIGNAL_MASK_LOCK(current, flags); + sigfillset(¤t->blocked); + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); +} + +int portal_ioctl_getdata(char *buf, char *end, void *arg) +{ + struct portal_ioctl_hdr *hdr; + struct portal_ioctl_data *data; + int err; + ENTRY; + + hdr = (struct portal_ioctl_hdr *)buf; + data = (struct portal_ioctl_data *)buf; + + err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); + if (err) + RETURN(err); + + if (hdr->ioc_version != PORTAL_IOCTL_VERSION) { + CERROR("PORTALS: version mismatch kernel vs application\n"); + RETURN(-EINVAL); + } + + if (hdr->ioc_len + buf >= end) { + CERROR("PORTALS: user buffer exceeds kernel buffer\n"); + RETURN(-EINVAL); + } + + + if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) { + CERROR("PORTALS: user buffer too small for ioctl\n"); + RETURN(-EINVAL); + } + + err = copy_from_user(buf, (void *)arg, hdr->ioc_len); + if (err) + RETURN(err); + + if (portal_ioctl_is_invalid(data)) { + CERROR("PORTALS: ioctl not correctly formatted\n"); + RETURN(-EINVAL); + } + + if (data->ioc_inllen1) + data->ioc_inlbuf1 = &data->ioc_bulk[0]; + + if (data->ioc_inllen2) + data->ioc_inlbuf2 = &data->ioc_bulk[0] + + size_round(data->ioc_inllen1); + + RETURN(0); +} + +extern struct cfs_psdev_ops libcfs_psdev_ops; + +static int +libcfs_psdev_open(struct inode * inode, struct file * file) +{ + struct portals_device_userstate **pdu = NULL; + int rc = 0; + + if (!inode) + return (-EINVAL); + pdu = (struct portals_device_userstate **)&file->private_data; + if (libcfs_psdev_ops.p_open != NULL) + rc = libcfs_psdev_ops.p_open(0, (void *)pdu); + else + return (-EPERM); + return rc; +} + +/* called when closing /dev/device */ +static int +libcfs_psdev_release(struct inode * inode, struct file * file) +{ + struct portals_device_userstate *pdu; + int rc = 0; + + if (!inode) + return (-EINVAL); + pdu = file->private_data; + if (libcfs_psdev_ops.p_close != NULL) + rc = libcfs_psdev_ops.p_close(0, (void *)pdu); + else + rc = -EPERM; + return rc; +} + +static int +libcfs_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct cfs_psdev_file pfile; + int rc = 0; + + if (current->fsuid != 0) + return -EACCES; + + if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE || + _IOC_NR(cmd) < IOC_PORTAL_MIN_NR || + _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) { + CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", + _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); + return (-EINVAL); + } + + /* Handle platform-dependent IOC requests */ + switch (cmd) { + case IOC_PORTAL_PANIC: + if (!capable (CAP_SYS_BOOT)) + return (-EPERM); + panic("debugctl-invoked panic"); + return (0); + case IOC_PORTAL_MEMHOG: + if (!capable (CAP_SYS_ADMIN)) + return -EPERM; + /* go thought */ + } + + pfile.off = 0; + pfile.private_data = file->private_data; + if (libcfs_psdev_ops.p_ioctl != NULL) + rc = libcfs_psdev_ops.p_ioctl(&pfile, cmd, (void *)arg); + else + rc = -EPERM; + return (rc); +} + +static struct file_operations libcfs_fops = { + ioctl: libcfs_ioctl, + open: libcfs_psdev_open, + release: libcfs_psdev_release +}; + +cfs_psdev_t libcfs_dev = { + PORTAL_MINOR, + "portals", + &libcfs_fops +}; + +EXPORT_SYMBOL(kportal_blockallsigs); +EXPORT_SYMBOL(kportal_daemonize); + + diff --git a/lnet/libcfs/linux/linux-prim.c b/lnet/libcfs/linux/linux-prim.c new file mode 100644 index 0000000..95365ee --- /dev/null +++ b/lnet/libcfs/linux/linux-prim.c @@ -0,0 +1,19 @@ +#define DEBUG_SUBSYSTEM S_PORTALS +#include +#include +#include + +int +libcfs_arch_init(void) +{ + return 0; +} + +void +libcfs_arch_cleanup(void) +{ + return; +} + +EXPORT_SYMBOL(libcfs_arch_init); +EXPORT_SYMBOL(libcfs_arch_cleanup); diff --git a/lnet/libcfs/proc.c b/lnet/libcfs/linux/linux-proc.c similarity index 99% rename from lnet/libcfs/proc.c rename to lnet/libcfs/linux/linux-proc.c index 08446a0..70f4059 100644 --- a/lnet/libcfs/proc.c +++ b/lnet/libcfs/linux/linux-proc.c @@ -53,7 +53,7 @@ # define DEBUG_SUBSYSTEM S_PORTALS -#include +#include #include #include "tracefile.h" diff --git a/lnet/libcfs/linux/linux-sync.c b/lnet/libcfs/linux/linux-sync.c new file mode 100644 index 0000000..32adc80 --- /dev/null +++ b/lnet/libcfs/linux/linux-sync.c @@ -0,0 +1,2 @@ +# define DEBUG_SUBSYSTEM S_PORTALS + diff --git a/lnet/libcfs/linux/linux-tracefile.c b/lnet/libcfs/linux/linux-tracefile.c new file mode 100644 index 0000000..0c134ee --- /dev/null +++ b/lnet/libcfs/linux/linux-tracefile.c @@ -0,0 +1,205 @@ +#define DEBUG_SUBSYSTEM S_PORTALS +#define LUSTRE_TRACEFILE_PRIVATE + +#include +#include +#include "tracefile.h" + +#ifndef get_cpu +#define get_cpu() smp_processor_id() +#define put_cpu() do { } while (0) +#endif + +extern union trace_data_union trace_data[NR_CPUS]; +extern char *tracefile; +extern long long tracefile_size; +extern struct rw_semaphore tracefile_sem; + +inline struct trace_cpu_data * +__trace_get_tcd(unsigned long *flags) +{ + struct trace_cpu_data *ret; + + int cpu = get_cpu(); + local_irq_save(*flags); + ret = &trace_data[cpu].tcd; + + return ret; +} + +inline void +trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags) +{ + local_irq_restore(flags); + put_cpu(); +} + +void +set_ptldebug_header(struct ptldebug_header *header, int subsys, int mask, + const int line, unsigned long stack) +{ + struct timeval tv; + + do_gettimeofday(&tv); + + header->ph_subsys = subsys; + header->ph_mask = mask; + header->ph_cpu_id = smp_processor_id(); + header->ph_sec = (__u32)tv.tv_sec; + header->ph_usec = tv.tv_usec; + header->ph_stack = stack; + header->ph_pid = current->pid; + header->ph_line_num = line; +#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) + header->ph_extern_pid = current->thread.extern_pid; +#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + header->ph_extern_pid = current->thread.mode.tt.extern_pid; +#else + header->ph_extern_pid = 0; +#endif + return; +} + +void print_to_console(struct ptldebug_header *hdr, int mask, char *buf, + int len, char *file, const char *fn) +{ + char *prefix = NULL, *ptype = NULL; + + if ((mask & D_EMERG) != 0) { + prefix = "LustreError"; + ptype = KERN_EMERG; + } else if ((mask & D_ERROR) != 0) { + prefix = "LustreError"; + ptype = KERN_ERR; + } else if ((mask & D_WARNING) != 0) { + prefix = "Lustre"; + ptype = KERN_WARNING; + } else if (portal_printk) { + prefix = "Lustre"; + ptype = KERN_INFO; + } + printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, + hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); + return; +} + +int trace_write_daemon_file(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char *name; + unsigned long off; + int rc; + + name = kmalloc(count + 1, GFP_KERNEL); + if (name == NULL) + return -ENOMEM; + + if (copy_from_user(name, buffer, count)) { + rc = -EFAULT; + goto out; + } + + /* be nice and strip out trailing '\n' */ + for (off = count ; off > 2 && isspace(name[off - 1]); off--) + ; + + name[off] = '\0'; + + down_write(&tracefile_sem); + if (strcmp(name, "stop") == 0) { + tracefile = NULL; + trace_stop_thread(); + goto out_sem; + } else if (strncmp(name, "size=", 5) == 0) { + tracefile_size = simple_strtoul(name + 5, NULL, 0); + if (tracefile_size < 10 || tracefile_size > 20480) + tracefile_size = TRACEFILE_SIZE; + else + tracefile_size <<= 20; + goto out_sem; + } + + if (name[0] != '/') { + rc = -EINVAL; + goto out_sem; + } + + if (tracefile != NULL) + kfree(tracefile); + + tracefile = name; + name = NULL; + printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " + "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); + + trace_start_thread(); +out_sem: + up_write(&tracefile_sem); +out: + kfree(name); + return count; +} + +int trace_read_daemon_file(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int rc; + + down_read(&tracefile_sem); + rc = snprintf(page, count, "%s", tracefile); + up_read(&tracefile_sem); + + return rc; +} + +int trace_write_debug_mb(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + char string[32]; + int i; + unsigned max; + + if (count >= sizeof(string)) { + printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", + count); + return -EOVERFLOW; + } + + if (copy_from_user(string, buffer, count)) + return -EFAULT; + + max = simple_strtoul(string, NULL, 0); + if (max == 0) + return -EINVAL; + + if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) { + printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " + "%dMB, which is more than 80%% of available RAM (%lu)\n", + max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); + return -EINVAL; + } + + max /= smp_num_cpus; + + for (i = 0; i < NR_CPUS; i++) { + struct trace_cpu_data *tcd; + tcd = &trace_data[i].tcd; + tcd->tcd_max_pages = max << (20 - PAGE_SHIFT); + } + return count; +} + +int trace_read_debug_mb(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct trace_cpu_data *tcd; + unsigned long flags; + int rc; + + tcd = trace_get_tcd(flags); + rc = snprintf(page, count, "%lu\n", + (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus); + trace_put_tcd(tcd, flags); + return rc; +} + diff --git a/lnet/libcfs/linux/linux-utils.c b/lnet/libcfs/linux/linux-utils.c new file mode 100644 index 0000000..67ecb0c --- /dev/null +++ b/lnet/libcfs/linux/linux-utils.c @@ -0,0 +1,47 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2002 Cluster File Systems, Inc. + * Author: Phil Schwan + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +/* + * miscellaneous libcfs stuff + */ +#define DEBUG_SUBSYSTEM S_PORTALS +#include + +/* + * Convert server error code to client format. Error codes are from + * Linux errno.h, so for Linux client---identity. + */ +int convert_server_error(__u64 ecode) +{ + return ecode; +} + +/* + * convert flag from client to server. + */ +int convert_client_oflag(int cflag) +{ + return cflag; +} + + diff --git a/lnet/libcfs/lwt.c b/lnet/libcfs/lwt.c index 3f6a9c2..b4ae10f5 100644 --- a/lnet/libcfs/lwt.c +++ b/lnet/libcfs/lwt.c @@ -41,7 +41,7 @@ #define DEBUG_SUBSYSTEM S_PORTALS -#include +#include #if LWT_SUPPORT diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c index 9358af2..5fe401b 100644 --- a/lnet/libcfs/module.c +++ b/lnet/libcfs/module.c @@ -24,32 +24,9 @@ #endif #define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - #include #include -#include -#include - -#define PORTAL_MINOR 240 +#include struct nal_cmd_handler { int nch_number; @@ -58,7 +35,7 @@ struct nal_cmd_handler { }; static struct nal_cmd_handler nal_cmd[16]; -static DECLARE_MUTEX(nal_cmd_sem); +struct semaphore nal_cmd_mutex; #ifdef PORTAL_DEBUG void kportal_assertion_failed(char *expr, char *file, const char *func, @@ -71,69 +48,58 @@ void kportal_assertion_failed(char *expr, char *file, const char *func, #endif void -kportal_daemonize (char *str) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) - daemonize(str); -#else - daemonize(); - snprintf (current->comm, sizeof (current->comm), "%s", str); -#endif -} - -void kportal_memhog_free (struct portals_device_userstate *pdu) { - struct page **level0p = &pdu->pdu_memhog_root_page; - struct page **level1p; - struct page **level2p; + cfs_page_t **level0p = &pdu->pdu_memhog_root_page; + cfs_page_t **level1p; + cfs_page_t **level2p; int count1; int count2; - + if (*level0p != NULL) { - level1p = (struct page **)page_address(*level0p); + level1p = (cfs_page_t **)cfs_page_address(*level0p); count1 = 0; - - while (count1 < PAGE_SIZE/sizeof(struct page *) && + + while (count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && *level1p != NULL) { - level2p = (struct page **)page_address(*level1p); + level2p = (cfs_page_t **)cfs_page_address(*level1p); count2 = 0; - - while (count2 < PAGE_SIZE/sizeof(struct page *) && + + while (count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *) && *level2p != NULL) { - - __free_page(*level2p); + + cfs_free_page(*level2p); pdu->pdu_memhog_pages--; level2p++; count2++; } - - __free_page(*level1p); + + cfs_free_page(*level1p); pdu->pdu_memhog_pages--; level1p++; count1++; } - - __free_page(*level0p); + + cfs_free_page(*level0p); pdu->pdu_memhog_pages--; *level0p = NULL; } - + LASSERT (pdu->pdu_memhog_pages == 0); } int kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags) { - struct page **level0p; - struct page **level1p; - struct page **level2p; + cfs_page_t **level0p; + cfs_page_t **level1p; + cfs_page_t **level2p; int count1; int count2; - + LASSERT (pdu->pdu_memhog_pages == 0); LASSERT (pdu->pdu_memhog_root_page == NULL); @@ -144,45 +110,45 @@ kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flag return 0; level0p = &pdu->pdu_memhog_root_page; - *level0p = alloc_page(flags); + *level0p = cfs_alloc_page(flags); if (*level0p == NULL) return -ENOMEM; pdu->pdu_memhog_pages++; - level1p = (struct page **)page_address(*level0p); + level1p = (cfs_page_t **)cfs_page_address(*level0p); count1 = 0; - memset(level1p, 0, PAGE_SIZE); - + memset(level1p, 0, CFS_PAGE_SIZE); + while (pdu->pdu_memhog_pages < npages && - count1 < PAGE_SIZE/sizeof(struct page *)) { + count1 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { - if (signal_pending(current)) + if (cfs_signal_pending(cfs_current())) return (-EINTR); - - *level1p = alloc_page(flags); + + *level1p = cfs_alloc_page(flags); if (*level1p == NULL) return -ENOMEM; pdu->pdu_memhog_pages++; - level2p = (struct page **)page_address(*level1p); + level2p = (cfs_page_t **)cfs_page_address(*level1p); count2 = 0; - memset(level2p, 0, PAGE_SIZE); - + memset(level2p, 0, CFS_PAGE_SIZE); + while (pdu->pdu_memhog_pages < npages && - count2 < PAGE_SIZE/sizeof(struct page *)) { - - if (signal_pending(current)) + count2 < CFS_PAGE_SIZE/sizeof(cfs_page_t *)) { + + if (cfs_signal_pending(cfs_current())) return (-EINTR); - *level2p = alloc_page(flags); + *level2p = cfs_alloc_page(flags); if (*level2p == NULL) return (-ENOMEM); pdu->pdu_memhog_pages++; - + level2p++; count2++; } - + level1p++; count1++; } @@ -190,25 +156,11 @@ kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flag return 0; } -void -kportal_blockallsigs () -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - /* called when opening /dev/device */ -static int libcfs_psdev_open(struct inode * inode, struct file * file) +static int libcfs_psdev_open(unsigned long flags, void *args) { struct portals_device_userstate *pdu; ENTRY; - - if (!inode) - RETURN(-EINVAL); PORTAL_MODULE_USE; @@ -217,26 +169,23 @@ static int libcfs_psdev_open(struct inode * inode, struct file * file) pdu->pdu_memhog_pages = 0; pdu->pdu_memhog_root_page = NULL; } - file->private_data = pdu; - + *(struct portals_device_userstate **)args = pdu; + RETURN(0); } /* called when closing /dev/device */ -static int libcfs_psdev_release(struct inode * inode, struct file * file) +static int libcfs_psdev_release(unsigned long flags, void *args) { struct portals_device_userstate *pdu; ENTRY; - if (!inode) - RETURN(-EINVAL); - - pdu = file->private_data; + pdu = (struct portals_device_userstate *)args; if (pdu != NULL) { kportal_memhog_free(pdu); PORTAL_FREE(pdu, sizeof(*pdu)); } - + PORTAL_MODULE_UNUSE; RETURN(0); } @@ -268,10 +217,10 @@ libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) CDEBUG(D_IOCTL, "Register NAL %x, handler: %p\n", nal, handler); - down(&nal_cmd_sem); + mutex_down(&nal_cmd_mutex); if (libcfs_find_nal_cmd_handler(nal) != NULL) { - up (&nal_cmd_sem); + mutex_up (&nal_cmd_mutex); return (-EBUSY); } @@ -281,7 +230,7 @@ libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) cmd = &nal_cmd[i]; break; } - + if (cmd == NULL) { rc = -EBUSY; } else { @@ -291,7 +240,7 @@ libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private) cmd->nch_private = private; } - up(&nal_cmd_sem); + mutex_up(&nal_cmd_mutex); return rc; } @@ -304,12 +253,12 @@ libcfs_nal_cmd_unregister(int nal) CDEBUG(D_IOCTL, "Unregister NAL %x\n", nal); - down(&nal_cmd_sem); + mutex_down(&nal_cmd_mutex); cmd = libcfs_find_nal_cmd_handler(nal); LASSERT (cmd != NULL); cmd->nch_handler = NULL; cmd->nch_private = NULL; - up(&nal_cmd_sem); + mutex_up(&nal_cmd_mutex); } EXPORT_SYMBOL(libcfs_nal_cmd_unregister); @@ -325,24 +274,24 @@ libcfs_nal_cmd(struct portals_cfg *pcfg) int rc = -EINVAL; ENTRY; - down(&nal_cmd_sem); + mutex_down(&nal_cmd_mutex); cmd = libcfs_find_nal_cmd_handler(nal); if (cmd != NULL) { - CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal, + CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal, pcfg->pcfg_command); rc = cmd->nch_handler(pcfg, cmd->nch_private); } else { CERROR("invalid nal: %x, cmd: %d\n", nal, pcfg->pcfg_command); } - up(&nal_cmd_sem); + mutex_up(&nal_cmd_mutex); RETURN(rc); #endif } EXPORT_SYMBOL(libcfs_nal_cmd); -static DECLARE_RWSEM(ioctl_list_sem); -static LIST_HEAD(ioctl_list); +static struct rw_semaphore ioctl_list_sem; +static struct list_head ioctl_list; int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand) { @@ -378,41 +327,29 @@ int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand) } EXPORT_SYMBOL(libcfs_deregister_ioctl); -static int libcfs_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg) { + char buf[1024]; int err = -EINVAL; - char buf[1024]; struct portal_ioctl_data *data; ENTRY; - if (current->fsuid != 0) - RETURN(err = -EACCES); - - if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE || - _IOC_NR(cmd) < IOC_PORTAL_MIN_NR || - _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - RETURN(-EINVAL); - } + /* 'cmd' and permissions get checked in our arch-specific caller */ if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) { CERROR("PORTALS ioctl: data error\n"); - RETURN(-EINVAL); + return (-EINVAL); } - data = (struct portal_ioctl_data *)buf; switch (cmd) { case IOC_PORTAL_CLEAR_DEBUG: portals_debug_clear_buffer(); RETURN(0); - case IOC_PORTAL_PANIC: - if (!capable (CAP_SYS_BOOT)) - RETURN (-EPERM); - panic("debugctl-invoked panic"); - RETURN(0); + /* + * case IOC_PORTAL_PANIC: + * Handled in arch/cfs_module.c + */ case IOC_PORTAL_MARK_DEBUG: if (data->ioc_inlbuf1 == NULL || data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') @@ -481,17 +418,16 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, } case IOC_PORTAL_MEMHOG: - if (!capable (CAP_SYS_ADMIN)) - err = -EPERM; - else if (file->private_data == NULL) { + if (pfile->private_data == NULL) { err = -EINVAL; } else { - kportal_memhog_free(file->private_data); - err = kportal_memhog_alloc(file->private_data, + kportal_memhog_free(pfile->private_data); + /* XXX The ioc_flags is not GFP flags now, need to be fixed */ + err = kportal_memhog_alloc(pfile->private_data, data->ioc_count, data->ioc_flags); if (err != 0) - kportal_memhog_free(file->private_data); + kportal_memhog_free(pfile->private_data); } break; @@ -500,7 +436,7 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, err = -EINVAL; down_read(&ioctl_list_sem); list_for_each_entry(hand, &ioctl_list, item) { - err = hand->handle_ioctl(data, cmd, arg); + err = hand->handle_ioctl(data, cmd, (unsigned long)arg); if (err != -EINVAL) break; } @@ -511,18 +447,12 @@ static int libcfs_ioctl(struct inode *inode, struct file *file, RETURN(err); } - -static struct file_operations libcfs_fops = { - ioctl: libcfs_ioctl, - open: libcfs_psdev_open, - release: libcfs_psdev_release -}; - - -static struct miscdevice libcfs_dev = { - PORTAL_MINOR, - "portals", - &libcfs_fops +struct cfs_psdev_ops libcfs_psdev_ops = { + libcfs_psdev_open, + libcfs_psdev_release, + NULL, + NULL, + libcfs_ioctl }; extern int insert_proc(void); @@ -531,10 +461,24 @@ MODULE_AUTHOR("Peter J. Braam "); MODULE_DESCRIPTION("Portals v3.1"); MODULE_LICENSE("GPL"); +extern cfs_psdev_t libcfs_dev; +extern struct rw_semaphore tracefile_sem; +extern struct semaphore trace_thread_sem; + +extern int libcfs_arch_init(void); +extern void libcfs_arch_cleanup(void); + static int init_libcfs_module(void) { int rc; + libcfs_arch_init(); + init_rwsem(&tracefile_sem); + init_mutex(&trace_thread_sem); + init_mutex(&nal_cmd_mutex); + init_rwsem(&ioctl_list_sem); + CFS_INIT_LIST_HEAD(&ioctl_list); + rc = portals_debug_init(5 * 1024 * 1024); if (rc < 0) { printk(KERN_ERR "LustreError: portals_debug_init: %d\n", rc); @@ -548,7 +492,7 @@ static int init_libcfs_module(void) goto cleanup_debug; } #endif - rc = misc_register(&libcfs_dev); + rc = cfs_psdev_register(&libcfs_dev); if (rc) { CERROR("misc_register: error %d\n", rc); goto cleanup_lwt; @@ -564,7 +508,7 @@ static int init_libcfs_module(void) return (0); cleanup_deregister: - misc_deregister(&libcfs_dev); + cfs_psdev_deregister(&libcfs_dev); cleanup_lwt: #if LWT_SUPPORT lwt_fini(); @@ -583,7 +527,7 @@ static void exit_libcfs_module(void) CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", atomic_read(&portal_kmemory)); - rc = misc_deregister(&libcfs_dev); + rc = cfs_psdev_deregister(&libcfs_dev); if (rc) CERROR("misc_deregister error %d\n", rc); @@ -598,11 +542,9 @@ static void exit_libcfs_module(void) rc = portals_debug_cleanup(); if (rc) printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc); + libcfs_arch_cleanup(); } -EXPORT_SYMBOL(kportal_daemonize); -EXPORT_SYMBOL(kportal_blockallsigs); EXPORT_SYMBOL(kportal_assertion_failed); -module_init(init_libcfs_module); -module_exit(exit_libcfs_module); +cfs_module(libcfs, "1.0.0", init_libcfs_module, exit_libcfs_module); diff --git a/lnet/libcfs/tracefile.c b/lnet/libcfs/tracefile.c index 3bd2c01..439589f 100644 --- a/lnet/libcfs/tracefile.c +++ b/lnet/libcfs/tracefile.c @@ -21,178 +21,138 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef HAVE_MM_INLINE -#include -#endif #define DEBUG_SUBSYSTEM S_PORTALS +#define LUSTRE_TRACEFILE_PRIVATE +#include "tracefile.h" -#include -#include -#include - -#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) +#include +#include /* XXX move things up to the top, comment */ +union trace_data_union trace_data[NR_CPUS] __cacheline_aligned; -static union { - struct trace_cpu_data { - struct list_head tcd_pages; - unsigned long tcd_cur_pages; - - struct list_head tcd_daemon_pages; - unsigned long tcd_cur_daemon_pages; - - unsigned long tcd_max_pages; - int tcd_shutting_down; - } tcd; - char __pad[SMP_CACHE_BYTES]; -} trace_data[NR_CPUS] __cacheline_aligned; - -struct page_collection { - struct list_head pc_pages; - spinlock_t pc_lock; - int pc_want_daemon_pages; -}; - -struct tracefiled_ctl { - struct completion tctl_start; - struct completion tctl_stop; - wait_queue_head_t tctl_waitq; - pid_t tctl_pid; - atomic_t tctl_shutdown; -}; - -#define TRACEFILE_SIZE (500 << 20) -static DECLARE_RWSEM(tracefile_sem); -static char *tracefile = NULL; -static long long tracefile_size = TRACEFILE_SIZE; +struct rw_semaphore tracefile_sem; +char *tracefile = NULL; +long long tracefile_size = TRACEFILE_SIZE; static struct tracefiled_ctl trace_tctl; -static DECLARE_MUTEX(trace_thread_sem); +struct semaphore trace_thread_sem; static int thread_running = 0; -#ifndef get_cpu -#define get_cpu() smp_processor_id() -#define put_cpu() do { } while (0) -#endif +static void put_pages_on_daemon_list_on_cpu(void *info); + +static inline struct trace_page *tage_from_list(struct list_head *list) +{ + return list_entry(list, struct trace_page, linkage); +} -#define trace_get_tcd(FLAGS) ({ \ - struct trace_cpu_data *__ret; \ - int __cpu = get_cpu(); \ - local_irq_save(FLAGS); \ - __ret = &trace_data[__cpu].tcd; \ - __ret; \ -}) +static struct trace_page *tage_alloc(int gfp) +{ + cfs_page_t *page; + struct trace_page *tage; + + page = cfs_alloc_page(gfp); + if (page != NULL) { + tage = cfs_alloc(sizeof *tage, gfp); + if (tage == NULL) + cfs_free_page(page); + tage->page = page; + } else + tage = NULL; + return tage; +} -#define trace_put_tcd(TCD, FLAGS) do { \ - local_irq_restore(FLAGS); \ - put_cpu(); \ -} while (0) +static void tage_free(struct trace_page *tage) +{ + LASSERT(tage != NULL); -static void put_pages_on_daemon_list_on_cpu(void *info); + if (tage->page != NULL) + cfs_free_page(tage->page); + cfs_free(tage); +} + +static void tage_to_tail(struct trace_page *tage, struct list_head *queue) +{ + LASSERT(tage != NULL); + LASSERT(queue != NULL); + + list_move_tail(&tage->linkage, queue); +} + +static int tage_invariant(struct trace_page *tage) +{ + return + tage != NULL && + tage->used <= CFS_PAGE_SIZE && + cfs_page_count(tage->page) > 0; +} /* return a page that has 'len' bytes left at the end */ -static struct page *trace_get_page(struct trace_cpu_data *tcd, - unsigned long len) +static struct trace_page *trace_get_tage(struct trace_cpu_data *tcd, + unsigned long len) { - struct page *page = NULL; + struct trace_page *tage; - if (len > PAGE_SIZE) { + if (len > CFS_PAGE_SIZE) { printk(KERN_ERR "cowardly refusing to write %lu bytes in a " "page\n", len); return NULL; } if (!list_empty(&tcd->tcd_pages)) { - page = list_entry(tcd->tcd_pages.prev, struct page, - PAGE_LIST_ENTRY); - if (page->index + len <= PAGE_SIZE) - return page; + tage = tage_from_list(tcd->tcd_pages.prev); + if (tage->used + len <= CFS_PAGE_SIZE) + return tage; } if (tcd->tcd_cur_pages < tcd->tcd_max_pages) { - page = alloc_page(GFP_ATOMIC); - if (page == NULL) { + tage = tage_alloc(CFS_ALLOC_ATOMIC); + if (tage == NULL) { /* the kernel should print a message for us. fall back * to using the last page in the ring buffer. */ goto ring_buffer; } - page->index = 0; - page->mapping = (void *)(long)smp_processor_id(); - list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages); + tage->used = 0; + tage->cpu = smp_processor_id(); + list_add_tail(&tage->linkage, &tcd->tcd_pages); tcd->tcd_cur_pages++; if (tcd->tcd_cur_pages > 8 && thread_running) { struct tracefiled_ctl *tctl = &trace_tctl; - wake_up(&tctl->tctl_waitq); + cfs_waitq_signal(&tctl->tctl_waitq); } - return page; + return tage; } ring_buffer: if (thread_running) { int pgcount = tcd->tcd_cur_pages / 10; struct page_collection pc; - struct list_head *pos, *tmp; + struct trace_page *tage; + struct trace_page *tmp; + printk(KERN_WARNING "debug daemon buffer overflowed; discarding" " 10%% of pages (%d)\n", pgcount + 1); - INIT_LIST_HEAD(&pc.pc_pages); + CFS_INIT_LIST_HEAD(&pc.pc_pages); spin_lock_init(&pc.pc_lock); - list_for_each_safe(pos, tmp, &tcd->tcd_pages) { - struct page *page; - + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { if (pgcount-- == 0) break; - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - list_del(&PAGE_LIST(page)); - list_add_tail(&PAGE_LIST(page), &pc.pc_pages); + list_move_tail(&tage->linkage, &pc.pc_pages); tcd->tcd_cur_pages--; } put_pages_on_daemon_list_on_cpu(&pc); } LASSERT(!list_empty(&tcd->tcd_pages)); - page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY); - page->index = 0; + tage = tage_from_list(tcd->tcd_pages.next); + tage->used = 0; + tage_to_tail(tage, &tcd->tcd_pages); - list_del(&PAGE_LIST(page)); - list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages); - return page; -} - -static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf, - int len, char *file, const char *fn) -{ - char *prefix = NULL, *ptype = NULL; - - if ((mask & D_EMERG) != 0) { - prefix = "LustreError"; - ptype = KERN_EMERG; - } else if ((mask & D_ERROR) != 0) { - prefix = "LustreError"; - ptype = KERN_ERR; - } else if ((mask & D_WARNING) != 0) { - prefix = "Lustre"; - ptype = KERN_WARNING; - } else if (portal_printk) { - prefix = "Lustre"; - ptype = KERN_INFO; - } - - printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid, - hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf); + return tage; } void portals_debug_msg(int subsys, int mask, char *file, const char *fn, @@ -200,12 +160,11 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, { struct trace_cpu_data *tcd; struct ptldebug_header header; - struct page *page; + struct trace_page *tage; char *debug_buf = format; int known_size, needed = 85 /* average message length */, max_nob; va_list ap; unsigned long flags; - struct timeval tv; #ifdef CRAY_PORTALS if (mask == D_PORTALS && !(portal_debug & D_PORTALS)) @@ -222,40 +181,22 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, if (tcd->tcd_shutting_down) goto out; - do_gettimeofday(&tv); - - header.ph_subsys = subsys; - header.ph_mask = mask; - header.ph_cpu_id = smp_processor_id(); - header.ph_sec = (__u32)tv.tv_sec; - header.ph_usec = tv.tv_usec; - header.ph_stack = stack; - header.ph_pid = current->pid; - header.ph_line_num = line; - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - header.ph_extern_pid = current->thread.extern_pid; -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - header.ph_extern_pid = current->thread.mode.tt.extern_pid; -#else - header.ph_extern_pid = 0; -#endif - + set_ptldebug_header(&header, subsys, mask, line, stack); known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls retry: - page = trace_get_page(tcd, needed + known_size); - if (page == NULL) { + tage = trace_get_tage(tcd, needed + known_size); + if (tage == NULL) { debug_buf = format; - if (needed + known_size > PAGE_SIZE) + if (needed + known_size > CFS_PAGE_SIZE) mask |= D_ERROR; needed = strlen(format); goto out; } - debug_buf = page_address(page) + page->index + known_size; + debug_buf = cfs_page_address(tage->page) + tage->used + known_size; - max_nob = PAGE_SIZE - page->index - known_size; + max_nob = CFS_PAGE_SIZE - tage->used - known_size; LASSERT(max_nob > 0); va_start(ap, format); needed = vsnprintf(debug_buf, max_nob, format, ap); @@ -265,24 +206,24 @@ void portals_debug_msg(int subsys, int mask, char *file, const char *fn, goto retry; header.ph_len = known_size + needed; - debug_buf = page_address(page) + page->index; + debug_buf = cfs_page_address(tage->page) + tage->used; memcpy(debug_buf, &header, sizeof(header)); - page->index += sizeof(header); + tage->used += sizeof(header); debug_buf += sizeof(header); strcpy(debug_buf, file); - page->index += strlen(file) + 1; + tage->used += strlen(file) + 1; debug_buf += strlen(file) + 1; strcpy(debug_buf, fn); - page->index += strlen(fn) + 1; + tage->used += strlen(fn) + 1; debug_buf += strlen(fn) + 1; - page->index += needed; - if (page->index > PAGE_SIZE) - printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n", - page->index); + tage->used += needed; + if (tage->used > CFS_PAGE_SIZE) + printk(KERN_EMERG + "tage->used == %u in portals_debug_msg\n", tage->used); out: if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk) @@ -301,12 +242,10 @@ static void collect_pages_on_cpu(void *info) tcd = trace_get_tcd(flags); spin_lock(&pc->pc_lock); - list_splice(&tcd->tcd_pages, &pc->pc_pages); - INIT_LIST_HEAD(&tcd->tcd_pages); + list_splice_init(&tcd->tcd_pages, &pc->pc_pages); tcd->tcd_cur_pages = 0; if (pc->pc_want_daemon_pages) { - list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages); - INIT_LIST_HEAD(&tcd->tcd_daemon_pages); + list_splice_init(&tcd->tcd_daemon_pages, &pc->pc_pages); tcd->tcd_cur_daemon_pages = 0; } spin_unlock(&pc->pc_lock); @@ -317,7 +256,7 @@ static void collect_pages_on_cpu(void *info) static void collect_pages(struct page_collection *pc) { /* needs to be fixed up for preempt */ - INIT_LIST_HEAD(&pc->pc_pages); + CFS_INIT_LIST_HEAD(&pc->pc_pages); collect_pages_on_cpu(pc); smp_call_function(collect_pages_on_cpu, pc, 0, 1); } @@ -326,26 +265,24 @@ static void put_pages_back_on_cpu(void *info) { struct page_collection *pc = info; struct trace_cpu_data *tcd; - struct list_head *pos, *tmp, *cur_head; + struct list_head *cur_head; unsigned long flags; + struct trace_page *tage; + struct trace_page *tmp; tcd = trace_get_tcd(flags); cur_head = tcd->tcd_pages.next; spin_lock(&pc->pc_lock); - list_for_each_safe(pos, tmp, &pc->pc_pages) { - struct page *page; + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + LASSERT(tage_invariant(tage)); - if ((unsigned long)page->mapping != smp_processor_id()) + if (tage->cpu != smp_processor_id()) continue; - list_del(&PAGE_LIST(page)); - list_add_tail(&PAGE_LIST(page), cur_head); + tage_to_tail(tage, cur_head); tcd->tcd_cur_pages++; } spin_unlock(&pc->pc_lock); @@ -368,37 +305,33 @@ static void put_pages_on_daemon_list_on_cpu(void *info) { struct page_collection *pc = info; struct trace_cpu_data *tcd; - struct list_head *pos, *tmp; + struct trace_page *tage; + struct trace_page *tmp; unsigned long flags; tcd = trace_get_tcd(flags); spin_lock(&pc->pc_lock); - list_for_each_safe(pos, tmp, &pc->pc_pages) { - struct page *page; + list_for_each_entry_safe(tage, tmp, &pc->pc_pages, linkage) { + + LASSERT(tage_invariant(tage)); - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); - if ((unsigned long)page->mapping != smp_processor_id()) + if (tage->cpu != smp_processor_id()) continue; - list_del(&PAGE_LIST(page)); - list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages); + tage_to_tail(tage, &tcd->tcd_daemon_pages); tcd->tcd_cur_daemon_pages++; if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) { + struct trace_page *victim; + LASSERT(!list_empty(&tcd->tcd_daemon_pages)); - page = list_entry(tcd->tcd_daemon_pages.next, - struct page, PAGE_LIST_ENTRY); + victim = tage_from_list(tcd->tcd_daemon_pages.next); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + LASSERT(tage_invariant(victim)); - page->index = 0; - list_del(&PAGE_LIST(page)); - page->mapping = NULL; - __free_page(page); + list_del(&victim->linkage); + tage_free(victim); tcd->tcd_cur_daemon_pages--; } } @@ -416,21 +349,21 @@ static void put_pages_on_daemon_list(struct page_collection *pc) void trace_debug_print(void) { struct page_collection pc; - struct list_head *pos, *tmp; + struct trace_page *tage; + struct trace_page *tmp; spin_lock_init(&pc.pc_lock); collect_pages(&pc); - list_for_each_safe(pos, tmp, &pc.pc_pages) { - struct page *page; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { char *p, *file, *fn; + cfs_page_t *page; - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + LASSERT(tage_invariant(tage)); - p = page_address(page); - while (p < ((char *)page_address(page) + PAGE_SIZE)) { + page = tage->page; + p = cfs_page_address(page); + while (p < ((char *)cfs_page_address(page) + CFS_PAGE_SIZE)) { struct ptldebug_header *hdr; int len; hdr = (void *)p; @@ -444,25 +377,25 @@ void trace_debug_print(void) print_to_console(hdr, D_EMERG, p, len, file, fn); } - list_del(&PAGE_LIST(page)); - page->mapping = NULL; - __free_page(page); + list_del(&tage->linkage); + tage_free(tage); } } int tracefile_dump_all_pages(char *filename) { struct page_collection pc; - struct file *filp; - struct list_head *pos, *tmp; - mm_segment_t oldfs; + cfs_file_t *filp; + struct trace_page *tage; + struct trace_page *tmp; + CFS_DECL_MMSPACE; int rc; down_write(&tracefile_sem); - filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); + filp = cfs_filp_open(filename, + O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600, &rc); + if (!filp) { printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n", filename, rc); goto out; @@ -478,33 +411,28 @@ int tracefile_dump_all_pages(char *filename) /* ok, for now, just write the pages. in the future we'll be building * iobufs with the pages and calling generic_direct_IO */ - oldfs = get_fs(); - set_fs(get_ds()); - list_for_each_safe(pos, tmp, &pc.pc_pages) { - struct page *page; - - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); - - rc = filp->f_op->write(filp, page_address(page), page->index, - &filp->f_pos); - if (rc != page->index) { - printk(KERN_WARNING "wanted to write %lu but wrote " - "%d\n", page->index, rc); + CFS_MMSPACE_OPEN; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { + + LASSERT(tage_invariant(tage)); + + rc = cfs_filp_write(filp, cfs_page_address(tage->page), + tage->used, cfs_filp_poff(filp)); + if (rc != tage->used) { + printk(KERN_WARNING "wanted to write %u but wrote " + "%d\n", tage->used, rc); put_pages_back(&pc); break; } - list_del(&PAGE_LIST(page)); - page->mapping = NULL; - __free_page(page); + list_del(&tage->linkage); + tage_free(tage); } - set_fs(oldfs); - rc = filp->f_op->fsync(filp, filp->f_dentry, 1); + CFS_MMSPACE_CLOSE; + rc = cfs_filp_fsync(filp); if (rc) printk(KERN_ERR "sync returns %d\n", rc); close: - filp_close(filp, 0); + cfs_filp_close(filp); out: up_write(&tracefile_sem); return rc; @@ -513,21 +441,18 @@ int tracefile_dump_all_pages(char *filename) void trace_flush_pages(void) { struct page_collection pc; - struct list_head *pos, *tmp; + struct trace_page *tage; + struct trace_page *tmp; spin_lock_init(&pc.pc_lock); collect_pages(&pc); - list_for_each_safe(pos, tmp, &pc.pc_pages) { - struct page *page; + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + LASSERT(tage_invariant(tage)); - list_del(&PAGE_LIST(page)); - page->mapping = NULL; - __free_page(page); + list_del(&tage->linkage); + tage_free(tage); } } @@ -538,7 +463,7 @@ int trace_dk(struct file *file, const char *buffer, unsigned long count, unsigned long off; int rc; - name = kmalloc(count + 1, GFP_KERNEL); + name = cfs_alloc(count + 1, CFS_ALLOC_STD); if (name == NULL) return -ENOMEM; @@ -560,7 +485,7 @@ int trace_dk(struct file *file, const char *buffer, unsigned long count, rc = tracefile_dump_all_pages(name); out: if (name) - kfree(name); + cfs_free(name); return count; } EXPORT_SYMBOL(trace_dk); @@ -569,11 +494,11 @@ static int tracefiled(void *arg) { struct page_collection pc; struct tracefiled_ctl *tctl = arg; - struct list_head *pos, *tmp; + struct trace_page *tage; + struct trace_page *tmp; struct ptldebug_header *hdr; - struct file *filp; - struct page *page; - mm_segment_t oldfs; + cfs_file_t *filp; + CFS_DECL_MMSPACE; int rc; /* we're started late enough that we pick up init's fs context */ @@ -585,13 +510,13 @@ static int tracefiled(void *arg) complete(&tctl->tctl_start); while (1) { - wait_queue_t __wait; + cfs_waitlink_t __wait; - init_waitqueue_entry(&__wait, current); - add_wait_queue(&tctl->tctl_waitq, &__wait); + cfs_waitlink_init(&__wait); + cfs_waitq_add(&tctl->tctl_waitq, &__wait); set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ); - remove_wait_queue(&tctl->tctl_waitq, &__wait); + cfs_waitq_timedwait(&__wait, cfs_time_seconds(1)); + cfs_waitq_del(&tctl->tctl_waitq, &__wait); if (atomic_read(&tctl->tctl_shutdown)) break; @@ -604,13 +529,10 @@ static int tracefiled(void *arg) filp = NULL; down_read(&tracefile_sem); if (tracefile != NULL) { - filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE, - 0600); - if (IS_ERR(filp)) { - printk("couldn't open %s: %ld\n", tracefile, - PTR_ERR(filp)); - filp = NULL; - } + filp = cfs_filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE, + 0600, &rc); + if (!(filp)) + printk("couldn't open %s: %d\n", tracefile, rc); } up_read(&tracefile_sem); if (filp == NULL) { @@ -618,39 +540,35 @@ static int tracefiled(void *arg) continue; } - oldfs = get_fs(); - set_fs(get_ds()); + CFS_MMSPACE_OPEN; /* mark the first header, so we can sort in chunks */ - page = list_entry(pc.pc_pages.next, struct page, - PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + tage = tage_from_list(pc.pc_pages.next); + LASSERT(tage_invariant(tage)); - hdr = page_address(page); + hdr = cfs_page_address(tage->page); hdr->ph_flags |= PH_FLAG_FIRST_RECORD; - list_for_each_safe(pos, tmp, &pc.pc_pages) { + list_for_each_entry_safe(tage, tmp, &pc.pc_pages, linkage) { static loff_t f_pos; - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + + LASSERT(tage_invariant(tage)); if (f_pos >= tracefile_size) f_pos = 0; - else if (f_pos > filp->f_dentry->d_inode->i_size) - f_pos = filp->f_dentry->d_inode->i_size; - - rc = filp->f_op->write(filp, page_address(page), - page->index, &f_pos); - if (rc != page->index) { - printk(KERN_WARNING "wanted to write %lu but " - "wrote %d\n", page->index, rc); + else if (f_pos > cfs_filp_size(filp)) + f_pos = cfs_filp_size(filp); + + rc = cfs_filp_write(filp, cfs_page_address(tage->page), + tage->used, &f_pos); + if (rc != tage->used) { + printk(KERN_WARNING "wanted to write %u but " + "wrote %d\n", tage->used, rc); put_pages_back(&pc); } } - set_fs(oldfs); - filp_close(filp, 0); + CFS_MMSPACE_CLOSE; + cfs_filp_close(filp); put_pages_on_daemon_list(&pc); } @@ -663,16 +581,16 @@ int trace_start_thread(void) struct tracefiled_ctl *tctl = &trace_tctl; int rc = 0; - down(&trace_thread_sem); + mutex_down(&trace_thread_sem); if (thread_running) goto out; init_completion(&tctl->tctl_start); init_completion(&tctl->tctl_stop); - init_waitqueue_head(&tctl->tctl_waitq); + cfs_waitq_init(&tctl->tctl_waitq); atomic_set(&tctl->tctl_shutdown, 0); - if (kernel_thread(tracefiled, tctl, 0) < 0) { + if (cfs_kernel_thread(tracefiled, tctl, 0) < 0) { rc = -ECHILD; goto out; } @@ -680,7 +598,7 @@ int trace_start_thread(void) wait_for_completion(&tctl->tctl_start); thread_running = 1; out: - up(&trace_thread_sem); + mutex_up(&trace_thread_sem); return rc; } @@ -688,138 +606,14 @@ void trace_stop_thread(void) { struct tracefiled_ctl *tctl = &trace_tctl; - down(&trace_thread_sem); + mutex_down(&trace_thread_sem); if (thread_running) { printk(KERN_INFO "Shutting down debug daemon thread...\n"); atomic_set(&tctl->tctl_shutdown, 1); wait_for_completion(&tctl->tctl_stop); thread_running = 0; } - up(&trace_thread_sem); -} - -int trace_write_daemon_file(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - char *name; - unsigned long off; - int rc; - - name = kmalloc(count + 1, GFP_KERNEL); - if (name == NULL) - return -ENOMEM; - - if (copy_from_user(name, buffer, count)) { - rc = -EFAULT; - goto out; - } - - /* be nice and strip out trailing '\n' */ - for (off = count ; off > 2 && isspace(name[off - 1]); off--) - ; - - name[off] = '\0'; - - down_write(&tracefile_sem); - if (strcmp(name, "stop") == 0) { - tracefile = NULL; - trace_stop_thread(); - goto out_sem; - } else if (strncmp(name, "size=", 5) == 0) { - tracefile_size = simple_strtoul(name + 5, NULL, 0); - if (tracefile_size < 10 || tracefile_size > 20480) - tracefile_size = TRACEFILE_SIZE; - else - tracefile_size <<= 20; - goto out_sem; - } - - if (name[0] != '/') { - rc = -EINVAL; - goto out_sem; - } - - if (tracefile != NULL) - kfree(tracefile); - - tracefile = name; - name = NULL; - - printk(KERN_INFO "Lustre: debug daemon will attempt to start writing " - "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10)); - - trace_start_thread(); - - out_sem: - up_write(&tracefile_sem); - - out: - kfree(name); - return count; -} - -int trace_read_daemon_file(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - int rc; - - down_read(&tracefile_sem); - rc = snprintf(page, count, "%s", tracefile); - up_read(&tracefile_sem); - - return rc; -} - -int trace_write_debug_mb(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - char string[32]; - int i; - unsigned max; - - if (count >= sizeof(string)) { - printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n", - count); - return -EOVERFLOW; - } - - if (copy_from_user(string, buffer, count)) - return -EFAULT; - - max = simple_strtoul(string, NULL, 0); - if (max == 0) - return -EINVAL; - - if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) { - printk(KERN_ERR "Lustre: Refusing to set debug buffer size to " - "%dMB, which is more than 80%% of available RAM (%lu)\n", - max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5); - return -EINVAL; - } - - max /= smp_num_cpus; - - for (i = 0; i < NR_CPUS; i++) { - struct trace_cpu_data *tcd; - tcd = &trace_data[i].tcd; - tcd->tcd_max_pages = max << (20 - PAGE_SHIFT); - } - return count; -} - -int trace_read_debug_mb(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct trace_cpu_data *tcd; - unsigned long flags; - int rc; - - tcd = trace_get_tcd(flags); - rc = snprintf(page, count, "%lu\n", - (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus); - trace_put_tcd(tcd, flags); - - return rc; + mutex_up(&trace_thread_sem); } int tracefile_init(void) @@ -829,8 +623,8 @@ int tracefile_init(void) for (i = 0; i < NR_CPUS; i++) { tcd = &trace_data[i].tcd; - INIT_LIST_HEAD(&tcd->tcd_pages); - INIT_LIST_HEAD(&tcd->tcd_daemon_pages); + CFS_INIT_LIST_HEAD(&tcd->tcd_pages); + CFS_INIT_LIST_HEAD(&tcd->tcd_daemon_pages); tcd->tcd_cur_pages = 0; tcd->tcd_cur_daemon_pages = 0; tcd->tcd_max_pages = TCD_MAX_PAGES; @@ -842,23 +636,19 @@ int tracefile_init(void) static void trace_cleanup_on_cpu(void *info) { struct trace_cpu_data *tcd; - struct list_head *pos, *tmp; + struct trace_page *tage; + struct trace_page *tmp; unsigned long flags; tcd = trace_get_tcd(flags); tcd->tcd_shutting_down = 1; - list_for_each_safe(pos, tmp, &tcd->tcd_pages) { - struct page *page; - - page = list_entry(pos, struct page, PAGE_LIST_ENTRY); - LASSERT(page->index <= PAGE_SIZE); - LASSERT(page_count(page) > 0); + list_for_each_entry_safe(tage, tmp, &tcd->tcd_pages, linkage) { + LASSERT(tage_invariant(tage)); - list_del(&PAGE_LIST(page)); - page->mapping = NULL; - __free_page(page); + list_del(&tage->linkage); + tage_free(tage); } tcd->tcd_cur_pages = 0; @@ -869,7 +659,7 @@ static void trace_cleanup(void) { struct page_collection pc; - INIT_LIST_HEAD(&pc.pc_pages); + CFS_INIT_LIST_HEAD(&pc.pc_pages); spin_lock_init(&pc.pc_lock); trace_cleanup_on_cpu(&pc); diff --git a/lnet/libcfs/tracefile.h b/lnet/libcfs/tracefile.h index f581257..4e7fdde 100644 --- a/lnet/libcfs/tracefile.h +++ b/lnet/libcfs/tracefile.h @@ -1,5 +1,7 @@ -#ifndef __PORTALS_TRACEFILE_H -#define __PORTALS_TRACEFILE_H +#ifndef __LIBCFS_TRACEFILE_H__ +#define __LIBCFS_TRACEFILE_H__ + +#include int tracefile_dump_all_pages(char *filename); void trace_debug_print(void); @@ -19,4 +21,76 @@ int trace_read_debug_mb(char *page, char **start, off_t off, int count, int trace_dk(struct file *file, const char *buffer, unsigned long count, void *data); +#ifdef LUSTRE_TRACEFILE_PRIVATE +/* + * Private declare for tracefile + */ +#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT)) + +#define TRACEFILE_SIZE (500 << 20) + +union trace_data_union { + struct trace_cpu_data { + struct list_head tcd_pages; + unsigned long tcd_cur_pages; + + struct list_head tcd_daemon_pages; + unsigned long tcd_cur_daemon_pages; + + unsigned long tcd_max_pages; + int tcd_shutting_down; + } tcd; + char __pad[SMP_CACHE_BYTES]; +}; + +struct page_collection { + struct list_head pc_pages; + spinlock_t pc_lock; + int pc_want_daemon_pages; +}; + +struct tracefiled_ctl { + struct completion tctl_start; + struct completion tctl_stop; + cfs_waitq_t tctl_waitq; + pid_t tctl_pid; + atomic_t tctl_shutdown; +}; + +/* + * small data-structure for each page owned by tracefiled. + */ +struct trace_page { + /* + * page itself + */ + cfs_page_t *page; + /* + * linkage into one of the lists in trace_data_union or + * page_collection + */ + struct list_head linkage; + /* + * number of bytes used within this page + */ + unsigned int used; + /* + * cpu that owns this page + */ + int cpu; +}; + +extern void set_ptldebug_header(struct ptldebug_header *header, + int subsys, int mask, const int line, + unsigned long stack); +extern void print_to_console(struct ptldebug_header *hdr, int mask, + char *buf, int len, char *file, const char *fn); +extern struct trace_cpu_data * __trace_get_tcd (unsigned long *flags); +extern void __trace_put_tcd (struct trace_cpu_data *tcd, unsigned long flags); + +#define trace_get_tcd(f) __trace_get_tcd(&(f)) +#define trace_put_tcd(t, f) __trace_put_tcd(t, f) + +#endif /* LUSTRE_TRACEFILE_PRIVATE */ + #endif /* __PORTALS_TRACEFILE_H */ diff --git a/lnet/libcfs/user-lock.c b/lnet/libcfs/user-lock.c new file mode 100644 index 0000000..99dcd7f --- /dev/null +++ b/lnet/libcfs/user-lock.c @@ -0,0 +1,242 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable time API for user-level. + * + */ + +/* Implementations of portable synchronization APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + */ + +#ifndef __KERNEL__ + +/* + * Optional debugging (magic stamping and checking ownership) can be added. + */ + +/* + * spin_lock + * + * - spin_lock_init(x) + * - spin_lock(x) + * - spin_unlock(x) + * - spin_trylock(x) + * + * - spin_lock_irqsave(x, f) + * - spin_unlock_irqrestore(x, f) + * + * No-op implementation. + */ + +void spin_lock_init(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_lock(spinlock_t *lock) +{ + (void)lock; +} + +void spin_unlock(spinlock_t *lock) +{ + (void)lock; +} + +int spin_trylock(spinlock_t *lock) +{ + (void)lock; + return 1; +} + +void spin_lock_bh_init(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_lock_bh(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_unlock_bh(spinlock_t *lock) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_lock_irqsave(spinlock_t *lock, unsigned long flags) +{ + LASSERT(lock != NULL); + (void)lock; +} + +void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) +{ + LASSERT(lock != NULL); + (void)lock; +} + + +/* + * Semaphore + * + * - sema_init(x, v) + * - __down(x) + * - __up(x) + */ +struct semaphore {}; + +void sema_init(struct semaphore *s, int val) +{ + LASSERT(s != NULL); + (void)s; + (void)val; +} + +void __down(struct semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +void __up(struct semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +/* + * Mutex: + * + * - init_mutex(x) + * - init_mutex_locked(x) + * - mutex_up(x) + * - mutex_down(x) + */ + +#define mutex_up(s) __up(s) +#define mutex_down(s) __down(s) + +#define init_mutex(x) sema_init(x, 1) +#define init_mutex_locked(x) sema_init(x, 0) + +/* + * Completion: + * + * - init_completion(c) + * - complete(c) + * - wait_for_completion(c) + */ +struct completion {}; + +void init_completion(struct completion *c) +{ + LASSERT(c != NULL); + (void)c; +} + +void complete(struct completion *c) +{ + LASSERT(c != NULL); + (void)c; +} + +void wait_for_completion(struct completion *c) +{ + LASSERT(c != NULL); + (void)c; +} + +/* + * rw_semaphore: + * + * - DECLARE_RWSEM(x) + * - init_rwsem(x) + * - down_read(x) + * - up_read(x) + * - down_write(x) + * - up_write(x) + */ +struct rw_semaphore {}; + +void init_rwsem(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +void down_read(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +int down_read_trylock(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; + return 1; +} + +void down_write(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +int down_write_trylock(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; + return 1; +} + +void up_read(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +void up_write(struct rw_semaphore *s) +{ + LASSERT(s != NULL); + (void)s; +} + +/* !__KERNEL__ */ +#endif + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/libcfs/user-prim.c b/lnet/libcfs/user-prim.c new file mode 100644 index 0000000..ddc994c --- /dev/null +++ b/lnet/libcfs/user-prim.c @@ -0,0 +1,266 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2004 Cluster File Systems, Inc. + * Author: Nikita Danilov + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or modify it under the + * terms of version 2 of the GNU General Public License as published by the + * Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with Lustre; if not, write to the Free Software Foundation, Inc., 675 Mass + * Ave, Cambridge, MA 02139, USA. + * + * Implementation of portable APIs for user-level. + * + */ + +/* Implementations of portable APIs for liblustre */ + +/* + * liblustre is single-threaded, so most "synchronization" APIs are trivial. + */ + +#ifndef __KERNEL__ + +#include +#ifndef __CYGWIN__ +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#include + +#include + +/* + * Sleep channel. No-op implementation. + */ + +void cfs_waitq_init(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitlink_init(struct cfs_waitlink *link) +{ + LASSERT(link != NULL); + (void)link; +} + +void cfs_waitq_add(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +void cfs_waitq_add_exclusive(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +void cfs_waitq_forward(struct cfs_waitlink *link, struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +void cfs_waitq_del(struct cfs_waitq *waitq, struct cfs_waitlink *link) +{ + LASSERT(waitq != NULL); + LASSERT(link != NULL); + (void)waitq; + (void)link; +} + +int cfs_waitq_active(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_signal(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_signal_nr(struct cfs_waitq *waitq, int nr) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_broadcast(struct cfs_waitq *waitq) +{ + LASSERT(waitq != NULL); + (void)waitq; +} + +void cfs_waitq_wait(struct cfs_waitlink *link) +{ + LASSERT(link != NULL); + (void)link; +} + +int64_t cfs_waitq_timedwait(struct cfs_waitlink *link, int64_t timeout) +{ + LASSERT(link != NULL); + (void)link; +} + +/* + * Allocator + */ + +cfs_page_t *cfs_alloc_pages(unsigned int flags, unsigned int order) +{ + cfs_page_t *pg = malloc(sizeof(*pg)); + + if (!pg) + return NULL; +#if 0 //#ifdef MAP_ANONYMOUS + pg->addr = mmap(0, PAGE_SIZE << order, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); +#else + pg->addr = malloc(PAGE_SIZE << order); +#endif + + if (!pg->addr) { + free(pg); + return NULL; + } + return pg; +} + +void cfs_free_pages(struct page *pg, int what) +{ +#if 0 //#ifdef MAP_ANONYMOUS + munmap(pg->addr, PAGE_SIZE); +#else + free(pg->addr); +#endif + free(pg); +} + +cfs_page_t *cfs_alloc_page(unsigned int flags) +{ + return cfs_alloc_pages(flags, 0); +} + +void cfs_free_page(cfs_page_t *pg, int what) +{ + cfs_free_page(pg, what); +} + +void *cfs_page_address(cfs_page_t *pg) +{ + return pg->addr; +} + +void *cfs_kmap(cfs_page_t *pg) +{ + return pg->addr; +} + +void cfs_kunmap(cfs_page_t *pg) +{ +} + +/* + * Memory allocator + */ +void *cfs_alloc(size_t nr_bytes, u_int32_t flags) +{ + void *result; + + result = malloc(nr_bytes); + if (result != NULL && (flags & CFS_ALLOC_ZERO)) + memset(result, 0, nr_bytes); +} + +void cfs_free(void *addr) +{ + free(addr); +} + +void *cfs_alloc_large(size_t nr_bytes) +{ + return cfs_alloc(nr_bytes, 0); +} + +void cfs_free_large(void *addr) +{ + return cfs_free(addr); +} + +/* + * SLAB allocator + */ + +cfs_mem_cache_t * +cfs_mem_cache_create(const char *, size_t, size_t, unsigned long, + void (*)(void *, cfs_mem_cache_t *, unsigned long), + void (*)(void *, cfs_mem_cache_t *, unsigned long)) +{ + cfs_mem_cache_t *c; + + c = malloc(sizeof(*c)); + if (!c) + return NULL; + c->size = objsize; + CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n", + name, c, (int)objsize); + return c; +} + +int cfs_mem_cache_destroy(cfs_mem_cache_t *c) +{ + CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", c, c->size); + free(c); + return 0; +} + +void *cfs_mem_cache_alloc(cfs_mem_cache_t *c, int gfp) +{ + return cfs_alloc(c, gfp); +} + +void cfs_mem_cache_free(cfs_mem_cache_t *c, void *addr) +{ + cfs_free(addr); +} + + +/* !__KERNEL__ */ +#endif + +/* + * Local variables: + * c-indentation-style: "K&R" + * c-basic-offset: 8 + * tab-width: 8 + * fill-column: 80 + * scroll-step: 1 + * End: + */ diff --git a/lnet/libcfs/watchdog.c b/lnet/libcfs/watchdog.c index 844845a..53b5903 100644 --- a/lnet/libcfs/watchdog.c +++ b/lnet/libcfs/watchdog.c @@ -22,9 +22,9 @@ #define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include +#include +#include +#include @@ -74,7 +74,7 @@ static unsigned long lcw_flags = 0; static __u32 lcw_refcount = 0; static DECLARE_MUTEX(lcw_refcount_sem); -/* +/* * List of timers that have fired that need their callbacks run by the * dispatcher. */ @@ -195,9 +195,9 @@ static int lcw_dispatch_main(void *data) CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid); if (lcw->lcw_state != LC_WATCHDOG_DISABLED) { - /* + /* * sanity check the task against our - * watchdog + * watchdog */ tsk = lcw_lookup_task(lcw); lcw->lcw_callback(lcw, tsk, lcw->lcw_data); @@ -254,7 +254,7 @@ static void lcw_dispatch_stop(void) EXIT; } -struct lc_watchdog *lc_watchdog_add(int time, +struct lc_watchdog *lc_watchdog_add(int time, void (*callback)(struct lc_watchdog *, struct task_struct *, void *), @@ -269,8 +269,8 @@ struct lc_watchdog *lc_watchdog_add(int time, RETURN(ERR_PTR(-ENOMEM)); } - lcw->lcw_task = current; - lcw->lcw_pid = current->pid; + lcw->lcw_task = cfs_current(); + lcw->lcw_pid = cfs_curproc_pid(); lcw->lcw_time = (time * HZ) / 1000; lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog; lcw->lcw_data = data; diff --git a/lnet/lnet/Info.plist b/lnet/lnet/Info.plist new file mode 100644 index 0000000..60c304b --- /dev/null +++ b/lnet/lnet/Info.plist @@ -0,0 +1,35 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + portals + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.portals + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.1 + OSBundleCompatibleVersion + 1.0.0 + OSBundleLibraries + + com.apple.kernel.bsd + 1.1 + com.apple.kernel.iokit + 1.0.0b1 + com.apple.kernel.mach + 1.0.0b1 + com.clusterfs.lustre.libcfs + 1.0.0 + + + diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c index 7e92256..1210686 100644 --- a/lnet/lnet/api-ni.c +++ b/lnet/lnet/api-ni.c @@ -36,16 +36,16 @@ int ptl_init; static struct nal_t *ptl_nal_table[NAL_MAX_NR + 1]; #ifdef __KERNEL__ -DECLARE_MUTEX(ptl_mutex); +struct semaphore ptl_mutex; static void ptl_mutex_enter (void) { - down (&ptl_mutex); + mutex_down (&ptl_mutex); } static void ptl_mutex_exit (void) { - up (&ptl_mutex); + mutex_up (&ptl_mutex); } #else static void ptl_mutex_enter (void) diff --git a/lnet/lnet/api-wrap.c b/lnet/lnet/api-wrap.c index 37f6c0b..0b7832e 100644 --- a/lnet/lnet/api-wrap.c +++ b/lnet/lnet/api-wrap.c @@ -23,7 +23,7 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -# define DEBUG_SUBSYSTEM S_PORTALS +#define DEBUG_SUBSYSTEM S_PORTALS #include void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h) @@ -35,10 +35,10 @@ int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out) { if (!ptl_init) return PTL_NO_INIT; - + if (ptl_hndl2nal(&handle_in) == NULL) return PTL_HANDLE_INVALID; - + *ni_out = handle_in; return PTL_OK; } @@ -49,7 +49,7 @@ int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&ni_handle); if (nal == NULL) return PTL_NI_INVALID; @@ -63,7 +63,7 @@ int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid) if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&ni_handle); if (nal == NULL) return PTL_NI_INVALID; @@ -73,13 +73,13 @@ int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid) return PTL_OK; } -int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) +int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) { nal_t *nal; if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&interface); if (nal == NULL) return PTL_NI_INVALID; @@ -94,7 +94,7 @@ int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&interface_in); if (nal == NULL) return PTL_NI_INVALID; @@ -109,7 +109,7 @@ int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&interface_in); if (nal == NULL) return PTL_NI_INVALID; @@ -126,12 +126,12 @@ int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&interface_in); if (nal == NULL) return PTL_NI_INVALID; - return nal->nal_me_attach(nal, index_in, match_id_in, + return nal->nal_me_attach(nal, index_in, match_id_in, match_bits_in, ignore_bits_in, unlink_in, pos_in, handle_out); } @@ -145,7 +145,7 @@ int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(¤t_in); if (nal == NULL) return PTL_ME_INVALID; @@ -161,7 +161,7 @@ int PtlMEUnlink(ptl_handle_me_t current_in) if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(¤t_in); if (nal == NULL) return PTL_ME_INVALID; @@ -176,7 +176,7 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&me_in); if (nal == NULL) return PTL_ME_INVALID; @@ -185,7 +185,7 @@ int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, ptl_hndl2nal(&md_in.eq_handle) != nal) return PTL_MD_ILLEGAL; - return (nal->nal_md_attach)(nal, &me_in, &md_in, + return (nal->nal_md_attach)(nal, &me_in, &md_in, unlink_in, handle_out); } @@ -196,7 +196,7 @@ int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&ni_in); if (nal == NULL) return PTL_NI_INVALID; @@ -212,10 +212,10 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, ptl_md_t *new_inout, ptl_handle_eq_t testq_in) { nal_t *nal; - + if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&md_in); if (nal == NULL) return PTL_MD_INVALID; @@ -224,21 +224,21 @@ int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, ptl_hndl2nal(&testq_in) != nal) return PTL_EQ_INVALID; - return (nal->nal_md_update)(nal, &md_in, + return (nal->nal_md_update)(nal, &md_in, old_inout, new_inout, &testq_in); } int PtlMDUnlink(ptl_handle_md_t md_in) { nal_t *nal; - + if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&md_in); if (nal == NULL) return PTL_MD_INVALID; - + return (nal->nal_md_unlink)(nal, &md_in); } @@ -247,10 +247,10 @@ int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, ptl_handle_eq_t *handle_out) { nal_t *nal; - + if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&interface); if (nal == NULL) return PTL_NI_INVALID; @@ -264,7 +264,7 @@ int PtlEQFree(ptl_handle_eq_t eventq) if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&eventq); if (nal == NULL) return PTL_EQ_INVALID; @@ -275,15 +275,15 @@ int PtlEQFree(ptl_handle_eq_t eventq) int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev) { int which; - + return (PtlEQPoll (&eventq, 1, 0, ev, &which)); } int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) { int which; - - return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, + + return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, event_out, &which)); } @@ -319,11 +319,11 @@ int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&ni_in); if (nal == NULL) return PTL_NI_INVALID; - + return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in); } @@ -336,7 +336,7 @@ int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, if (!ptl_init) return PTL_NO_INIT; - + nal = ptl_hndl2nal(&md_in); if (nal == NULL) return PTL_MD_INVALID; @@ -359,7 +359,7 @@ int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, if (nal == NULL) return PTL_MD_INVALID; - return (nal->nal_get)(nal, &md_in, + return (nal->nal_get)(nal, &md_in, &target_in, portal_in, ac_in, match_bits_in, offset_in); } diff --git a/lnet/lnet/autoMakefile.am b/lnet/lnet/autoMakefile.am index 285f8fe..136f870 100644 --- a/lnet/lnet/autoMakefile.am +++ b/lnet/lnet/autoMakefile.am @@ -17,10 +17,33 @@ libportals_a_CFLAGS = $(LLCFLAGS) endif if MODULES + +if LINUX modulenet_DATA = portals$(KMODEXT) +endif # LINUX + +if DARWIN +macos_PROGRAMS := portals + +portals_SOURCES := api-errno.c api-ni.c api-wrap.c +portals_SOURCES += lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c +portals_SOURCES += lib-move.c lib-ni.c lib-pid.c module.c + +portals_CFLAGS := $(EXTRA_KCFLAGS) +portals_LDFLAGS := $(EXTRA_KLDFLAGS) +portals_LDADD := $(EXTRA_KLIBS) + +plist_DATA := Info.plist + +install-data-hook: fix-kext-ownership + +endif # DARWIN + endif # MODULES endif # CRAY_PORTALS -MOSTLYCLEANFILES = *.o *.ko *.mod.c +EXTRA_DIST := Info.plist + +MOSTLYCLEANFILES = *.o *.ko *.mod.c portals DIST_SOURCES = $(portals-objs:%.o=%.c) diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c index a886ff5..4992fce 100644 --- a/lnet/lnet/lib-eq.c +++ b/lnet/lnet/lib-eq.c @@ -186,8 +186,8 @@ lib_api_eq_poll (nal_t *apinal, int i; int rc; #ifdef __KERNEL__ - wait_queue_t wq; - unsigned long now; + cfs_waitlink_t wl; + cfs_time_t now; #else struct timeval then; struct timeval now; @@ -218,23 +218,27 @@ lib_api_eq_poll (nal_t *apinal, * in the same stack frame, means we can abstract the * locking here */ #ifdef __KERNEL__ - init_waitqueue_entry(&wq, current); + cfs_waitlink_init(&wl); set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&ni->ni_waitq, &wq); + cfs_waitq_add(&ni->ni_waitq, &wl); LIB_UNLOCK(nal, flags); if (timeout_ms < 0) { - schedule (); - } else { - now = jiffies; - schedule_timeout((timeout_ms * HZ)/1000); - timeout_ms -= ((jiffies - now) * 1000)/HZ; + cfs_waitq_wait (&wl); + } else { + struct timeval tv; + + now = cfs_time_current(); + cfs_waitq_timedwait(&wl, cfs_time_seconds(timeout_ms)/1000); + cfs_duration_usec(cfs_time_sub(cfs_time_current(), now), &tv); + timeout_ms -= tv.tv_sec * 1000 + tv.tv_usec / 1000; if (timeout_ms < 0) timeout_ms = 0; } LIB_LOCK(nal, flags); + cfs_waitq_del(&ni->ni_waitq, &wl); #else if (timeout_ms < 0) { pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex); diff --git a/lnet/lnet/lib-init.c b/lnet/lnet/lib-init.c index 813063a..b0c55f9 100644 --- a/lnet/lnet/lib-init.c +++ b/lnet/lnet/lib-init.c @@ -27,8 +27,7 @@ #include #ifdef __KERNEL__ -# include /* for memset() */ -# include +# include #else # include # include @@ -68,7 +67,7 @@ lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size) if (space == NULL) return (PTL_NO_SPACE); - INIT_LIST_HEAD (&fl->fl_list); + CFS_INIT_LIST_HEAD (&fl->fl_list); fl->fl_objs = space; fl->fl_nobjs = n; fl->fl_objsize = size; @@ -196,7 +195,7 @@ lib_setup_handle_hash (lib_nal_t *nal) return (PTL_NO_SPACE); for (i = 0; i < ni->ni_lh_hash_size; i++) - INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]); + CFS_INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]); ni->ni_next_object_cookie = PTL_COOKIE_TYPES; @@ -305,14 +304,14 @@ lib_init(lib_nal_t *libnal, nal_t *apinal, memset(&ni->ni_counters, 0, sizeof(lib_counters_t)); - INIT_LIST_HEAD (&ni->ni_active_msgs); - INIT_LIST_HEAD (&ni->ni_active_mds); - INIT_LIST_HEAD (&ni->ni_active_eqs); - INIT_LIST_HEAD (&ni->ni_test_peers); + CFS_INIT_LIST_HEAD (&ni->ni_active_msgs); + CFS_INIT_LIST_HEAD (&ni->ni_active_mds); + CFS_INIT_LIST_HEAD (&ni->ni_active_eqs); + CFS_INIT_LIST_HEAD (&ni->ni_test_peers); #ifdef __KERNEL__ spin_lock_init (&ni->ni_lock); - init_waitqueue_head (&ni->ni_waitq); + cfs_waitq_init (&ni->ni_waitq); #else pthread_mutex_init(&ni->ni_mutex, NULL); pthread_cond_init(&ni->ni_cond, NULL); @@ -340,7 +339,7 @@ lib_init(lib_nal_t *libnal, nal_t *apinal, } for (i = 0; i < ptl_size; i++) - INIT_LIST_HEAD(&(ni->ni_portals.tbl[i])); + CFS_INIT_LIST_HEAD(&(ni->ni_portals.tbl[i])); /* max_{mes,mds,eqs} set in kportal_descriptor_setup */ diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c index ce5cb0b..f188e2a 100644 --- a/lnet/lnet/lib-md.c +++ b/lnet/lnet/lib-md.c @@ -22,11 +22,12 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS + #ifndef __KERNEL__ # include #else -# define DEBUG_SUBSYSTEM S_PORTALS -# include +# include #endif #include @@ -62,13 +63,13 @@ lib_md_unlink(lib_nal_t *nal, lib_md_t *md) if ((md->options & PTL_MD_KIOV) != 0) { if (nal->libnal_unmap_pages != NULL) - nal->libnal_unmap_pages (nal, - md->md_niov, - md->md_iov.kiov, + nal->libnal_unmap_pages (nal, + md->md_niov, + md->md_iov.kiov, &md->md_addrkey); } else if (nal->libnal_unmap != NULL) { - nal->libnal_unmap (nal, - md->md_niov, md->md_iov.iov, + nal->libnal_unmap (nal, + md->md_niov, md->md_iov.iov, &md->md_addrkey); } @@ -123,7 +124,7 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) if ((umd->options & PTL_MD_IOVEC) != 0) { if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_MD_ILLEGAL; + return PTL_MD_ILLEGAL; lmd->md_niov = niov = umd->length; memcpy(lmd->md_iov.iov, umd->start, @@ -140,12 +141,12 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) lmd->length = total_length; if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || + (umd->max_size < 0 || umd->max_size > total_length)) // illegal max_size return PTL_MD_ILLEGAL; if (nal->libnal_map != NULL) { - rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, + rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, &lmd->md_addrkey); if (rc != PTL_OK) return (rc); @@ -153,7 +154,7 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) } else if ((umd->options & PTL_MD_KIOV) != 0) { #ifndef __KERNEL__ return PTL_MD_ILLEGAL; -#else +#else /* Trap attempt to use paged I/O if unsupported early. */ if (nal->libnal_send_pages == NULL || nal->libnal_recv_pages == NULL) @@ -165,7 +166,7 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) for (i = 0; i < niov; i++) { /* We take the page pointer on trust */ - if (lmd->md_iov.kiov[i].kiov_offset + + if (lmd->md_iov.kiov[i].kiov_offset + lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE ) return PTL_VAL_FAILED; /* invalid length */ @@ -175,12 +176,12 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) lmd->length = total_length; if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || + (umd->max_size < 0 || umd->max_size > total_length)) // illegal max_size return PTL_MD_ILLEGAL; if (nal->libnal_map_pages != NULL) { - rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, + rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, &lmd->md_addrkey); if (rc != PTL_OK) return (rc); @@ -193,17 +194,17 @@ lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink) lmd->md_iov.iov[0].iov_len = umd->length; if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */ - (umd->max_size < 0 || + (umd->max_size < 0 || umd->max_size > umd->length)) // illegal max_size return PTL_MD_ILLEGAL; if (nal->libnal_map != NULL) { - rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, + rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, &lmd->md_addrkey); if (rc != PTL_OK) return (rc); } - } + } if (eq != NULL) eq->eq_refcount++; @@ -234,9 +235,9 @@ lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd) ptl_eq2handle(&umd->eq_handle, nal, lmd->eq); } -int +int lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh, - ptl_md_t *umd, ptl_unlink_t unlink, + ptl_md_t *umd, ptl_unlink_t unlink, ptl_handle_md_t *handle) { lib_nal_t *nal = apinal->nal_data; @@ -280,7 +281,7 @@ lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh, } int -lib_api_md_bind(nal_t *apinal, +lib_api_md_bind(nal_t *apinal, ptl_md_t *umd, ptl_unlink_t unlink, ptl_handle_md_t *handle) { @@ -343,7 +344,7 @@ lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh) ev.unlinked = 1; lib_md_deconstruct(nal, md, &ev.md); ptl_md2handle(&ev.md_handle, nal, md); - + lib_enq_event_locked(nal, NULL, md->eq, &ev); } @@ -383,13 +384,13 @@ lib_api_md_update (nal_t *apinal, /* XXX fttb, the new MD must be the same "shape" wrt fragmentation, * since we simply overwrite the old lib-md */ - if ((((newumd->options ^ md->options) & + if ((((newumd->options ^ md->options) & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) || - ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && + ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && newumd->length != md->md_niov)) { rc = PTL_IOV_INVALID; goto out; - } + } if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) { test_eq = ptl_handle2eq(testqh, nal); @@ -410,7 +411,7 @@ lib_api_md_update (nal_t *apinal, int unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ? PTL_UNLINK : PTL_RETAIN; - // #warning this does not track eq refcounts properly + // #warning this does not track eq refcounts properly rc = lib_md_build(nal, md, newumd, unlink); md->me = me; diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c index 770e2d3..cbc7c53 100644 --- a/lnet/lnet/lib-me.c +++ b/lnet/lnet/lib-me.c @@ -22,11 +22,12 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS + #ifndef __KERNEL__ # include #else -# define DEBUG_SUBSYSTEM S_PORTALS -# include +# include #endif #include @@ -34,8 +35,8 @@ int lib_api_me_attach(nal_t *apinal, ptl_pt_index_t portal, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, + ptl_process_id_t match_id, + ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits, ptl_unlink_t unlink, ptl_ins_pos_t pos, ptl_handle_me_t *handle) @@ -78,8 +79,8 @@ lib_api_me_attach(nal_t *apinal, int lib_api_me_insert(nal_t *apinal, ptl_handle_me_t *current_meh, - ptl_process_id_t match_id, - ptl_match_bits_t match_bits, + ptl_process_id_t match_id, + ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits, ptl_unlink_t unlink, ptl_ins_pos_t pos, ptl_handle_me_t *handle) @@ -147,7 +148,7 @@ lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh) } /* call with state_lock please */ -void +void lib_me_unlink(lib_nal_t *nal, lib_me_t *me) { list_del (&me->me_list); @@ -162,10 +163,10 @@ lib_me_unlink(lib_nal_t *nal, lib_me_t *me) } #if 0 -static void +static void lib_me_dump(lib_nal_t *nal, lib_me_t * me) { - CWARN("Match Entry %p ("LPX64")\n", me, + CWARN("Match Entry %p ("LPX64")\n", me, me->me_lh.lh_cookie); CWARN("\tMatch/Ignore\t= %016lx / %016lx\n", diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c index ee0f0b4..e9ef015 100644 --- a/lnet/lnet/lib-move.c +++ b/lnet/lnet/lib-move.c @@ -22,11 +22,12 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS + #ifndef __KERNEL__ # include #else -# define DEBUG_SUBSYSTEM S_PORTALS -# include +# include #endif #include #include @@ -35,8 +36,8 @@ static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg); static lib_md_t * -lib_match_md(lib_nal_t *nal, int index, int op_mask, - ptl_nid_t src_nid, ptl_pid_t src_pid, +lib_match_md(lib_nal_t *nal, int index, int op_mask, + ptl_nid_t src_nid, ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset, ptl_match_bits_t match_bits, lib_msg_t *msg, ptl_size_t *mlength_out, ptl_size_t *offset_out) @@ -81,7 +82,7 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, if (me->match_id.nid != PTL_NID_ANY && me->match_id.nid != src_nid) continue; - + CDEBUG(D_NET, "match_id.pid [%x], src_pid [%x]\n", me->match_id.pid, src_pid); @@ -119,9 +120,9 @@ lib_match_md(lib_nal_t *nal, int index, int op_mask, /* Commit to this ME/MD */ CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of " - "length %d/%d into md "LPX64" [%d] + %d\n", + "length %d/%d into md "LPX64" [%d] + %d\n", (op_mask == PTL_MD_OP_PUT) ? "put" : "get", - index, src_nid, src_pid, mlength, rlength, + index, src_nid, src_pid, mlength, rlength, md->md_lh.lh_cookie, md->md_niov, offset); lib_commit_md(nal, md, msg); @@ -168,30 +169,30 @@ int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) struct list_head *el; struct list_head *next; struct list_head cull; - + if (threshold != 0) { /* Adding a new entry */ PORTAL_ALLOC(tp, sizeof(*tp)); if (tp == NULL) return PTL_NO_SPACE; - + tp->tp_nid = nid; tp->tp_threshold = threshold; - + LIB_LOCK(nal, flags); list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers); LIB_UNLOCK(nal, flags); return PTL_OK; } - + /* removing entries */ - INIT_LIST_HEAD (&cull); - + CFS_INIT_LIST_HEAD (&cull); + LIB_LOCK(nal, flags); list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) { tp = list_entry (el, lib_test_peer_t, tp_list); - + if (tp->tp_threshold == 0 || /* needs culling anyway */ nid == PTL_NID_ANY || /* removing all entries */ tp->tp_nid == nid) /* matched this one */ @@ -200,9 +201,9 @@ int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) list_add (&tp->tp_list, &cull); } } - + LIB_UNLOCK(nal, flags); - + while (!list_empty (&cull)) { tp = list_entry (cull.next, lib_test_peer_t, tp_list); @@ -213,7 +214,7 @@ int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold) } static int -fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) +fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) { lib_test_peer_t *tp; struct list_head *el; @@ -222,8 +223,8 @@ fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) struct list_head cull; int fail = 0; - INIT_LIST_HEAD (&cull); - + CFS_INIT_LIST_HEAD (&cull); + LIB_LOCK (nal, flags); list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) { @@ -240,11 +241,11 @@ fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) } continue; } - + if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */ nid == tp->tp_nid) { /* fail this peer */ fail = 1; - + if (tp->tp_threshold != PTL_MD_THRESH_INF) { tp->tp_threshold--; if (outgoing && @@ -257,13 +258,13 @@ fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) break; } } - + LIB_UNLOCK (nal, flags); while (!list_empty (&cull)) { tp = list_entry (cull.next, lib_test_peer_t, tp_list); list_del (&tp->tp_list); - + PORTAL_FREE(tp, sizeof (*tp)); } @@ -274,22 +275,22 @@ ptl_size_t lib_iov_nob (int niov, struct iovec *iov) { ptl_size_t nob = 0; - + while (niov-- > 0) nob += (iov++)->iov_len; - + return (nob); } void -lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, +lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t offset, ptl_size_t len) { ptl_size_t nob; if (len == 0) return; - + /* skip complete frags before 'offset' */ LASSERT (niov > 0); while (offset >= iov->iov_len) { @@ -298,7 +299,7 @@ lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, niov--; LASSERT (niov > 0); } - + do { LASSERT (niov > 0); nob = MIN (iov->iov_len - offset, len); @@ -313,7 +314,7 @@ lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, } void -lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, +lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, char *src, ptl_size_t len) { ptl_size_t nob; @@ -329,12 +330,12 @@ lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, niov--; LASSERT (niov > 0); } - + do { LASSERT (niov > 0); nob = MIN (iov->iov_len - offset, len); memcpy (iov->iov_base + offset, src, nob); - + len -= nob; src += nob; niov--; @@ -369,7 +370,7 @@ lib_extract_iov (int dst_niov, struct iovec *dst, for (;;) { LASSERT (src_niov > 0); LASSERT (niov <= dst_niov); - + frag_len = src->iov_len - offset; dst->iov_base = ((char *)src->iov_base) + offset; @@ -377,7 +378,7 @@ lib_extract_iov (int dst_niov, struct iovec *dst, dst->iov_len = len; return (niov); } - + dst->iov_len = frag_len; len -= frag_len; @@ -391,14 +392,14 @@ lib_extract_iov (int dst_niov, struct iovec *dst, #ifndef __KERNEL__ ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) +lib_kiov_nob (int niov, ptl_kiov_t *kiov) { LASSERT (0); return (0); } void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, +lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t offset, ptl_size_t len) { LASSERT (0); @@ -412,7 +413,7 @@ lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, } int -lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, +lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len) { @@ -422,7 +423,7 @@ lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, #else ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) +lib_kiov_nob (int niov, ptl_kiov_t *kiov) { ptl_size_t nob = 0; @@ -433,7 +434,7 @@ lib_kiov_nob (int niov, ptl_kiov_t *kiov) } void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, +lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t offset, ptl_size_t len) { ptl_size_t nob; @@ -441,7 +442,7 @@ lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, if (len == 0) return; - + LASSERT (!in_interrupt ()); LASSERT (niov > 0); @@ -451,15 +452,15 @@ lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, niov--; LASSERT (niov > 0); } - + do{ LASSERT (niov > 0); nob = MIN (kiov->kiov_len - offset, len); - - addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; + + addr = ((char *)cfs_kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; memcpy (dest, addr, nob); - kunmap (kiov->kiov_page); - + cfs_kunmap (kiov->kiov_page); + len -= nob; dest += nob; niov--; @@ -487,15 +488,15 @@ lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, niov--; LASSERT (niov > 0); } - + do { LASSERT (niov > 0); nob = MIN (kiov->kiov_len - offset, len); - - addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; + + addr = ((char *)cfs_kmap (kiov->kiov_page)) + kiov->kiov_offset + offset; memcpy (addr, src, nob); - kunmap (kiov->kiov_page); - + cfs_kunmap (kiov->kiov_page); + len -= nob; src += nob; niov--; @@ -505,7 +506,7 @@ lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset, } int -lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, +lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, int src_niov, ptl_kiov_t *src, ptl_size_t offset, ptl_size_t len) { @@ -530,7 +531,7 @@ lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, for (;;) { LASSERT (src_niov > 0); LASSERT (niov <= dst_niov); - + frag_len = src->kiov_len - offset; dst->kiov_page = src->kiov_page; dst->kiov_offset = src->kiov_offset + offset; @@ -565,10 +566,10 @@ lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, if ((md->options & PTL_MD_KIOV) == 0) return (nal->libnal_recv(nal, private, msg, - md->md_niov, md->md_iov.iov, + md->md_niov, md->md_iov.iov, offset, mlen, rlen)); - return (nal->libnal_recv_pages(nal, private, msg, + return (nal->libnal_recv_pages(nal, private, msg, md->md_niov, md->md_iov.kiov, offset, mlen, rlen)); } @@ -576,21 +577,21 @@ lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len) + lib_md_t *md, ptl_size_t offset, ptl_size_t len) { if (len == 0) return (nal->libnal_send(nal, private, msg, hdr, type, nid, pid, 0, NULL, offset, len)); - + if ((md->options & PTL_MD_KIOV) == 0) - return (nal->libnal_send(nal, private, msg, + return (nal->libnal_send(nal, private, msg, hdr, type, nid, pid, md->md_niov, md->md_iov.iov, offset, len)); - return (nal->libnal_send_pages(nal, private, msg, + return (nal->libnal_send_pages(nal, private, msg, hdr, type, nid, pid, md->md_niov, md->md_iov.kiov, offset, len)); @@ -606,7 +607,7 @@ lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg) * decrementing its threshold. Come what may, the network "owns" * the MD until a call to lib_finalize() signals completion. */ msg->md = md; - + md->pending++; if (md->threshold != PTL_MD_THRESH_INF) { LASSERT (md->threshold > 0); @@ -628,7 +629,7 @@ lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr) /* CAVEAT EMPTOR: this only drops messages that we've not committed * to receive (init_msg() not called) and therefore can't cause an * event. */ - + LIB_LOCK(nal, flags); nal->libnal_ni.ni_counters.drop_count++; nal->libnal_ni.ni_counters.drop_length += hdr->payload_length; @@ -654,7 +655,7 @@ parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) ptl_err_t rc; lib_md_t *md; unsigned long flags; - + /* Convert put fields to host byte order */ hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits); hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index); @@ -744,7 +745,7 @@ parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming * message. Some NALs _require_ this to implement optimized GET */ - rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, + rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, hdr->src_nid, hdr->src_pid, md, offset, mlength); if (rc != PTL_OK) CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n", @@ -799,7 +800,7 @@ parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) } CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n", - hdr->src_nid, length, rlength, + hdr->src_nid, length, rlength, hdr->msg.reply.dst_wmd.wh_object_cookie); lib_commit_md(nal, md, msg); @@ -844,7 +845,7 @@ parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal); if (md == NULL || md->threshold == 0) { CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD " - LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, + LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, (md == NULL) ? "invalid" : "inactive", hdr->msg.ack.dst_wmd.wh_interface_cookie, hdr->msg.ack.dst_wmd.wh_object_cookie); @@ -854,7 +855,7 @@ parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) } CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n", - ni->ni_pid.nid, hdr->src_nid, + ni->ni_pid.nid, hdr->src_nid, hdr->msg.ack.dst_wmd.wh_object_cookie); lib_commit_md(nal, md, msg); @@ -871,14 +872,14 @@ parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg) ni->ni_counters.recv_count++; LIB_UNLOCK(nal, flags); - + /* We have received and matched up the ack OK, create the * completion event now... */ lib_finalize(nal, private, msg, PTL_OK); /* ...and now discard any junk after the hdr */ (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length); - + return (PTL_OK); } @@ -965,7 +966,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) /* NB we return PTL_OK if we manage to parse the header and believe * it looks OK. Anything that goes wrong with receiving the * message after that point is the responsibility of the NAL */ - + /* convert common fields to host byte order */ hdr->type = le32_to_cpu(hdr->type); hdr->src_nid = le64_to_cpu(hdr->src_nid); @@ -987,7 +988,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) mv->version_minor == PORTALS_PROTO_VERSION_MINOR) { CWARN (LPU64": Dropping unexpected HELLO message: " "magic %d, version %d.%d from "LPD64"\n", - nal->libnal_ni.ni_pid.nid, mv->magic, + nal->libnal_ni.ni_pid.nid, mv->magic, mv->version_major, mv->version_minor, hdr->src_nid); @@ -999,7 +1000,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) /* we got garbage */ CERROR (LPU64": Bad HELLO message: " "magic %d, version %d.%d from "LPD64"\n", - nal->libnal_ni.ni_pid.nid, mv->magic, + nal->libnal_ni.ni_pid.nid, mv->magic, mv->version_major, mv->version_minor, hdr->src_nid); return PTL_FAIL; @@ -1012,7 +1013,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) hdr->dest_nid = le64_to_cpu(hdr->dest_nid); if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) { CERROR(LPU64": BAD dest NID in %s message from" - LPU64" to "LPU64" (not me)\n", + LPU64" to "LPU64" (not me)\n", nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), hdr->src_nid, hdr->dest_nid); return PTL_FAIL; @@ -1033,7 +1034,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) { CERROR(LPU64": Dropping incoming %s from "LPU64 ": simulated failure\n", - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), + nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), hdr->src_nid); lib_drop_message(nal, private, hdr); return PTL_OK; @@ -1043,7 +1044,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) if (msg == NULL) { CERROR(LPU64": Dropping incoming %s from "LPU64 ": can't allocate a lib_msg_t\n", - nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), + nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), hdr->src_nid); lib_drop_message(nal, private, hdr); return PTL_OK; @@ -1067,7 +1068,7 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) rc = PTL_FAIL; /* no compiler warning please */ break; } - + if (rc != PTL_OK) { if (msg->md != NULL) { /* committed... */ @@ -1085,11 +1086,11 @@ lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private) /* That's "OK I can parse it", not "OK I like it" :) */ } -int -lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, +int +lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, ptl_ack_req_t ack, ptl_process_id_t *id, ptl_pt_index_t portal, ptl_ac_index_t ac, - ptl_match_bits_t match_bits, + ptl_match_bits_t match_bits, ptl_size_t offset, ptl_hdr_data_t hdr_data) { lib_nal_t *nal = apinal->nal_data; @@ -1099,7 +1100,7 @@ lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, lib_md_t *md; unsigned long flags; int rc; - + if (!list_empty (&ni->ni_test_peers) && /* normally we don't */ fail_peer (nal, id->nid, 1)) /* shall we now? */ { @@ -1121,7 +1122,7 @@ lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, if (md == NULL || md->threshold == 0) { lib_msg_free(nal, msg); LIB_UNLOCK(nal, flags); - + return PTL_MD_INVALID; } @@ -1149,7 +1150,7 @@ lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, hdr.msg.put.hdr_data = hdr_data; lib_commit_md(nal, md, msg); - + msg->ev.type = PTL_EVENT_SEND_END; msg->ev.initiator.nid = ni->ni_pid.nid; msg->ev.initiator.pid = ni->ni_pid.pid; @@ -1167,7 +1168,7 @@ lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, ni->ni_counters.send_length += md->length; LIB_UNLOCK(nal, flags); - + rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT, id->nid, id->pid, md, 0, md->length); if (rc != PTL_OK) { @@ -1175,12 +1176,12 @@ lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, id->nid, rc); lib_finalize (nal, NULL, msg, rc); } - + /* completion will be signalled by an event */ return PTL_OK; } -lib_msg_t * +lib_msg_t * lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg) { /* The NAL can DMA direct to the GET md (i.e. no REPLY msg). This @@ -1244,7 +1245,7 @@ lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg) return NULL; } -int +int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id, ptl_pt_index_t portal, ptl_ac_index_t ac, ptl_match_bits_t match_bits, ptl_size_t offset) @@ -1256,7 +1257,7 @@ lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id, lib_md_t *md; unsigned long flags; int rc; - + if (!list_empty (&ni->ni_test_peers) && /* normally we don't */ fail_peer (nal, id->nid, 1)) /* shall we now? */ { @@ -1327,7 +1328,7 @@ lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id, ni->ni_pid.nid, id->nid, rc); lib_finalize (nal, NULL, msg, rc); } - + /* completion will be signalled by an event */ return PTL_OK; } diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c index 4d17c7d..38904c4 100644 --- a/lnet/lnet/lib-msg.c +++ b/lnet/lnet/lib-msg.c @@ -22,17 +22,18 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define DEBUG_SUBSYSTEM S_PORTALS + #ifndef __KERNEL__ # include #else -# define DEBUG_SUBSYSTEM S_PORTALS -# include +# include #endif #include void -lib_enq_event_locked (lib_nal_t *nal, void *private, +lib_enq_event_locked (lib_nal_t *nal, void *private, lib_eq_t *eq, ptl_event_t *ev) { ptl_event_t *eq_slot; @@ -46,7 +47,7 @@ lib_enq_event_locked (lib_nal_t *nal, void *private, /* We don't support different uid/jids yet */ ev->uid = 0; ev->jid = 0; - + /* size must be a power of 2 to handle sequence # overflow */ LASSERT (eq->eq_size != 0 && eq->eq_size == LOWEST_BIT_SET (eq->eq_size)); @@ -64,14 +65,14 @@ lib_enq_event_locked (lib_nal_t *nal, void *private, /* Wake anyone sleeping for an event (see lib-eq.c) */ #ifdef __KERNEL__ - if (waitqueue_active(&nal->libnal_ni.ni_waitq)) - wake_up_all(&nal->libnal_ni.ni_waitq); + if (cfs_waitq_active(&nal->libnal_ni.ni_waitq)) + cfs_waitq_broadcast(&nal->libnal_ni.ni_waitq); #else pthread_cond_broadcast(&nal->libnal_ni.ni_cond); #endif } -void +void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) { lib_md_t *md; @@ -102,11 +103,11 @@ lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status) ack.msg.ack.mlength = cpu_to_le32(msg->ev.mlength); rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK, - msg->ev.initiator.nid, msg->ev.initiator.pid, + msg->ev.initiator.nid, msg->ev.initiator.pid, NULL, 0, 0); if (rc != PTL_OK) { /* send failed: there's nothing else to clean up. */ - CERROR("Error %d sending ACK to "LPX64"\n", + CERROR("Error %d sending ACK to "LPX64"\n", rc, msg->ev.initiator.nid); } } diff --git a/lnet/lnet/lib-pid.c b/lnet/lnet/lib-pid.c index 5197308..8a9fbc5 100644 --- a/lnet/lnet/lib-pid.c +++ b/lnet/lnet/lib-pid.c @@ -26,7 +26,6 @@ #define DEBUG_SUBSYSTEM S_PORTALS #if defined (__KERNEL__) -# include extern int getpid(void); #else # include diff --git a/lnet/lnet/module.c b/lnet/lnet/module.c index c1303b7..21b91a5 100644 --- a/lnet/lnet/module.c +++ b/lnet/lnet/module.c @@ -24,36 +24,15 @@ #endif #define DEBUG_SUBSYSTEM S_PORTALS -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - #include #include #include -#include -#include -#include +#include +#include extern void (kping_client)(struct portal_ioctl_data *); -static int kportal_ioctl(struct portal_ioctl_data *data, +static int kportal_ioctl(struct portal_ioctl_data *data, unsigned int cmd, unsigned long arg) { int err; @@ -130,12 +109,14 @@ static int kportal_ioctl(struct portal_ioctl_data *data, } DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl); +extern struct semaphore ptl_mutex; static int init_kportals_module(void) { int rc; ENTRY; + init_mutex(&ptl_mutex); rc = PtlInit(NULL); if (rc) { CERROR("PtlInit: error %d\n", rc); @@ -197,5 +178,5 @@ EXPORT_SYMBOL(lib_fini); MODULE_AUTHOR("Peter J. Braam "); MODULE_DESCRIPTION("Portals v3.1"); MODULE_LICENSE("GPL"); -module_init(init_kportals_module); -module_exit(exit_kportals_module); + +cfs_module(portals, "1.0.0", init_kportals_module, exit_kportals_module); diff --git a/lnet/lnet/portals.xcode/project.pbxproj b/lnet/lnet/portals.xcode/project.pbxproj new file mode 100644 index 0000000..1dc0146 --- /dev/null +++ b/lnet/lnet/portals.xcode/project.pbxproj @@ -0,0 +1,430 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 39; + objects = { + 06AA1262FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; + }; + isa = PBXBuildStyle; + name = Development; + }; + 06AA1263FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; + }; + isa = PBXBuildStyle; + name = Deployment; + }; +//060 +//061 +//062 +//063 +//064 +//080 +//081 +//082 +//083 +//084 + 089C1669FE841209C02AAC07 = { + buildSettings = { + }; + buildStyles = ( + 06AA1262FFB20DD611CA28AA, + 06AA1263FFB20DD611CA28AA, + ); + hasScannedForEncodings = 1; + isa = PBXProject; + mainGroup = 089C166AFE841209C02AAC07; + projectDirPath = ""; + targets = ( + 32A4FEB80562C75700D090E7, + ); + }; + 089C166AFE841209C02AAC07 = { + children = ( + 247142CAFF3F8F9811CA285C, + 089C167CFE841241C02AAC07, + 19C28FB6FE9D52B211CA2CBB, + ); + isa = PBXGroup; + name = portals; + refType = 4; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 = { + children = ( + 32A4FEC30562C75700D090E7, + ); + isa = PBXGroup; + name = Resources; + refType = 4; + sourceTree = ""; + }; +//080 +//081 +//082 +//083 +//084 +//190 +//191 +//192 +//193 +//194 + 19A778270730EACD00846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = module.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778280730EACD00846375 = { + fileRef = 19A778270730EACD00846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7782B0730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "api-errno.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A7782C0730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "api-ni.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A7782D0730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "api-wrap.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A7782E0730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-eq.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A7782F0730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-init.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778300730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-md.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778310730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-me.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778320730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-move.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778330730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-msg.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778340730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-ni.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778350730EB8400846375 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + path = "lib-pid.c"; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 19A778360730EB8400846375 = { + fileRef = 19A7782B0730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A778370730EB8400846375 = { + fileRef = 19A7782C0730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A778380730EB8400846375 = { + fileRef = 19A7782D0730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A778390730EB8400846375 = { + fileRef = 19A7782E0730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7783A0730EB8400846375 = { + fileRef = 19A7782F0730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7783B0730EB8400846375 = { + fileRef = 19A778300730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7783C0730EB8400846375 = { + fileRef = 19A778310730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7783D0730EB8400846375 = { + fileRef = 19A778320730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7783E0730EB8400846375 = { + fileRef = 19A778330730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A7783F0730EB8400846375 = { + fileRef = 19A778340730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19A778400730EB8400846375 = { + fileRef = 19A778350730EB8400846375; + isa = PBXBuildFile; + settings = { + }; + }; + 19C28FB6FE9D52B211CA2CBB = { + children = ( + 32A4FEC40562C75800D090E7, + ); + isa = PBXGroup; + name = Products; + refType = 4; + sourceTree = ""; + }; +//190 +//191 +//192 +//193 +//194 +//240 +//241 +//242 +//243 +//244 + 247142CAFF3F8F9811CA285C = { + children = ( + 19A7782B0730EB8400846375, + 19A7782C0730EB8400846375, + 19A7782D0730EB8400846375, + 19A7782E0730EB8400846375, + 19A7782F0730EB8400846375, + 19A778300730EB8400846375, + 19A778310730EB8400846375, + 19A778320730EB8400846375, + 19A778330730EB8400846375, + 19A778340730EB8400846375, + 19A778350730EB8400846375, + 19A778270730EACD00846375, + ); + isa = PBXGroup; + name = Source; + path = ""; + refType = 4; + sourceTree = ""; + }; +//240 +//241 +//242 +//243 +//244 +//320 +//321 +//322 +//323 +//324 + 32A4FEB80562C75700D090E7 = { + buildPhases = ( + 32A4FEB90562C75700D090E7, + 32A4FEBA0562C75700D090E7, + 32A4FEBB0562C75700D090E7, + 32A4FEBD0562C75700D090E7, + 32A4FEBF0562C75700D090E7, + 32A4FEC00562C75700D090E7, + 32A4FEC10562C75700D090E7, + ); + buildRules = ( + ); + buildSettings = { + FRAMEWORK_SEARCH_PATHS = ""; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = NO; + HEADER_SEARCH_PATHS = ../include; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + LIBRARY_SEARCH_PATHS = ""; + MODULE_NAME = com.clusterfs.lustre.portals.portals.portals; + MODULE_START = portals_start; + MODULE_STOP = portals_stop; + MODULE_VERSION = 1.0.1; + OTHER_CFLAGS = "-D__KERNEL__"; + OTHER_LDFLAGS = ""; + OTHER_REZFLAGS = ""; + PRODUCT_NAME = portals; + SECTORDER_FLAGS = ""; + WARNING_CFLAGS = "-Wmost"; + WRAPPER_EXTENSION = kext; + }; + dependencies = ( + ); + isa = PBXNativeTarget; + name = portals; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = portals; + productReference = 32A4FEC40562C75800D090E7; + productType = "com.apple.product-type.kernel-extension"; + }; + 32A4FEB90562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEBA0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXHeadersBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBB0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXResourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBD0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + 19A778280730EACD00846375, + 19A778360730EB8400846375, + 19A778370730EB8400846375, + 19A778380730EB8400846375, + 19A778390730EB8400846375, + 19A7783A0730EB8400846375, + 19A7783B0730EB8400846375, + 19A7783C0730EB8400846375, + 19A7783D0730EB8400846375, + 19A7783E0730EB8400846375, + 19A7783F0730EB8400846375, + 19A778400730EB8400846375, + ); + isa = PBXSourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBF0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXFrameworksBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC00562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXRezBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC10562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEC30562C75700D090E7 = { + isa = PBXFileReference; + lastKnownFileType = text.plist.xml; + path = Info.plist; + refType = 4; + sourceTree = ""; + }; + 32A4FEC40562C75800D090E7 = { + explicitFileType = wrapper.cfbundle; + includeInIndex = 0; + isa = PBXFileReference; + path = portals.kext; + refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; + }; + }; + rootObject = 089C1669FE841209C02AAC07; +} diff --git a/lnet/router/autoMakefile.am b/lnet/router/autoMakefile.am index fa11e8c..08db593 100644 --- a/lnet/router/autoMakefile.am +++ b/lnet/router/autoMakefile.am @@ -5,8 +5,12 @@ if MODULES if !CRAY_PORTALS + +if LINUX modulenet_DATA = kptlrouter$(KMODEXT) endif + +endif endif MOSTLYCLEANFILES = *.o *.ko *.mod.c diff --git a/lnet/router/router.h b/lnet/router/router.h index af3c966..44f307a 100644 --- a/lnet/router/router.h +++ b/lnet/router/router.h @@ -35,8 +35,8 @@ #define DEBUG_SUBSYSTEM S_PTLROUTER -#include -#include +#include +#include #include #include diff --git a/lnet/tests/arch-linux/ping.h b/lnet/tests/arch-linux/ping.h new file mode 100644 index 0000000..640100e --- /dev/null +++ b/lnet/tests/arch-linux/ping.h @@ -0,0 +1,22 @@ +#ifndef __LINUX_PING_H__ +#define __LINUX_PING_H__ + +#include +#include +#include +#include +#include +#include +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) +#include +#else +#include +#endif +#include +#include +#include + +#include +#include + +#endif diff --git a/lnet/tests/arch-xnu/ping.h b/lnet/tests/arch-xnu/ping.h new file mode 100644 index 0000000..bb13276 --- /dev/null +++ b/lnet/tests/arch-xnu/ping.h @@ -0,0 +1,8 @@ +#ifndef __XNU_PING_H__ +#define __XNU_PING_H__ + +#include +#include +#include + +#endif diff --git a/lnet/tests/autoMakefile.am b/lnet/tests/autoMakefile.am index 5f81b93..91be853 100644 --- a/lnet/tests/autoMakefile.am +++ b/lnet/tests/autoMakefile.am @@ -6,9 +6,13 @@ if MODULES if !CRAY_PORTALS if TESTS + +if LINUX noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT) noinst_DATA += spingsrv$(KMODEXT) spingcli$(KMODEXT) endif + +endif endif endif diff --git a/lnet/tests/build-osx b/lnet/tests/build-osx new file mode 100644 index 0000000..5af66cb --- /dev/null +++ b/lnet/tests/build-osx @@ -0,0 +1,159 @@ +#! /bin/sh + +if false ;then + OPTVAL=`getopt -o cb:l:s:k:L:v -n 'build-all' -- "$@"` +else + # XNU/BSD getopt is special... + OPTVAL=$(getopt cb:l:s:k:L:v "$@") +fi + +if [ $? != 0 ] +then + echo 'Usage: see source...' + exit 2 +fi + +eval set -- "$OPTVAL" + +cd $(dirname $0) + +#set -x + +b=$PWD # base directory +l=$b/build.log # where to log operations +s=$b/build.seq # build sequence +k=$b/kext.stage # where to place kexts after build + +load='' # list of kexts to load +clean=0 +verbose=0 + +while true ;do + case "$1" in + -c) + clean=1 + shift 1 + ;; + -v) + verbose=$(($verbose + 1)) + shift 1 + ;; + -b) + b=$2 + shift 2 + ;; + -l) + l=$2 + shift 2 + ;; + -s) + s=$2 + shift 2 + ;; + -k) + k=$2 + shift 2 + ;; + -L) + load=$2 + shift 2 + ;; + --) + shift + break + ;; + *) + echo "Internal error!" + exit 1 + ;; + esac +done + +echo > $l + +function message () +{ + local msg + + msg="$1" + echo $msg + echo $msg >> $l +} + +function abort () +{ + local msg + + msg=$1 + + message "$1" + exit 1 +} + +function configure_xcode () +{ + local path + local pfile + local module + + path=$PWD + module=$(basename $path) + pfile=$path/$module.xcode/project.pbxproj + if [ -r $pfile.template ] ;then + cpp \ + -P \ + -include $b/build-config \ + $pfile.template | \ + tail +2 > $pfile + else + abort "missing $pfile.template" + fi +} + +if [ x$clean != x0 ] ;then + echo "Removing..." + find $b/ -type d -name build + rm -fr $(find $b/ -type d -name build) + find $b/ -print0 | xargs -0 touch +fi + +cat $s | while read ;do + d=$REPLY + if [ x$d = x ] ;then + : # empty line. Do nothing + elif [ ${d:0:1} = '#' ] ;then + : # comment. Skip + else + cd $d || abort "Cannot cd to $d" + message "________ Building in $d __________" + #configure_xcode + if [ $verbose -gt 0 ] ;then + xcodebuild 2>&1 | tee -a $l + else + xcodebuild >> $l 2>&1 || abort "Build failure in $d. See $l" + fi + # tail -2 $l + cd $b + fi +done + +# copy all built kexts into $k +# sudo is used, because extensions are later chowned to root. +sudo rm -f ../include/arch +ln -s ../include/arch-xnu ../include/arch +sudo rm -fr $k || abort "Cannot clean $k" +mkdir $k || abort "Cannot create $k" +cp -R $(find ../ -name \*.kext -type d) $k || abort "Cannot stage kexts" +cd $k || abort "Cannot chdir to $k" +sudo chown -R root:wheel * || abort "Cannot chown kexts to root:wheel" + +if [ x$load != x ] ;then + cd $k + sudo kextload -r $k $load +else + sudo chown -R root:wheel * +fi +cd $b + +sync;sync;sync + diff --git a/lnet/tests/build.seq b/lnet/tests/build.seq new file mode 100644 index 0000000..e6298b7 --- /dev/null +++ b/lnet/tests/build.seq @@ -0,0 +1,5 @@ +../libcfs +../portals +../knals/socknal +./ping_cli +./ping_srv diff --git a/lnet/tests/ping.h b/lnet/tests/ping.h index f07444b..ef937af 100644 --- a/lnet/tests/ping.h +++ b/lnet/tests/ping.h @@ -1,6 +1,7 @@ #ifndef _KPING_INCLUDED #define _KPING_INCLUDED +#include #include @@ -34,12 +35,6 @@ #if __KERNEL__ - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -#else -#include -#endif struct pingsrv_data { ptl_handle_ni_t ni; @@ -53,7 +48,7 @@ struct pingsrv_data { ptl_handle_md_t mdin_h; ptl_handle_md_t mdout_h; ptl_event_t evnt; - struct task_struct *tsk; + cfs_task_t *tsk; }; /* struct pingsrv_data */ struct pingcli_data { @@ -71,7 +66,7 @@ struct pingcli_data { ptl_handle_md_t md_in_head_h; ptl_handle_md_t md_out_head_h; ptl_event_t ev; - struct task_struct *tsk; + cfs_task_t *tsk; }; /* struct pingcli_data */ diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c index e9a8481..2995b46 100644 --- a/lnet/tests/ping_cli.c +++ b/lnet/tests/ping_cli.c @@ -25,12 +25,8 @@ #define DEBUG_SUBSYSTEM S_PINGER -#include +#include #include -#include -#include -#include -#include #include "ping.h" /* int portal_debug = D_PING_CLI; */ @@ -48,6 +44,7 @@ static int count = 0; static void pingcli_shutdown(ptl_handle_ni_t nih, int err) { + struct portal_ioctl_data *args = client->args; int rc; /* Yes, we are intentionally allowing us to fall through each @@ -74,6 +71,12 @@ pingcli_shutdown(ptl_handle_ni_t nih, int err) case 4: /* Free our buffers */ + if (client->outbuf != NULL) + PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size); + + if (client->inbuf != NULL) + PORTAL_FREE (client->inbuf, + (args->ioc_size + STDSIZE) * args->ioc_count); if (client != NULL) PORTAL_FREE (client, @@ -86,9 +89,10 @@ pingcli_shutdown(ptl_handle_ni_t nih, int err) static void pingcli_callback(ptl_event_t *ev) { - int i, magic; - i = *(int *)(ev->md.start + ev->offset + sizeof(unsigned)); - magic = *(int *)(ev->md.start + ev->offset); + int i; + unsigned magic; + i = __le32_to_cpu(*(int *)(ev->md.start + ev->offset + sizeof(unsigned))); + magic = __le32_to_cpu(*(int *)(ev->md.start + ev->offset)); if(magic != 0xcafebabe) { CERROR("Unexpected response %x\n", magic); @@ -105,13 +109,12 @@ static struct pingcli_data * pingcli_start(struct portal_ioctl_data *args) { ptl_handle_ni_t nih = PTL_INVALID_HANDLE; - unsigned ping_head_magic = PING_HEADER_MAGIC; - unsigned ping_bulk_magic = PING_BULK_MAGIC; + unsigned ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC); int rc; struct timeval tv1, tv2; char str[PTL_NALFMT_SIZE]; - client->tsk = current; + client->tsk = cfs_current(); client->args = args; CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s), \ nal %x, size %u, count: %u, timeout: %u\n", @@ -140,7 +143,7 @@ pingcli_start(struct portal_ioctl_data *args) /* Aquire and initialize the proper nal for portals. */ rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih); - if (rc != PTL_OK || rc != PTL_IFACE_DUP) + if (rc != PTL_OK && rc != PTL_IFACE_DUP) { CERROR ("NAL %x not loaded\n", args->ioc_nal); pingcli_shutdown (nih, 4); @@ -206,7 +209,7 @@ pingcli_start(struct portal_ioctl_data *args) client->md_out_head.user_ptr = NULL; client->md_out_head.eq_handle = PTL_EQ_NONE; - memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic)); + memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); count = 0; @@ -218,10 +221,13 @@ pingcli_start(struct portal_ioctl_data *args) return NULL; } while ((args->ioc_count - count)) { + unsigned __count; + __count = __cpu_to_le32(count); + memcpy (client->outbuf + sizeof(unsigned), - &(count), sizeof(unsigned)); + &(__count), sizeof(unsigned)); /* Put the ping packet */ - do_gettimeofday (&tv1); + cfs_fs_timeval (&tv1); memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1, sizeof(struct timeval)); @@ -232,14 +238,14 @@ pingcli_start(struct portal_ioctl_data *args) pingcli_shutdown (nih, 1); return NULL; } - CWARN ("Lustre: sent msg no %d", count); + CWARN ("Lustre: sent msg no %d.\n", count); set_current_state (TASK_INTERRUPTIBLE); - rc = schedule_timeout (20 * args->ioc_timeout); + rc = schedule_timeout (cfs_time_seconds(args->ioc_timeout)); if (rc == 0) { CERROR ("timeout .....\n"); } else { - do_gettimeofday (&tv2); + cfs_fs_timeval (&tv2); CWARN("Reply in %u usec\n", (unsigned)((tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec))); @@ -247,13 +253,6 @@ pingcli_start(struct portal_ioctl_data *args) count++; } - if (client->outbuf != NULL) - PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size); - - if (client->inbuf != NULL) - PORTAL_FREE (client->inbuf, - (args->ioc_size + STDSIZE) * args->ioc_count); - pingcli_shutdown (nih, 2); /* Success! */ @@ -295,8 +294,7 @@ MODULE_AUTHOR("Brian Behlendorf (LLNL)"); MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); MODULE_LICENSE("GPL"); -module_init(pingcli_init); -module_exit(pingcli_cleanup); +cfs_module(ping_cli, "1.0.0", pingcli_init, pingcli_cleanup); #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) EXPORT_SYMBOL (kping_client); diff --git a/lnet/tests/ping_cli/Info.plist b/lnet/tests/ping_cli/Info.plist new file mode 100644 index 0000000..2dfc37b --- /dev/null +++ b/lnet/tests/ping_cli/Info.plist @@ -0,0 +1,37 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ping_cli + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.portals.tests.ping_cli + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.0d1 + OSBundleLibraries + + com.apple.kernel.bsd + 1.1 + com.apple.kernel.iokit + 1.0.0b1 + com.apple.kernel.mach + 1.0.0b1 + com.clusterfs.lustre.portals.libcfs + 1.0.0 + com.clusterfs.lustre.portals.portals + 1.0.0 + com.clusterfs.lustre.portals.knals.ksocknal + 1.0.0 + + + diff --git a/lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj b/lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj new file mode 100644 index 0000000..255220d --- /dev/null +++ b/lnet/tests/ping_cli/ping_cli.xcode/project.pbxproj @@ -0,0 +1,255 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 39; + objects = { + 06AA1262FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; + }; + isa = PBXBuildStyle; + name = Development; + }; + 06AA1263FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; + }; + isa = PBXBuildStyle; + name = Deployment; + }; +//060 +//061 +//062 +//063 +//064 +//080 +//081 +//082 +//083 +//084 + 089C1669FE841209C02AAC07 = { + buildSettings = { + }; + buildStyles = ( + 06AA1262FFB20DD611CA28AA, + 06AA1263FFB20DD611CA28AA, + ); + hasScannedForEncodings = 1; + isa = PBXProject; + mainGroup = 089C166AFE841209C02AAC07; + projectDirPath = ""; + targets = ( + 32A4FEB80562C75700D090E7, + ); + }; + 089C166AFE841209C02AAC07 = { + children = ( + 247142CAFF3F8F9811CA285C, + 089C167CFE841241C02AAC07, + 19C28FB6FE9D52B211CA2CBB, + ); + isa = PBXGroup; + name = ping_cli; + refType = 4; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 = { + children = ( + 32A4FEC30562C75700D090E7, + ); + isa = PBXGroup; + name = Resources; + refType = 4; + sourceTree = ""; + }; +//080 +//081 +//082 +//083 +//084 +//190 +//191 +//192 +//193 +//194 + 1949BA72073A08F100E4167C = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = ping_cli.c; + path = ../ping_cli.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 1949BA73073A08F100E4167C = { + fileRef = 1949BA72073A08F100E4167C; + isa = PBXBuildFile; + settings = { + }; + }; + 19C28FB6FE9D52B211CA2CBB = { + children = ( + 32A4FEC40562C75800D090E7, + ); + isa = PBXGroup; + name = Products; + refType = 4; + sourceTree = ""; + }; +//190 +//191 +//192 +//193 +//194 +//240 +//241 +//242 +//243 +//244 + 247142CAFF3F8F9811CA285C = { + children = ( + 1949BA72073A08F100E4167C, + ); + isa = PBXGroup; + name = Source; + path = ""; + refType = 4; + sourceTree = ""; + }; +//240 +//241 +//242 +//243 +//244 +//320 +//321 +//322 +//323 +//324 + 32A4FEB80562C75700D090E7 = { + buildPhases = ( + 32A4FEB90562C75700D090E7, + 32A4FEBA0562C75700D090E7, + 32A4FEBB0562C75700D090E7, + 32A4FEBD0562C75700D090E7, + 32A4FEBF0562C75700D090E7, + 32A4FEC00562C75700D090E7, + 32A4FEC10562C75700D090E7, + ); + buildRules = ( + ); + buildSettings = { + FRAMEWORK_SEARCH_PATHS = ""; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = NO; + HEADER_SEARCH_PATHS = "../../include ../"; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + LIBRARY_SEARCH_PATHS = ""; + MODULE_NAME = com.clusterfs.lustre.portals.tests.ping_cli; + MODULE_START = ping_cli_start; + MODULE_STOP = ping_cli_stop; + MODULE_VERSION = 1.0.0d1; + OTHER_CFLAGS = "-D__KERNEL__ -D__DARWIN__"; + OTHER_LDFLAGS = ""; + OTHER_REZFLAGS = ""; + PRODUCT_NAME = ping_cli; + SECTORDER_FLAGS = ""; + WARNING_CFLAGS = "-Wmost"; + WRAPPER_EXTENSION = kext; + }; + dependencies = ( + ); + isa = PBXNativeTarget; + name = ping_cli; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = ping_cli; + productReference = 32A4FEC40562C75800D090E7; + productType = "com.apple.product-type.kernel-extension"; + }; + 32A4FEB90562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEBA0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXHeadersBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBB0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXResourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBD0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + 1949BA73073A08F100E4167C, + ); + isa = PBXSourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBF0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXFrameworksBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC00562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXRezBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC10562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEC30562C75700D090E7 = { + isa = PBXFileReference; + lastKnownFileType = text.plist.xml; + path = Info.plist; + refType = 4; + sourceTree = ""; + }; + 32A4FEC40562C75800D090E7 = { + explicitFileType = wrapper.cfbundle; + includeInIndex = 0; + isa = PBXFileReference; + path = ping_cli.kext; + refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; + }; + }; + rootObject = 089C1669FE841209C02AAC07; +} diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c index 49e82af..ae0d722 100644 --- a/lnet/tests/ping_srv.c +++ b/lnet/tests/ping_srv.c @@ -25,27 +25,10 @@ #define DEBUG_SUBSYSTEM S_PINGER -#include +#include #include #include "ping.h" -#include -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#else -#include -#endif -#include -#include - -#include -#include - #define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) #define MAXSIZE (16*1024) @@ -104,10 +87,10 @@ int pingsrv_thread(void *arg) { int rc; unsigned long magic; - unsigned long ping_bulk_magic = 0xcafebabe; + unsigned long ping_bulk_magic = __cpu_to_le32(0xcafebabe); kportal_daemonize ("pingsrv"); - server->tsk = current; + server->tsk = cfs_current(); while (running) { set_current_state (TASK_INTERRUPTIBLE); @@ -116,12 +99,12 @@ int pingsrv_thread(void *arg) continue; } - magic = *((int *)(server->evnt.md.start - + server->evnt.offset)); + magic = __le32_to_cpu(*((int *)(server->evnt.md.start + + server->evnt.offset))); if(magic != 0xdeadbeef) { - CERROR("Unexpected Packet to the server\n"); + CERROR("Unexpected Packet to the server, magic: %lx %d\n", magic, server->evnt.offset); } memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic)); @@ -185,10 +168,10 @@ static void pingsrv_callback(ptl_event_t *ev) CWARN ("received ping from nid "LPX64" " "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n", ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->md.start + ev->offset)), - *((int *)(ev->md.start + ev->offset + sizeof(unsigned))), - *((int *)(ev->md.start + ev->offset + 2 * - sizeof(unsigned)))); + __le32_to_cpu(*((int *)(ev->md.start + ev->offset))), + __le32_to_cpu(*((int *)(ev->md.start + ev->offset + sizeof(unsigned)))), + __le32_to_cpu(*((int *)(ev->md.start + ev->offset + 2 * + sizeof(unsigned))))); packets_valid++; @@ -260,37 +243,35 @@ static struct pingsrv_data *pingsrv_setup(void) return server; } /* pingsrv_setup() */ -static int pingsrv_start(void) +static int pingsrv_start(void) { /* Setup our server */ if (!pingsrv_setup()) { CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); return -ENOMEM; } - kernel_thread (pingsrv_thread,NULL,0); + cfs_kernel_thread (pingsrv_thread,NULL,0); return 0; } /* pingsrv_start() */ - - static int __init pingsrv_init(void) { - ping_head_magic = PING_HEADER_MAGIC; - ping_bulk_magic = PING_BULK_MAGIC; + ping_head_magic = __cpu_to_le32(PING_HEADER_MAGIC); + ping_bulk_magic = __cpu_to_le32(PING_BULK_MAGIC); PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); + atomic_set(&pkt, 0); return pingsrv_start (); } /* pingsrv_init() */ - static void /*__exit*/ pingsrv_cleanup(void) { - remove_proc_entry ("net/pingsrv", NULL); + cfs_remove_proc_entry ("net/pingsrv", NULL); running = 0; wake_up_process (server->tsk); while (running != 1) { set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); + schedule_timeout (cfs_time_seconds(1)); } } /* pingsrv_cleanup() */ @@ -304,5 +285,4 @@ MODULE_AUTHOR("Brian Behlendorf (LLNL)"); MODULE_DESCRIPTION("A kernel space ping server for portals testing"); MODULE_LICENSE("GPL"); -module_init(pingsrv_init); -module_exit(pingsrv_cleanup); +cfs_module(ping_srv, "1.0.0", pingsrv_init, pingsrv_cleanup); diff --git a/lnet/tests/ping_srv/Info.plist b/lnet/tests/ping_srv/Info.plist new file mode 100644 index 0000000..21024f0 --- /dev/null +++ b/lnet/tests/ping_srv/Info.plist @@ -0,0 +1,37 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + ping_srv + CFBundleIconFile + + CFBundleIdentifier + com.clusterfs.lustre.portals.tests.ping_srv + CFBundleInfoDictionaryVersion + 6.0 + CFBundlePackageType + KEXT + CFBundleSignature + ???? + CFBundleVersion + 1.0.0d1 + OSBundleLibraries + + com.apple.kernel.bsd + 1.1 + com.apple.kernel.iokit + 1.0.0b1 + com.apple.kernel.mach + 1.0.0b1 + com.clusterfs.lustre.portals.libcfs + 1.0.0 + com.clusterfs.lustre.portals.portals + 1.0.0 + com.clusterfs.lustre.portals.knals.ksocknal + 1.0.0 + + + diff --git a/lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj b/lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj new file mode 100644 index 0000000..0173417 --- /dev/null +++ b/lnet/tests/ping_srv/ping_srv.xcode/project.pbxproj @@ -0,0 +1,255 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 39; + objects = { + 06AA1262FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + OPTIMIZATION_CFLAGS = "-O0"; + ZERO_LINK = YES; + }; + isa = PBXBuildStyle; + name = Development; + }; + 06AA1263FFB20DD611CA28AA = { + buildRules = ( + ); + buildSettings = { + COPY_PHASE_STRIP = YES; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + ZERO_LINK = NO; + }; + isa = PBXBuildStyle; + name = Deployment; + }; +//060 +//061 +//062 +//063 +//064 +//080 +//081 +//082 +//083 +//084 + 089C1669FE841209C02AAC07 = { + buildSettings = { + }; + buildStyles = ( + 06AA1262FFB20DD611CA28AA, + 06AA1263FFB20DD611CA28AA, + ); + hasScannedForEncodings = 1; + isa = PBXProject; + mainGroup = 089C166AFE841209C02AAC07; + projectDirPath = ""; + targets = ( + 32A4FEB80562C75700D090E7, + ); + }; + 089C166AFE841209C02AAC07 = { + children = ( + 247142CAFF3F8F9811CA285C, + 089C167CFE841241C02AAC07, + 19C28FB6FE9D52B211CA2CBB, + ); + isa = PBXGroup; + name = ping_srv; + refType = 4; + sourceTree = ""; + }; + 089C167CFE841241C02AAC07 = { + children = ( + 32A4FEC30562C75700D090E7, + ); + isa = PBXGroup; + name = Resources; + refType = 4; + sourceTree = ""; + }; +//080 +//081 +//082 +//083 +//084 +//190 +//191 +//192 +//193 +//194 + 1987212D0739090900338926 = { + fileEncoding = 30; + isa = PBXFileReference; + lastKnownFileType = sourcecode.c.c; + name = ping_srv.c; + path = ../ping_srv.c; + refType = 2; + sourceTree = SOURCE_ROOT; + }; + 1987212E0739090900338926 = { + fileRef = 1987212D0739090900338926; + isa = PBXBuildFile; + settings = { + }; + }; + 19C28FB6FE9D52B211CA2CBB = { + children = ( + 32A4FEC40562C75800D090E7, + ); + isa = PBXGroup; + name = Products; + refType = 4; + sourceTree = ""; + }; +//190 +//191 +//192 +//193 +//194 +//240 +//241 +//242 +//243 +//244 + 247142CAFF3F8F9811CA285C = { + children = ( + 1987212D0739090900338926, + ); + isa = PBXGroup; + name = Source; + path = ""; + refType = 4; + sourceTree = ""; + }; +//240 +//241 +//242 +//243 +//244 +//320 +//321 +//322 +//323 +//324 + 32A4FEB80562C75700D090E7 = { + buildPhases = ( + 32A4FEB90562C75700D090E7, + 32A4FEBA0562C75700D090E7, + 32A4FEBB0562C75700D090E7, + 32A4FEBD0562C75700D090E7, + 32A4FEBF0562C75700D090E7, + 32A4FEC00562C75700D090E7, + 32A4FEC10562C75700D090E7, + ); + buildRules = ( + ); + buildSettings = { + FRAMEWORK_SEARCH_PATHS = ""; + GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; + GCC_WARN_UNKNOWN_PRAGMAS = NO; + HEADER_SEARCH_PATHS = "../../include ../"; + INFOPLIST_FILE = Info.plist; + INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + LIBRARY_SEARCH_PATHS = ""; + MODULE_NAME = com.clusterfs.lustre.portals.tests.ping_srv; + MODULE_START = ping_srv_start; + MODULE_STOP = ping_srv_stop; + MODULE_VERSION = 1.0.0d1; + OTHER_CFLAGS = "-D__KERNEL__ -D__DARWIN__"; + OTHER_LDFLAGS = ""; + OTHER_REZFLAGS = ""; + PRODUCT_NAME = ping_srv; + SECTORDER_FLAGS = ""; + WARNING_CFLAGS = "-Wmost"; + WRAPPER_EXTENSION = kext; + }; + dependencies = ( + ); + isa = PBXNativeTarget; + name = ping_srv; + productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; + productName = ping_srv; + productReference = 32A4FEC40562C75800D090E7; + productType = "com.apple.product-type.kernel-extension"; + }; + 32A4FEB90562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEBA0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXHeadersBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBB0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXResourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBD0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + 1987212E0739090900338926, + ); + isa = PBXSourcesBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEBF0562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXFrameworksBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC00562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXRezBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + }; + 32A4FEC10562C75700D090E7 = { + buildActionMask = 2147483647; + files = ( + ); + isa = PBXShellScriptBuildPhase; + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; + }; + 32A4FEC30562C75700D090E7 = { + isa = PBXFileReference; + lastKnownFileType = text.plist.xml; + path = Info.plist; + refType = 4; + sourceTree = ""; + }; + 32A4FEC40562C75800D090E7 = { + explicitFileType = wrapper.cfbundle; + includeInIndex = 0; + isa = PBXFileReference; + path = ping_srv.kext; + refType = 3; + sourceTree = BUILT_PRODUCTS_DIR; + }; + }; + rootObject = 089C1669FE841209C02AAC07; +} diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c index d9970e7..71a2a98 100644 --- a/lnet/tests/sping_cli.c +++ b/lnet/tests/sping_cli.c @@ -31,7 +31,7 @@ #define DEBUG_SUBSYSTEM S_PINGER -#include +#include #include #include #include diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c index 069423d..30f158c 100644 --- a/lnet/tests/sping_srv.c +++ b/lnet/tests/sping_srv.c @@ -30,7 +30,7 @@ #define DEBUG_SUBSYSTEM S_PINGER -#include +#include #include #include "ping.h" diff --git a/lnet/ulnds/address.c b/lnet/ulnds/address.c index f329e2a..07b4249 100644 --- a/lnet/ulnds/address.c +++ b/lnet/ulnds/address.c @@ -25,6 +25,8 @@ * mapping of virtual nodes into the port range of an IP socket. */ +#define DEBUG_SUBSYSTEM S_NAL + #include #include #include @@ -59,7 +61,7 @@ static unsigned int get_node_id(void) x = 0; return(ntohl(x)); } - else + else { if (host_envp[1] != 'x') { @@ -98,15 +100,15 @@ void set_address(bridge t,ptl_pid_t pidrequest) void set_address(bridge t,ptl_pid_t pidrequest) { - int virtnode, in_addr, port; + int virtnode, in_addr, port; ptl_pid_t pid; /* get and remember my node id*/ if (!getenv("PTL_VIRTNODE")) virtnode = 0; - else + else { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT + int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT >> PNAL_VNODE_SHIFT); virtnode = atoi(getenv("PTL_VIRTNODE")); if (virtnode > maxvnode) @@ -116,11 +118,11 @@ void set_address(bridge t,ptl_pid_t pidrequest) return; } } - + in_addr = get_node_id(); t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) + t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) << PNAL_VNODE_SHIFT) + virtnode; pid=pidrequest; @@ -128,7 +130,7 @@ void set_address(bridge t,ptl_pid_t pidrequest) #ifdef notyet if (pid==(unsigned short)PTL_PID_ANY) port = 0; #endif - if (pid==(unsigned short)PTL_PID_ANY) + if (pid==(unsigned short)PTL_PID_ANY) { fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); return; diff --git a/lnet/ulnds/connection.c b/lnet/ulnds/connection.c index b399fcf..ed68355 100644 --- a/lnet/ulnds/connection.c +++ b/lnet/ulnds/connection.c @@ -39,10 +39,9 @@ #include #include #include -#include #include #include -#include +#include #include #include #include diff --git a/lnet/ulnds/select.c b/lnet/ulnds/select.c index 09e1542..49c770f 100644 --- a/lnet/ulnds/select.c +++ b/lnet/ulnds/select.c @@ -25,6 +25,8 @@ * io events through the select system call. */ +#define DEBUG_SUBSYSTEM S_NAL + #ifdef sun #include #else @@ -55,7 +57,7 @@ static io_handler io_handlers; when now() { struct timeval result; - + gettimeofday(&result,0); return((((unsigned long long)result.tv_sec)<<32)| (((unsigned long long)result.tv_usec)<<32)/1000000); @@ -91,7 +93,7 @@ io_handler register_io_handler(int fd, * Arguments: i: a pointer to the handler to stop servicing * * remove_io_handler() doesn't actually free the handler, due - * to reentrancy problems. it just marks the handler for + * to reentrancy problems. it just marks the handler for * later cleanup by the blocking function. */ void remove_io_handler (io_handler i) @@ -282,7 +284,7 @@ static struct timeval *choose_timeout(struct timeval *tv1, /* Function: select_timer_block * Arguments: until: an absolute time when the select should return - * + * * This function dispatches the various file descriptors' handler * functions, if the kernel indicates there is io available. */ @@ -379,7 +381,7 @@ again: /* Function: select_timer_block * Arguments: until: an absolute time when the select should return - * + * * This function dispatches the various file descriptors' handler * functions, if the kernel indicates there is io available. */ @@ -409,7 +411,7 @@ again: #endif /* ENABLE_SELECT_DISPATCH */ /* Function: init_unix_timer() - * is called to initialize the library + * is called to initialize the library */ void init_unix_timer() { diff --git a/lnet/ulnds/socklnd/address.c b/lnet/ulnds/socklnd/address.c index f329e2a..07b4249 100644 --- a/lnet/ulnds/socklnd/address.c +++ b/lnet/ulnds/socklnd/address.c @@ -25,6 +25,8 @@ * mapping of virtual nodes into the port range of an IP socket. */ +#define DEBUG_SUBSYSTEM S_NAL + #include #include #include @@ -59,7 +61,7 @@ static unsigned int get_node_id(void) x = 0; return(ntohl(x)); } - else + else { if (host_envp[1] != 'x') { @@ -98,15 +100,15 @@ void set_address(bridge t,ptl_pid_t pidrequest) void set_address(bridge t,ptl_pid_t pidrequest) { - int virtnode, in_addr, port; + int virtnode, in_addr, port; ptl_pid_t pid; /* get and remember my node id*/ if (!getenv("PTL_VIRTNODE")) virtnode = 0; - else + else { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT + int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT >> PNAL_VNODE_SHIFT); virtnode = atoi(getenv("PTL_VIRTNODE")); if (virtnode > maxvnode) @@ -116,11 +118,11 @@ void set_address(bridge t,ptl_pid_t pidrequest) return; } } - + in_addr = get_node_id(); t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) + t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) << PNAL_VNODE_SHIFT) + virtnode; pid=pidrequest; @@ -128,7 +130,7 @@ void set_address(bridge t,ptl_pid_t pidrequest) #ifdef notyet if (pid==(unsigned short)PTL_PID_ANY) port = 0; #endif - if (pid==(unsigned short)PTL_PID_ANY) + if (pid==(unsigned short)PTL_PID_ANY) { fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); return; diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c index b399fcf..ed68355 100644 --- a/lnet/ulnds/socklnd/connection.c +++ b/lnet/ulnds/socklnd/connection.c @@ -39,10 +39,9 @@ #include #include #include -#include #include #include -#include +#include #include #include #include diff --git a/lnet/ulnds/socklnd/select.c b/lnet/ulnds/socklnd/select.c index 09e1542..49c770f 100644 --- a/lnet/ulnds/socklnd/select.c +++ b/lnet/ulnds/socklnd/select.c @@ -25,6 +25,8 @@ * io events through the select system call. */ +#define DEBUG_SUBSYSTEM S_NAL + #ifdef sun #include #else @@ -55,7 +57,7 @@ static io_handler io_handlers; when now() { struct timeval result; - + gettimeofday(&result,0); return((((unsigned long long)result.tv_sec)<<32)| (((unsigned long long)result.tv_usec)<<32)/1000000); @@ -91,7 +93,7 @@ io_handler register_io_handler(int fd, * Arguments: i: a pointer to the handler to stop servicing * * remove_io_handler() doesn't actually free the handler, due - * to reentrancy problems. it just marks the handler for + * to reentrancy problems. it just marks the handler for * later cleanup by the blocking function. */ void remove_io_handler (io_handler i) @@ -282,7 +284,7 @@ static struct timeval *choose_timeout(struct timeval *tv1, /* Function: select_timer_block * Arguments: until: an absolute time when the select should return - * + * * This function dispatches the various file descriptors' handler * functions, if the kernel indicates there is io available. */ @@ -379,7 +381,7 @@ again: /* Function: select_timer_block * Arguments: until: an absolute time when the select should return - * + * * This function dispatches the various file descriptors' handler * functions, if the kernel indicates there is io available. */ @@ -409,7 +411,7 @@ again: #endif /* ENABLE_SELECT_DISPATCH */ /* Function: init_unix_timer() - * is called to initialize the library + * is called to initialize the library */ void init_unix_timer() { diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c index 524d128..e5bb46b 100644 --- a/lnet/utils/acceptor.c +++ b/lnet/utils/acceptor.c @@ -19,8 +19,8 @@ #include #endif +#include #include -#include #include #include diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index 83147ae..2931321 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -26,21 +26,15 @@ #define __USE_FILE_OFFSET64 #define _GNU_SOURCE -#include - #include #ifdef HAVE_NETDB_H #include #endif #include #include -#include "ioctl.h" #include #include #include -#ifndef __CYGWIN__ -# include -#endif #include #include @@ -48,19 +42,11 @@ #include #include -#ifdef HAVE_LINUX_VERSION_H -#include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define BUG() /* workaround for module.h includes */ -#include -#endif -#endif /* !HAVE_LINUX_VERSION_H */ - #include #include #include +#include #include "parser.h" #include @@ -303,7 +289,7 @@ static int parse_buffer(FILE *in, FILE *out) unsigned long dropped = 0, kept = 0; struct list_head chunk_list; - INIT_LIST_HEAD(&chunk_list); + CFS_INIT_LIST_HEAD(&chunk_list); while (1) { rc = fread(buf, sizeof(hdr->ph_len), 1, in); diff --git a/lnet/utils/gmlndnid.c b/lnet/utils/gmlndnid.c index e45fae4..6a119c0 100644 --- a/lnet/utils/gmlndnid.c +++ b/lnet/utils/gmlndnid.c @@ -33,17 +33,10 @@ #include #include -#include #include #define GMNAL_IOC_GET_GNID 1 -int -roundup(int len) -{ - return((len+7) & (~0x7)); -} - int main(int argc, char **argv) { int rc, pfd; diff --git a/lnet/utils/l_ioctl.c b/lnet/utils/l_ioctl.c index 0671c24..01dccb1 100644 --- a/lnet/utils/l_ioctl.c +++ b/lnet/utils/l_ioctl.c @@ -35,13 +35,7 @@ #include #include - -#ifndef __CYGWIN__ - #include -#else - #include - #include -#endif +#include static ioc_handler_t do_ioctl; /* forward ref */ static ioc_handler_t *current_ioc_handler = &do_ioctl; diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index 4efca98..847fb00 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -32,13 +32,14 @@ #include #include #include -#include "ioctl.h" #include #include #include #include #include +#ifdef HAVE_ENDIAN_H #include +#endif #if CRAY_PORTALS #ifdef REDSTORM #define __QK__ @@ -46,15 +47,9 @@ #include #endif -#ifdef __CYGWIN__ - -#include - -#endif /* __CYGWIN__ */ - +#include #include #include -#include #include #include #include "parser.h" diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c index 6316290..986d081 100644 --- a/lnet/utils/wirecheck.c +++ b/lnet/utils/wirecheck.c @@ -5,10 +5,13 @@ #include #include #include -#include #include -extern size_t strnlen(const char *, size_t); +#include + +#ifndef HAVE_STRNLEN +#define strnlen(s, i) strlen(s) +#endif #define BLANK_LINE() \ do { \ -- 1.8.3.1