%patch
-Index: linux-2.6.0/fs/ext3/ialloc.c
+Index: linux-2.6.7/fs/ext3/ialloc.c
===================================================================
---- linux-2.6.0.orig/fs/ext3/ialloc.c 2004-01-14 18:54:11.000000000 +0300
-+++ linux-2.6.0/fs/ext3/ialloc.c 2004-01-14 18:54:12.000000000 +0300
-@@ -627,6 +627,11 @@
- inode->i_generation = EXT3_SB(sb)->s_next_generation++;
+--- linux-2.6.7.orig/fs/ext3/ialloc.c 2004-09-06 20:01:18.000000000 +0800
++++ linux-2.6.7/fs/ext3/ialloc.c 2004-09-06 20:04:42.000000000 +0800
+@@ -629,6 +629,11 @@
+ spin_unlock(&sbi->s_next_gen_lock);
ei->i_state = EXT3_STATE_NEW;
+ if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
-Index: linux-2.6.0/fs/ext3/inode.c
+Index: linux-2.6.7/fs/ext3/inode.c
===================================================================
---- linux-2.6.0.orig/fs/ext3/inode.c 2004-01-14 18:54:12.000000000 +0300
-+++ linux-2.6.0/fs/ext3/inode.c 2004-01-14 19:09:46.000000000 +0300
-@@ -2339,7 +2339,7 @@
+--- linux-2.6.7.orig/fs/ext3/inode.c 2004-09-06 20:01:20.000000000 +0800
++++ linux-2.6.7/fs/ext3/inode.c 2004-09-06 20:04:42.000000000 +0800
+@@ -2349,7 +2349,7 @@
* trying to determine the inode's location on-disk and no read need be
* performed.
*/
struct ext3_iloc *iloc, int in_mem)
{
unsigned long block;
-@@ -2547,6 +2547,11 @@
+@@ -2558,6 +2558,11 @@
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext3_file_inode_operations;
inode->i_fop = &ext3_file_operations;
-@@ -2682,6 +2687,9 @@
+@@ -2693,6 +2698,9 @@
} else for (block = 0; block < EXT3_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block];
BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
rc = ext3_journal_dirty_metadata(handle, bh);
if (!err)
-Index: linux-2.6.0/fs/ext3/xattr.c
+Index: linux-2.6.7/fs/ext3/xattr.c
===================================================================
---- linux-2.6.0.orig/fs/ext3/xattr.c 2003-12-30 08:33:13.000000000 +0300
-+++ linux-2.6.0/fs/ext3/xattr.c 2004-01-14 18:54:12.000000000 +0300
+--- linux-2.6.7.orig/fs/ext3/xattr.c 2004-06-16 13:19:36.000000000 +0800
++++ linux-2.6.7/fs/ext3/xattr.c 2004-09-06 20:05:40.000000000 +0800
@@ -246,17 +246,12 @@
}
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+ return -ENOENT;
+
-+ ret = ext3_get_inode_loc(inode, &iloc);
++ ret = ext3_get_inode_loc(inode, &iloc, 1);
+ if (ret)
+ return ret;
+ raw_inode = ext3_raw_inode(&iloc);
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+ return 0;
+
-+ ret = ext3_get_inode_loc(inode, &iloc);
++ ret = ext3_get_inode_loc(inode, &iloc, 1);
+ if (ret)
+ return ret;
+ raw_inode = ext3_raw_inode(&iloc);
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+ return ret;
+
-+ err = ext3_get_inode_loc(inode, &iloc);
++ err = ext3_get_inode_loc(inode, &iloc, 1);
+ if (err)
+ return -EIO;
+ raw_inode = ext3_raw_inode(&iloc);
+ if (EXT3_SB(inode->i_sb)->s_inode_size <= EXT3_GOOD_OLD_INODE_SIZE)
+ return -ENOSPC;
+
-+ err = ext3_get_inode_loc(inode, &iloc);
++ err = ext3_get_inode_loc(inode, &iloc, 1);
+ if (err)
+ return err;
+ raw_inode = ext3_raw_inode(&iloc);
const char *name, const void *value, size_t value_len,
int flags)
{
-@@ -492,22 +1078,7 @@
+@@ -492,22 +1079,7 @@
* towards the end of the block).
* end -- Points right after the block pointed to by header.
*/
if (EXT3_I(inode)->i_file_acl) {
/* The inode already has an extended attribute block. */
bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
-@@ -733,7 +1304,6 @@
+@@ -733,7 +1305,6 @@
brelse(bh);
if (!(bh && header == HDR(bh)))
kfree(header);
return error;
}
-Index: linux-2.6.0/fs/ext3/xattr.h
+Index: linux-2.6.7/fs/ext3/xattr.h
===================================================================
---- linux-2.6.0.orig/fs/ext3/xattr.h 2003-06-24 18:04:43.000000000 +0400
-+++ linux-2.6.0/fs/ext3/xattr.h 2004-01-14 18:54:12.000000000 +0300
+--- linux-2.6.7.orig/fs/ext3/xattr.h 2004-06-16 13:20:04.000000000 +0800
++++ linux-2.6.7/fs/ext3/xattr.h 2004-09-06 20:04:42.000000000 +0800
@@ -77,7 +77,8 @@
extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext3_xattr_list(struct inode *, char *, size_t);
extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
extern void ext3_xattr_put_super(struct super_block *);
-Index: linux-2.6.0/include/linux/ext3_fs.h
+Index: linux-2.6.7/include/linux/ext3_fs.h
===================================================================
---- linux-2.6.0.orig/include/linux/ext3_fs.h 2004-01-14 18:54:11.000000000 +0300
-+++ linux-2.6.0/include/linux/ext3_fs.h 2004-01-14 18:54:12.000000000 +0300
+--- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-09-06 20:01:19.000000000 +0800
++++ linux-2.6.7/include/linux/ext3_fs.h 2004-09-06 20:04:42.000000000 +0800
@@ -265,6 +265,8 @@
__u32 m_i_reserved2[2];
} masix2;
};
#define i_size_high i_dir_acl
-Index: linux-2.6.0/include/linux/ext3_fs_i.h
+@@ -725,6 +727,7 @@
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+
++extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *, int);
+ extern void ext3_read_inode (struct inode *);
+ extern void ext3_write_inode (struct inode *, int);
+ extern int ext3_setattr (struct dentry *, struct iattr *);
+Index: linux-2.6.7/include/linux/ext3_fs_i.h
===================================================================
---- linux-2.6.0.orig/include/linux/ext3_fs_i.h 2003-12-30 08:32:44.000000000 +0300
-+++ linux-2.6.0/include/linux/ext3_fs_i.h 2004-01-14 18:54:12.000000000 +0300
+--- linux-2.6.7.orig/include/linux/ext3_fs_i.h 2004-06-16 13:18:52.000000000 +0800
++++ linux-2.6.7/include/linux/ext3_fs_i.h 2004-09-06 20:04:42.000000000 +0800
@@ -96,6 +96,9 @@
*/
loff_t i_disksize;
%diffstat
fs/ext3/ialloc.c | 5
fs/ext3/inode.c | 10
- fs/ext3/xattr.c | 634 +++++++++++++++++++++++++++++++++++++++++++---
+ fs/ext3/xattr.c | 635 +++++++++++++++++++++++++++++++++++++++++++---
fs/ext3/xattr.h | 3
- include/linux/ext3_fs.h | 2
+ include/linux/ext3_fs.h | 3
include/linux/ext3_fs_i.h | 3
- 6 files changed, 623 insertions(+), 34 deletions(-)
+ 6 files changed, 625 insertions(+), 34 deletions(-)
ext3_headers := $(wildcard @LINUX@/fs/ext3/*.h)
linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h)
-
+new_linux_hearders := ext3_extents.h
ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c))
-new_sources := iopen.c iopen.h
+new_sources := iopen.c iopen.h extents.c extents-in-ea.c
ldiskfs_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources)
-
ldiskfs-objs := $(filter %.o,$(ldiskfs_sources:.c=.o))
EXTRA_PRE_CFLAGS := -I@LINUX@/fs -I@LUSTRE@ -I@LUSTRE@/ldiskfs
# FIXME: we need to grab the series in configure somehow
# (see bug 1679)
#
-series := @top_srcdir@/kernel_patches/series/ldiskfs-2.6-suse.series
+series := @top_srcdir@/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
+patches := @top_srcdir@/kernel_patches/patches
sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
rm -rf linux-stage linux sources $(ldiskfs_SOURCES)
mkdir -p linux-stage/fs/ext3 linux-stage/include/linux
- cd linux-stage && quilt setup -l ../$(series)
cp $(ext3_sources) $(ext3_headers) $(ext3_extra) linux-stage/fs/ext3
cp $(linux_headers) linux-stage/include/linux
cd linux-stage && quilt push -a -q
+if USE_QUILT
+ cd linux-stage && quilt setup -l ../$(series) -d ../$(patches)
+ cd linux-stage && quilt push -a -q
+else
+ @cd linux-stage && for i in $$(<../$(series)) ; do \
+ echo "patch -p1 < ../$(patches)/$$i" ; \
+ patch -p1 < ../$(patches)/$$i || exit 1 ; \
+ done
+endif
+
mkdir linux
@echo -n "Replacing 'ext3' with 'ldiskfs':"
@for i in $(notdir $(ext3_headers) $(ext3_sources)) $(new_sources) ; do \
sed $(strip $(ldiskfs_sed_flags)) \
linux-stage/fs/ext3/$$i > $$i ; \
done
- @for i in $(subst ext3,,$(notdir $(linux_headers))) ; do \
+ @for i in $(subst ext3,,$(notdir $(linux_headers)) $(new_linux_hearders)) ; do \
echo -n " ext3$$i" ; \
sed $(strip $(ldiskfs_sed_flags)) \
linux-stage/include/linux/ext3$$i \
> linux/ldiskfs$$i ; \
done
+ @echo
touch sources
foo-check:
# -------- we can't build modules unless srcdir = builddir
if test x$enable_modules != xno ; then
-AC_CHECK_FILE([autoMakefile.am],[],
- [AC_MSG_ERROR([At this time, Lustre does not support building kernel modules with srcdir != buildir.])])
+ AC_CHECK_FILE([autoMakefile.am],[],
+ [AC_MSG_ERROR([At this time, Lustre does not support building kernel modules with srcdir != buildir.])])
fi
# -------- in kernel compilation? (2.5 only) -------------
[
if test "$with_cray_portals" != no; then
if test -r $with_cray_portals/include/portals/api.h ; then
+ CRAY_PORTALS_PATH=$with_cray_portals
CRAY_PORTALS_INCLUDE="-I$with_cray_portals/include"
AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals])
else
AC_MSG_ERROR([--with-cray-portals specified badly])
- fi
- fi
+ fi
+ fi
],[with_cray_portals=no])
+AC_SUBST(CRAY_PORTALS_PATH)
AC_MSG_RESULT([$with_cray_portals])
AM_CONDITIONAL(CRAY_PORTALS, test x$with_cray_portals != xno)
enable_utils=no
fi
-# -------- set linuxdir ------------
-AC_MSG_CHECKING([for Linux sources])
-AC_ARG_WITH([linux],
- AC_HELP_STRING([--with-linux=path],
- [set path to Linux source (default=/usr/src/linux)]),
- [LINUX=$with_linux],
- [LINUX=/usr/src/linux])
-AC_MSG_RESULT([$LINUX])
-AC_SUBST(LINUX)
-if test x$enable_inkernel = xyes ; then
- echo ln -s `pwd` $LINUX/fs/lustre
- rm $LINUX/fs/lustre
- ln -s `pwd` $LINUX/fs/lustre
-fi
-
-# -------- check for .confg --------
-AC_ARG_WITH([linux-config],
- [AC_HELP_STRING([--with-linux-config=path],
- [set path to Linux .conf (default=\$LINUX/.config)])],
- [LINUX_CONFIG=$with_linux_config],
- [LINUX_CONFIG=$LINUX/.config])
-AC_SUBST(LINUX_CONFIG)
-
-AC_CHECK_FILE([/boot/kernel.h],
- [KERNEL_SOURCE_HEADER='/boot/kernel.h'],
- [AC_CHECK_FILE([/var/adm/running-kernel.h]),
- [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h']])
-
-AC_ARG_WITH([kernel-source-header],
- AC_HELP_STRING([--with-kernel-source-header=path],
- [Use a different kernel version header. Consult README.kernel-source for details.]),
- [KERNEL_SOURCE_HEADER=$with_kernel_source_header])
-
-# --------------------
-ARCH_UM=
-UML_CFLAGS=
if test x$enable_modules != xno ; then
+ # -------- set linuxdir ------------
+ AC_MSG_CHECKING([for Linux sources])
+ AC_ARG_WITH([linux],
+ AC_HELP_STRING([--with-linux=path],
+ [set path to Linux source (default=/usr/src/linux)]),
+ [LINUX=$with_linux],
+ [LINUX=/usr/src/linux])
+ AC_MSG_RESULT([$LINUX])
+ AC_SUBST(LINUX)
+ if test x$enable_inkernel = xyes ; then
+ echo ln -s `pwd` $LINUX/fs/lustre
+ rm $LINUX/fs/lustre
+ ln -s `pwd` $LINUX/fs/lustre
+ fi
+
+ # -------- check for .confg --------
+ AC_ARG_WITH([linux-config],
+ [AC_HELP_STRING([--with-linux-config=path],
+ [set path to Linux .conf (default=\$LINUX/.config)])],
+ [LINUX_CONFIG=$with_linux_config],
+ [LINUX_CONFIG=$LINUX/.config])
+ AC_SUBST(LINUX_CONFIG)
+
+ AC_CHECK_FILE([/boot/kernel.h],
+ [KERNEL_SOURCE_HEADER='/boot/kernel.h'],
+ [AC_CHECK_FILE([/var/adm/running-kernel.h]),
+ [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h']])
+
+ AC_ARG_WITH([kernel-source-header],
+ AC_HELP_STRING([--with-kernel-source-header=path],
+ [Use a different kernel version header. Consult README.kernel-source for details.]),
+ [KERNEL_SOURCE_HEADER=$with_kernel_source_header])
+
+ # --------------------
+ ARCH_UM=
+ UML_CFLAGS=
+
AC_MSG_CHECKING([if you are running user mode linux for $host_cpu])
if test -e $LINUX/include/asm-um ; then
if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
else
AC_MSG_RESULT([no (asm-um missing)])
fi
-fi
-AC_SUBST(ARCH_UM)
-AC_SUBST(UML_CFLAGS)
-# --------- Linux 25 ------------------
-AC_CHECK_FILE([$LINUX/include/linux/namei.h],
- [
- linux25="yes"
- KMODEXT=".ko"
- ],[
- KMODEXT=".o"
- linux25="no"
- ])
-AC_MSG_CHECKING([if you are using Linux 2.6])
-AC_MSG_RESULT([$linux25])
+ AC_SUBST(ARCH_UM)
+ AC_SUBST(UML_CFLAGS)
+
+ # --------- Linux 25 ------------------
+ AC_CHECK_FILE([$LINUX/include/linux/namei.h],
+ [
+ linux25="yes"
+ KMODEXT=".ko"
+ enable_ldiskfs="yes"
+ BACKINGFS="ldiskfs"
+ ],[
+ KMODEXT=".o"
+ linux25="no"
+ ])
+ AC_MSG_CHECKING([if you are using Linux 2.6])
+ AC_MSG_RESULT([$linux25])
+
+ AC_SUBST(LINUX25)
+ AC_SUBST(KMODEXT)
+
+ AC_PATH_PROG(PATCH, patch, [no])
+ AC_PATH_PROG(QUILT, quilt, [no])
+
+ if test x$enable_ldiskfs$PATCH$QUILT = xyesnono ; then
+ AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)])
+ fi
+fi
AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
-AC_SUBST(KMODEXT)
+AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
# ------- Makeflags ------------------
])
# ------------ LINUXRELEASE and moduledir ------------------
- AC_MSG_CHECKING([for Linux release])
- rm -f kernel-tests/conftest.i
- LINUXRELEASE=
+ MODULE_TARGET="SUBDIRS"
if test $linux25 = 'yes' ; then
+ # ------------ external module support ---------------------
makerule="$PWD/kernel-tests"
+ AC_MSG_CHECKING([for external module build support])
+ rm -f kernel-tests/conftest.i
+ LUSTRE_MODULE_TRY_MAKE([],[],
+ [$makerule LUSTRE_KERNEL_TEST=conftest.i],
+ [test -s kernel-tests/conftest.i],
+ [
+ AC_MSG_RESULT([no])
+ ],[
+ AC_MSG_RESULT([yes])
+ makerule="_module_$makerule"
+ MODULE_TARGET="M"
+ ])
else
makerule="_dir_$PWD/kernel-tests"
fi
+ AC_SUBST(MODULE_TARGET)
+ LINUXRELEASE=
+ rm -f kernel-tests/conftest.i
+ AC_MSG_CHECKING([for Linux release])
LUSTRE_MODULE_TRY_MAKE(
[#include <linux/version.h>],
[char *LINUXRELEASE;
AC_MSG_RESULT([$LINUXRELEASE])
AC_SUBST(LINUXRELEASE)
- moduledir='$(libdir)/modules/'$LINUXRELEASE/kernel
- AC_SUBST(moduledir)
-
+ moduledir='/lib/modules/'$LINUXRELEASE/kernel
modulefsdir='$(moduledir)/fs/$(PACKAGE)'
+ modulenetdir='$(moduledir)/net/$(PACKAGE)'
+
+ AC_SUBST(moduledir)
AC_SUBST(modulefsdir)
+ AC_SUBST(modulenetdir)
# ------------ RELEASE --------------------------------
AC_MSG_CHECKING([for Lustre release])
RELEASE="`echo ${LINUXRELEASE} | tr '-' '_'`_`date +%Y%m%d%H%M`"
AC_MSG_RESULT($RELEASE)
AC_SUBST(RELEASE)
-fi
-# ---------- Portals flags --------------------
-
-#AC_PREFIX_DEFAULT([])
-#if test "x$prefix" = xNONE || test "x$prefix" = x; then
-# usrprefix=/usr
-#else
-# usrprefix='${prefix}'
-#fi
-#AC_SUBST(usrprefix)
-
-AC_MSG_CHECKING([for zero-copy TCP support])
-AC_ARG_ENABLE([zerocopy],
- AC_HELP_STRING([--disable-zerocopy],
- [disable socknal zerocopy]),
- [],[enable_zerocopy='yes'])
-if test x$enable_zerocopy = xno ; then
- AC_MSG_RESULT([no (by request)])
-else
- ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`"
- if test "$ZCCD" != 0 ; then
- AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP])
- AC_MSG_RESULT(yes)
+ # ---------- Portals flags --------------------
+
+ AC_MSG_CHECKING([for zero-copy TCP support])
+ AC_ARG_ENABLE([zerocopy],
+ AC_HELP_STRING([--disable-zerocopy],
+ [disable socknal zerocopy]),
+ [],[enable_zerocopy='yes'])
+ if test x$enable_zerocopy = xno ; then
+ AC_MSG_RESULT([no (by request)])
else
- AC_MSG_RESULT([no (no kernel support)])
+ ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`"
+ if test "$ZCCD" != 0 ; then
+ AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP])
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_RESULT([no (no kernel support)])
+ fi
fi
-fi
-AC_MSG_CHECKING([for CPU affinity support])
-AC_ARG_ENABLE([affinity],
- AC_HELP_STRING([--disable-affinity],
- [disable process/irq affinity]),
- [],[enable_affinity='yes'])
-if test x$enable_affinity = xno ; then
- AC_MSG_RESULT([no (by request)])
-else
- SET_CPUS_ALLOW="`grep -c set_cpus_allowed $LINUX/kernel/softirq.c`"
- if test "$SET_CPUS_ALLOW" != 0 ; then
- AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support])
- AC_MSG_RESULT([yes])
+ AC_ARG_ENABLE([affinity],
+ AC_HELP_STRING([--disable-affinity],
+ [disable process/irq affinity]),
+ [],[enable_affinity='yes'])
+
+ AC_MSG_CHECKING([for CPU affinity support])
+ if test x$enable_affinity = xno ; then
+ AC_MSG_RESULT([no (by request)])
else
- AC_MSG_RESULT([no (no kernel support)])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/sched.h>
+ ],[
+ struct task_struct t;
+ #ifdef CPU_ARRAY_SIZE
+ cpumask_t m;
+ #else
+ unsigned long m;
+ #endif
+ set_cpus_allowed(&t, m);
+ ],[
+ AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support])
+ AC_MSG_RESULT([yes])
+ ],[
+ AC_MSG_RESULT([no (no kernel support)])
+ ])
fi
-fi
-
-#####################################
+ #####################################
-AC_MSG_CHECKING([if quadrics kernel headers are present])
-if test -d $LINUX/drivers/net/qsnet ; then
- AC_MSG_RESULT([yes])
- QSWNAL="qswnal"
- AC_MSG_CHECKING([for multirail EKC])
- if test -f $LINUX/include/elan/epcomms.h; then
- AC_MSG_RESULT([supported])
- QSWCPPFLAGS="-DMULTIRAIL_EKC=1"
- else
- AC_MSG_RESULT([not supported])
- if test -d $LINUX/drivers/net/qsnet/include; then
- QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include"
+ AC_MSG_CHECKING([if quadrics kernel headers are present])
+ if test -d $LINUX/drivers/net/qsnet ; then
+ AC_MSG_RESULT([yes])
+ QSWNAL="qswnal"
+ AC_MSG_CHECKING([for multirail EKC])
+ if test -f $LINUX/include/elan/epcomms.h; then
+ AC_MSG_RESULT([supported])
+ QSWCPPFLAGS="-DMULTIRAIL_EKC=1"
else
- QSWCPPFLAGS="-I$LINUX/include/linux"
+ AC_MSG_RESULT([not supported])
+ if test -d $LINUX/drivers/net/qsnet/include; then
+ QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include"
+ else
+ QSWCPPFLAGS="-I$LINUX/include/linux"
+ fi
fi
+ else
+ AC_MSG_RESULT([no])
+ QSWNAL=""
+ QSWCPPFLAGS=""
fi
-else
- AC_MSG_RESULT([no])
- QSWNAL=""
- QSWCPPFLAGS=""
-fi
-AC_SUBST(QSWCPPFLAGS)
-AC_SUBST(QSWNAL)
-AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal")
+ AC_SUBST(QSWCPPFLAGS)
+ AC_SUBST(QSWNAL)
-AC_MSG_CHECKING([if gm support was requested])
-AC_ARG_WITH([gm],
- AC_HELP_STRING([--with-gm=path],
- [build gmnal against path]),
- [
- case $with_gm in
- yes)
- AC_MSG_RESULT([yes])
- GMCPPFLAGS="-I/usr/local/gm/include"
- GMNAL="gmnal"
- ;;
- no)
- AC_MSG_RESULT([no])
- GMCPPFLAGS=""
- GMNAL=""
- ;;
- *)
- AC_MSG_RESULT([yes])
- GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm"
- GMNAL="gmnal"
- ;;
- esac
- ],[
- AC_MSG_RESULT([no])
- GMCPPFLAGS=""
- GMNAL=""
- ])
-AC_SUBST(GMCPPFLAGS)
-AC_SUBST(GMNAL)
-AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal")
+ AC_MSG_CHECKING([if gm support was requested])
+ AC_ARG_WITH([gm],
+ AC_HELP_STRING([--with-gm=path],
+ [build gmnal against path]),
+ [
+ case $with_gm in
+ yes)
+ AC_MSG_RESULT([yes])
+ GMCPPFLAGS="-I/usr/local/gm/include"
+ GMNAL="gmnal"
+ ;;
+ no)
+ AC_MSG_RESULT([no])
+ GMCPPFLAGS=""
+ GMNAL=""
+ ;;
+ *)
+ AC_MSG_RESULT([yes])
+ GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm"
+ GMNAL="gmnal"
+ ;;
+ esac
+ ],[
+ AC_MSG_RESULT([no])
+ GMCPPFLAGS=""
+ GMNAL=""
+ ])
+ AC_SUBST(GMCPPFLAGS)
+ AC_SUBST(GMNAL)
+
+ #### OpenIB
+ AC_MSG_CHECKING([if OpenIB kernel headers are present])
+ OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD"
+ EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS"
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <ts_ib_core.h>
+ ],[
+ struct ib_device_properties props;
+ return 0;
+ ],[
+ AC_MSG_RESULT([yes])
+ OPENIBNAL="openibnal"
+ ],[
+ AC_MSG_RESULT([no])
+ OPENIBNAL=""
+ OPENIBCPPFLAGS=""
+ ])
+ EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
+ AC_SUBST(OPENIBCPPFLAGS)
+ AC_SUBST(OPENIBNAL)
-#fixme: where are the default IB includes?
-default_ib_include_dir=/usr/local/ib/include
-an_ib_include_file=vapi.h
+ # ---------- Red Hat 2.4.18 has iobuf->dovary --------------
+ # But other kernels don't
-AC_MSG_CHECKING([if ib nal support was requested])
-AC_ARG_WITH([ib],
- AC_HELP_STRING([--with-ib=yes/no/path],
- [Path to IB includes]),
- [
- case $with_ib in
- yes)
- AC_MSG_RESULT([yes])
- IBCPPFLAGS="-I/usr/local/ib/include"
- IBNAL="ibnal"
- ;;
- no)
- AC_MSG_RESULT([no])
- IBCPPFLAGS=""
- IBNAL=""
- ;;
- *)
- AC_MSG_RESULT([yes])
- IBCPPFLAGS="-I$with_ib"
- IBNAL=""
- ;;
- esac
- ],[
- AC_MSG_RESULT([no])
- IBFLAGS=""
- IBNAL=""
- ])
-AC_SUBST(IBNAL)
-AC_SUBST(IBCPPFLAGS)
-AM_CONDITIONAL(BUILD_IBNAL, test x$IBNAL = "xibnal")
+ AC_MSG_CHECKING([if struct kiobuf has a dovary field])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/iobuf.h>
+ ],[
+ struct kiobuf iobuf;
+ iobuf.dovary = 1;
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+
+ # ----------- 2.6.4 no longer has page->list ---------------
+ AC_MSG_CHECKING([if struct page has a list field])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/mm.h>
+ ],[
+ struct page page;
+ &page.list;
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
-AC_SUBST(MOD_LINK)
-AC_SUBST(LINUX25)
+ # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
+ # This needs to run after we've defined the KCPPFLAGS
-# ---------- Red Hat 2.4.18 has iobuf->dovary --------------
-# But other kernels don't
+ AC_MSG_CHECKING([if task_struct has a sighand field])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/sched.h>
+ ],[
+ struct task_struct p;
+ p.sighand = NULL;
+ ],[
+ AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
+ AC_MSG_RESULT([yes])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
-AC_MSG_CHECKING([if struct kiobuf has a dovary field])
-LUSTRE_MODULE_TRY_COMPILE(
- [
- #include <linux/iobuf.h>
- ],[
- struct kiobuf iobuf;
- iobuf.dovary = 1;
- ],[
+ # ---------- 2.4.20 introduced cond_resched --------------
+
+ AC_MSG_CHECKING([if kernel offers cond_resched])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/sched.h>
+ ],[
+ cond_resched();
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+
+ # --------- zap_page_range(vma) --------------------------------
+ AC_MSG_CHECKING([if zap_pag_range with vma parameter])
+ ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
+ if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
+ AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
- ],[
+ else
AC_MSG_RESULT([no])
- ])
+ fi
-# ----------- 2.6.4 no longer has page->list ---------------
-AC_MSG_CHECKING([if struct page has a list field])
-LUSTRE_MODULE_TRY_COMPILE(
- [
- #include <linux/mm.h>
- ],[
- struct page page;
- &page.list;
- ],[
+ # ---------- Red Hat 2.4.21 backports some more 2.5 bits --------
+
+ AC_MSG_CHECKING([if kernel defines PDE])
+ HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
+ if test "$HAVE_PDE" != 0 ; then
+ AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field])
- ],[
+ else
AC_MSG_RESULT([no])
- ])
+ fi
-# ---------- Red Hat 2.4.20 backports some 2.5 bits --------
-# This needs to run after we've defined the KCPPFLAGS
+ AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
+ HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
+ if test "$HAVE_DIO_FILE" != 0 ; then
+ AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
+ AC_MSG_RESULT(yes)
+ else
+ AC_MSG_RESULT(no)
+ fi
-AC_MSG_CHECKING([for kernel version])
-LUSTRE_MODULE_TRY_COMPILE(
- [
- #include <linux/sched.h>
- ],[
- struct task_struct p;
- p.sighand = NULL;
- ],[
- AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
- AC_MSG_RESULT([redhat-2.4.20])
- ],[
- AC_MSG_RESULT([$LINUXRELEASE])
- ])
-
-# ---------- 2.4.20 introduced cond_resched --------------
-
-AC_MSG_CHECKING([if kernel offers cond_resched])
-LUSTRE_MODULE_TRY_COMPILE(
- [
- #include <linux/sched.h>
- ],[
- cond_resched();
- ],[
- AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
- ],[
- AC_MSG_RESULT([no])
- ])
-# ---------- Red Hat 2.4.21 backports some more 2.5 bits --------
-
-AC_MSG_CHECKING([if kernel defines PDE])
-HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
-if test "$HAVE_PDE" != 0 ; then
- AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
- AC_MSG_RESULT([yes])
-else
- AC_MSG_RESULT([no])
-fi
+ AC_MSG_CHECKING([if kernel defines cpu_online()])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/sched.h>
+ ],[
+ cpu_online(0);
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+ AC_MSG_CHECKING([if kernel defines cpumask_t])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/sched.h>
+ ],[
+ return sizeof (cpumask_t);
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+
+ # ---------- RHEL kernels define page_count in mm_inline.h
+ AC_MSG_CHECKING([if kernel has mm_inline.h header])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/mm_inline.h>
+ ],[
+ #ifndef page_count
+ #error mm_inline.h does not define page_count
+ #endif
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
+
+ # ---------- inode->i_alloc_sem --------------
+ AC_MSG_CHECKING([if struct inode has i_alloc_sem])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/fs.h>
+ #include <linux/version.h>
+ ],[
+ #if defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24))
+ #error "x86_64 down_read_trylock broken before 2.4.24"
+ #endif
+ struct inode i;
+ return (char *)&i.i_alloc_sem - (char *)&i;
+ ],[
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
+ ],[
+ AC_MSG_RESULT([no])
+ ])
-AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
-HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
-if test "$HAVE_DIO_FILE" != 0 ; then
- AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
- AC_MSG_RESULT(yes)
-else
- AC_MSG_RESULT(no)
-fi
-if test x$enable_modules != xno ; then
# ---------- modules? ------------------------
AC_MSG_CHECKING([for module support])
LUSTRE_MODULE_TRY_COMPILE(
AC_MSG_RESULT([no])
])
- if test $BACKINGFS = 'ext3' ; then
- # --- Check that ext3 and ext3 xattr are enabled in the kernel
- AC_MSG_CHECKING([that ext3 is enabled in the kernel])
- LUSTRE_MODULE_TRY_COMPILE(
- [
- #include <linux/config.h>
- ],[
- #ifndef CONFIG_EXT3_FS
- #ifndef CONFIG_EXT3_FS_MODULE
- #error CONFIG_EXT3_FS not #defined
- #endif
- #endif
- ],[
- AC_MSG_RESULT([yes])
- ],[
- AC_MSG_RESULT([no])
- AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel (CONFIG_EXT3_FS)])
- ])
-
- AC_MSG_CHECKING([that extended attributes for ext3 are enabled in the kernel])
- LUSTRE_MODULE_TRY_COMPILE(
- [
- #include <linux/config.h>
- ],[
- #ifndef CONFIG_EXT3_FS_XATTR
- #error CONFIG_EXT3_FS_XATTR not #defined
- #endif
- ],[
- AC_MSG_RESULT([yes])
- ],[
- AC_MSG_RESULT([no])
- AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel (CONFIG_EXT3_FS_XATTR.)])
- AC_MSG_WARN([This build may fail.])
- ])
- fi # BACKINGFS = ext3
+ case $BACKINGFS in
+ ext3)
+ # --- Check that ext3 and ext3 xattr are enabled in the kernel
+ AC_MSG_CHECKING([that ext3 is enabled in the kernel])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/config.h>
+ ],[
+ #ifndef CONFIG_EXT3_FS
+ #ifndef CONFIG_EXT3_FS_MODULE
+ #error CONFIG_EXT3_FS not #defined
+ #endif
+ #endif
+ ],[
+ AC_MSG_RESULT([yes])
+ ],[
+ AC_MSG_RESULT([no])
+ AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel (CONFIG_EXT3_FS)])
+ ])
+
+ AC_MSG_CHECKING([that extended attributes for ext3 are enabled in the kernel])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/config.h>
+ ],[
+ #ifndef CONFIG_EXT3_FS_XATTR
+ #error CONFIG_EXT3_FS_XATTR not #defined
+ #endif
+ ],[
+ AC_MSG_RESULT([yes])
+ ],[
+ AC_MSG_RESULT([no])
+ AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel (CONFIG_EXT3_FS_XATTR.)])
+ AC_MSG_WARN([This build may fail.])
+ ])
+ ;;
+ ldiskfs)
+ AC_MSG_CHECKING([if fshooks are present])
+ LUSTRE_MODULE_TRY_COMPILE(
+ [
+ #include <linux/fshooks.h>
+ ],[],[
+ AC_MSG_RESULT([yes])
+ LDISKFS_SERIES="2.6-suse.series"
+ ],[
+ AC_MSG_RESULT([no])
+ LDISKFS_SERIES="2.6-vanilla.series"
+ ])
+ AC_SUBST(LDISKFS_SERIES)
+ # --- check which ldiskfs series we should use
+ ;;
+ esac # $BACKINGFS
fi
-# ---------- check ->lookup_raw() support --------
-
-AC_MSG_CHECKING([if kernel supports ->lookup_raw()])
-HAVE_LOOKUP_RAW="`grep -c 'lookup_raw.*struct inode' $LINUX/include/linux/fs.h`"
-if test "$HAVE_LOOKUP_RAW" != 0 ; then
- AC_DEFINE(HAVE_LOOKUP_RAW, 1, [the kernel supports ->lookup_raw()])
- AC_MSG_RESULT([yes])
-else
- AC_MSG_RESULT([no])
-fi
+AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal")
+AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal")
+AM_CONDITIONAL(BUILD_OPENIBNAL, test x$OPENIBNAL = "xopenibnal")
CPPFLAGS="-include \$(top_builddir)/include/config.h $CPPFLAGS"
EXTRA_KCFLAGS="-include $PWD/include/config.h $EXTRA_KCFLAGS"
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-EXTRA_DIST = archdep.m4 build.m4 include
+EXTRA_DIST = archdep.m4 build.m4
-SUBDIRS = portals libcfs knals unals router tests doc utils
+SUBDIRS = portals libcfs knals unals router tests doc utils include
fi
# directories for binaries
-ac_default_prefix=
-bindir='${exec_prefix}/usr/bin'
-sbindir='${exec_prefix}/usr/sbin'
-includedir='${prefix}/usr/include'
+ac_default_prefix=/usr
-rootsbindir='${exec_prefix}/sbin'
+# mount.lustre
+rootsbindir='/sbin'
AC_SUBST(rootsbindir)
-
+sysconfdir='/etc'
+AC_SUBST(sysconfdir)
# Directories for documentation and demos.
-docdir='${prefix}/usr/share/doc/$(PACKAGE)'
+docdir='${datadir}/doc/$(PACKAGE)'
AC_SUBST(docdir)
demodir='$(docdir)/demo'
AC_SUBST(demodir)
-pkgexampledir='${prefix}/usr/lib/$(PACKAGE)/examples'
+pkgexampledir='${pkgdatadir}/examples'
AC_SUBST(pkgexampledir)
-pymoddir='${prefix}/usr/lib/${PACKAGE}/python/Lustre'
+pymoddir='${pkglibdir}/python/Lustre'
AC_SUBST(pymoddir)
-# for substitution in lconf
-PYMOD_DIR="/usr/lib/$PACKAGE/python"
-AC_SUBST(PYMOD_DIR)
-modulenetdir='$(moduledir)/net/$(PACKAGE)'
-AC_SUBST(modulenetdir)
-
# ---------- BAD gcc? ------------
AC_PROG_RANLIB
LIBEFENCE=""
fi
AC_SUBST(LIBEFENCE)
+
+# -------- enable acceptor libwrap (TCP wrappers) support? -------
+AC_MSG_CHECKING([if libwrap support is requested])
+AC_ARG_ENABLE([libwrap],
+ AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]),
+ [case "${enableval}" in
+ yes) enable_libwrap=yes ;;
+ no) enable_libwrap=no ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;;
+ esac],[enable_libwrap=no])
+AC_MSG_RESULT([$enable_libwrap])
+if test x$enable_libwrap = xyes ; then
+ LIBWRAP="-lwrap"
+ AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested])
+else
+ LIBWRAP=""
+fi
+AC_SUBST(LIBWRAP)
stamp-h
stamp-h1
stamp-h.in
+Makefile
+Makefile.in
--- /dev/null
+SUBDIRS = linux portals
+
+EXTRA_DIST = cygwin-ioctl.h
--- /dev/null
+linuxdir = $(includedir)/linux
+
+EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h \
+ portals_lib.h
const int line);
#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \
__FUNCTION__, __LINE__))
-/* it would be great to dump_stack() here, but some kernels
- * export it as show_stack() and I can't be bothered to
- * proprely engage in that dance right now */
#define LASSERTF(cond, fmt...) \
do { \
if (unlikely(!(cond))) { \
- portals_debug_msg(0, D_EMERG, __FILE__, __FUNCTION__,\
- __LINE__, CDEBUG_STACK, \
+ portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, __FILE__,\
+ __FUNCTION__,__LINE__, CDEBUG_STACK,\
"ASSERTION(" #cond ") failed:" fmt);\
LBUG(); \
} \
#define LBUG_WITH_LOC(file, func, line) \
do { \
CEMERG("LBUG\n"); \
+ CERROR("STACK: %s\n", portals_debug_dumpstack()); \
portals_debug_dumplog(); \
portals_run_lbug_upcall(file, func, line); \
set_task_state(current, TASK_UNINTERRUPTIBLE); \
* Support for temporary event tracing with minimal Heisenberg effect. */
#define LWT_SUPPORT 0
-#define LWT_MEMORY (64<<20)
-#define LWT_MAX_CPUS 4
+#define LWT_MEMORY (16<<20)
+#if !KLWT_SUPPORT
+/* kernel hasn't defined this? */
typedef struct {
- cycles_t lwte_when;
+ long long lwte_when;
char *lwte_where;
void *lwte_task;
long lwte_p1;
long lwte_p2;
long lwte_p3;
long lwte_p4;
-#if BITS_PER_LONG > 32
+# if BITS_PER_LONG > 32
long lwte_pad;
-#endif
+# endif
} lwt_event_t;
+#endif /* !KLWT_SUPPORT */
#if LWT_SUPPORT
-#ifdef __KERNEL__
-#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
+# ifdef __KERNEL__
+# if !KLWT_SUPPORT
typedef struct _lwt_page {
struct list_head lwtp_list;
extern int lwt_enabled;
extern lwt_cpu_t lwt_cpus[];
-extern int lwt_init (void);
-extern void lwt_fini (void);
-extern int lwt_lookup_string (int *size, char *knlptr,
- char *usrptr, int usrsize);
-extern int lwt_control (int enable, int clear);
-extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
- void *user_ptr, int user_size);
-
/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
* This stuff is meant for finding specific problems; it never stays in
* production code... */
#define LWTSTR(n) #n
#define LWTWHERE(f,l) f ":" LWTSTR(l)
+#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
#define LWT_EVENT(p1, p2, p3, p4) \
do { \
lwt_page_t *p; \
lwt_event_t *e; \
\
- local_irq_save (flags); \
- \
if (lwt_enabled) { \
+ local_irq_save (flags); \
+ \
cpu = &lwt_cpus[smp_processor_id()]; \
p = cpu->lwtc_current_page; \
e = &p->lwtp_events[cpu->lwtc_current_index++]; \
e->lwte_p2 = (long)(p2); \
e->lwte_p3 = (long)(p3); \
e->lwte_p4 = (long)(p4); \
- } \
\
- local_irq_restore (flags); \
+ local_irq_restore (flags); \
+ } \
} while (0)
-#else /* __KERNEL__ */
-#define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
-#endif /* __KERNEL__ */
+
+#endif /* !KLWT_SUPPORT */
+
+extern int lwt_init (void);
+extern void lwt_fini (void);
+extern int lwt_lookup_string (int *size, char *knlptr,
+ char *usrptr, int usrsize);
+extern int lwt_control (int enable, int clear);
+extern int lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
+ void *user_ptr, int user_size);
+# else /* __KERNEL__ */
+# define LWT_EVENT(p1,p2,p3,p4) /* no userland implementation yet */
+# endif /* __KERNEL__ */
#endif /* LWT_SUPPORT */
struct portals_device_userstate
#define IOC_PORTAL_MIN_NR 30
#define IOC_PORTAL_PING _IOWR('e', 30, long)
-#define IOC_PORTAL_GET_DEBUG _IOWR('e', 31, long)
+
#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long)
#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long)
#define IOC_PORTAL_PANIC _IOWR('e', 34, long)
#define IOC_PORTAL_NAL_CMD _IOWR('e', 35, long)
#define IOC_PORTAL_GET_NID _IOWR('e', 36, long)
#define IOC_PORTAL_FAIL_NID _IOWR('e', 37, long)
-#define IOC_PORTAL_SET_DAEMON _IOWR('e', 38, long)
+
#define IOC_PORTAL_LWT_CONTROL _IOWR('e', 39, long)
#define IOC_PORTAL_LWT_SNAPSHOT _IOWR('e', 40, long)
#define IOC_PORTAL_LWT_LOOKUP_STRING _IOWR('e', 41, long)
/* 4 unused */
TCPNAL = 5,
ROUTER = 6,
- IBNAL = 7,
+ OPENIBNAL = 7,
NAL_ENUM_END_MARKER
};
#define NAL_CMD_REGISTER_MYNID 102
#define NAL_CMD_PUSH_CONNECTION 103
#define NAL_CMD_GET_CONN 104
-#define NAL_CMD_DEL_AUTOCONN 105
-#define NAL_CMD_ADD_AUTOCONN 106
-#define NAL_CMD_GET_AUTOCONN 107
+#define NAL_CMD_DEL_PEER 105
+#define NAL_CMD_ADD_PEER 106
+#define NAL_CMD_GET_PEER 107
#define NAL_CMD_GET_TXDESC 108
#define NAL_CMD_ADD_ROUTE 109
#define NAL_CMD_DEL_ROUTE 110
#define NAL_CMD_GET_ROUTE 111
#define NAL_CMD_NOTIFY_ROUTER 112
+#define NAL_CMD_ADD_INTERFACE 113
+#define NAL_CMD_DEL_INTERFACE 114
+#define NAL_CMD_GET_INTERFACE 115
+
enum {
DEBUG_DAEMON_START = 1,
* vim:expandtab:shiftwidth=8:tabstop=8:
*/
#ifndef _LIBCFS_H
+#define _LIBCFS_H
-
-#define PORTAL_DEBUG
-
-/* I think this beast is just trying to get cycles_t and get_cycles().
- * this should be in its own header. */
-#ifdef __linux__
-# include <asm/types.h>
-# if defined(__powerpc__) && !defined(__KERNEL__)
-# define __KERNEL__
-# include <asm/timex.h>
-# undef __KERNEL__
-# else
-# if defined(__KERNEL__)
-# include <asm/timex.h>
-# else
-# include <sys/time.h>
-# define cycles_t unsigned long
-static inline cycles_t get_cycles(void)
-{
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return (tv.tv_sec * 100000) + tv.tv_usec;
-}
-# endif
-# endif
-#else
-# include <sys/types.h>
-typedef u_int32_t __u32;
-typedef u_int64_t __u64;
-#endif
+#include <asm/types.h>
#ifdef __KERNEL__
# include <linux/time.h>
+# include <asm/timex.h>
#else
# include <sys/time.h>
# define do_gettimeofday(tv) gettimeofday(tv, NULL);
+typedef unsigned long long cycles_t;
#endif
+#define PORTAL_DEBUG
+
#ifndef offsetof
# define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
#endif
#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1))
+#ifndef __KERNEL__
+/* Userpace byte flipping */
+# include <endian.h>
+# include <byteswap.h>
+# define __swab16(x) bswap_16(x)
+# define __swab32(x) bswap_32(x)
+# define __swab64(x) bswap_64(x)
+# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
+# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
+# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+# define le16_to_cpu(x) (x)
+# define cpu_to_le16(x) (x)
+# define le32_to_cpu(x) (x)
+# define cpu_to_le32(x) (x)
+# define le64_to_cpu(x) (x)
+# define cpu_to_le64(x) (x)
+# else
+# if __BYTE_ORDER == __BIG_ENDIAN
+# define le16_to_cpu(x) bswap_16(x)
+# define cpu_to_le16(x) bswap_16(x)
+# define le32_to_cpu(x) bswap_32(x)
+# define cpu_to_le32(x) bswap_32(x)
+# define le64_to_cpu(x) bswap_64(x)
+# define cpu_to_le64(x) bswap_64(x)
+# else
+# error "Unknown byte order"
+# endif /* __BIG_ENDIAN */
+# endif /* __LITTLE_ENDIAN */
+#endif /* ! __KERNEL__ */
+
/*
* Debugging
*/
extern unsigned int portal_stack;
extern unsigned int portal_debug;
extern unsigned int portal_printk;
-extern unsigned int portal_cerror;
+
+#include <asm/types.h>
+struct ptldebug_header {
+ __u32 ph_len;
+ __u32 ph_flags;
+ __u32 ph_subsys;
+ __u32 ph_mask;
+ __u32 ph_cpu_id;
+ __u32 ph_sec;
+ __u64 ph_usec;
+ __u32 ph_stack;
+ __u32 ph_pid;
+ __u32 ph_extern_pid;
+ __u32 ph_line_num;
+} __attribute__((packed));
+
+#define PH_FLAG_FIRST_RECORD 1
+
/* Debugging subsystems (32 bits, non-overlapping) */
#define S_UNDEFINED 0x00000001
#define S_MDC 0x00000002
#define S_GMNAL 0x00080000
#define S_PTLROUTER 0x00100000
#define S_COBD 0x00200000
-#define S_IBNAL 0x00400000
+#define S_OPENIBNAL 0x00400000
#define S_SM 0x00800000
#define S_ASOBD 0x01000000
#define S_LMV 0x02000000
#define D_RPCTRACE 0x00100000 /* for distributed debugging */
#define D_VFSTRACE 0x00200000
#define D_READA 0x00400000 /* read-ahead */
-#define D_CONFIG 0x00800000
-
+#define D_MMAP 0x00800000
+#define D_CONFIG 0x01000000
#ifdef __KERNEL__
# include <linux/sched.h> /* THREAD_SIZE */
#else
#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-//#ifdef __KERNEL__
-#if 0
+#ifdef __KERNEL__
# ifdef __ia64__
# define CDEBUG_STACK (THREAD_SIZE - \
((unsigned long)__builtin_dwarf_cfa() & \
# define CDEBUG_STACK (THREAD_SIZE - \
((unsigned long)__builtin_frame_address(0) & \
(THREAD_SIZE - 1)))
-# endif
+# endif /* __ia64__ */
#define CHECK_STACK(stack) \
do { \
/*panic("LBUG");*/ \
} \
} while (0)
-#else /* __KERNEL__ */
+#else /* !__KERNEL__ */
#define CHECK_STACK(stack) do { } while(0)
#define CDEBUG_STACK (0L)
#endif /* __KERNEL__ */
#if 1
#define CDEBUG(mask, format, a...) \
do { \
- if (likely(portal_debug == 0)) \
- break; \
+ CHECK_STACK(CDEBUG_STACK); \
if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) || \
(portal_debug & (mask) && \
portal_subsystem_debug & DEBUG_SUBSYSTEM)) \
CDEBUG_STACK, format, ## a); \
} while (0)
-#define CWARN(format, a...) \
-do { \
- portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING, \
- __FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK, format, ## a); \
-} while (0)
-
-#define CERROR(format, a...) \
-do { \
- portals_debug_msg(DEBUG_SUBSYSTEM, D_ERROR, \
- __FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK, format, ## a); \
-} while (0)
-
-#define CEMERG(format, a...) \
-do { \
- portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG, \
- __FILE__, __FUNCTION__, __LINE__, \
- CDEBUG_STACK, format, ## a); \
-} while (0)
+#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
+#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
+#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
#define GOTO(label, rc) \
do { \
} while(0)
#else
#define CDEBUG(mask, format, a...) do { } while (0)
-#define CWARN(format, a...) printk("<4>" format, ## a)
-#define CERROR(format, a...) printk("<3>" format, ## a)
-#define CEMERG(format, a...) printk("<0>" format, ## a)
+#define CWARN(format, a...) printk(KERN_WARNING format, ## a)
+#define CERROR(format, a...) printk(KERN_ERR format, ## a)
+#define CEMERG(format, a...) printk(KERN_EMERG format, ## a)
#define GOTO(label, rc) do { (void)(rc); goto label; } while (0)
#define RETURN(rc) return (rc)
#define ENTRY do { } while (0)
#define PAGE_LIST(page) ((page)->lru)
#endif
+#ifndef HAVE_CPU_ONLINE
+#define cpu_online(cpu) (test_bit(cpu_online_map, &(cpu)))
+#endif
+#ifndef HAVE_CPUMASK_T
+#define cpu_set(cpu, map) (set_bit(cpu, &(map)))
+typedef unsigned long cpumask_t;
+#endif
+
#endif /* _PORTALS_COMPAT_H */
return size_round(strlen(fset) + 1);
}
-#ifdef __KERNEL__
-static inline char *strdup(const char *str)
-{
- int len = strlen(str) + 1;
- char *tmp = kmalloc(len, GFP_KERNEL);
- if (tmp)
- memcpy(tmp, str, len);
-
- return tmp;
-}
-#endif
-
-#ifdef __KERNEL__
-# define NTOH__u16(var) le16_to_cpu(var)
-# define NTOH__u32(var) le32_to_cpu(var)
-# define NTOH__u64(var) le64_to_cpu(var)
-# define HTON__u16(var) cpu_to_le16(var)
-# define HTON__u32(var) cpu_to_le32(var)
-# define HTON__u64(var) cpu_to_le64(var)
-#else
-# define expansion_u64(var) \
- ({ __u64 ret; \
- switch (sizeof(var)) { \
- case 8: (ret) = (var); break; \
- case 4: (ret) = (__u32)(var); break; \
- case 2: (ret) = (__u16)(var); break; \
- case 1: (ret) = (__u8)(var); break; \
- }; \
- (ret); \
- })
-# define NTOH__u16(var) (var)
-# define NTOH__u32(var) (var)
-# define NTOH__u64(var) (expansion_u64(var))
-# define HTON__u16(var) (var)
-# define HTON__u32(var) (var)
-# define HTON__u64(var) (expansion_u64(var))
-#endif
-
-/*
- * copy sizeof(type) bytes from pointer to var and move ptr forward.
- * return EFAULT if pointer goes beyond end
- */
-#define UNLOGV(var,type,ptr,end) \
-do { \
- var = *(type *)ptr; \
- ptr += sizeof(type); \
- if (ptr > end ) \
- return -EFAULT; \
-} while (0)
-
-/* the following two macros convert to little endian */
-/* type MUST be __u32 or __u64 */
-#define LUNLOGV(var,type,ptr,end) \
-do { \
- var = NTOH##type(*(type *)ptr); \
- ptr += sizeof(type); \
- if (ptr > end ) \
- return -EFAULT; \
-} while (0)
-
-/* now log values */
-#define LOGV(var,type,ptr) \
-do { \
- *((type *)ptr) = var; \
- ptr += sizeof(type); \
-} while (0)
-
-/* and in network order */
-#define LLOGV(var,type,ptr) \
-do { \
- *((type *)ptr) = HTON##type(var); \
- ptr += sizeof(type); \
-} while (0)
-
-
-/*
- * set var to point at (type *)ptr, move ptr forward with sizeof(type)
- * return from function with EFAULT if ptr goes beyond end
- */
-#define UNLOGP(var,type,ptr,end) \
-do { \
- var = (type *)ptr; \
- ptr += sizeof(type); \
- if (ptr > end ) \
- return -EFAULT; \
-} while (0)
-
-#define LOGP(var,type,ptr) \
-do { \
- memcpy(ptr, var, sizeof(type)); \
- ptr += sizeof(type); \
-} while (0)
-
-/*
- * set var to point at (char *)ptr, move ptr forward by size_round(len);
- * return from function with EFAULT if ptr goes beyond end
- */
-#define UNLOGL(var,type,len,ptr,end) \
-do { \
- var = (type *)ptr; \
- ptr += size_round(len * sizeof(type)); \
- if (ptr > end ) \
- return -EFAULT; \
-} while (0)
-
-#define UNLOGL0(var,type,len,ptr,end) \
-do { \
- UNLOGL(var,type,len,ptr,end); \
- if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \
- return -EFAULT; \
-} while (0)
-
#define LOGL(var,len,ptr) \
do { \
if (var) \
--- /dev/null
+portalsdir=$(includedir)/portals
+
+if UTILS
+portals_HEADERS = list.h
+endif
+
+EXTRA_DIST = api.h api-support.h build_check.h errno.h \
+ internal.h lib-p30.h lib-types.h list.h \
+ lltrace.h myrnal.h nal.h nalids.h p30.h ptlctl.h \
+ socknal.h stringtab.h types.h
int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
+int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid);
+
/*
* Network interfaces
+++ /dev/null
-#include "build_check.h"
-/*
-**
-** This files contains definitions that are used throughout the cplant code.
-*/
-
-#ifndef CPLANT_H
-#define CPLANT_H
-
-#define TITLE(fname,zmig)
-
-
-/*
-** TRUE and FALSE
-*/
-#undef TRUE
-#define TRUE (1)
-#undef FALSE
-#define FALSE (0)
-
-
-/*
-** Return codes from functions
-*/
-#undef OK
-#define OK (0)
-#undef ERROR
-#define ERROR (-1)
-
-
-
-/*
-** The GCC macro for a safe max() that works on all types arithmetic types.
-*/
-#ifndef MAX
-#define MAX(a, b) (a) > (b) ? (a) : (b)
-#endif /* MAX */
-
-#ifndef MIN
-#define MIN(a, b) (a) < (b) ? (a) : (b)
-#endif /* MIN */
-
-/*
-** The rest is from the old qkdefs.h
-*/
-
-#ifndef __linux__
-#define __inline__
-#endif
-
-#ifndef NULL
-#define NULL ((void *)0)
-#endif
-
-#ifndef __osf__
-#define PRIVATE static
-#define PUBLIC
-#endif
-
-#ifndef __osf__
-typedef unsigned char uchar;
-#endif
-
-typedef char CHAR;
-typedef unsigned char UCHAR;
-typedef char INT8;
-typedef unsigned char UINT8;
-typedef short int INT16;
-typedef unsigned short int UINT16;
-typedef int INT32;
-typedef unsigned int UINT32;
-typedef long LONG32;
-typedef unsigned long ULONG32;
-
-/* long may be 32 or 64, so we can't really append the size to the definition */
-typedef long LONG;
-typedef unsigned long ULONG;
-
-#ifdef __alpha__
-typedef long int_t;
-#ifndef __osf__
-typedef unsigned long uint_t;
-#endif
-#endif
-
-#ifdef __i386__
-typedef int int_t;
-typedef unsigned int uint_t;
-#endif
-
-typedef float FLOAT32;
-typedef double FLOAT64;
-typedef void VOID;
-typedef INT32 BOOLEAN;
-typedef void (*FCN_PTR)(void);
-
-#ifndef off64_t
-
-#if defined (__alpha__) || defined (__ia64__)
-typedef long off64_t;
-#else
-typedef long long off64_t;
-#endif
-
-#endif
-
-/*
-** Process related typedefs
-*/
-typedef UINT16 PID_TYPE; /* Type of Local process ID */
-typedef UINT16 NID_TYPE; /* Type of Physical node ID */
-typedef UINT16 GID_TYPE; /* Type of Group ID */
-typedef UINT16 RANK_TYPE; /* Type of Logical rank/process within a group */
-
-
-
-#endif /* CPLANT_H */
#define PORTALS_PROTO_MAGIC 0xeebc0ded
-#define PORTALS_PROTO_VERSION_MAJOR 0
-#define PORTALS_PROTO_VERSION_MINOR 3
+#define PORTALS_PROTO_VERSION_MAJOR 1
+#define PORTALS_PROTO_VERSION_MINOR 0
typedef struct {
long recv_count, recv_length, send_count, send_length, drop_count,
} lib_counters_t;
/* temporary expedient: limit number of entries in discontiguous MDs */
-#define PTL_MTU (512<<10)
-#define PTL_MD_MAX_IOV 128
+#define PTL_MTU (1<<20)
+#define PTL_MD_MAX_IOV 256
struct lib_msg_t {
struct list_head msg_list;
int jt_ptl_print_autoconnects (int argc, char **argv);
int jt_ptl_add_autoconnect (int argc, char **argv);
int jt_ptl_del_autoconnect (int argc, char **argv);
+int jt_ptl_print_interfaces(int argc, char **argv);
+int jt_ptl_add_interface(int argc, char **argv);
+int jt_ptl_del_interface(int argc, char **argv);
+int jt_ptl_print_peers (int argc, char **argv);
+int jt_ptl_add_peer (int argc, char **argv);
+int jt_ptl_del_peer (int argc, char **argv);
int jt_ptl_print_connections (int argc, char **argv);
int jt_ptl_connect(int argc, char **argv);
int jt_ptl_disconnect(int argc, char **argv);
+++ /dev/null
-#ifndef _INCppidh_
-#define _INCppidh_
-
-#include "defines.h"
-// #include "idtypes.h"
-
-
-#define MAX_PPID 1000 /* this needs to fit into 16 bits so the
- maximum value is 65535. having it "large"
- can help w/ debugging process accounting
- but there are reasons for making it
- somewhat smaller than the maximum --
- requiring storage for arrays that index
- on the ppid, eg... */
-
-#define MAX_GID 1000 /* this needs to fit into 16 bits... */
-
-#define MAX_FIXED_PPID 100
-#define MAX_FIXED_GID 100
-#define PPID_FLOATING MAX_FIXED_PPID+1 /* Floating area starts here */
-#define GID_FLOATING MAX_FIXED_GID+1 /* Floating area starts here */
-#define NUM_PTL_TASKS MAX_FIXED_PPID+80 /* Maximum no. portals tasks */
-
-#define PPID_AUTO 0
-
-/* Minimum PPID is 1 */
-#define PPID_BEBOPD 1 /* bebopd */
-#define GID_BEBOPD 1 /* bebopd */
-
-#define PPID_PCT 2 /* pct */
-#define GID_PCT 2 /* pct */
-
-#define PPID_FYOD 3 /* fyod */
-#define GID_FYOD 3 /* fyod */
-
-#define PPID_GDBWRAP 11 /* portals proxy for gdb */
-#define GID_GDBWRAP 11 /* portals proxy for gdb */
-
-#define PPID_TEST 15 /* for portals tests */
-#define GID_TEST 15
-
-#define GID_YOD 5 /* yod */
-#define GID_PINGD 6 /* pingd */
-#define GID_BT 7 /* bt */
-#define GID_PTLTEST 8 /* ptltest */
-#define GID_CGDB 9 /* cgdb */
-#define GID_TVDSVR 10 /* start-tvdsvr */
-
-#endif /* _INCppidh_ */
int jt_ptl_print_autoconnects (int argc, char **argv);
int jt_ptl_add_autoconnect (int argc, char **argv);
int jt_ptl_del_autoconnect (int argc, char **argv);
+int jt_ptl_print_interfaces(int argc, char **argv);
+int jt_ptl_add_interface(int argc, char **argv);
+int jt_ptl_del_interface(int argc, char **argv);
+int jt_ptl_print_peers (int argc, char **argv);
+int jt_ptl_add_peer (int argc, char **argv);
+int jt_ptl_del_peer (int argc, char **argv);
int jt_ptl_print_connections (int argc, char **argv);
int jt_ptl_connect(int argc, char **argv);
int jt_ptl_disconnect(int argc, char **argv);
#define PTL_NI_OK PTL_OK
typedef ptl_err_t ptl_ni_fail_t;
+typedef __u32 ptl_uid_t;
+typedef __u32 ptl_jid_t;
typedef __u64 ptl_nid_t;
typedef __u32 ptl_pid_t;
typedef __u32 ptl_pt_index_t;
return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
}
+#define PTL_UID_ANY ((ptl_uid_t) -1)
+#define PTL_JID_ANY ((ptl_jid_t) -1)
#define PTL_NID_ANY ((ptl_nid_t) -1)
#define PTL_PID_ANY ((ptl_pid_t) -1)
int max_size;
unsigned int options;
void *user_ptr;
- ptl_handle_eq_t eventq;
+ ptl_handle_eq_t eq_handle;
} ptl_md_t;
/* Options for the MD structure */
typedef struct {
ptl_event_kind_t type;
ptl_process_id_t initiator;
- ptl_pt_index_t portal;
+ ptl_uid_t uid;
+ ptl_jid_t jid;
+ ptl_pt_index_t pt_index;
ptl_match_bits_t match_bits;
ptl_size_t rlength;
- ptl_size_t mlength;
- ptl_size_t offset;
- ptl_md_t mem_desc;
+ ptl_size_t mlength;
+ ptl_size_t offset;
+ ptl_handle_md_t md_handle;
+ ptl_md_t md;
ptl_hdr_data_t hdr_data;
- int unlinked;
- ptl_ni_fail_t ni_fail_type;
+ ptl_seq_t link;
+ ptl_ni_fail_t ni_fail_type;
+
+ int unlinked;
volatile ptl_seq_t sequence;
} ptl_event_t;
@BUILD_GMNAL_TRUE@subdir-m += gmnal
-@BUILD_IBNAL_TRUE@subdir-m += ibnal
+@BUILD_OPENIBNAL_TRUE@subdir-m += openibnal
@BUILD_QSWNAL_TRUE@subdir-m += qswnal
subdir-m += socknal
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
-SUBDIRS = gmnal ibnal qswnal socknal
+SUBDIRS = gmnal openibnal qswnal socknal
#include "linux/kp30.h"
#include "portals/p30.h"
-#include "portals/lib-nal.h"
+#include "portals/nal.h"
#include "portals/lib-p30.h"
#define GM_STRONG_TYPES 1
+#ifdef VERSION
+#undef VERSION
+#endif
#include "gm.h"
#include "gm_internal.h"
#define GMNAL_INIT_NAL(a) do { \
- a->startup = gmnal_api_startup; \
- a->forward = gmnal_api_forward; \
- a->shutdown = gmnal_api_shutdown; \
- a->yield = gmnal_api_yield; \
- a->lock = gmnal_api_lock; \
- a->unlock = gmnal_api_unlock; \
- a->timeout = NULL; \
- a->nal_data = NULL; \
+ (a)->nal_ni_init = gmnal_api_startup; \
+ (a)->nal_ni_fini = gmnal_api_shutdown; \
+ (a)->nal_data = NULL; \
} while (0)
* CB NAL
*/
-int gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
- int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t);
+ptl_err_t gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
+ int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t, size_t);
-int gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
- int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t);
+ptl_err_t gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
+ int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t, size_t);
-int gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *,
- unsigned int, struct iovec *, size_t, size_t);
+ptl_err_t gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *,
+ unsigned int, struct iovec *, size_t, size_t, size_t);
-int gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *,
- unsigned int, ptl_kiov_t *, size_t, size_t);
+ptl_err_t gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *,
+ unsigned int, ptl_kiov_t *, size_t, size_t, size_t);
int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
* Small messages
*/
int gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t);
+ struct iovec *, size_t, size_t, size_t);
int gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t,
- unsigned int, struct iovec*, int);
+ unsigned int, struct iovec*, size_t, int);
void gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
* Large messages
*/
int gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int,
- struct iovec *, size_t, size_t);
+ struct iovec *, size_t, size_t, size_t);
int gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
int, ptl_nid_t, ptl_pid_t, unsigned int,
- struct iovec*, int);
+ struct iovec*, size_t, int);
void gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t);
gmnal_data_t *global_nal_data = NULL;
#define GLOBAL_NID_STR_LEN 16
char global_nid_str[GLOBAL_NID_STR_LEN] = {0};
+ptl_handle_ni_t kgmnal_ni;
+
+extern int gmnal_cmd(struct portals_cfg *pcfg, void *private);
/*
* Write the global nid /proc/sys/gmnal/globalnid
* nal_t nal our nal to shutdown
*/
void
-gmnal_api_shutdown(nal_t *nal, int interface)
+gmnal_api_shutdown(nal_t *nal)
{
gmnal_data_t *nal_data;
lib_nal_t *libnal;
if (nal->nal_refct != 0)
return;
- CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
LASSERT(nal == global_nal_data->nal);
libnal = (lib_nal_t *)nal->nal_data;
nal_data = (gmnal_data_t *)libnal->libnal_data;
LASSERT(nal_data == global_nal_data);
+ CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
/* Stop portals calling our ioctl handler */
libcfs_nal_cmd_unregister(GMNAL);
if (nal->nal_refct != 0) {
if (actual_limits != NULL) {
libnal = (lib_nal_t *)nal->nal_data;
- *actual_limits = nal->libnal_ni.ni_actual_limits;
+ *actual_limits = libnal->libnal_ni.ni_actual_limits;
+ }
return (PTL_OK);
}
}
- CDEBUG(D_NET, "Calling gm_open with interface [%d], port [%d], "
- "name [%s], version [%d]\n", interface, GMNAL_GM_PORT,
+ CDEBUG(D_NET, "Calling gm_open with port [%d], "
+ "name [%s], version [%d]\n", GMNAL_GM_PORT,
"gmnal", GM_API_VERSION);
GMNAL_GM_LOCK(nal_data);
PORTAL_FREE(libnal, sizeof(lib_nal_t));
return(PTL_FAIL);
}
+
nal_data->gm_local_nid = local_nid;
CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid);
+
GMNAL_GM_LOCK(nal_data);
gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid,
&global_nid);
}
- if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, nal->nal_data) != 0) {
+ if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) {
CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n");
/* XXX these cleanup cases should be restructured to
rc = ptl_register_nal(GMNAL, &the_gm_nal);
if (rc != PTL_OK)
CERROR("Can't register GMNAL: %d\n", rc);
+ rc = PtlNIInit(GMNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kgmnal_ni);
+ if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
+ ptl_unregister_nal(GMNAL);
+ return (-ENODEV);
+ }
return (rc);
}
CDEBUG(D_TRACE, "gmnal_fini\n");
LASSERT(global_nal_data == NULL);
+ PtlNIFini(kgmnal_ni);
ptl_unregister_nal(GMNAL);
}
#include "gmnal.h"
-int gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t mlen,
- size_t rlen)
+ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
+ unsigned int niov, struct iovec *iov, size_t offset,
+ size_t mlen, size_t rlen)
{
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
int status = PTL_OK;
CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
- "niov[%d], iov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- libnal, private, cookie, niov, iov, mlen, rlen);
+ "niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n",
+ libnal, private, cookie, niov, iov, offset, mlen, rlen);
switch(srxd->type) {
case(GMNAL_SMALL_MESSAGE):
CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
status = gmnal_small_rx(libnal, private, cookie, niov,
- iov, mlen, rlen);
+ iov, offset, mlen, rlen);
break;
case(GMNAL_LARGE_MESSAGE_INIT):
CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
status = gmnal_large_rx(libnal, private, cookie, niov,
- iov, mlen, rlen);
+ iov, offset, mlen, rlen);
}
return(status);
}
-int gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int kniov, ptl_kiov_t *kiov, size_t mlen,
- size_t rlen)
+ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
+ unsigned int kniov, ptl_kiov_t *kiov, size_t offset,
+ size_t mlen, size_t rlen)
{
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
int status = PTL_OK;
CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
- "cookie[%p], kniov[%d], kiov [%p], mlen["LPSZ"], rlen["LPSZ"]\n",
- libnal, private, cookie, kniov, kiov, mlen, rlen);
+ "cookie[%p], kniov[%d], kiov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n",
+ libnal, private, cookie, kniov, kiov, offset, mlen, rlen);
if (srxd->type == GMNAL_SMALL_MESSAGE) {
PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
}
CDEBUG(D_INFO, "calling gmnal_small_rx\n");
status = gmnal_small_rx(libnal, private, cookie, kniov,
- iovec_dup, mlen, rlen);
+ iovec_dup, offset, mlen, rlen);
for (i=0; i<kniov; i++) {
kunmap(kiov_dup->kiov_page);
kiov_dup++;
}
-int gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
+ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, size_t len)
+ unsigned int niov, struct iovec *iov, size_t offset, size_t len)
{
gmnal_data_t *nal_data;
- CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] len["LPSZ"] nid["LPU64"]\n",
- niov, len, nid);
+ CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ"] nid["LPU64"]\n",
+ niov, offset, len, nid);
nal_data = libnal->libnal_data;
if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
CDEBUG(D_INFO, "This is a small message send\n");
gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid,
- niov, iov, len);
+ niov, iov, offset, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported\n");
lib_finalize(libnal, private, cookie, PTL_FAIL);
return(PTL_FAIL);
gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid,
- niov, iov, len);
+ niov, iov, offset, len);
}
return(PTL_OK);
}
-int gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
+ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
- unsigned int kniov, ptl_kiov_t *kiov, size_t len)
+ unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len)
{
int i = 0;
struct iovec *iovec = NULL, *iovec_dup = NULL;
ptl_kiov_t *kiov_dup = kiov;
- CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] len["LPSZ"]\n", nid, kniov, len);
+ CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset["LPSZ"] len["LPSZ"]\n",
+ nid, kniov, offset, len);
nal_data = libnal->libnal_data;
PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
iovec_dup = iovec;
kiov++;
}
gmnal_small_tx(libnal, private, cookie, hdr, type, nid,
- pid, kniov, iovec_dup, len);
+ pid, kniov, iovec_dup, offset, len);
} else {
CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
return(PTL_FAIL);
kiov++;
}
gmnal_large_tx(libnal, private, cookie, hdr, type, nid,
- pid, kniov, iovec, len);
+ pid, kniov, iovec, offset, len);
}
for (i=0; i<kniov; i++) {
kunmap(kiov_dup->kiov_page);
*/
int
gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen)
+ unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen)
{
gmnal_srxd_t *srxd = NULL;
void *buffer = NULL;
- gmnal_data_t *nal_data = (gmnal_data_t*)libnal->nal_data;
+ gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data;
CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
buffer += sizeof(ptl_hdr_t);
while(niov--) {
- CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
- iov->iov_len);
- gm_bcopy(buffer, iov->iov_base, iov->iov_len);
- buffer += iov->iov_len;
- iov++;
+ if (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ } else if (offset > 0) {
+ CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
+ "offset %d, len ["LPSZ"]\n", iov,
+ iov->iov_base + offset, iov->iov_len, offset,
+ iov->iov_len - offset);
+ gm_bcopy(buffer, iov->iov_base + offset,
+ iov->iov_len - offset);
+ offset = 0;
+ buffer += iov->iov_len - offset;
+ } else {
+ CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
+ iov->iov_len);
+ gm_bcopy(buffer, iov->iov_base, iov->iov_len);
+ buffer += iov->iov_len;
+ }
+ iov++;
}
int
gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, int size)
+ unsigned int niov, struct iovec *iov, size_t offset, int size)
{
- gmnal_data_t *nal_data = (gmnal_data_t*)libnal->nal_data;
+ gmnal_data_t *nal_data = (gmnal_data_t*)libnal->libnal_data;
gmnal_stxd_t *stxd = NULL;
void *buffer = NULL;
gmnal_msghdr_t *msghdr = NULL;
buffer += sizeof(ptl_hdr_t);
while(niov--) {
- CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
- iov, iov->iov_len, buffer);
- gm_bcopy(iov->iov_base, buffer, iov->iov_len);
- buffer+= iov->iov_len;
- iov++;
+ if (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ } else if (offset > 0) {
+ CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n",
+ iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
+ gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
+ buffer+= iov->iov_len - offset;
+ offset = 0;
+ } else {
+ CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n",
+ iov, iov->iov_len, buffer);
+ gm_bcopy(iov->iov_base, buffer, iov->iov_len);
+ buffer+= iov->iov_len;
+ }
+ iov++;
}
CDEBUG(D_INFO, "sending\n");
int
gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid,
- unsigned int niov, struct iovec *iov, int size)
+ unsigned int niov, struct iovec *iov, size_t offset, int size)
{
gmnal_data_t *nal_data;
global_nid, pid, niov, iov, size);
if (libnal)
- nal_data = (gmnal_data_t*)libnal->nal_data;
+ nal_data = (gmnal_data_t*)libnal->libnal_data;
else {
CDEBUG(D_ERROR, "no libnal.\n");
return(GMNAL_STATUS_FAIL);
mlen += sizeof(ptl_hdr_t);
CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
+ while (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ niov--;
+ iov++;
+ }
+
+ LASSERT(offset >= 0);
+ /*
+ * Store the iovs in the stxd for we can get
+ * them later if we need them
+ */
+ stxd->iov[0].iov_base = iov->iov_base + offset;
+ stxd->iov[0].iov_len = iov->iov_len - offset;
+ CDEBUG(D_NET, "Copying iov [%p] to [%p], niov=%d\n", iov, stxd->iov, niov);
+ if (niov > 1)
+ gm_bcopy(&iov[1], &stxd->iov[1], (niov-1)*sizeof(struct iovec));
+ stxd->niov = niov;
+
/*
* copy the iov to the buffer so target knows
* where to get the data from
*/
CDEBUG(D_INFO, "processing iov to [%p]\n", buffer);
- gm_bcopy(iov, buffer, niov*sizeof(struct iovec));
- mlen += niov*(sizeof(struct iovec));
+ gm_bcopy(stxd->iov, buffer, stxd->niov*sizeof(struct iovec));
+ mlen += stxd->niov*(sizeof(struct iovec));
CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
-
-
- /*
- * Store the iovs in the stxd for we can get
- * them later if we need them
- */
- CDEBUG(D_NET, "Copying iov [%p] to [%p]\n", iov, stxd->iov);
- gm_bcopy(iov, stxd->iov, niov*sizeof(struct iovec));
- stxd->niov = niov;
-
/*
* register the memory so the NIC can get hold of the data
* This is a slow process. it'd be good to overlap it
* with something else.
*/
+ iov = stxd->iov;
iov_dup = iov;
niov_dup = niov;
while(niov--) {
*/
int
gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie,
- unsigned int nriov, struct iovec *riov, size_t mlen,
- size_t rlen)
+ unsigned int nriov, struct iovec *riov, size_t offset,
+ size_t mlen, size_t rlen)
{
- gmnal_data_t *nal_data = libnal->nal_data;
+ gmnal_data_t *nal_data = libnal->libnal_data;
gmnal_srxd_t *srxd = (gmnal_srxd_t*)private;
void *buffer = NULL;
struct iovec *riov_dup;
* If the iovecs match, could interleave
* gm_registers and gm_gets for each element
*/
+ while (offset >= riov->iov_len) {
+ offset -= riov->iov_len;
+ riov++;
+ nriov--;
+ }
+ LASSERT (nriov >= 0);
+ LASSERT (offset >= 0);
+ /*
+ * do this so the final gm_get callback can deregister the memory
+ */
+ PORTAL_ALLOC(srxd->riov, nriov*(sizeof(struct iovec)));
+
+ srxd->riov[0].iov_base = riov->iov_base + offset;
+ srxd->riov[0].iov_len = riov->iov_len - offset;
+ if (nriov > 1)
+ gm_bcopy(&riov[1], &srxd->riov[1], (nriov-1)*(sizeof(struct iovec)));
+ srxd->nriov = nriov;
+
+ riov = srxd->riov;
nriov_dup = nriov;
riov_dup = riov;
while(nriov--) {
/*
* give back srxd and buffer. Send NACK to sender
*/
+ PORTAL_FREE(srxd->riov, nriov_dup*(sizeof(struct iovec)));
return(PTL_FAIL);
}
GMNAL_GM_UNLOCK(nal_data);
riov++;
}
- /*
- * do this so the final gm_get callback can deregister the memory
- */
- PORTAL_ALLOC(srxd->riov, nriov_dup*(sizeof(struct iovec)));
- gm_bcopy(riov_dup, srxd->riov, nriov_dup*(sizeof(struct iovec)));
- srxd->nriov = nriov_dup;
/*
* now do gm_get to get the data
copy_from_user(name, pcfg->pcfg_pbuf1, pcfg->pcfg_plen1);
GMNAL_GM_LOCK(nal_data);
- nid = gm_host_name_to_node_id(nal_data->gm_port, name);
+ //nid = gm_host_name_to_node_id(nal_data->gm_port, name);
+ gm_status = gm_host_name_to_node_id_ex (nal_data->gm_port, 0, name, &nid);
GMNAL_GM_UNLOCK(nal_data);
- CDEBUG(D_INFO, "Local node id is [%d]\n", nid);
+ if (gm_status != GM_SUCCESS) {
+ CDEBUG(D_INFO, "gm_host_name_to_node_id_ex(...host %s) failed[%d]\n",
+ name, gm_status);
+ return (-1);
+ } else
+ CDEBUG(D_INFO, "Local node %s id is [%d]\n", name, nid);
GMNAL_GM_LOCK(nal_data);
gm_status = gm_node_id_to_global_id(nal_data->gm_port,
nid, &gnid);
CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n");
-
CDEBUG(D_INFO, "Calling gmnal_init\n");
- statud = gmnal_init();
+ status = gmnal_init();
if (status == PTL_OK) {
CDEBUG(D_INFO, "Portals GMNAL initialised ok\n");
} else {
+++ /dev/null
-.deps
-Makefile
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.*.cmd
-.tmp_versions
-.depend
+++ /dev/null
-MODULES := kibnal
-kibnal-objs := ibnal.o ibnal_cb.o
-
-EXTRA_PRE_CFLAGS := @IBCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-if MODULES
-if !CRAY_PORTALS
-if BUILD_IBNAL
-modulenet_DATA = kibnal$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kibnal-objs:%.o=%.c) ibnal.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Based on ksocknal, qswnal, and gmnal
- *
- * Copyright (C) 2003 LANL
- * Author: HB Chen <hbchen@lanl.gov>
- * Los Alamos National Lab
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "ibnal.h"
-
-// portal handle ID for this IB-NAL
-ptl_handle_ni_t kibnal_ni;
-
-// message send buffer mutex
-spinlock_t MSBuf_mutex[NUM_MBUF];
-
-// message recv buffer mutex
-spinlock_t MRBuf_mutex[NUM_MBUF];
-
-// IB-NAL API information
-nal_t kibnal_api;
-
-// nal's private data
-kibnal_data_t kibnal_data;
-
-int ibnal_debug = 0;
-VAPI_pd_hndl_t Pd_hndl;
-unsigned int Num_posted_recv_buf;
-
-// registered send buffer list
-Memory_buffer_info MSbuf_list[NUM_MBUF];
-
-// registered recv buffer list
-Memory_buffer_info MRbuf_list[NUM_MBUF];
-
-//
-// for router
-// currently there is no need fo IBA
-//
-kpr_nal_interface_t kibnal_router_interface = {
- kprni_nalid: IBNAL,
- kprni_arg: &kibnal_data,
- kprni_fwd: kibnal_fwd_packet, // forward data to router
- // is router invloving the
- // data transmision
-};
-
-
-// Queue-pair list
-QP_info QP_list[NUM_QPS];
-
-// information associated with a HCA
-HCA_info Hca_data;
-
-// something about HCA
-VAPI_hca_hndl_t Hca_hndl; // assume we only use one HCA now
-VAPI_hca_vendor_t Hca_vendor;
-VAPI_hca_cap_t Hca_cap;
-VAPI_hca_port_t Hca_port_1_props;
-VAPI_hca_port_t Hca_port_2_props;
-VAPI_hca_attr_t Hca_attr;
-VAPI_hca_attr_mask_t Hca_attr_mask;
-VAPI_cq_hndl_t Cq_RQ_hndl; // CQ's handle
-VAPI_cq_hndl_t Cq_SQ_hndl; // CQ's handle
-VAPI_cq_hndl_t Cq_hndl; // CQ's handle
-Remote_QP_Info L_QP_data;
-Remote_QP_Info R_QP_data;
-
-
-//
-// forward API
-//
-int
-kibnal_forward(nal_t *nal,
- int id,
- void *args,
- size_t args_len,
- void *ret,
- size_t ret_len)
-{
- kibnal_data_t *knal_data = nal->nal_data;
- nal_cb_t *nal_cb = knal_data->kib_cb;
-
- // ASSERT checking
- LASSERT (nal == &kibnal_api);
- LASSERT (knal_data == &kibnal_data);
- LASSERT (nal_cb == &kibnal_lib);
-
- // dispatch forward API function
-
- CDEBUG(D_NET,"kibnal_forward: function id = %d\n", id);
-
- lib_dispatch(nal_cb, knal_data, id, args, ret);
-
- CDEBUG(D_TRACE,"IBNAL- Done kibnal_forward\n");
-
- return PTL_OK; // always return PTL_OK
-}
-
-//
-// lock API
-//
-void
-kibnal_lock(nal_t *nal, unsigned long *flags)
-{
- kibnal_data_t *knal_data = nal->nal_data;
- nal_cb_t *nal_cb = knal_data->kib_cb;
-
- // ASSERT checking
- LASSERT (nal == &kibnal_api);
- LASSERT (knal_data == &kibnal_data);
- LASSERT (nal_cb == &kibnal_lib);
-
- // disable logical interrrupt
- nal_cb->cb_cli(nal_cb,flags);
-
- CDEBUG(D_TRACE,"IBNAL-Done kibnal_lock\n");
-
-}
-
-//
-// unlock API
-//
-void
-kibnal_unlock(nal_t *nal, unsigned long *flags)
-{
- kibnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kib_cb;
-
- // ASSERT checking
- LASSERT (nal == &kibnal_api);
- LASSERT (k == &kibnal_data);
- LASSERT (nal_cb == &kibnal_lib);
-
- // enable logical interrupt
- nal_cb->cb_sti(nal_cb,flags);
-
- CDEBUG(D_TRACE,"IBNAL-Done kibnal_unlock");
-
-}
-
-//
-// shutdown API
-// showdown this network interface
-//
-int
-kibnal_shutdown(nal_t *nal, int ni)
-{
- VAPI_ret_t vstat;
- kibnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kib_cb;
-
- // assert checking
- LASSERT (nal == &kibnal_api);
- LASSERT (k == &kibnal_data);
- LASSERT (nal_cb == &kibnal_lib);
-
- // take down this IB network interface
- // there is not corresponding cb function to hande this
- // do we actually need this one
- // reference to IB network interface shutdown
- //
-
- vstat = IB_Close_HCA();
-
- if (vstat != VAPI_OK) {
- CERROR("Failed to close HCA - %s\n",VAPI_strerror(vstat));
- return (~PTL_OK);
- }
-
- CDEBUG(D_TRACE,"IBNAL- Done kibnal_shutdown\n");
-
- return PTL_OK;
-}
-
-//
-// yield
-// when do we call this yield function
-//
-void
-kibnal_yield( nal_t *nal, unsigned long *flags, int milliseconds )
-{
- kibnal_data_t *k = nal->nal_data;
- nal_cb_t *nal_cb = k->kib_cb;
-
- // assert checking
- LASSERT (nal == &kibnal_api);
- LASSERT (k == &kibnal_data);
- LASSERT (nal_cb == &kibnal_lib);
-
- if (milliseconds != 0) {
- CERROR("Blocking yeild not implemented yet\n");
- LBUG();
- }
-
- // check under what condition that we need to
- // call schedule()
- // who set this need_resched
- if (current->need_resched)
- schedule();
-
- CDEBUG(D_TRACE,"IBNAL-Done kibnal_yield");
-
- return;
-}
-
-//
-// ibnal init
-//
-nal_t *
-kibnal_init(int interface, // no use here
- ptl_pt_index_t ptl_size,
- ptl_ac_index_t ac_size,
- ptl_pid_t requested_pid // no use here
- )
-{
- nal_t *nal = NULL;
- nal_cb_t *nal_cb = NULL;
- kibnal_data_t *nal_data = NULL;
- int rc;
-
- CDEBUG(D_NET, "kibnal_init:calling lib_init with nid 0x%u\n",
- kibnal_data.kib_nid);
-
-
- CDEBUG(D_NET, "kibnal_init: interface [%d], ptl_size [%d], ac_size[%d]\n",
- interface, ptl_size, ac_size);
- CDEBUG(D_NET, "kibnal_init: &kibnal_lib 0x%X\n", &kibnal_lib);
- CDEBUG(D_NET, "kibnal_init: kibnal_data.kib_nid %d\n", kibnal_data.kib_nid);
-
- rc = lib_init(&kibnal_lib,
- kibnal_data.kib_nid,
- requested_pid , // process id is set as requested_pid instead of 0
- ptl_size,
- ac_size);
-
- if(rc != PTL_OK) {
- CERROR("kibnal_init: Failed lib_init with nid 0x%u, rc=%d\n",
- kibnal_data.kib_nid,rc);
- }
- else {
- CDEBUG(D_NET,"kibnal_init: DONE lib_init with nid 0x%x%x\n",
- kibnal_data.kib_nid);
- }
-
- return &kibnal_api;
-
-}
-
-
-//
-// called before remove ibnal kernel module
-//
-void __exit
-kibnal_finalize(void)
-{
- struct list_head *tmp;
-
- inter_module_unregister("kibnal_ni");
-
- // release resources allocated to this Infiniband network interface
- PtlNIFini(kibnal_ni);
-
- lib_fini(&kibnal_lib);
-
- IB_Close_HCA();
-
- // how much do we need to do here?
- list_for_each(tmp, &kibnal_data.kib_list) {
- kibnal_rx_t *conn;
- conn = list_entry(tmp, kibnal_rx_t, krx_item);
- CDEBUG(D_IOCTL, "freeing conn %p\n",conn);
- tmp = tmp->next;
- list_del(&conn->krx_item);
- PORTAL_FREE(conn, sizeof(*conn));
- }
-
- CDEBUG(D_MALLOC,"done kmem %d\n",atomic_read(&portal_kmemory));
- CDEBUG(D_TRACE,"IBNAL-Done kibnal_finalize\n");
-
- return;
-}
-
-
-//
-// * k_server_thread is a kernel thread
-// use a shared memory ro exchange HCA's data with a pthread in user
-// address space
-// * will be replaced when CM is used to handle communication management
-//
-
-void k_server_thread(Remote_QP_Info *hca_data)
-{
- int segment_id;
- const int shared_segment_size = sizeof(Remote_QP_Info);
- key_t key = HCA_EXCHANGE_SHM_KEY;
- unsigned long raddr;
- int exchanged_done = NO;
- int i;
-
- Remote_QP_Info *exchange_hca_data;
-
- long *n;
- long *uaddr;
- long ret = 0;
-
- // create a shared memory with pre-agreement key
- segment_id = sys_shmget(key,
- shared_segment_size,
- IPC_CREAT | 0666);
-
-
- // attached to shared memoru
- // raddr is pointed to an user address space
- // use this address to update shared menory content
- ret = sys_shmat(segment_id, 0 , SHM_RND, &raddr);
-
-#ifdef IBNAL_DEBUG
- if(ret >= 0) {
- CDEBUG(D_NET,"k_server_thread: Shared memory attach success ret = 0X%d,&raddr"
- " 0X%x (*(&raddr))=0x%x \n", ret, &raddr, (*(&raddr)));
- printk("k_server_thread: Shared memory attach success ret = 0X%d, &raddr"
- " 0X%x (*(&raddr))=0x%x \n", ret, &raddr, (*(&raddr)));
- }
- else {
- CERROR("k_server_thread: Shared memory attach failed ret = 0x%d \n", ret);
- printk("k_server_thread: Shared memory attach failed ret = 0x%d \n", ret);
- return;
- }
-#endif
-
- n = &raddr;
- uaddr = *n; // get the U-address
- /* cast uaddr to exchange_hca_data */
- exchange_hca_data = (Remote_QP_Info *) uaddr;
-
- /* copy data from local HCA to shared memory */
- exchange_hca_data->opcode = hca_data->opcode;
- exchange_hca_data->length = hca_data->length;
-
- for(i=0; i < NUM_QPS; i++) {
- exchange_hca_data->dlid[i] = hca_data->dlid[i];
- exchange_hca_data->rqp_num[i] = hca_data->rqp_num[i];
- }
-
- // periodically check shared memory until get updated
- // remote HCA's data from user mode pthread
- while(exchanged_done == NO) {
- if(exchange_hca_data->opcode == RECV_QP_INFO){
- exchanged_done = YES;
- /* copy data to local buffer from shared memory */
- hca_data->opcode = exchange_hca_data->opcode;
- hca_data->length = exchange_hca_data->length;
-
- for(i=0; i < NUM_QPS; i++) {
- hca_data->dlid[i] = exchange_hca_data->dlid[i];
- hca_data->rqp_num[i] = exchange_hca_data->rqp_num[i];
- }
- break;
- }
- else {
- schedule_timeout(1000);
- }
- }
-
- // detached shared memory
- sys_shmdt(uaddr);
-
- CDEBUG(D_NET, "Exit from kernel thread: k_server_thread \n");
- printk("Exit from kernel thread: k_server_thread \n");
-
- return;
-
-}
-
-//
-// create QP
-//
-VAPI_ret_t
-create_qp(QP_info *qp, int qp_index)
-{
-
- VAPI_ret_t vstat;
- VAPI_qp_init_attr_t qp_init_attr;
- VAPI_qp_prop_t qp_prop;
-
- qp->hca_hndl = Hca_hndl;
- qp->port = 1; // default
- qp->slid = Hca_port_1_props.lid;
- qp->hca_port = Hca_port_1_props;
-
-
- /* Queue Pair Creation Attributes */
- qp_init_attr.cap.max_oust_wr_rq = NUM_WQE;
- qp_init_attr.cap.max_oust_wr_sq = NUM_WQE;
- qp_init_attr.cap.max_sg_size_rq = NUM_SG;
- qp_init_attr.cap.max_sg_size_sq = NUM_SG;
- qp_init_attr.pd_hndl = qp->pd_hndl;
- qp_init_attr.rdd_hndl = 0;
- qp_init_attr.rq_cq_hndl = qp->rq_cq_hndl;
- /* we use here polling */
- //qp_init_attr.rq_sig_type = VAPI_SIGNAL_REQ_WR;
- qp_init_attr.rq_sig_type = VAPI_SIGNAL_ALL_WR;
- qp_init_attr.sq_cq_hndl = qp->sq_cq_hndl;
- /* we use here polling */
- //qp_init_attr.sq_sig_type = VAPI_SIGNAL_REQ_WR;
- qp_init_attr.sq_sig_type = VAPI_SIGNAL_ALL_WR;
- // transport servce - reliable connection
-
- qp_init_attr.ts_type = VAPI_TS_RC;
-
- vstat = VAPI_create_qp(qp->hca_hndl,
- &qp_init_attr,
- &qp->qp_hndl, &qp_prop);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed creating QP. Return Failed - %s\n",VAPI_strerror(vstat));
- return vstat;
- }
-
- qp->qp_num = qp_prop.qp_num; // the qp number
- qp->last_posted_send_id = 0; // user defined work request ID
- qp->last_posted_rcv_id = 0; // user defined work request ID
- qp->cur_send_outstanding = 0;
- qp->cur_posted_rcv_bufs = 0;
- qp->snd_rcv_balance = 0;
-
- CDEBUG(D_OTHER, "create_qp: qp_num = %d, slid = %d, qp_hndl = 0X%X",
- qp->qp_num, qp->slid, qp->qp_hndl);
-
- // initialize spin-lock mutex variables
- spin_lock_init(&(qp->snd_mutex));
- spin_lock_init(&(qp->rcv_mutex));
- spin_lock_init(&(qp->bl_mutex));
- spin_lock_init(&(qp->cln_mutex));
- // number of outstanding requests on the send Q
- qp->cur_send_outstanding = 0;
- // number of posted receive buffers
- qp->cur_posted_rcv_bufs = 0;
- qp->snd_rcv_balance = 0;
-
- return(VAPI_OK);
-
-}
-
-//
-// initialize a UD qp state to RTR and RTS
-//
-VAPI_ret_t
-init_qp_UD(QP_info *qp, int qp_index)
-{
- VAPI_qp_attr_t qp_attr;
- VAPI_qp_init_attr_t qp_init_attr;
- VAPI_qp_attr_mask_t qp_attr_mask;
- VAPI_qp_cap_t qp_cap;
- VAPI_ret_t vstat;
-
- /* Move from RST to INIT */
- /* Change QP to INIT */
-
- CDEBUG(D_OTHER, "Changing QP state to INIT qp-index = %d\n", qp_index);
-
- QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
-
- qp_attr.qp_state = VAPI_INIT;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.pkey_ix = 0;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PKEY_IX);
-
- CDEBUG(D_OTHER, "pkey_ix qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.port = qp->port;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PORT);
-
- CDEBUG(D_OTHER, "port qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.qkey = 0;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QKEY);
-
- CDEBUG(D_OTHER, "qkey qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- /* If I do not set this mask, I get an error from HH. QPM should catch it */
-
- vstat = VAPI_modify_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_cap);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed modifying QP from RST to INIT. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- CDEBUG(D_OTHER, "Modifying QP from RST to INIT.\n");
-
- vstat= VAPI_query_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_init_attr);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed query QP. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* Move from INIT to RTR */
- /* Change QP to RTR */
- CDEBUG(D_OTHER, "Changing QP state to RTR\n");
-
- QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
-
- qp_attr.qp_state = VAPI_RTR;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
-
- CDEBUG(D_OTHER, "INIT to RTR- qp_state : qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- vstat = VAPI_modify_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_cap);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed modifying QP from INIT to RTR. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- CDEBUG(D_OTHER, "Modifying QP from INIT to RTR.\n");
-
- vstat= VAPI_query_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_init_attr);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed query QP. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* RTR to RTS - Change QP to RTS */
- CDEBUG(D_OTHER, "Changing QP state to RTS\n");
-
- QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
-
- qp_attr.qp_state = VAPI_RTS;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
-
- qp_attr.sq_psn = START_SQ_PSN;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_SQ_PSN);
-
- vstat = VAPI_modify_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_cap);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed modifying QP from RTR to RTS. %s:%s\n",
- VAPI_strerror_sym(vstat),
- VAPI_strerror(vstat));
- return(vstat);
- }
-
- CDEBUG(D_OTHER, "Modifying QP from RTR to RTS. \n");
-
- vstat= VAPI_query_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_init_attr);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed query QP. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- //
- // a QP is at RTS state NOW
- //
-
- CDEBUG(D_OTHER, "IBNAL- UD qp is at RTS NOW\n");
-
- return(vstat);
-
-}
-
-
-
-//
-// initialize a RC qp state to RTR and RTS
-// RC transport service
-//
-VAPI_ret_t
-init_qp_RC(QP_info *qp, int qp_index)
-{
- VAPI_qp_attr_t qp_attr;
- VAPI_qp_init_attr_t qp_init_attr;
- VAPI_qp_attr_mask_t qp_attr_mask;
- VAPI_qp_cap_t qp_cap;
- VAPI_ret_t vstat;
-
- /* Move from RST to INIT */
- /* Change QP to INIT */
-
- CDEBUG(D_OTHER, "Changing QP state to INIT qp-index = %d\n", qp_index);
-
- QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
-
- qp_attr.qp_state = VAPI_INIT;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.pkey_ix = 0;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PKEY_IX);
-
- CDEBUG(D_OTHER, "pkey_ix qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.port = qp->port;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PORT);
-
- CDEBUG(D_OTHER, "port qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.remote_atomic_flags = VAPI_EN_REM_WRITE | VAPI_EN_REM_READ;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_REMOTE_ATOMIC_FLAGS);
-
- CDEBUG(D_OTHER, "remote_atomic_flags qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- /* If I do not set this mask, I get an error from HH. QPM should catch it */
-
- vstat = VAPI_modify_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_cap);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed modifying QP from RST to INIT. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- vstat= VAPI_query_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_init_attr);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed query QP. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* Move from INIT to RTR */
- /* Change QP to RTR */
- CDEBUG(D_OTHER, "Changing QP state to RTR qp_indexi %d\n", qp_index);
-
- QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
- qp_attr.qp_state = VAPI_RTR;
-
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.av.sl = 0;/* RESPONDER_SL */
- qp_attr.av.grh_flag = FALSE;
- qp_attr.av.dlid = qp->dlid;/*RESPONDER_LID;*/
- qp_attr.av.static_rate = 0;
- qp_attr.av.src_path_bits = 0;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_AV);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.path_mtu = MTU_2048;// default is MTU_2048
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PATH_MTU);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.rq_psn = START_RQ_PSN;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RQ_PSN);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.qp_ous_rd_atom = NUM_WQE;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_OUS_RD_ATOM);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.pkey_ix = 0;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_PKEY_IX);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.min_rnr_timer = 10;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_MIN_RNR_TIMER);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- qp_attr.dest_qp_num = qp->rqp_num;
-
- CDEBUG(D_OTHER, "remore qp num %d\n", qp->rqp_num);
-
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_DEST_QP_NUM);
-
- CDEBUG(D_OTHER, "qp_state qp_attr_mask = 0X%x\n", qp_attr_mask);
-
- vstat = VAPI_modify_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_cap);
-
-
- if (vstat != VAPI_OK) {
- CERROR("Failed modifying QP from INIT to RTR. qp_index %d - %s\n",
- qp_index, VAPI_strerror(vstat));
- return(vstat);
- }
-
- vstat= VAPI_query_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_init_attr);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed query QP. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* RTR to RTS - Change QP to RTS */
- CDEBUG(D_OTHER, "Changing QP state to RTS\n");
-
- QP_ATTR_MASK_CLR_ALL(qp_attr_mask);
-
- qp_attr.qp_state = VAPI_RTS;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_QP_STATE);
-
- qp_attr.sq_psn = START_SQ_PSN;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_SQ_PSN);
-
- qp_attr.timeout = 0x18;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_TIMEOUT);
-
- qp_attr.retry_count = 10;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RETRY_COUNT);
-
- qp_attr.rnr_retry = 14;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_RNR_RETRY);
-
- qp_attr.ous_dst_rd_atom = 100;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_OUS_DST_RD_ATOM);
-
- qp_attr.min_rnr_timer = 5;
- QP_ATTR_MASK_SET(qp_attr_mask,QP_ATTR_MIN_RNR_TIMER);
-
- vstat = VAPI_modify_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_cap);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed modifying QP from RTR to RTS. %s:%s\n",
- VAPI_strerror_sym(vstat), VAPI_strerror(vstat));
- return(vstat);
- }
-
- vstat= VAPI_query_qp(qp->hca_hndl,
- qp->qp_hndl,
- &qp_attr,
- &qp_attr_mask,
- &qp_init_attr);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed query QP. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- //
- // a QP is at RTS state NOW
- //
-
- CDEBUG(D_OTHER, "IBNAL- RC qp is at RTS NOW\n");
-
- return(vstat);
-}
-
-
-
-VAPI_ret_t
-IB_Open_HCA(kibnal_data_t *kib_data)
-{
-
- VAPI_ret_t vstat;
- VAPI_cqe_num_t cqe_active_num;
- QP_info *qp;
- int i;
- int Num_posted_recv_buf;
-
- /* Open HCA */
- CDEBUG(D_PORTALS, "Opening an HCA\n");
-
- vstat = VAPI_open_hca(HCA_ID, &Hca_hndl);
- vstat = EVAPI_get_hca_hndl(HCA_ID, &Hca_hndl);
- if (vstat != VAPI_OK) {
- CERROR("Failed opening the HCA: %s. %s...\n",HCA_ID,VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* Get HCA CAP */
- vstat = VAPI_query_hca_cap(Hca_hndl, &Hca_vendor, &Hca_cap);
- if (vstat != VAPI_OK) {
- CERROR("Failed query hca cap %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* Get port 1 info */
- vstat = VAPI_query_hca_port_prop(Hca_hndl, HCA_PORT_1 , &Hca_port_1_props);
- if (vstat != VAPI_OK) {
- CERROR("Failed query port cap %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* Get port 2 info */
- vstat = VAPI_query_hca_port_prop(Hca_hndl, HCA_PORT_2, &Hca_port_2_props);
- if (vstat != VAPI_OK) {
- CERROR("Failed query port cap %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- // Get a PD
- CDEBUG(D_PORTALS, "Allocating PD \n");
- vstat = VAPI_alloc_pd(Hca_hndl,&Pd_hndl);
- if (vstat != VAPI_OK) {
- CERROR("Failed allocating a PD. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- vstat = createMemRegion(Hca_hndl, Pd_hndl);
- if (vstat != VAPI_OK) {
- CERROR("Failed registering a memory region.%s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- /* Create CQ for RQ*/
- CDEBUG(D_PORTALS, "Creating a send completion queue\n");
-
- vstat = VAPI_create_cq(Hca_hndl,
- NUM_CQE,
- &Cq_hndl,
- &cqe_active_num);
-
- if (vstat != VAPI_OK) {
- CERROR("Failed creating a CQ. %s\n",VAPI_strerror(vstat));
- return(vstat);
- }
-
- if(NUM_CQE == cqe_active_num) {
- CERROR("VAPI_create_cq: NUM_CQE EQ cqe_active_num \n");
- }
- else {
- CDEBUG(D_NET, "VAPI_create_cq: NUM_CQE %d , actual cqe_active_num %d \n",
- NUM_CQE, cqe_active_num);
- }
-
- Cq_SQ_hndl = Cq_hndl;
- Cq_RQ_hndl = Cq_hndl;
-
- //
- // create QPs
- //
- for(i=0; i < NUM_QPS; i++) {
- QP_list[i].pd_hndl = Pd_hndl;
- QP_list[i].hca_hndl = Hca_hndl;
- // sq rq use the same Cq_hndl
- QP_list[i].sq_cq_hndl = Cq_hndl;
- QP_list[i].rq_cq_hndl = Cq_hndl;
- vstat = create_qp(&QP_list[i], i);
- if (vstat != VAPI_OK) {
- CERROR("Failed creating a QP %d %s\n",i, VAPI_strerror(vstat));
- return(vstat);
- }
- }
-
- //
- // record HCA data
- //
-
- Hca_data.hca_hndl = Hca_hndl; // HCA handle
- Hca_data.pd_hndl = Pd_hndl; // protection domain
- Hca_data.port = 1; // port number
- Hca_data.num_qp = NUM_QPS; // number of qp used
-
- for(i=0; i < NUM_QPS; i++) {
- Hca_data.qp_ptr[i] = &QP_list[i]; // point to QP_list
- }
-
- Hca_data.num_cq = NUM_CQ; // number of cq used
- Hca_data.cq_hndl = Cq_hndl; //
- Hca_data.sq_cq_hndl = Cq_SQ_hndl; //
- Hca_data.rq_cq_hndl = Cq_RQ_hndl; //
- Hca_data.kib_data = kib_data; //
- Hca_data.slid = QP_list[0].slid;//
-
- // prepare L_QP_data
-
-#ifdef USE_SHARED_MEMORY_AND_SOCKET
-
- /*
- * + use a shared-memory between a user thread and a kernel thread
- * for HCA's data exchange on the same node
- * + use socket in user mode to exhange HCA's data with a remote node
- */
-
-
- R_QP_data.opcode = SEND_QP_INFO;
- R_QP_data.length = sizeof(L_QP_data);
-
- for(i=0; i < NUM_QPS; i++) {
- // my slid will be used in a remote node as dlid
- R_QP_data.dlid[i] = QP_list[i].slid;
- // my qp_num will be used in remode node as remote_qp_number
- // RC is used here so we need dlid and rqp_num
- R_QP_data.rqp_num[i] = QP_list[i].qp_num ;
- }
-
- // create a kernel thread for exchanging HCA's data
- // R_QP_data will be exchanged with a remoe node
-
- kernel_thread(k_server_thread, &R_QP_data, 0); //
- // check if the HCA'data have been updated by kernel_thread
- // loop until the HCA's data is updated
- // make sure that uagent is running
-
- // QP info is exchanged with a remote node
- while (1) {
- schedule_timeout(1000);
- if(R_QP_data.opcode == RECV_QP_INFO) {
- CDEBUG(D_NET, "HCA's data is being updated\n");
- break;
- }
- }
-
-#endif
-
-#ifdef USE_SHARED_MEMORY_AND_MULTICAST
-
- /*
- * + use a shared-memory between a user thread and a kernel thread
- * for HCA's data exchange on the same node
- * + use Infinoband UR/multicast in user mode to exhange HCA's data with i
- * a remote node
- */
-
- // use CM, opemSM
-
-#endif
-
- //
- for(i=0; i < NUM_QPS; i++) {
- qp = (QP_info *) &QP_list[i];
- QP_list[i].rqp_num = R_QP_data.rqp_num[i]; // remoter qp number
- QP_list[i].dlid = R_QP_data.dlid[i]; // remote dlid
- }
-
- // already have remote_qp_num adn dlid information
- // initialize QP to RTR/RTS state
- //
- for(i=0; i < NUM_QPS; i++) {
- vstat = init_qp_RC(&QP_list[i], i);
- if (vstat != VAPI_OK) {
- CERROR("Failed change a QP %d to RTS state%s\n",
- i,VAPI_strerror(vstat));
- return(vstat);
- }
- }
-
- // post receiving buffer before any send happened
-
- Num_posted_recv_buf = post_recv_bufs( (VAPI_wr_id_t ) START_RECV_WRQ_ID);
-
- // for irregular completion event or some unexpected failure event
- vstat = IB_Set_Async_Event_Handler(Hca_data, &kibnal_data);
- if (vstat != VAPI_OK) {
- CERROR("IB_Set_Async_Event_Handler failed: %d\n", vstat);
- return vstat;
- }
-
-
- CDEBUG(D_PORTALS, "IBNAL- done with IB_Open_HCA\n");
-
- for(i=0; i < NUM_MBUF; i++) {
- spin_lock_init(&MSB_mutex[i]);
- }
-
- return(VAPI_OK);
-
-}
-
-
-/*
- Function: IB_Set_Event_Handler()
-
- IN Hca_info hca_data
- IN kibnal_data_t *kib_data -- private data
- OUT NONE
-
- return: VAPI_OK - success
- else - fail
-
-*/
-
-VAPI_ret_t
-IB_Set_Event_Handler(HCA_info hca_data, kibnal_data_t *kib_data)
-{
- VAPI_ret_t vstat;
- EVAPI_compl_handler_hndl_t comp_handler_hndl;
-
- // register CQE_Event_Hnadler
- // VAPI function
- vstat = VAPI_set_comp_event_handler(hca_data.hca_hndl,
- CQE_event_handler,
- &hca_data);
-
- /*
- or use extended VAPI function
- vstat = EVAPI_set_comp_eventh(hca_data.hca_hndl,
- hca_data.cq_hndl,
- CQE_event_handler,
- &hca_data,
- &comp_handler_hndl
- );
- */
-
- if (vstat != VAPI_OK) {
- CERROR("IB_Set_Event_Handler: failed EVAPI_set_comp_eventh for"
- " HCA ID = %s (%s).\n", HCA_ID, VAPI_strerror(vstat));
- return vstat;
- }
-
- // issue a request for completion ievent notification
- vstat = VAPI_req_comp_notif(hca_data.hca_hndl,
- hca_data.cq_hndl,
- VAPI_NEXT_COMP);
-
- if (vstat != VAPI_OK) {
- CERROR("IB_Set_Event_Handler: failed VAPI_req_comp_notif for HCA ID"
- " = %s (%s).\n", HCA_ID, VAPI_strerror(vstat));
- }
-
- return vstat;
-}
-
-
-
-/*
- Function: IB_Set_Async_Event_Handler()
-
- IN HCA_info hca_data
- IN kibnal_data_t *kib_data -- private data
- OUT NONE
-
- return: VAPI_OK - success
- else - fail
-
-*/
-
-
-VAPI_ret_t
-IB_Set_Async_Event_Handler(HCA_info hca_data, kibnal_data_t *kib_data)
-{
- VAPI_ret_t vstat;
-
- //
- // register an asynchronous event handler for this HCA
- //
-
- vstat= VAPI_set_async_event_handler(hca_data.hca_hndl,
- async_event_handler,
- kib_data);
-
- if (vstat != VAPI_OK) {
- CERROR("IB_Set_Async_Event_Handler: failed VAPI_set_async_comp_event_handler"
- " for HCA ID = %s (%s).\n", HCA_ID, VAPI_strerror(vstat));
- }
-
- return vstat;
-}
-
-//
-// IB_Close_HCA
-// close this Infiniband HCA interface
-// release allocated resources to system
-//
-VAPI_ret_t
-IB_Close_HCA(void )
-{
-
- VAPI_ret_t vstat;
- int ok = 1;
- int i;
-
- /* Destroy QP */
- CDEBUG(D_PORTALS, "Destroying QP\n");
-
- for(i=0; i < NUM_QPS; i++) {
- vstat = VAPI_destroy_qp(QP_list[i].hca_hndl, QP_list[i].qp_hndl);
- if (vstat != VAPI_OK) {
- CERROR("Failed destroying QP %d. %s\n", i, VAPI_strerror(vstat));
- ok = 0;
- }
- }
-
- if (ok) {
- /* Destroy CQ */
- CDEBUG(D_PORTALS, "Destroying CQ\n");
- for(i=0; i < NUM_QPS; i++) {
- // send_cq adn receive_cq are shared the same CQ
- // so only destroy one of them
- vstat = VAPI_destroy_cq(QP_list[i].hca_hndl, QP_list[i].sq_cq_hndl);
- if (vstat != VAPI_OK) {
- CERROR("Failed destroying CQ %d. %s\n", i, VAPI_strerror(vstat));
- ok = 0;
- }
- }
- }
-
- if (ok) {
- /* Destroy Memory Region */
- CDEBUG(D_PORTALS, "Deregistering MR\n");
- for(i=0; i < NUM_QPS; i++) {
- vstat = deleteMemRegion(&QP_list[i], i);
- if (vstat != VAPI_OK) {
- CERROR("Failed deregister mem reg %d. %s\n",i, VAPI_strerror(vstat));
- ok = 0;
- break;
- }
- }
- }
-
- if (ok) {
- // finally
- /* Close HCA */
- CDEBUG(D_PORTALS, "Closing HCA\n");
- vstat = VAPI_close_hca(Hca_hndl);
- if (vstat != VAPI_OK) {
- CERROR("Failed to close HCA. %s\n", VAPI_strerror(vstat));
- ok = 0;
- }
- }
-
- CDEBUG(D_PORTALS, "IBNAL- Done with closing HCA \n");
-
- return vstat;
-}
-
-
-VAPI_ret_t
-createMemRegion(VAPI_hca_hndl_t hca_hndl,
- VAPI_pd_hndl_t pd_hndl)
-{
- VAPI_ret_t vstat;
- VAPI_mrw_t mrw;
- VAPI_mrw_t rep_mr;
- VAPI_mr_hndl_t rep_mr_hndl;
- int buf_size;
- char *bufptr;
- int i;
-
- // send registered memory region
- for(i=0; i < NUM_ENTRY; i++) {
- MSbuf_list[i].buf_size = KB_32;
- PORTAL_ALLOC(bufptr, MSbuf_list[i].buf_size);
- if(bufptr == NULL) {
- CDEBUG(D_MALLOC,"Failed to malloc a block of send memory, qix %d size %d\n",
- i, MSbuf_list[i].buf_size);
- CERROR("Failed to malloc a block of send memory, qix %d size %d\n",
- i, MSbuf_list[i].buf_size);
- return(VAPI_ENOMEM);
- }
-
- mrw.type = VAPI_MR;
- mrw.pd_hndl= pd_hndl;
- mrw.start = MSbuf_list[i].buf_addr = (VAPI_virt_addr_t)(MT_virt_addr_t) bufptr;
- mrw.size = MSbuf_list[i].buf_size;
- mrw.acl = VAPI_EN_LOCAL_WRITE |
- VAPI_EN_REMOTE_WRITE |
- VAPI_EN_REMOTE_READ;
-
- // register send memory region
- vstat = VAPI_register_mr(hca_hndl,
- &mrw,
- &rep_mr_hndl,
- &rep_mr);
-
- // this memory region is going to be reused until deregister is called
- if(vstat != VAPI_OK) {
- CERROR("Failed registering a mem region qix %d Addr=%p, Len=%d. %s\n",
- i, mrw.start, mrw.size, VAPI_strerror(vstat));
- return(vstat);
- }
-
- MSbuf_list[i].mr = rep_mr;
- MSbuf_list[i].mr_hndl = rep_mr_hndl;
- MSbuf_list[i].bufptr = bufptr;
- MSbuf_list[i].buf_addr = rep_mr.start;
- MSbuf_list[i].status = BUF_REGISTERED;
- MSbuf_list[i].ref_count = 0;
- MSbuf_list[i].buf_type = REG_BUF;
- MSbuf_list[i].raddr = 0x0;
- MSbuf_list[i].rkey = 0x0;
- }
-
- // RDAM buffer is not reserved for RDAM WRITE/READ
-
- for(i=NUM_ENTRY; i< NUM_MBUF; i++) {
- MSbuf_list[i].status = BUF_UNREGISTERED;
- MSbuf_list[i].buf_type = RDMA_BUF;
- }
-
-
- // recv registered memory region
- for(i=0; i < NUM_ENTRY; i++) {
- MRbuf_list[i].buf_size = KB_32;
- PORTAL_ALLOC(bufptr, MRbuf_list[i].buf_size);
-
- if(bufptr == NULL) {
- CDEBUG(D_MALLOC, "Failed to malloc a block of send memory, qix %d size %d\n",
- i, MRbuf_list[i].buf_size);
- return(VAPI_ENOMEM);
- }
-
- mrw.type = VAPI_MR;
- mrw.pd_hndl= pd_hndl;
- mrw.start = (VAPI_virt_addr_t)(MT_virt_addr_t) bufptr;
- mrw.size = MRbuf_list[i].buf_size;
- mrw.acl = VAPI_EN_LOCAL_WRITE |
- VAPI_EN_REMOTE_WRITE |
- VAPI_EN_REMOTE_READ;
-
- // register send memory region
- vstat = VAPI_register_mr(hca_hndl,
- &mrw,
- &rep_mr_hndl,
- &rep_mr);
-
- // this memory region is going to be reused until deregister is called
- if(vstat != VAPI_OK) {
- CERROR("Failed registering a mem region qix %d Addr=%p, Len=%d. %s\n",
- i, mrw.start, mrw.size, VAPI_strerror(vstat));
- return(vstat);
- }
-
- MRbuf_list[i].mr = rep_mr;
- MRbuf_list[i].mr_hndl = rep_mr_hndl;
- MRbuf_list[i].bufptr = bufptr;
- MRbuf_list[i].buf_addr = rep_mr.start;
- MRbuf_list[i].status = BUF_REGISTERED;
- MRbuf_list[i].ref_count = 0;
- MRbuf_list[i].buf_type = REG_BUF;
- MRbuf_list[i].raddr = 0x0;
- MRbuf_list[i].rkey = rep_mr.r_key;
- MRbuf_list[i].lkey = rep_mr.l_key;
-
- }
-
- // keep extra information for a qp
- for(i=0; i < NUM_QPS; i++) {
- QP_list[i].mr_hndl = MSbuf_list[i].mr_hndl;
- QP_list[i].mr = MSbuf_list[i].mr;
- QP_list[i].bufptr = MSbuf_list[i].bufptr;
- QP_list[i].buf_addr = MSbuf_list[i].buf_addr;
- QP_list[i].buf_size = MSbuf_list[i].buf_size;
- QP_list[i].raddr = MSbuf_list[i].raddr;
- QP_list[i].rkey = MSbuf_list[i].rkey;
- QP_list[i].lkey = MSbuf_list[i].lkey;
- }
-
- CDEBUG(D_PORTALS, "IBNAL- done VAPI_ret_t createMemRegion \n");
-
- return vstat;
-
-} /* createMemRegion */
-
-
-
-VAPI_ret_t
-deleteMemRegion(QP_info *qp, int qix)
-{
- VAPI_ret_t vstat;
-
- //
- // free send memory assocaited with this memory region
- //
- PORTAL_FREE(MSbuf_list[qix].bufptr, MSbuf_list[qix].buf_size);
-
- // de-register it
- vstat = VAPI_deregister_mr(qp->hca_hndl, MSbuf_list[qix].mr_hndl);
-
- if(vstat != VAPI_OK) {
- CERROR("Failed deregistering a send mem region qix %d %s\n",
- qix, VAPI_strerror(vstat));
- return vstat;
- }
-
- //
- // free recv memory assocaited with this memory region
- //
- PORTAL_FREE(MRbuf_list[qix].bufptr, MRbuf_list[qix].buf_size);
-
- // de-register it
- vstat = VAPI_deregister_mr(qp->hca_hndl, MRbuf_list[qix].mr_hndl);
-
- if(vstat != VAPI_OK) {
- CERROR("Failed deregistering a recv mem region qix %d %s\n",
- qix, VAPI_strerror(vstat));
- return vstat;
- }
-
- return vstat;
-}
-
-
-//
-// polling based event handling
-// + a daemon process
-// + poll the CQ and check what is in the CQ
-// + process incoming CQ event
-// +
-//
-
-
-RDMA_Info_Exchange Rdma_info;
-int Cts_Message_arrived = NO;
-
-void k_recv_thread(HCA_info *hca_data)
-{
- VAPI_ret_t vstat;
- VAPI_wc_desc_t comp_desc;
- unsigned long polling_count = 0;
- u_int32_t timeout_usec;
- unsigned int priority = 100;
- unsigned int length;
- VAPI_wr_id_t wrq_id;
- u_int32_t transferred_data_length; /* Num. of bytes transferred */
- void *bufdata;
- VAPI_virt_addr_t bufaddr;
- unsigned long buf_size = 0;
- QP_info *qp; // point to QP_list
-
- kportal_daemonize("k_recv_thread"); // make it as a daemon process
-
- // tuning variable
- timeout_usec = 100; // how is the impact on the performance
-
- // send Q and receive Q are using the same CQ
- // so only poll one CQ for both operations
-
- CDEBUG(D_NET, "IBNAL- enter kibnal_recv_thread\n");
- CDEBUG(D_NET, "hca_hndl = 0X%x, cq_hndl=0X%x\n",
- hca_data->hca_hndl,hca_data->cq_hndl);
-
- qp = hca_data->qp_ptr;
- if(qp == NULL) {
- CDEBUG(D_NET, "in recv_thread qp is NULL\n");
- CDEBUG(D_NET, "Exit from recv_thread qp is NULL\n");
- return;
- }
- else {
- CDEBUG(D_NET, "in recv_thread qp is 0X%X\n", qp);
- }
-
- CDEBUG(D_NET, "kibnal_recv_thread - enter event driver polling loop\n");
-
- //
- // use event driver
- //
-
-
-
- while(1) {
- polling_count++;
-
- //
- // send Q and receive Q are using the same CQ
- // so only poll one CQ for both operations
- //
-
- vstat = VAPI_poll_cq(hca_data->hca_hndl,hca_data->cq_hndl, &comp_desc);
-
- if (vstat == VAPI_CQ_EMPTY) {
- // there is no event in CQE
- continue;
- }
- else {
- if (vstat != (VAPI_OK)) {
- CERROR("error while polling completion queuei vstat %d \n", vstat);
- return;
- }
- }
-
- // process the complete event
- switch(comp_desc.opcode) {
- case VAPI_CQE_SQ_SEND_DATA:
- // about the Send Q ,POST SEND completion
- // who needs this information
- // get wrq_id
- // mark MSbuf_list[wr_id].status = BUF_REGISTERED
-
- wrq_id = comp_desc.id;
-
- if(RDMA_OP_ID < wrq_id) {
- // this RDMA message id, adjust it to the right entry
- wrq_id = wrq_id - RDMA_OP_ID;
- vstat = VAPI_deregister_mr(qp->hca_hndl, Local_rdma_info.send_rdma_mr_hndl);
- }
-
- if(vstat != VAPI_OK) {
- CERROR("VAPI_CQE_SQ_SEND_DATA: Failed deregistering a RDMAi recv" " mem region %s\n", VAPI_strerror(vstat));
- }
-
- if((RDMA_CTS_ID <= wrq_id) && (RDMA_OP_ID < wrq_id)) {
- // RTS or CTS send complete, release send buffer
- if(wrq_id >= RDMA_RTS_ID)
- wrq_id = wrq_id - RDMA_RTS_ID;
- else
- wrq_id = wrq_id - RDMA_CTS_ID;
- }
-
- spin_lock(&MSB_mutex[(int) wrq_id]);
- MRbuf_list[wrq_id].status = BUF_REGISTERED;
- spin_unlock(&MSB_mutex[(int) wrq_id]);
-
- CDEBUG(D_NET, "CQE opcode-VAPI_CQE_SQ_SEND_DATA\n");
- break;
-
- case VAPI_CQE_SQ_RDMA_WRITE:
- // about the Send Q, RDMA write completion
- // who needs this information
- // data is successfully write from pource to destionation
-
- // get wr_id
- // mark MSbuf_list[wr_id].status = BUF_REGISTERED
- // de-register rdma buffer
- //
-
- CDEBUG(D_NET, "CQE opcode-VAPI_CQE_SQ_RDMA_WRITE\n");
- break;
-
- case VAPI_CQE_SQ_RDMA_READ:
- // about the Send Q
- // RDMA read completion
- // who needs this information
- // data is successfully read from destionation to source
- CDEBUG(D_NET, "CQE opcode- VAPI_CQE_SQ_RDMA_READ\n");
- break;
-
- case VAPI_CQE_SQ_COMP_SWAP:
- // about the Send Q
- // RDMA write completion
- // who needs this information
-
- CDEBUG(D_NET, "CQE opcode-VAPI_CQE_SQ_COMP_SWAP\n");
- break;
-
- case VAPI_CQE_SQ_FETCH_ADD:
- // about the Send Q
- // RDMA write completion
- // who needs this information
-
- CDEBUG(D_NET, "CQE opcode-VAPI_CQE_SQ_FETCH_ADD\n");
- break;
-
- case VAPI_CQE_SQ_BIND_MRW:
- // about the Send Q
- // RDMA write completion
- // who needs this information
-
- CDEBUG(D_NET, "CQE opcode-VAPI_CQE_SQ_BIND_MRW\n");
- break;
-
- case VAPI_CQE_RQ_SEND_DATA:
- // about the Receive Q
- // process the incoming data and
- // forward it to .....
- // a completion recevie event is arriving at CQ
- // issue a recevie to get this arriving data out from CQ
- // pass the receiving data for further processing
- CDEBUG(D_NET, "CQE opcode-VAPI_CQE_RQ_SEND_DATA\n");
- wrq_id = comp_desc.id ;
- transferred_data_length = comp_desc.byte_len;
-
- if((wrq_id >= RDMA_CTS_ID) && (wrq_id < RDMA_OP_ID)) {
- // this is RTS/CTS message
- // process it locally and don't pass it to portals layer
- // adjust wrq_id to get the right entry in MRbfu_list
-
- if(wrq_id >= RDMA_RTS_ID)
- wrq_id = wrq_id - RDMA_RTS_ID;
- else
- wrq_id = wrq_id - RDMA_CTS_ID;
-
- bufaddr = (VAPI_virt_addr_t)(MT_virt_addr_t) MRbuf_list[wrq_id].buf_addr;
- MRbuf_list[wrq_id].status = BUF_INUSE;
- memcpy(&Rdma_info, &bufaddr, sizeof(RDMA_Info_Exchange));
-
- if(Ready_To_send == Rdma_info.opcode)
- // an RTS request message from remote node
- // prepare local RDMA buffer and send local rdma info to
- // remote node
- CTS_handshaking_protocol(&Rdma_info);
- else
- if((Clear_To_send == Rdma_info.opcode) &&
- (RDMA_BUFFER_RESERVED == Rdma_info.flag))
- Cts_Message_arrived = YES;
- else
- if(RDMA_BUFFER_UNAVAILABLE == Rdma_info.flag)
- CERROR("RDMA operation abort-RDMA_BUFFER_UNAVAILABLE\n");
- }
- else {
- //
- // this is an incoming mesage for portals layer
- // move to PORTALS layer for further processing
- //
-
- bufaddr = (VAPI_virt_addr_t)(MT_virt_addr_t)
- MRbuf_list[wrq_id].buf_addr;
-
- MRbuf_list[wrq_id].status = BUF_INUSE;
- transferred_data_length = comp_desc.byte_len;
-
- kibnal_rx(hca_data->kib_data,
- bufaddr,
- transferred_data_length,
- MRbuf_list[wrq_id].buf_size,
- priority);
- }
-
- // repost this receiving buffer and makr it at BUF_REGISTERED
-
- vstat = repost_recv_buf(qp, wrq_id);
- if(vstat != (VAPI_OK)) {
- CERROR("error while polling completion queue\n");
- }
- else {
- MRbuf_list[wrq_id].status = BUF_REGISTERED;
- &n