])
#
-# LN_CONFIG_OPENIB
-#
-# check for OpenIB in the kernel
-AC_DEFUN([LN_CONFIG_OPENIB],[
-AC_MSG_CHECKING([whether to enable OpenIB support])
-# set default
-OPENIBPATH="$LINUX/drivers/infiniband"
-AC_ARG_WITH([openib],
- AC_HELP_STRING([--with-openib=path],
- [build openiblnd against path]),
- [
- case $with_openib in
- yes) ENABLEOPENIB=2
- ;;
- no) ENABLEOPENIB=0
- ;;
- *) OPENIBPATH="$with_openib"
- ENABLEOPENIB=3
- ;;
- esac
- ],[
- ENABLEOPENIB=1
- ])
-if test $ENABLEOPENIB -eq 0; then
- AC_MSG_RESULT([disabled])
-elif test ! \( -f ${OPENIBPATH}/include/ts_ib_core.h -a \
- -f ${OPENIBPATH}/include/ts_ib_cm.h -a \
- -f ${OPENIBPATH}/include/ts_ib_sa_client.h \); then
- AC_MSG_RESULT([no])
- case $ENABLEOPENIB in
- 1) ;;
- 2) AC_MSG_ERROR([kernel OpenIB headers not present]);;
- 3) AC_MSG_ERROR([bad --with-openib path]);;
- *) AC_MSG_ERROR([internal error]);;
- esac
-else
- case $ENABLEOPENIB in
- 1|2) OPENIBCPPFLAGS="-I$OPENIBPATH/include -DIN_TREE_BUILD";;
- 3) OPENIBCPPFLAGS="-I$OPENIBPATH/include";;
- *) AC_MSG_RESULT([no])
- AC_MSG_ERROR([internal error]);;
- esac
- OPENIBCPPFLAGS="$OPENIBCPPFLAGS -DIB_NTXRXPARAMS=4"
- EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS"
- LB_LINUX_TRY_COMPILE([
- #include <ts_ib_core.h>
- #include <ts_ib_cm.h>
- #include <ts_ib_sa_client.h>
- ],[
- struct ib_device_properties dev_props;
- struct ib_cm_active_param cm_active_params;
- tTS_IB_CLIENT_QUERY_TID tid;
- int enum1 = IB_QP_ATTRIBUTE_STATE;
- int enum2 = IB_ACCESS_LOCAL_WRITE;
- int enum3 = IB_CQ_CALLBACK_INTERRUPT;
- int enum4 = IB_CQ_PROVIDER_REARM;
- return 0;
- ],[
- AC_MSG_RESULT([yes])
- OPENIBLND="openiblnd"
- ],[
- AC_MSG_RESULT([no])
- case $ENABLEOPENIB in
- 1) ;;
- 2) AC_MSG_ERROR([can't compile with kernel OpenIB headers]);;
- 3) AC_MSG_ERROR([can't compile with OpenIB headers under $OPENIBPATH]);;
- *) AC_MSG_ERROR([internal error]);;
- esac
- OPENIBLND=""
- OPENIBCPPFLAGS=""
- ])
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-fi
-AC_SUBST(OPENIBCPPFLAGS)
-AC_SUBST(OPENIBLND)
-])
-
-#
-# LN_CONFIG_CIBLND
-#
-AC_DEFUN([LN_CONFIG_CIB],[
-AC_MSG_CHECKING([whether to enable Cisco/TopSpin IB support])
-# set default
-CIBPATH=""
-CIBLND=""
-AC_ARG_WITH([cib],
- AC_HELP_STRING([--with-cib=path],
- [build ciblnd against path]),
- [
- case $with_cib in
- no) AC_MSG_RESULT([no]);;
- *) CIBPATH="$with_cib"
- if test -d "$CIBPATH"; then
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- AC_MSG_ERROR([No directory $CIBPATH])
- fi;;
- esac
- ],[
- AC_MSG_RESULT([no])
- ])
-if test -n "$CIBPATH"; then
- CIBCPPFLAGS="-I${CIBPATH}/ib/ts_api_ng/include -I${CIBPATH}/all/kernel_services/include -DUSING_TSAPI"
- CIBCPPFLAGS="$CIBCPPFLAGS -DIB_NTXRXPARAMS=3"
- EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS $CIBCPPFLAGS"
- LB_LINUX_TRY_COMPILE([
- #include <ts_ib_core.h>
- #include <ts_ib_cm.h>
- #include <ts_ib_sa_client.h>
- ],[
- struct ib_device_properties dev_props;
- struct ib_cm_active_param cm_active_params;
- tTS_IB_CLIENT_QUERY_TID tid;
- int enum1 = TS_IB_QP_ATTRIBUTE_STATE;
- int enum2 = TS_IB_ACCESS_LOCAL_WRITE;
- int enum3 = TS_IB_CQ_CALLBACK_INTERRUPT;
- int enum4 = TS_IB_CQ_PROVIDER_REARM;
- return 0;
- ],[
- CIBLND="ciblnd"
- ],[
- AC_MSG_ERROR([can't compile ciblnd with given path])
- CIBCPPFLAGS=""
- ])
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-fi
-AC_SUBST(CIBCPPFLAGS)
-AC_SUBST(CIBLND)
-])
-
-#
-# LN_CONFIG_IIB
-#
-# check for infinicon infiniband support
-#
-AC_DEFUN([LN_CONFIG_IIB],[
-AC_MSG_CHECKING([whether to enable Infinicon support])
-# set default
-IIBPATH="/usr/include"
-AC_ARG_WITH([iib],
- AC_HELP_STRING([--with-iib=path],
- [build iiblnd against path]),
- [
- case $with_iib in
- yes) ENABLEIIB=2
- ;;
- no) ENABLEIIB=0
- ;;
- *) IIBPATH="${with_iib}/include"
- ENABLEIIB=3
- ;;
- esac
- ],[
- ENABLEIIB=1
- ])
-if test $ENABLEIIB -eq 0; then
- AC_MSG_RESULT([disabled])
-elif test ! \( -f ${IIBPATH}/linux/iba/ibt.h \); then
- AC_MSG_RESULT([no])
- case $ENABLEIIB in
- 1) ;;
- 2) AC_MSG_ERROR([default Infinicon headers not present]);;
- 3) AC_MSG_ERROR([bad --with-iib path]);;
- *) AC_MSG_ERROR([internal error]);;
- esac
-else
- IIBCPPFLAGS="-I$IIBPATH"
- if test $IIBPATH != "/usr/include"; then
- # we need /usr/include come what may
- IIBCPPFLAGS="$IIBCPPFLAGS -I/usr/include"
- fi
- EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS $IIBCPPFLAGS"
- LB_LINUX_TRY_COMPILE([
- #include <linux/iba/ibt.h>
- ],[
- IBT_INTERFACE_UNION interfaces;
- FSTATUS rc;
-
- rc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2,
- &interfaces);
-
- return rc == FSUCCESS ? 0 : 1;
- ],[
- AC_MSG_RESULT([yes])
- IIBLND="iiblnd"
- ],[
- AC_MSG_RESULT([no])
- case $ENABLEIIB in
- 1) ;;
- 2) AC_MSG_ERROR([can't compile with default Infinicon headers]);;
- 3) AC_MSG_ERROR([can't compile with Infinicon headers under $IIBPATH]);;
- *) AC_MSG_ERROR([internal error]);;
- esac
- IIBLND=""
- IIBCPPFLAGS=""
- ])
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-fi
-AC_SUBST(IIBCPPFLAGS)
-AC_SUBST(IIBLND)
-])
-
-#
-# LN_CONFIG_VIB
-#
-# check for Voltaire infiniband support
-#
-AC_DEFUN([LN_CONFIG_VIB],
-[AC_MSG_CHECKING([whether to enable Voltaire IB support])
-VIBPATH=""
-AC_ARG_WITH([vib],
- AC_HELP_STRING([--with-vib=path],
- [build viblnd against path]),
- [
- case $with_vib in
- no) AC_MSG_RESULT([no]);;
- *) VIBPATH="${with_vib}/src/nvigor/ib-code"
- if test -d "$with_vib" -a -d "$VIBPATH"; then
- AC_MSG_RESULT([yes])
- else
- AC_MSG_RESULT([no])
- AC_MSG_ERROR([No directory $VIBPATH])
- fi;;
- esac
- ],[
- AC_MSG_RESULT([no])
- ])
-if test -z "$VIBPATH"; then
- VIBLND=""
-else
- VIBCPPFLAGS="-I${VIBPATH}/include -I${VIBPATH}/cm"
- EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS $VIBCPPFLAGS"
- LB_LINUX_TRY_COMPILE([
- #include <linux/list.h>
- #include <asm/byteorder.h>
- #ifdef __BIG_ENDIAN
- # define CPU_BE 1
- # define CPU_LE 0
- #endif
- #ifdef __LITTLE_ENDIAN
- # define CPU_BE 0
- # define CPU_LE 1
- #endif
- #include <vverbs.h>
- #include <ib-cm.h>
- #include <ibat.h>
- ],[
- vv_hca_h_t kib_hca;
- vv_return_t vvrc;
- cm_cep_handle_t cep;
- ibat_arp_data_t arp_data;
- ibat_stat_t ibatrc;
-
- vvrc = vv_hca_open("ANY_HCA", NULL, &kib_hca);
- cep = cm_create_cep(cm_cep_transp_rc);
- ibatrc = ibat_get_ib_data((uint32_t)0, (uint32_t)0,
- ibat_paths_primary, &arp_data,
- (ibat_get_ib_data_reply_fn_t)NULL,
- NULL, 0);
- return 0;
- ],[
- VIBLND="viblnd"
- ],[
- AC_MSG_ERROR([can't compile viblnd with given path])
- ])
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-fi
-if test -n "$VIBLND"; then
- EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS $VIBCPPFLAGS"
- AC_MSG_CHECKING([if Voltaire still uses void * sg addresses])
- LB_LINUX_TRY_COMPILE([
- #include <linux/list.h>
- #include <asm/byteorder.h>
- #ifdef __BIG_ENDIAN
- # define CPU_BE 1
- # define CPU_LE 0
- #endif
- #ifdef __LITTLE_ENDIAN
- # define CPU_BE 0
- # define CPU_LE 1
- #endif
- #include <vverbs.h>
- #include <ib-cm.h>
- #include <ibat.h>
- ],[
- vv_scatgat_t sg;
-
- return &sg.v_address[3] == NULL;
- ],[
- AC_MSG_RESULT([yes])
- VIBCPPFLAGS="$VIBCPPFLAGS -DIBNAL_VOIDSTAR_SGADDR=1"
- ],[
- AC_MSG_RESULT([no])
- ])
- EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-fi
-AC_SUBST(VIBCPPFLAGS)
-AC_SUBST(VIBLND)
-])
-
-#
# LN_CONFIG_RALND
#
# check whether to use the RapidArray lnd
LN_CONFIG_AFFINITY
LN_CONFIG_BACKOFF
LN_CONFIG_QUADRICS
-LN_CONFIG_OPENIB
-LN_CONFIG_CIB
-LN_CONFIG_VIB
-LN_CONFIG_IIB
LN_CONFIG_O2IB
LN_CONFIG_RALND
LN_CONFIG_PTLLND
[AM_CONDITIONAL(BUILD_QSWLND, test x$QSWLND = "xqswlnd")
AM_CONDITIONAL(BUILD_MXLND, test x$MXLND = "xmxlnd")
AM_CONDITIONAL(BUILD_O2IBLND, test x$O2IBLND = "xo2iblnd")
-AM_CONDITIONAL(BUILD_OPENIBLND, test x$OPENIBLND = "xopeniblnd")
-AM_CONDITIONAL(BUILD_CIBLND, test x$CIBLND = "xciblnd")
-AM_CONDITIONAL(BUILD_IIBLND, test x$IIBLND = "xiiblnd")
-AM_CONDITIONAL(BUILD_VIBLND, test x$VIBLND = "xviblnd")
AM_CONDITIONAL(BUILD_RALND, test x$RALND = "xralnd")
AM_CONDITIONAL(BUILD_PTLLND, test x$PTLLND = "xptllnd")
AM_CONDITIONAL(BUILD_UPTLLND, test x$UPTLLND = "xptllnd")
lnet/klnds/autoMakefile
lnet/klnds/mxlnd/autoMakefile
lnet/klnds/mxlnd/Makefile
-lnet/klnds/openiblnd/Makefile
-lnet/klnds/openiblnd/autoMakefile
lnet/klnds/o2iblnd/Makefile
lnet/klnds/o2iblnd/autoMakefile
-lnet/klnds/ciblnd/Makefile
-lnet/klnds/ciblnd/autoMakefile
-lnet/klnds/iiblnd/Makefile
-lnet/klnds/iiblnd/autoMakefile
-lnet/klnds/viblnd/Makefile
-lnet/klnds/viblnd/autoMakefile
lnet/klnds/qswlnd/Makefile
lnet/klnds/qswlnd/autoMakefile
lnet/klnds/ralnd/Makefile
/* PROTO MAGIC for LNDs */
#define LNET_PROTO_IB_MAGIC 0x0be91b91
-#define LNET_PROTO_OPENIB_MAGIC LNET_PROTO_IB_MAGIC
-#define LNET_PROTO_IIB_MAGIC LNET_PROTO_IB_MAGIC
-#define LNET_PROTO_VIB_MAGIC LNET_PROTO_IB_MAGIC
#define LNET_PROTO_RA_MAGIC 0x0be91b92
#define LNET_PROTO_QSW_MAGIC 0x0be91b93
#define LNET_PROTO_TCP_MAGIC 0xeebc0ded
#ifndef HAVE_SYSCTL_UNNUMBERED
#define CTL_KRANAL 201
-#define CTL_KIBNAL 203
-#define CTL_IIBBLND 204
#define CTL_O2IBLND 205
#define CTL_PTLLND 206
#define CTL_QSWNAL 207
#define CTL_SOCKLND 208
-#define CTL_VIBLND 209
#define CTL_GNILND 210
#else
#define CTL_KRANAL CTL_UNNUMBERED
-#define CTL_KIBNAL CTL_UNNUMBERED
-#define CTL_IIBLND CTL_UNNUMBERED
#define CTL_O2IBLND CTL_UNNUMBERED
#define CTL_PTLLND CTL_UNNUMBERED
#define CTL_QSWNAL CTL_UNNUMBERED
#define CTL_SOCKLND CTL_UNNUMBERED
-#define CTL_VIBLND CTL_UNNUMBERED
#define CTL_GNILND CTL_UNNUMBERED
#endif /* sysctl id */
@BUILD_MXLND_TRUE@subdir-m += mxlnd
@BUILD_RALND_TRUE@subdir-m += ralnd
@BUILD_O2IBLND_TRUE@subdir-m += o2iblnd
-@BUILD_OPENIBLND_TRUE@subdir-m += openiblnd
-@BUILD_CIBLND_TRUE@subdir-m += ciblnd
-@BUILD_IIBLND_TRUE@subdir-m += iiblnd
-@BUILD_VIBLND_TRUE@subdir-m += viblnd
@BUILD_QSWLND_TRUE@subdir-m += qswlnd
@BUILD_PTLLND_TRUE@subdir-m += ptllnd
subdir-m += socklnd
# Lustre is a trademark of Sun Microsystems, Inc.
#
-SUBDIRS = socklnd qswlnd mxlnd openiblnd iiblnd viblnd ralnd ptllnd ciblnd o2iblnd
+SUBDIRS = socklnd qswlnd mxlnd ralnd ptllnd o2iblnd
+++ /dev/null
-/.deps
-/Makefile
-/.*.cmd
-/autoMakefile.in
-/autoMakefile
-/*.ko
-/*.mod.c
-/.*.flags
-/.tmp_versions
-/.depend
+++ /dev/null
-MODULES := kciblnd
-kciblnd-objs := ciblnd.o ciblnd_cb.o ciblnd_modparams.o
-
-default: all
-
-EXTRA_POST_CFLAGS := @CIBCPPFLAGS@ -I@LUSTRE@/../lnet/klnds/openiblnd
-
-@INCLUDE_RULES@
+++ /dev/null
-#
-# GPL HEADER START
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 only,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License version 2 for more details (a copy is included
-# in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU General Public License
-# version 2 along with this program; If not, see
-# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-#
-# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-# CA 95054 USA or visit www.sun.com if you need additional information or
-# have any questions.
-#
-# GPL HEADER END
-#
-
-#
-# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# This file is part of Lustre, http://www.lustre.org/
-# Lustre is a trademark of Sun Microsystems, Inc.
-#
-
-if MODULES
-if BUILD_CIBLND
-modulenet_DATA = kciblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kciblnd-objs:%.o=%.c)
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include "openiblnd.c"
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include "openiblnd_cb.c"
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- */
-
-#include "openiblnd_modparams.c"
+++ /dev/null
-/.deps
-/Makefile
-/.*.cmd
-/autoMakefile.in
-/autoMakefile
-/*.ko
-/*.mod.c
-/.*.flags
-/.tmp_versions
-/.depend
+++ /dev/null
-MODULES := kiiblnd
-kiiblnd-objs := iiblnd.o iiblnd_cb.o iiblnd_modparams.o
-
-EXTRA_POST_CFLAGS := @IIBCPPFLAGS@
-
-@INCLUDE_RULES@
+++ /dev/null
-# GPL HEADER START
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2 only,
-# as published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License version 2 for more details (a copy is included
-# in the LICENSE file that accompanied this code).
-#
-# You should have received a copy of the GNU General Public License
-# version 2 along with this program; If not, see
-# http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
-#
-# Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
-# CA 95054 USA or visit www.sun.com if you need additional information or
-# have any questions.
-#
-# GPL HEADER END
-#
-
-#
-# Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
-# Use is subject to license terms.
-#
-
-#
-# This file is part of Lustre, http://www.lustre.org/
-# Lustre is a trademark of Sun Microsystems, Inc.
-#
-
-if MODULES
-if BUILD_IIBLND
-modulenet_DATA = kiiblnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-DIST_SOURCES = $(kiiblnd-objs:%.o=%.c) iiblnd.h
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/iiblnd/iiblnd.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "iiblnd.h"
-
-lnd_t the_kiblnd = {
- .lnd_type = IIBLND,
- .lnd_startup = kibnal_startup,
- .lnd_shutdown = kibnal_shutdown,
- .lnd_ctl = kibnal_ctl,
- .lnd_send = kibnal_send,
- .lnd_recv = kibnal_recv,
- .lnd_eager_recv = kibnal_eager_recv,
-};
-
-kib_data_t kibnal_data;
-
-__u32
-kibnal_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kibnal_init_msg(kib_msg_t *msg, int type, int body_nob)
-{
- msg->ibm_type = type;
- msg->ibm_nob = offsetof(kib_msg_t, ibm_u) + body_nob;
-}
-
-void
-kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
- lnet_nid_t dstnid, __u64 dststamp, __u64 seq)
-{
- /* CAVEAT EMPTOR! all message fields not set here should have been
- * initialised previously. */
- msg->ibm_magic = IBNAL_MSG_MAGIC;
- msg->ibm_version = version;
- /* ibm_type */
- msg->ibm_credits = credits;
- /* ibm_nob */
- msg->ibm_cksum = 0;
- msg->ibm_srcnid = kibnal_data.kib_ni->ni_nid;
- msg->ibm_srcstamp = kibnal_data.kib_incarnation;
- msg->ibm_dstnid = dstnid;
- msg->ibm_dststamp = dststamp;
- msg->ibm_seq = seq;
-
- if (*kibnal_tunables.kib_cksum) {
- /* NB ibm_cksum zero while computing cksum */
- msg->ibm_cksum = kibnal_cksum(msg, msg->ibm_nob);
- }
-}
-
-void
-kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob,
- int type, lnet_nid_t dstnid, __u64 dststamp)
-{
- LASSERT (nob >= offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
-
- memset(msg, 0, nob);
- kibnal_init_msg(msg, type, sizeof(kib_connparams_t));
-
- msg->ibm_u.connparams.ibcp_queue_depth = IBNAL_MSG_QUEUE_SIZE;
- msg->ibm_u.connparams.ibcp_max_msg_size = IBNAL_MSG_SIZE;
- msg->ibm_u.connparams.ibcp_max_frags = IBNAL_MAX_RDMA_FRAGS;
-
- kibnal_pack_msg(msg, version, 0, dstnid, dststamp, 0);
-}
-
-int
-kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob)
-{
- const int hdr_size = offsetof(kib_msg_t, ibm_u);
- __u32 msg_cksum;
- __u32 msg_version;
- int flip;
- int msg_nob;
-#if !IBNAL_USE_FMR
- int i;
- int n;
-#endif
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- /* Future protocol version compatibility support!
- * If the iiblnd-specific protocol changes, or when LNET unifies
- * protocols over all LNDs, the initial connection will negotiate a
- * protocol version. If I find this, I avoid any console errors. If
- * my is doing connection establishment, the reject will tell the peer
- * which version I'm running. */
-
- if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ibm_magic == __swab32(IBNAL_MSG_MAGIC)) {
- flip = 1;
- } else {
- if (msg->ibm_magic == LNET_PROTO_MAGIC ||
- msg->ibm_magic == __swab32(LNET_PROTO_MAGIC))
- return -EPROTO;
-
- /* Completely out to lunch */
- CERROR("Bad magic: %08x\n", msg->ibm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ibm_version) : msg->ibm_version;
- if (expected_version == 0) {
- if (msg_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
- msg_version != IBNAL_MSG_VERSION)
- return -EPROTO;
- } else if (msg_version != expected_version) {
- CERROR("Bad version: %x(%x expected)\n",
- msg_version, expected_version);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: %d\n", nob);
- return -EPROTO;
- }
-
- msg_nob = flip ? __swab32(msg->ibm_nob) : msg->ibm_nob;
- if (msg_nob > nob) {
- CERROR("Short message: got %d, wanted %d\n", nob, msg_nob);
- return -EPROTO;
- }
-
- /* checksum must be computed with ibm_cksum zero and BEFORE anything
- * gets flipped */
- msg_cksum = flip ? __swab32(msg->ibm_cksum) : msg->ibm_cksum;
- msg->ibm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kibnal_cksum(msg, msg_nob)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
- msg->ibm_cksum = msg_cksum;
-
- if (flip) {
- /* leave magic unflipped as a clue to peer endianness */
- msg->ibm_version = msg_version;
- CLASSERT (sizeof(msg->ibm_type) == 1);
- CLASSERT (sizeof(msg->ibm_credits) == 1);
- msg->ibm_nob = msg_nob;
- __swab64s(&msg->ibm_srcnid);
- __swab64s(&msg->ibm_srcstamp);
- __swab64s(&msg->ibm_dstnid);
- __swab64s(&msg->ibm_dststamp);
- __swab64s(&msg->ibm_seq);
- }
-
- if (msg->ibm_srcnid == LNET_NID_ANY) {
- CERROR("Bad src nid: %s\n", libcfs_nid2str(msg->ibm_srcnid));
- return -EPROTO;
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Unknown message type %x\n", msg->ibm_type);
- return -EPROTO;
-
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- if (msg_nob < offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0])) {
- CERROR("Short IMMEDIATE: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[0]));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putreq)) {
- CERROR("Short PUT_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putreq)));
- return -EPROTO;
- }
- break;
-
- case IBNAL_MSG_PUT_ACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.putack)) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.putack)));
- return -EPROTO;
- }
-#if IBNAL_USE_FMR
- if (flip) {
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_addr);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nob);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_key);
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_nfrag);
- }
-
- n = msg->ibm_u.putack.ibpam_rd.rd_nfrag;
- if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad PUT_ACK nfrags: %d, should be 0 < n <= %d\n",
- n, IBNAL_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n])) {
- CERROR("Short PUT_ACK: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.putack.ibpam_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip) {
- for (i = 0; i < n; i++) {
- __swab32s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_nob);
- __swab64s(&msg->ibm_u.putack.ibpam_rd.rd_frags[i].rf_addr);
- }
- }
-#endif
- break;
-
- case IBNAL_MSG_GET_REQ:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.get)) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.get)));
- return -EPROTO;
- }
-#if IBNAL_USE_FMR
- if (flip) {
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_addr);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nob);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- }
-#else
- if (flip) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_key);
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_nfrag);
- }
-
- n = msg->ibm_u.get.ibgm_rd.rd_nfrag;
- if (n <= 0 || n > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Bad GET_REQ nfrags: %d, should be 0 < n <= %d\n",
- n, IBNAL_MAX_RDMA_FRAGS);
- return -EPROTO;
- }
-
- if (msg_nob < offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n])) {
- CERROR("Short GET_REQ: %d(%d)\n", msg_nob,
- (int)offsetof(kib_msg_t, ibm_u.get.ibgm_rd.rd_frags[n]));
- return -EPROTO;
- }
-
- if (flip)
- for (i = 0; i < msg->ibm_u.get.ibgm_rd.rd_nfrag; i++) {
- __swab32s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_nob);
- __swab64s(&msg->ibm_u.get.ibgm_rd.rd_frags[i].rf_addr);
- }
-#endif
- break;
-
- case IBNAL_MSG_PUT_NAK:
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.completion)) {
- CERROR("Short RDMA completion: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.completion)));
- return -EPROTO;
- }
- if (flip)
- __swab32s(&msg->ibm_u.completion.ibcm_status);
- break;
-
- case IBNAL_MSG_CONNREQ:
- case IBNAL_MSG_CONNACK:
- if (msg_nob < hdr_size + sizeof(msg->ibm_u.connparams)) {
- CERROR("Short connreq/ack: %d(%d)\n", msg_nob,
- (int)(hdr_size + sizeof(msg->ibm_u.connparams)));
- return -EPROTO;
- }
- if (flip) {
- __swab32s(&msg->ibm_u.connparams.ibcp_queue_depth);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_msg_size);
- __swab32s(&msg->ibm_u.connparams.ibcp_max_frags);
- }
- break;
- }
- return 0;
-}
-
-IB_HANDLE
-kibnal_create_cep(lnet_nid_t nid)
-{
- FSTATUS frc;
- __u32 u32val;
- IB_HANDLE cep;
-
- cep = iba_cm_create_cep(CM_RC_TYPE);
- if (cep == NULL) {
- CERROR ("Can't create CEP for %s\n",
- (nid == LNET_NID_ANY) ? "listener" :
- libcfs_nid2str(nid));
- return NULL;
- }
-
- if (nid == LNET_NID_ANY) {
- u32val = 1;
- frc = iba_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT,
- (char *)&u32val, sizeof(u32val), 0);
- if (frc != FSUCCESS) {
- CERROR("Can't set async_accept: %d\n", frc);
- goto failed;
- }
-
- u32val = 0; /* sets system max */
- frc = iba_cm_modify_cep(cep, CM_FLAG_LISTEN_BACKLOG,
- (char *)&u32val, sizeof(u32val), 0);
- if (frc != FSUCCESS) {
- CERROR("Can't set listen backlog: %d\n", frc);
- goto failed;
- }
- }
-
- u32val = 1;
- frc = iba_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK,
- (char *)&u32val, sizeof(u32val), 0);
- if (frc != FSUCCESS) {
- CERROR("Can't set timewait_callback for %s: %d\n",
- (nid == LNET_NID_ANY) ? "listener" :
- libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- return cep;
-
- failed:
- iba_cm_destroy_cep(cep);
- return NULL;
-}
-
-#define IBNAL_CHECK_ADVERT 1
-#if IBNAL_CHECK_ADVERT
-void
-kibnal_service_query_done (void *arg, QUERY *qry,
- QUERY_RESULT_VALUES *qry_result)
-{
- int *rcp = arg;
- FSTATUS frc = qry_result->Status;
- SERVICE_RECORD_RESULTS *svc_rslt;
- IB_SERVICE_RECORD *svc;
- lnet_nid_t nid;
-
- if (frc != FSUCCESS || qry_result->ResultDataSize == 0) {
- CERROR("Error checking advert: status %d data size %d\n",
- frc, qry_result->ResultDataSize);
- *rcp = -EIO;
- goto out;
- }
-
- svc_rslt = (SERVICE_RECORD_RESULTS *)qry_result->QueryResult;
-
- if (svc_rslt->NumServiceRecords < 1) {
- CERROR("Check advert: %d records\n",
- svc_rslt->NumServiceRecords);
- *rcp = -ENOENT;
- goto out;
- }
-
- svc = &svc_rslt->ServiceRecords[0];
- nid = le64_to_cpu(*kibnal_service_nid_field(svc));
-
- CDEBUG(D_NET, "Check advert: %s "LPX64" "LPX64":%04x\n",
- libcfs_nid2str(nid), svc->RID.ServiceID,
- svc->RID.ServiceGID.Type.Global.InterfaceID,
- svc->RID.ServiceP_Key);
-
- if (nid != kibnal_data.kib_ni->ni_nid) {
- CERROR("Check advert: Bad NID %s (%s expected)\n",
- libcfs_nid2str(nid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- *rcp = -EINVAL;
- goto out;
- }
-
- if (svc->RID.ServiceID != *kibnal_tunables.kib_service_number) {
- CERROR("Check advert: Bad ServiceID "LPX64" (%x expected)\n",
- svc->RID.ServiceID,
- *kibnal_tunables.kib_service_number);
- *rcp = -EINVAL;
- goto out;
- }
-
- if (svc->RID.ServiceGID.Type.Global.InterfaceID !=
- kibnal_data.kib_port_guid) {
- CERROR("Check advert: Bad GUID "LPX64" ("LPX64" expected)\n",
- svc->RID.ServiceGID.Type.Global.InterfaceID,
- kibnal_data.kib_port_guid);
- *rcp = -EINVAL;
- goto out;
- }
-
- if (svc->RID.ServiceP_Key != kibnal_data.kib_port_pkey) {
- CERROR("Check advert: Bad PKEY %04x (%04x expected)\n",
- svc->RID.ServiceP_Key, kibnal_data.kib_port_pkey);
- *rcp = -EINVAL;
- goto out;
- }
-
- CDEBUG(D_NET, "Check advert OK\n");
- *rcp = 0;
-
- out:
- up (&kibnal_data.kib_listener_signal);
-}
-
-int
-kibnal_check_advert (void)
-{
- /* single-threaded */
- static QUERY qry;
-
- FSTATUS frc;
- int rc;
-
- memset (&qry, 0, sizeof(qry));
- qry.InputType = InputTypeServiceRecord;
- qry.OutputType = OutputTypeServiceRecord;
- kibnal_set_service_keys(&qry.InputValue.ServiceRecordValue.ServiceRecord,
- kibnal_data.kib_ni->ni_nid);
- qry.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
-
- frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- &qry,
- kibnal_service_query_done,
- &kibnal_data.kib_sdretry,
- &rc);
- if (frc != FPENDING) {
- CERROR ("Immediate error %d checking SM service\n", frc);
- return -EIO;
- }
-
- down (&kibnal_data.kib_listener_signal);
-
- if (rc != 0)
- CERROR ("Error %d checking SM service\n", rc);
- return rc;
-}
-#else
-int
-kibnal_check_advert(void)
-{
- return 0;
-}
-#endif
-
-void
-kibnal_fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
-{
- IB_SERVICE_RECORD *svc;
-
- memset (fod, 0, sizeof(*fod));
- fod->Type = type;
-
- svc = &fod->Value.ServiceRecordValue.ServiceRecord;
- svc->RID.ServiceID = *kibnal_tunables.kib_service_number;
- svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid;
- svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX;
- svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey;
- svc->ServiceLease = 0xffffffff;
-
- kibnal_set_service_keys(svc, kibnal_data.kib_ni->ni_nid);
-}
-
-void
-kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
- FSTATUS frc, uint32 madrc)
-{
- *(FSTATUS *)arg = frc;
- up (&kibnal_data.kib_listener_signal);
-}
-
-int
-kibnal_advertise (void)
-{
- /* Single threaded here */
- static FABRIC_OPERATION_DATA fod;
-
- IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
- FSTATUS frc;
- FSTATUS frc2;
-
- if (strlen(*kibnal_tunables.kib_service_name) >=
- sizeof(svc->ServiceName)) {
- CERROR("Service name '%s' too long (%d chars max)\n",
- *kibnal_tunables.kib_service_name,
- (int)sizeof(svc->ServiceName) - 1);
- return -EINVAL;
- }
-
- kibnal_fill_fod(&fod, FabOpSetServiceRecord);
-
- CDEBUG(D_NET, "Advertising service id "LPX64" %s:%s\n",
- svc->RID.ServiceID, svc->ServiceName,
- libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
-
- frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- &fod,
- kibnal_service_setunset_done,
- &kibnal_data.kib_sdretry,
- &frc2);
-
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d advertising NID %s\n",
- frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- return -EIO;
- }
-
- down (&kibnal_data.kib_listener_signal);
-
- frc = frc2;
- if (frc == FSUCCESS)
- return 0;
-
- CERROR ("Error %d advertising %s\n",
- frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- return -EIO;
-}
-
-void
-kibnal_unadvertise (int expect_success)
-{
- /* single threaded */
- static FABRIC_OPERATION_DATA fod;
-
- IB_SERVICE_RECORD *svc = &fod.Value.ServiceRecordValue.ServiceRecord;
- FSTATUS frc;
- FSTATUS frc2;
-
- LASSERT (kibnal_data.kib_ni->ni_nid != LNET_NID_ANY);
-
- kibnal_fill_fod(&fod, FabOpDeleteServiceRecord);
-
- CDEBUG(D_NET, "Unadvertising service %s:%s\n",
- svc->ServiceName,
- libcfs_nid2str(le64_to_cpu(*kibnal_service_nid_field(svc))));
-
- frc = iba_sd_port_fabric_operation(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- &fod,
- kibnal_service_setunset_done,
- &kibnal_data.kib_sdretry,
- &frc2);
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("Immediate error %d unadvertising NID %s\n",
- frc, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- return;
- }
-
- down (&kibnal_data.kib_listener_signal);
-
- CDEBUG(D_NET, "Unadvertise rc: %d\n", frc2);
-
- if ((frc2 == FSUCCESS) == !!expect_success)
- return;
-
- if (expect_success)
- CERROR("Error %d unadvertising NID %s\n",
- frc2, libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- else
- CWARN("Removed conflicting NID %s\n",
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
-}
-
-void
-kibnal_stop_listener(int normal_shutdown)
-{
- /* NB this also disables peer creation and destroys all existing
- * peers */
- IB_HANDLE cep = kibnal_data.kib_listener_cep;
- unsigned long flags;
- FSTATUS frc;
-
- LASSERT (cep != NULL);
-
- kibnal_unadvertise(normal_shutdown);
-
- frc = iba_cm_cancel(cep);
- if (frc != FSUCCESS && frc != FPENDING)
- CERROR ("Error %d stopping listener\n", frc);
-
- down(&kibnal_data.kib_listener_signal);
-
- frc = iba_cm_destroy_cep(cep);
- if (frc != FSUCCESS)
- CERROR ("Error %d destroying listener CEP\n", frc);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* This assignment disables peer creation */
- kibnal_data.kib_listener_cep = NULL;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- /* Start to tear down any peers created while the listener was
- * running */
- kibnal_del_peer(LNET_NID_ANY);
-}
-
-int
-kibnal_start_listener(void)
-{
- /* NB this also enables peer creation */
-
- IB_HANDLE cep;
- CM_LISTEN_INFO info;
- unsigned long flags;
- int rc;
- FSTATUS frc;
-
- LASSERT (kibnal_data.kib_listener_cep == NULL);
- init_MUTEX_LOCKED (&kibnal_data.kib_listener_signal);
-
- cep = kibnal_create_cep(LNET_NID_ANY);
- if (cep == NULL)
- return -ENOMEM;
-
- memset (&info, 0, sizeof(info));
- info.ListenAddr.EndPt.SID = *kibnal_tunables.kib_service_number;
-
- frc = iba_cm_listen(cep, &info, kibnal_listen_callback, NULL);
- if (frc != FSUCCESS && frc != FPENDING) {
- CERROR ("iba_cm_listen error: %d\n", frc);
-
- iba_cm_destroy_cep(cep);
- return -EIO;
- }
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* This assignment enables peer creation */
- kibnal_data.kib_listener_cep = cep;
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- rc = kibnal_advertise();
- if (rc == 0)
- rc = kibnal_check_advert();
-
- if (rc == 0)
- return 0;
-
- kibnal_stop_listener(0);
- return rc;
-}
-
-int
-kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- unsigned long flags;
- int rc;
-
- LASSERT (nid != LNET_NID_ANY);
-
- LIBCFS_ALLOC (peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Cannot allocate peer\n");
- return -ENOMEM;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- peer->ibp_nid = nid;
- atomic_set (&peer->ibp_refcount, 1); /* 1 ref for caller */
-
- INIT_LIST_HEAD (&peer->ibp_list); /* not in the peer table yet */
- INIT_LIST_HEAD (&peer->ibp_conns);
- INIT_LIST_HEAD (&peer->ibp_tx_queue);
-
- peer->ibp_error = 0;
- peer->ibp_last_alive = cfs_time_current();
- peer->ibp_reconnect_interval = 0; /* OK to connect at any time */
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (atomic_read(&kibnal_data.kib_npeers) >=
- *kibnal_tunables.kib_concurrent_peers) {
- rc = -EOVERFLOW; /* !! but at least it distinguishes */
- } else if (kibnal_data.kib_listener_cep == NULL) {
- rc = -ESHUTDOWN; /* shutdown has started */
- } else {
- rc = 0;
- /* npeers only grows with the global lock held */
- atomic_inc(&kibnal_data.kib_npeers);
- }
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (rc != 0) {
- CERROR("Can't create peer: %s\n",
- (rc == -ESHUTDOWN) ? "shutting down" :
- "too many peers");
- LIBCFS_FREE(peer, sizeof(*peer));
- } else {
- *peerp = peer;
- }
-
- return rc;
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
-
- LASSERT (atomic_read (&peer->ibp_refcount) == 0);
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (!kibnal_peer_active(peer));
- LASSERT (!kibnal_peer_connecting(peer));
- LASSERT (list_empty (&peer->ibp_conns));
- LASSERT (list_empty (&peer->ibp_tx_queue));
-
- LIBCFS_FREE (peer, sizeof (*peer));
-
- /* NB a peer's connections keep a reference on their peer until
- * they are destroyed, so we can be assured that _all_ state to do
- * with this peer has been cleaned up when its refcount drops to
- * zero. */
- atomic_dec (&kibnal_data.kib_npeers);
-}
-
-/* the caller is responsible for accounting for the additional reference
- * that this creates */
-kib_peer_t *
-kibnal_find_peer_locked (lnet_nid_t nid)
-{
- struct list_head *peer_list = kibnal_nid2peerlist (nid);
- struct list_head *tmp;
- kib_peer_t *peer;
-
- list_for_each (tmp, peer_list) {
-
- peer = list_entry (tmp, kib_peer_t, ibp_list);
-
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (peer->ibp_nid != nid)
- continue;
-
- CDEBUG(D_NET, "got peer %s (%d)\n",
- libcfs_nid2str(nid), atomic_read (&peer->ibp_refcount));
- return (peer);
- }
- return (NULL);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
- LASSERT (peer->ibp_persistence == 0);
- LASSERT (list_empty(&peer->ibp_conns));
-
- LASSERT (kibnal_peer_active(peer));
- list_del_init (&peer->ibp_list);
- /* lose peerlist's ref */
- kibnal_peer_decref(peer);
-}
-
-int
-kibnal_get_peer_info (int index, lnet_nid_t *nidp, int *persistencep)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (index-- > 0)
- continue;
-
- *nidp = peer->ibp_nid;
- *persistencep = peer->ibp_persistence;
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (0);
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (-ENOENT);
-}
-
-int
-kibnal_add_persistent_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- int rc;
-
- if (nid == LNET_NID_ANY)
- return (-EINVAL);
-
- rc = kibnal_create_peer(&peer, nid);
- if (rc != 0)
- return rc;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- /* I'm always called with a reference on kibnal_data.kib_ni
- * so shutdown can't have started */
- LASSERT (kibnal_data.kib_listener_cep != NULL);
-
- peer2 = kibnal_find_peer_locked (nid);
- if (peer2 != NULL) {
- kibnal_peer_decref (peer);
- peer = peer2;
- } else {
- /* peer table takes existing ref on peer */
- list_add_tail (&peer->ibp_list,
- kibnal_nid2peerlist (nid));
- }
-
- peer->ibp_persistence++;
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return (0);
-}
-
-void
-kibnal_del_peer_locked (kib_peer_t *peer)
-{
- struct list_head *ctmp;
- struct list_head *cnxt;
- kib_conn_t *conn;
-
- peer->ibp_persistence = 0;
-
- if (list_empty(&peer->ibp_conns)) {
- kibnal_unlink_peer_locked(peer);
- } else {
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
- kibnal_close_conn_locked (conn, 0);
- }
- /* NB peer is no longer persistent; closing its last conn
- * unlinked it. */
- }
- /* NB peer now unlinked; might even be freed if the peer table had the
- * last ref on it. */
-}
-
-int
-kibnal_del_peer (lnet_nid_t nid)
-{
- unsigned long flags;
- CFS_LIST_HEAD (zombies);
- struct list_head *ptmp;
- struct list_head *pnxt;
- kib_peer_t *peer;
- int lo;
- int hi;
- int i;
- int rc = -ENOENT;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || peer->ibp_nid == nid))
- continue;
-
- if (!list_empty(&peer->ibp_tx_queue)) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- list_splice_init(&peer->ibp_tx_queue, &zombies);
- }
-
- kibnal_del_peer_locked (peer);
- rc = 0; /* matched something */
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_txlist_done(&zombies, -EIO);
-
- return (rc);
-}
-
-kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
- kib_peer_t *peer;
- struct list_head *ptmp;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
- int i;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- list_for_each (ctmp, &peer->ibp_conns) {
- if (index-- > 0)
- continue;
-
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
- kibnal_conn_addref(conn);
- read_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return (conn);
- }
- }
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
- return (NULL);
-}
-
-int
-kibnal_conn_rts(kib_conn_t *conn,
- __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn)
-{
- IB_PATH_RECORD *path = &conn->ibc_cvars->cv_path;
- IB_HANDLE qp = conn->ibc_qp;
- IB_QP_ATTRIBUTES_MODIFY modify_attr;
- FSTATUS frc;
- int rc;
-
- if (resp_res > kibnal_data.kib_hca_attrs.MaxQPResponderResources)
- resp_res = kibnal_data.kib_hca_attrs.MaxQPResponderResources;
-
- if (init_depth > kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth)
- init_depth = kibnal_data.kib_hca_attrs.MaxQPInitiatorDepth;
-
- modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
- .RequestState = QPStateReadyToRecv,
- .RecvPSN = IBNAL_STARTING_PSN,
- .DestQPNumber = qpn,
- .ResponderResources = resp_res,
- .MinRnrTimer = UsecToRnrNakTimer(2000), /* 20 ms */
- .Attrs = (IB_QP_ATTR_RECVPSN |
- IB_QP_ATTR_DESTQPNUMBER |
- IB_QP_ATTR_RESPONDERRESOURCES |
- IB_QP_ATTR_DESTAV |
- IB_QP_ATTR_PATHMTU |
- IB_QP_ATTR_MINRNRTIMER),
- };
- GetAVFromPath(0, path, &modify_attr.PathMTU, NULL,
- &modify_attr.DestAV);
-
- frc = iba_modify_qp(qp, &modify_attr, NULL);
- if (frc != FSUCCESS) {
- CERROR("Can't set QP %s ready to receive: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- return -EIO;
- }
-
- rc = kibnal_post_receives(conn);
- if (rc != 0) {
- CERROR("Can't post receives for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- return rc;
- }
-
- modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
- .RequestState = QPStateReadyToSend,
- .FlowControl = TRUE,
- .InitiatorDepth = init_depth,
- .SendPSN = psn,
- .LocalAckTimeout = path->PktLifeTime + 2, /* 2 or 1? */
- .RetryCount = IBNAL_RETRY,
- .RnrRetryCount = IBNAL_RNR_RETRY,
- .Attrs = (IB_QP_ATTR_FLOWCONTROL |
- IB_QP_ATTR_INITIATORDEPTH |
- IB_QP_ATTR_SENDPSN |
- IB_QP_ATTR_LOCALACKTIMEOUT |
- IB_QP_ATTR_RETRYCOUNT |
- IB_QP_ATTR_RNRRETRYCOUNT),
- };
-
- frc = iba_modify_qp(qp, &modify_attr, NULL);
- if (frc != FSUCCESS) {
- CERROR("Can't set QP %s ready to send: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- return -EIO;
- }
-
- frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't query QP %s attributes: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- return -EIO;
- }
-
- return 0;
-}
-
-kib_conn_t *
-kibnal_create_conn (lnet_nid_t nid, int proto_version)
-{
- kib_conn_t *conn;
- int i;
- int page_offset;
- int ipage;
- int rc;
- FSTATUS frc;
- union {
- IB_QP_ATTRIBUTES_CREATE qp_create;
- IB_QP_ATTRIBUTES_MODIFY qp_attr;
- } params;
-
- LIBCFS_ALLOC (conn, sizeof (*conn));
- if (conn == NULL) {
- CERROR ("Can't allocate connection for %s\n",
- libcfs_nid2str(nid));
- return (NULL);
- }
-
- /* zero flags, NULL pointers etc... */
- memset (conn, 0, sizeof (*conn));
- conn->ibc_state = IBNAL_CONN_INIT_NOTHING;
- conn->ibc_version = proto_version;
-
- INIT_LIST_HEAD (&conn->ibc_early_rxs);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_nocred);
- INIT_LIST_HEAD (&conn->ibc_tx_queue);
- INIT_LIST_HEAD (&conn->ibc_tx_queue_rsrvd);
- INIT_LIST_HEAD (&conn->ibc_active_txs);
- spin_lock_init (&conn->ibc_lock);
-
- atomic_inc (&kibnal_data.kib_nconns);
- /* well not really, but I call destroy() on failure, which decrements */
-
- LIBCFS_ALLOC(conn->ibc_cvars, sizeof (*conn->ibc_cvars));
- if (conn->ibc_cvars == NULL) {
- CERROR ("Can't allocate connvars for %s\n",
- libcfs_nid2str(nid));
- goto failed;
- }
- memset(conn->ibc_cvars, 0, sizeof (*conn->ibc_cvars));
-
- LIBCFS_ALLOC(conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
- if (conn->ibc_rxs == NULL) {
- CERROR("Cannot allocate RX descriptors for %s\n",
- libcfs_nid2str(nid));
- goto failed;
- }
- memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES);
- if (rc != 0) {
- CERROR("Can't allocate RX buffers for %s\n",
- libcfs_nid2str(nid));
- goto failed;
- }
-
- for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
- struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
- kib_rx_t *rx = &conn->ibc_rxs[i];
-
- rx->rx_conn = conn;
- rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- rx->rx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
- lnet_page2phys(page) + page_offset;
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
- }
- }
-
- params.qp_create = (IB_QP_ATTRIBUTES_CREATE) {
- .Type = QPTypeReliableConnected,
- .SendQDepth = (1 + IBNAL_MAX_RDMA_FRAGS) *
- (*kibnal_tunables.kib_concurrent_sends),
- .RecvQDepth = IBNAL_RX_MSGS,
- .SendDSListDepth = 1,
- .RecvDSListDepth = 1,
- .SendCQHandle = kibnal_data.kib_cq,
- .RecvCQHandle = kibnal_data.kib_cq,
- .PDHandle = kibnal_data.kib_pd,
- .SendSignaledCompletions = TRUE,
- };
- frc = iba_create_qp(kibnal_data.kib_hca, ¶ms.qp_create, NULL,
- &conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs);
- if (frc != 0) {
- CERROR ("Can't create QP %s: %d\n", libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- /* Mark QP created */
- kibnal_set_conn_state(conn, IBNAL_CONN_INIT_QP);
-
- params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) {
- .RequestState = QPStateInit,
- .Attrs = (IB_QP_ATTR_PORTGUID |
- IB_QP_ATTR_PKEYINDEX |
- IB_QP_ATTR_ACCESSCONTROL),
- .PortGUID = kibnal_data.kib_port_guid,
- .PkeyIndex = 0,
- .AccessControl = {
- .s = {
- .RdmaWrite = 1,
- .RdmaRead = 1,
- },
- },
- };
- frc = iba_modify_qp(conn->ibc_qp, ¶ms.qp_attr, NULL);
- if (frc != 0) {
- CERROR ("Can't set QP %s state to INIT: %d\n",
- libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- frc = iba_query_qp(conn->ibc_qp, &conn->ibc_cvars->cv_qpattrs, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't query QP %s attributes: %d\n",
- libcfs_nid2str(nid), frc);
- goto failed;
- }
-
- /* 1 ref for caller */
- atomic_set (&conn->ibc_refcount, 1);
- CDEBUG(D_NET, "New conn %p\n", conn);
- return (conn);
-
- failed:
- kibnal_destroy_conn (conn);
- return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
- FSTATUS frc;
-
- LASSERT (!in_interrupt());
-
- CDEBUG (D_NET, "connection %s\n",
- (conn->ibc_peer) == NULL ? "<ANON>" :
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- LASSERT (atomic_read (&conn->ibc_refcount) == 0);
- LASSERT (list_empty(&conn->ibc_early_rxs));
- LASSERT (list_empty(&conn->ibc_tx_queue));
- LASSERT (list_empty(&conn->ibc_tx_queue_rsrvd));
- LASSERT (list_empty(&conn->ibc_tx_queue_nocred));
- LASSERT (list_empty(&conn->ibc_active_txs));
- LASSERT (conn->ibc_nsends_posted == 0);
-
- switch (conn->ibc_state) {
- case IBNAL_CONN_INIT_NOTHING:
- case IBNAL_CONN_INIT_QP:
- case IBNAL_CONN_DISCONNECTED:
- break;
-
- default:
- /* conn must either have never engaged with the CM, or have
- * completely disengaged from it */
- CERROR("Bad conn %s state %d\n",
- (conn->ibc_peer) == NULL ? "<anon>" :
- libcfs_nid2str(conn->ibc_peer->ibp_nid), conn->ibc_state);
- LBUG();
- }
-
- if (conn->ibc_cep != NULL) {
- frc = iba_cm_destroy_cep(conn->ibc_cep);
- if (frc != FSUCCESS)
- CERROR("Error destroying CEP %p: %d\n",
- conn->ibc_cep, frc);
- }
-
- if (conn->ibc_qp != NULL) {
- frc = iba_destroy_qp(conn->ibc_qp);
- if (frc != FSUCCESS)
- CERROR("Error destroying QP %p: %d\n",
- conn->ibc_qp, frc);
- }
-
- if (conn->ibc_rx_pages != NULL)
- kibnal_free_pages(conn->ibc_rx_pages);
-
- if (conn->ibc_rxs != NULL)
- LIBCFS_FREE(conn->ibc_rxs,
- IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
- if (conn->ibc_cvars != NULL)
- LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars));
-
- if (conn->ibc_peer != NULL)
- kibnal_peer_decref(conn->ibc_peer);
-
- LIBCFS_FREE(conn, sizeof (*conn));
-
- atomic_dec(&kibnal_data.kib_nconns);
-}
-
-int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- count++;
- kibnal_close_conn_locked (conn, why);
- }
-
- return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
- kib_conn_t *conn;
- struct list_head *ctmp;
- struct list_head *cnxt;
- int count = 0;
-
- list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- if (conn->ibc_incarnation == incarnation)
- continue;
-
- CDEBUG(D_NET, "Closing stale conn nid:%s incarnation:"LPX64"("LPX64")\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_incarnation, incarnation);
-
- count++;
- kibnal_close_conn_locked (conn, -ESTALE);
- }
-
- return (count);
-}
-
-int
-kibnal_close_matching_conns (lnet_nid_t nid)
-{
- unsigned long flags;
- kib_peer_t *peer;
- struct list_head *ptmp;
- struct list_head *pnxt;
- int lo;
- int hi;
- int i;
- int count = 0;
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (nid != LNET_NID_ANY)
- lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
- else {
- lo = 0;
- hi = kibnal_data.kib_peer_hash_size - 1;
- }
-
- for (i = lo; i <= hi; i++) {
- list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
- LASSERT (peer->ibp_persistence != 0 ||
- kibnal_peer_connecting(peer) ||
- !list_empty (&peer->ibp_conns));
-
- if (!(nid == LNET_NID_ANY || nid == peer->ibp_nid))
- continue;
-
- count += kibnal_close_peer_conns_locked (peer, 0);
- }
- }
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- /* wildcards always succeed */
- if (nid == LNET_NID_ANY)
- return (0);
-
- return (count == 0 ? -ENOENT : 0);
-}
-
-int
-kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
- ENTRY;
-
- LASSERT (ni == kibnal_data.kib_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_GET_PEER: {
- lnet_nid_t nid = 0;
- int share_count = 0;
-
- rc = kibnal_get_peer_info(data->ioc_count,
- &nid, &share_count);
- data->ioc_nid = nid;
- data->ioc_count = share_count;
- break;
- }
- case IOC_LIBCFS_ADD_PEER: {
- rc = kibnal_add_persistent_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_DEL_PEER: {
- rc = kibnal_del_peer (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_GET_CONN: {
- kib_conn_t *conn = kibnal_get_conn_by_idx (data->ioc_count);
-
- if (conn == NULL)
- rc = -ENOENT;
- else {
- rc = 0;
- data->ioc_nid = conn->ibc_peer->ibp_nid;
- kibnal_conn_decref(conn);
- }
- break;
- }
- case IOC_LIBCFS_CLOSE_CONNECTION: {
- rc = kibnal_close_matching_conns (data->ioc_nid);
- break;
- }
- case IOC_LIBCFS_REGISTER_MYNID: {
- if (ni->ni_nid == data->ioc_nid) {
- rc = 0;
- } else {
- CERROR("obsolete IOC_LIBCFS_REGISTER_MYNID: %s(%s)\n",
- libcfs_nid2str(data->ioc_nid),
- libcfs_nid2str(ni->ni_nid));
- rc = -EINVAL;
- }
- break;
- }
- }
-
- RETURN(rc);
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
- int npages = p->ibp_npages;
- int i;
-
- for (i = 0; i < npages; i++)
- if (p->ibp_pages[i] != NULL)
- __free_page(p->ibp_pages[i]);
-
- LIBCFS_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages)
-{
- kib_pages_t *p;
- int i;
-
- LIBCFS_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
- if (p == NULL) {
- CERROR ("Can't allocate buffer %d\n", npages);
- return (-ENOMEM);
- }
-
- memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
- p->ibp_npages = npages;
-
- for (i = 0; i < npages; i++) {
- p->ibp_pages[i] = alloc_page (GFP_KERNEL);
- if (p->ibp_pages[i] == NULL) {
- CERROR ("Can't allocate page %d of %d\n", i, npages);
- kibnal_free_pages(p);
- return (-ENOMEM);
- }
- }
-
- *pp = p;
- return (0);
-}
-
-int
-kibnal_alloc_tx_descs (void)
-{
- int i;
-
- LIBCFS_ALLOC (kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
- if (kibnal_data.kib_tx_descs == NULL)
- return -ENOMEM;
-
- memset(kibnal_data.kib_tx_descs, 0,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- LIBCFS_ALLOC(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
- if (tx->tx_pages == NULL)
- return -ENOMEM;
-#else
- LIBCFS_ALLOC(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
- if (tx->tx_wrq == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_gl));
- if (tx->tx_gl == NULL)
- return -ENOMEM;
-
- LIBCFS_ALLOC(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBNAL_MAX_RDMA_FRAGS]));
- if (tx->tx_rd == NULL)
- return -ENOMEM;
-#endif
- }
-
- return 0;
-}
-
-void
-kibnal_free_tx_descs (void)
-{
- int i;
-
- if (kibnal_data.kib_tx_descs == NULL)
- return;
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- kib_tx_t *tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- if (tx->tx_pages != NULL)
- LIBCFS_FREE(tx->tx_pages, LNET_MAX_IOV *
- sizeof(*tx->tx_pages));
-#else
- if (tx->tx_wrq != NULL)
- LIBCFS_FREE(tx->tx_wrq,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_wrq));
-
- if (tx->tx_gl != NULL)
- LIBCFS_FREE(tx->tx_gl,
- (1 + IBNAL_MAX_RDMA_FRAGS) *
- sizeof(*tx->tx_gl));
-
- if (tx->tx_rd != NULL)
- LIBCFS_FREE(tx->tx_rd,
- offsetof(kib_rdma_desc_t,
- rd_frags[IBNAL_MAX_RDMA_FRAGS]));
-#endif
- }
-
- LIBCFS_FREE(kibnal_data.kib_tx_descs,
- IBNAL_TX_MSGS() * sizeof(kib_tx_t));
-}
-
-int
-kibnal_setup_tx_descs (void)
-{
- int ipage = 0;
- int page_offset = 0;
- struct page *page;
- kib_tx_t *tx;
- int i;
- int rc;
-
- /* pre-mapped messages are not bigger than 1 page */
- CLASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
- /* No fancy arithmetic when we do the buffer calculations */
- CLASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
- rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
- IBNAL_TX_MSG_PAGES());
- if (rc != 0)
- return (rc);
-
- for (i = 0; i < IBNAL_TX_MSGS(); i++) {
- page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
- tx = &kibnal_data.kib_tx_descs[i];
-
-#if IBNAL_USE_FMR
- /* Allocate an FMR for this TX so it can map src/sink buffers
- * for large transfers */
-#endif
- tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) +
- page_offset);
-
- tx->tx_hca_msg = kibnal_data.kib_whole_mem.md_addr +
- lnet_page2phys(page) + page_offset;
-
- CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n",
- i, tx, tx->tx_msg, tx->tx_hca_msg);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- page_offset += IBNAL_MSG_SIZE;
- LASSERT (page_offset <= PAGE_SIZE);
-
- if (page_offset == PAGE_SIZE) {
- page_offset = 0;
- ipage++;
- LASSERT (ipage <= IBNAL_TX_MSG_PAGES());
- }
- }
-
- return (0);
-}
-
-int
-kibnal_register_all_memory(void)
-{
- /* CAVEAT EMPTOR: this assumes all physical memory is in 1 contiguous
- * chunk starting at 0 */
- struct sysinfo si;
- __u64 total;
- __u64 total2;
- __u64 roundup = (128<<20); /* round up in big chunks */
- IB_MR_PHYS_BUFFER phys;
- IB_ACCESS_CONTROL access;
- FSTATUS frc;
-
- memset(&access, 0, sizeof(access));
- access.s.MWBindable = 1;
- access.s.LocalWrite = 1;
- access.s.RdmaRead = 1;
- access.s.RdmaWrite = 1;
-
- /* XXX we don't bother with first-gen cards */
- if (kibnal_data.kib_hca_attrs.VendorId == 0xd0b7 &&
- kibnal_data.kib_hca_attrs.DeviceId == 0x3101) {
- CERROR("Can't register all memory on first generation HCAs\n");
- return -EINVAL;
- }
-
- si_meminfo(&si);
-
- CDEBUG(D_NET, "si_meminfo: %lu/%u, num_physpages %lu/%lu\n",
- si.totalram, si.mem_unit, num_physpages, PAGE_SIZE);
-
- total = ((__u64)si.totalram) * si.mem_unit;
- total2 = num_physpages * PAGE_SIZE;
- if (total < total2)
- total = total2;
-
- if (total == 0) {
- CERROR("Can't determine memory size\n");
- return -ENOMEM;
- }
-
- roundup = (128<<20);
- total = (total + (roundup - 1)) & ~(roundup - 1);
-
- phys.PhysAddr = 0;
- phys.Length = total;
-
- frc = iba_register_contig_pmr(kibnal_data.kib_hca, 0, &phys, 1, 0,
- kibnal_data.kib_pd, access,
- &kibnal_data.kib_whole_mem.md_handle,
- &kibnal_data.kib_whole_mem.md_addr,
- &kibnal_data.kib_whole_mem.md_lkey,
- &kibnal_data.kib_whole_mem.md_rkey);
-
- if (frc != FSUCCESS) {
- CERROR("registering physical memory failed: %d\n", frc);
- return -EIO;
- }
-
- CDEBUG(D_WARNING, "registered phys mem from 0("LPX64") for "LPU64"("LPU64") -> "LPX64"\n",
- phys.PhysAddr, total, phys.Length, kibnal_data.kib_whole_mem.md_addr);
-
- return 0;
-}
-
-void
-kibnal_shutdown (lnet_ni_t *ni)
-{
- int i;
- int rc;
-
- LASSERT (ni == kibnal_data.kib_ni);
- LASSERT (ni->ni_data == &kibnal_data);
-
- CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- switch (kibnal_data.kib_init) {
- default:
- CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
- LBUG();
-
- case IBNAL_INIT_ALL:
- /* stop accepting connections, prevent new peers and start to
- * tear down all existing ones... */
- kibnal_stop_listener(1);
-
- /* Wait for all peer state to clean up */
- i = 2;
- while (atomic_read (&kibnal_data.kib_npeers) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "waiting for %d peers to disconnect\n",
- atomic_read (&kibnal_data.kib_npeers));
- set_current_state (TASK_UNINTERRUPTIBLE);
- schedule_timeout (HZ);
- }
- /* fall through */
-
- case IBNAL_INIT_CQ:
- rc = iba_destroy_cq(kibnal_data.kib_cq);
- if (rc != 0)
- CERROR ("Destroy CQ error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_TXD:
- kibnal_free_pages (kibnal_data.kib_tx_pages);
- /* fall through */
-
- case IBNAL_INIT_MD:
- rc = iba_deregister_mr(kibnal_data.kib_whole_mem.md_handle);
- if (rc != FSUCCESS)
- CERROR ("Deregister memory: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_PD:
- rc = iba_free_pd(kibnal_data.kib_pd);
- if (rc != 0)
- CERROR ("Destroy PD error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_SD:
- rc = iba_sd_deregister(kibnal_data.kib_sd);
- if (rc != 0)
- CERROR ("Deregister SD error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_PORTATTRS:
- LIBCFS_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
- kibnal_data.kib_hca_attrs.PortAttributesListSize);
- /* fall through */
-
- case IBNAL_INIT_HCA:
- rc = iba_close_ca(kibnal_data.kib_hca);
- if (rc != 0)
- CERROR ("Close HCA error: %d\n", rc);
- /* fall through */
-
- case IBNAL_INIT_DATA:
- LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
- LASSERT (kibnal_data.kib_peers != NULL);
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
- LASSERT (list_empty (&kibnal_data.kib_peers[i]));
- }
- LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
- LASSERT (list_empty (&kibnal_data.kib_connd_zombies));
- LASSERT (list_empty (&kibnal_data.kib_connd_conns));
- LASSERT (list_empty (&kibnal_data.kib_connd_peers));
-
- /* flag threads to terminate; wake and wait for them to die */
- kibnal_data.kib_shutdown = 1;
- wake_up_all (&kibnal_data.kib_sched_waitq);
- wake_up_all (&kibnal_data.kib_connd_waitq);
-
- i = 2;
- while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- atomic_read (&kibnal_data.kib_nthreads));
- set_current_state (TASK_INTERRUPTIBLE);
- schedule_timeout (HZ);
- }
- /* fall through */
-
- case IBNAL_INIT_NOTHING:
- break;
- }
-
- kibnal_free_tx_descs();
-
- if (kibnal_data.kib_peers != NULL)
- LIBCFS_FREE (kibnal_data.kib_peers,
- sizeof (struct list_head) *
- kibnal_data.kib_peer_hash_size);
-
- CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
- atomic_read (&libcfs_kmemory));
-
- kibnal_data.kib_init = IBNAL_INIT_NOTHING;
- PORTAL_MODULE_UNUSE;
-}
-
-int
-kibnal_get_ipif_name(char *ifname, int ifname_size, int idx)
-{
- char *basename = *kibnal_tunables.kib_ipif_basename;
- int n = strlen(basename);
- int baseidx;
- int m;
-
- if (n == 0) { /* empty string */
- CERROR("Empty IP interface basename specified\n");
- return -EINVAL;
- }
-
- for (m = n; m > 0; m--) /* find max numeric postfix */
- if (sscanf(basename + m - 1, "%d", &baseidx) != 1)
- break;
-
- if (m == 0) /* just a number */
- m = n;
-
- if (m == n) /* no postfix */
- baseidx = 1; /* default to 1 */
-
- if (m >= ifname_size)
- m = ifname_size - 1;
-
- memcpy(ifname, basename, m); /* copy prefix name */
-
- snprintf(ifname + m, ifname_size - m, "%d", baseidx + idx);
-
- if (strlen(ifname) == ifname_size - 1) {
- CERROR("IP interface basename %s too long\n", basename);
- return -EINVAL;
- }
-
- return 0;
-}
-
-int
-kibnal_startup (lnet_ni_t *ni)
-{
- char ipif_name[32];
- __u32 ip;
- __u32 netmask;
- int up;
- int nob;
- struct timeval tv;
- IB_PORT_ATTRIBUTES *pattr;
- FSTATUS frc;
- int rc;
- __u32 n;
- int i;
-
- LASSERT (ni->ni_lnd == &the_kiblnd);
-
- /* Only 1 instance supported */
- if (kibnal_data.kib_init != IBNAL_INIT_NOTHING) {
- CERROR ("Only 1 instance supported\n");
- return -EPERM;
- }
-
- if (*kibnal_tunables.kib_credits > *kibnal_tunables.kib_ntx) {
- CERROR ("Can't set credits(%d) > ntx(%d)\n",
- *kibnal_tunables.kib_credits,
- *kibnal_tunables.kib_ntx);
- return -EINVAL;
- }
-
- ni->ni_maxtxcredits = *kibnal_tunables.kib_credits;
- ni->ni_peertxcredits = *kibnal_tunables.kib_peercredits;
-
- CLASSERT (LNET_MAX_INTERFACES > 1);
-
- if (ni->ni_interfaces[0] == NULL) {
- kibnal_data.kib_hca_idx = 0;
- } else {
- /* Use the HCA specified in 'networks=' */
- if (ni->ni_interfaces[1] != NULL) {
- CERROR("Multiple interfaces not supported\n");
- return -EPERM;
- }
-
- /* Parse <number> into kib_hca_idx */
- nob = strlen(ni->ni_interfaces[0]);
- if (sscanf(ni->ni_interfaces[0], "%d%n",
- &kibnal_data.kib_hca_idx, &nob) < 1 ||
- nob != strlen(ni->ni_interfaces[0])) {
- CERROR("Can't parse interface '%s'\n",
- ni->ni_interfaces[0]);
- return -EINVAL;
- }
- }
-
- rc = kibnal_get_ipif_name(ipif_name, sizeof(ipif_name),
- kibnal_data.kib_hca_idx);
- if (rc != 0)
- return rc;
-
- rc = libcfs_ipif_query(ipif_name, &up, &ip, &netmask);
- if (rc != 0) {
- CERROR("Can't query IPoIB interface %s: %d\n", ipif_name, rc);
- return -ENETDOWN;
- }
-
- if (!up) {
- CERROR("Can't query IPoIB interface %s: it's down\n", ipif_name);
- return -ENETDOWN;
- }
-
- ni->ni_nid = LNET_MKNID(LNET_NIDNET(ni->ni_nid), ip);
-
- ni->ni_data = &kibnal_data;
- kibnal_data.kib_ni = ni;
-
- do_gettimeofday(&tv);
- kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
- PORTAL_MODULE_USE;
-
- rwlock_init(&kibnal_data.kib_global_lock);
-
- kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
- LIBCFS_ALLOC (kibnal_data.kib_peers,
- sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
- if (kibnal_data.kib_peers == NULL) {
- goto failed;
- }
- for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
- INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
- spin_lock_init (&kibnal_data.kib_connd_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
- INIT_LIST_HEAD (&kibnal_data.kib_connd_zombies);
- init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
- spin_lock_init (&kibnal_data.kib_sched_lock);
- init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
- spin_lock_init (&kibnal_data.kib_tx_lock);
- INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-
- rc = kibnal_alloc_tx_descs();
- if (rc != 0) {
- CERROR("Can't allocate tx descs\n");
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kibnal_data.kib_init = IBNAL_INIT_DATA;
- /*****************************************************/
-
- kibnal_data.kib_sdretry.RetryCount = *kibnal_tunables.kib_sd_retries;
- kibnal_data.kib_sdretry.Timeout = (*kibnal_tunables.kib_timeout * 1000)/
- *kibnal_tunables.kib_sd_retries;
-
- for (i = 0; i < IBNAL_N_SCHED; i++) {
- rc = kibnal_thread_start (kibnal_scheduler,
- (void *)(unsigned long)i);
- if (rc != 0) {
- CERROR("Can't spawn iib scheduler[%d]: %d\n",
- i, rc);
- goto failed;
- }
- }
-
- rc = kibnal_thread_start (kibnal_connd, NULL);
- if (rc != 0) {
- CERROR ("Can't spawn iib connd: %d\n", rc);
- goto failed;
- }
-
- n = sizeof(kibnal_data.kib_hca_guids) /
- sizeof(kibnal_data.kib_hca_guids[0]);
- frc = iba_get_caguids(&n, kibnal_data.kib_hca_guids);
- if (frc != FSUCCESS) {
- CERROR ("Can't get HCA guids: %d\n", frc);
- goto failed;
- }
-
- if (n == 0) {
- CERROR ("No HCAs found\n");
- goto failed;
- }
-
- if (n <= kibnal_data.kib_hca_idx) {
- CERROR("Invalid HCA %d requested: (must be 0 - %d inclusive)\n",
- kibnal_data.kib_hca_idx, n - 1);
- goto failed;
- }
-
- /* Infinicon has per-HCA notification callbacks */
- frc = iba_open_ca(kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx],
- kibnal_hca_callback,
- kibnal_hca_async_callback,
- NULL,
- &kibnal_data.kib_hca);
- if (frc != FSUCCESS) {
- CERROR ("Can't open HCA[%d]: %d\n",
- kibnal_data.kib_hca_idx, frc);
- goto failed;
- }
-
- /* Channel Adapter opened */
- kibnal_data.kib_init = IBNAL_INIT_HCA;
- /*****************************************************/
-
- kibnal_data.kib_hca_attrs.PortAttributesList = NULL;
- kibnal_data.kib_hca_attrs.PortAttributesListSize = 0;
- frc = iba_query_ca(kibnal_data.kib_hca,
- &kibnal_data.kib_hca_attrs, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't size port attrs: %d\n", frc);
- goto failed;
- }
-
- LIBCFS_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
- kibnal_data.kib_hca_attrs.PortAttributesListSize);
- if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL)
- goto failed;
-
- /* Port attrs allocated */
- kibnal_data.kib_init = IBNAL_INIT_PORTATTRS;
- /*****************************************************/
-
- frc = iba_query_ca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
- NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't get port attrs for HCA %d: %d\n",
- kibnal_data.kib_hca_idx, frc);
- goto failed;
- }
-
- for (i = 0, pattr = kibnal_data.kib_hca_attrs.PortAttributesList;
- pattr != NULL;
- i++, pattr = pattr->Next) {
- switch (pattr->PortState) {
- default:
- CERROR("Unexpected port[%d] state %d\n",
- i, pattr->PortState);
- continue;
- case PortStateDown:
- CDEBUG(D_NET, "port[%d] Down\n", i);
- continue;
- case PortStateInit:
- CDEBUG(D_NET, "port[%d] Init\n", i);
- continue;
- case PortStateArmed:
- CDEBUG(D_NET, "port[%d] Armed\n", i);
- continue;
-
- case PortStateActive:
- CDEBUG(D_NET, "port[%d] Active\n", i);
- kibnal_data.kib_port = i;
- kibnal_data.kib_port_guid = pattr->GUID;
- kibnal_data.kib_port_pkey = pattr->PkeyTable[0];
- break;
- }
- break;
- }
-
- if (pattr == NULL) {
- CERROR ("Can't find an active port\n");
- goto failed;
- }
-
- CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid);
-
- frc = iba_sd_register(&kibnal_data.kib_sd, NULL);
- if (frc != FSUCCESS) {
- CERROR ("Can't register with SD: %d\n", frc);
- goto failed;
- }
-
- /* Registered with SD OK */
- kibnal_data.kib_init = IBNAL_INIT_SD;
- /*****************************************************/
-
- frc = iba_alloc_pd(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
- if (frc != FSUCCESS) {
- CERROR ("Can't create PD: %d\n", rc);
- goto failed;
- }
-
- /* flag PD initialised */
- kibnal_data.kib_init = IBNAL_INIT_PD;
- /*****************************************************/
-
- rc = kibnal_register_all_memory();
- if (rc != 0) {
- CERROR ("Can't register all memory\n");
- goto failed;
- }
-
- /* flag whole memory MD initialised */
- kibnal_data.kib_init = IBNAL_INIT_MD;
- /*****************************************************/
-
- rc = kibnal_setup_tx_descs();
- if (rc != 0) {
- CERROR ("Can't register tx descs: %d\n", rc);
- goto failed;
- }
-
- /* flag TX descs initialised */
- kibnal_data.kib_init = IBNAL_INIT_TXD;
- /*****************************************************/
-
- frc = iba_create_cq(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES(),
- &kibnal_data.kib_cq, &kibnal_data.kib_cq,
- &n);
- if (frc != FSUCCESS) {
- CERROR ("Can't create RX CQ: %d\n", frc);
- goto failed;
- }
-
- /* flag CQ initialised */
- kibnal_data.kib_init = IBNAL_INIT_CQ;
- /*****************************************************/
-
- if (n < IBNAL_CQ_ENTRIES()) {
- CERROR ("CQ only has %d entries: %d needed\n",
- n, IBNAL_CQ_ENTRIES());
- goto failed;
- }
-
- rc = iba_rearm_cq(kibnal_data.kib_cq, CQEventSelNextWC);
- if (rc != 0) {
- CERROR ("Failed to re-arm completion queue: %d\n", rc);
- goto failed;
- }
-
- rc = kibnal_start_listener();
- if (rc != 0) {
- CERROR("Can't start listener: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kibnal_data.kib_init = IBNAL_INIT_ALL;
- /*****************************************************/
-
- return (0);
-
- failed:
- kibnal_shutdown (ni);
- return (-ENETDOWN);
-}
-
-void __exit
-kibnal_module_fini (void)
-{
- lnet_unregister_lnd(&the_kiblnd);
- kibnal_tunables_fini();
-}
-
-int __init
-kibnal_module_init (void)
-{
- int rc;
-
- rc = kibnal_tunables_init();
- if (rc != 0)
- return rc;
-
- lnet_register_lnd(&the_kiblnd);
-
- return 0;
-}
-
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Kernel Infinicon IB LND v1.00");
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/iiblnd/iiblnd.h
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#ifndef AUTOCONF_INCLUDED
-#include <linux/config.h>
-#endif
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-#include <lnet/lnet-sysctl.h>
-
-#include <linux/iba/ibt.h>
-
-#define GCC_VERSION (__GNUC__ * 10000 \
- + __GNUC_MINOR__ * 100 \
- + __GNUC_PATCHLEVEL__)
-
-/* Test for GCC > 3.2.2 */
-#if GCC_VERSION <= 30202
-/* GCC 3.2.2, and presumably several versions before it, will
- * miscompile this driver. See
- * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */
-#error Invalid GCC version. Must use GCC >= 3.2.3
-#endif
-
-#ifdef CONFIG_SMP
-# define IBNAL_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define IBNAL_N_SCHED 1 /* # schedulers */
-#endif
-
-#define IBNAL_USE_FMR 0 /* map on demand v. use whole mem mapping */
-#define KIBLND_DETAILED_DEBUG 0
-
-/* tunables fixed at compile time */
-#define IBNAL_PEER_HASH_SIZE 101 /* # peer lists */
-#define IBNAL_RESCHED 100 /* # scheduler loops before reschedule */
-#define IBNAL_MSG_QUEUE_SIZE 8 /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER 7 /* when to eagerly return credits */
-#define IBNAL_MSG_SIZE (4<<10) /* max size of queued messages (inc hdr) */
-#define IBNAL_RDMA_BASE 0x0eeb0000
-#define IBNAL_STARTING_PSN 1
-
-/* QP tunables */
-/* 7 indicates infinite retry attempts, Infinicon recommended 5 */
-#define IBNAL_RETRY 5 /* # times to retry */
-#define IBNAL_RNR_RETRY 5 /* */
-#define IBNAL_CM_RETRY 5 /* # times to retry connection */
-#define IBNAL_FLOW_CONTROL 1
-#define IBNAL_ACK_TIMEOUT 20 /* supposedly 4 secs */
-#define IBNAL_EE_FLOW 1
-#define IBNAL_LOCAL_SUB 1
-#define IBNAL_FAILOVER_ACCEPTED 0
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS() (*kibnal_tunables.kib_ntx)
-#define IBNAL_TX_MSG_BYTES() (IBNAL_TX_MSGS() * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES() ((IBNAL_TX_MSG_BYTES() + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#if IBNAL_USE_FMR
-# define IBNAL_MAX_RDMA_FRAGS 1
-# define IBNAL_CONCURRENT_SENDS IBNAL_RX_MSGS
-#else
-# define IBNAL_MAX_RDMA_FRAGS LNET_MAX_IOV
-# define IBNAL_CONCURRENT_SENDS IBNAL_MSG_QUEUE_SIZE
-#endif
-
-/* RX messages (per connection) */
-#define IBNAL_RX_MSGS (IBNAL_MSG_QUEUE_SIZE * 2)
-#define IBNAL_RX_MSG_BYTES (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#define IBNAL_CQ_ENTRIES() (IBNAL_TX_MSGS() * (1 + IBNAL_MAX_RDMA_FRAGS) + \
- (IBNAL_RX_MSGS * *kibnal_tunables.kib_concurrent_peers))
-
-typedef struct
-{
- char **kib_hca_basename; /* HCA base name */
- char **kib_ipif_basename; /* IPoIB interface base name */
- char **kib_service_name; /* global service name */
- unsigned int *kib_service_number; /* global service number */
- int *kib_min_reconnect_interval; /* min connect retry seconds... */
- int *kib_max_reconnect_interval; /* max connect retry seconds */
- int *kib_concurrent_peers; /* max # peers */
- int *kib_cksum; /* checksum kib_msg_t? */
- int *kib_timeout; /* comms timeout (seconds) */
- int *kib_keepalive; /* keepalive timeout (seconds) */
- int *kib_ntx; /* # tx descs */
- int *kib_credits; /* # concurrent sends */
- int *kib_peercredits; /* # concurrent sends to 1 peer */
- int *kib_sd_retries; /* # concurrent sends to 1 peer */
- int *kib_concurrent_sends; /* send work queue sizing */
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- cfs_sysctl_table_header_t *kib_sysctl; /* sysctl interface */
-#endif
-} kib_tunables_t;
-
-/* NB The Infinicon stack has specific typedefs for some things
- * (e.g. IB_{L,R}_KEY), that just map back to __u32 etc */
-typedef struct
-{
- int ibp_npages; /* # pages */
- struct page *ibp_pages[0];
-} kib_pages_t;
-
-typedef struct
-{
- IB_HANDLE md_handle;
- __u32 md_lkey;
- __u32 md_rkey;
- __u64 md_addr;
-} kib_md_t;
-
-typedef struct
-{
- int kib_init; /* initialisation state */
- __u64 kib_incarnation; /* which one am I */
- int kib_shutdown; /* shut down? */
- atomic_t kib_nthreads; /* # live threads */
- lnet_ni_t *kib_ni; /* _the_ iib instance */
-
- __u64 kib_port_guid; /* my GUID (lo 64 of GID)*/
- __u16 kib_port_pkey; /* my pkey, whatever that is */
- struct semaphore kib_listener_signal; /* signal completion */
- IB_HANDLE kib_listener_cep; /* connection end point */
-
- rwlock_t kib_global_lock; /* stabilize peer/conn ops */
- int kib_ready; /* CQ callback fired */
- int kib_checking_cq; /* a scheduler is checking the CQ */
-
- struct list_head *kib_peers; /* hash table of all my known peers */
- int kib_peer_hash_size; /* size of kib_peers */
- atomic_t kib_npeers; /* # peers extant */
- atomic_t kib_nconns; /* # connections extant */
-
- struct list_head kib_connd_zombies; /* connections to free */
- struct list_head kib_connd_conns; /* connections to progress */
- struct list_head kib_connd_peers; /* peers waiting for a connection */
- wait_queue_head_t kib_connd_waitq; /* connection daemon sleep here */
- spinlock_t kib_connd_lock; /* serialise */
-
- wait_queue_head_t kib_sched_waitq; /* schedulers sleep here */
- spinlock_t kib_sched_lock; /* serialise */
-
- struct kib_tx *kib_tx_descs; /* all the tx descriptors */
- kib_pages_t *kib_tx_pages; /* premapped tx msg pages */
-
- struct list_head kib_idle_txs; /* idle tx descriptors */
- __u64 kib_next_tx_cookie; /* RDMA completion cookie */
- spinlock_t kib_tx_lock; /* serialise */
-
- IB_HANDLE kib_hca; /* The HCA */
- int kib_port; /* port on the device */
- IB_HANDLE kib_pd; /* protection domain */
- IB_HANDLE kib_sd; /* SD handle */
- IB_HANDLE kib_cq; /* completion queue */
- kib_md_t kib_whole_mem; /* whole-mem registration */
-
- int kib_hca_idx; /* my HCA number */
- uint64 kib_hca_guids[8]; /* all the HCA guids */
- IB_CA_ATTRIBUTES kib_hca_attrs; /* where to get HCA attrs */
-
- COMMAND_CONTROL_PARAMETERS kib_sdretry; /* control SD query retries */
-} kib_data_t;
-
-#define IBNAL_INIT_NOTHING 0
-#define IBNAL_INIT_DATA 1
-#define IBNAL_INIT_LIB 2
-#define IBNAL_INIT_HCA 3
-#define IBNAL_INIT_PORTATTRS 4
-#define IBNAL_INIT_SD 5
-#define IBNAL_INIT_PD 6
-#define IBNAL_INIT_MD 7
-#define IBNAL_INIT_TXD 8
-#define IBNAL_INIT_CQ 9
-#define IBNAL_INIT_ALL 10
-
-/************************************************************************
- * Wire message structs.
- * These are sent in sender's byte order (i.e. receiver flips).
- * CAVEAT EMPTOR: other structs communicated between nodes (e.g. MAD
- * private data and SM service info), is LE on the wire.
- */
-
-typedef struct kib_connparams
-{
- __u32 ibcp_queue_depth;
- __u32 ibcp_max_msg_size;
- __u32 ibcp_max_frags;
-} WIRE_ATTR kib_connparams_t;
-
-typedef struct
-{
- lnet_hdr_t ibim_hdr; /* portals header */
- char ibim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kib_immediate_msg_t;
-
-#if IBNAL_USE_FMR
-typedef struct
-{
- __u64 rd_addr; /* IO VMA address */
- __u32 rd_nob; /* # of bytes */
- __u32 rd_key; /* remote key */
-} WIRE_ATTR kib_rdma_desc_t;
-#else
-typedef struct
-{
- __u32 rf_nob; /* # of bytes */
- __u64 rf_addr; /* remote io vaddr */
-} WIRE_ATTR kib_rdma_frag_t;
-
-typedef struct
-{
- __u32 rd_key; /* local/remote key */
- __u32 rd_nfrag; /* # fragments */
- kib_rdma_frag_t rd_frags[0]; /* buffer frags */
-} WIRE_ATTR kib_rdma_desc_t;
-#endif
-
-typedef struct
-{
- lnet_hdr_t ibprm_hdr; /* LNET header */
- __u64 ibprm_cookie; /* opaque completion cookie */
-} WIRE_ATTR kib_putreq_msg_t;
-
-typedef struct
-{
- __u64 ibpam_src_cookie; /* reflected completion cookie */
- __u64 ibpam_dst_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibpam_rd; /* sender's sink buffer */
-} WIRE_ATTR kib_putack_msg_t;
-
-typedef struct
-{
- lnet_hdr_t ibgm_hdr; /* LNET header */
- __u64 ibgm_cookie; /* opaque completion cookie */
- kib_rdma_desc_t ibgm_rd; /* sender's sink buffer */
-} WIRE_ATTR kib_get_msg_t;
-
-typedef struct
-{
- __u64 ibcm_cookie; /* opaque completion cookie */
- __u32 ibcm_status; /* completion status */
-} WIRE_ATTR kib_completion_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ibm_magic; /* I'm an openibnal message */
- __u16 ibm_version; /* this is my version number */
-
- __u8 ibm_type; /* msg type */
- __u8 ibm_credits; /* returned credits */
- __u32 ibm_nob; /* # bytes in whole message */
- __u32 ibm_cksum; /* checksum (0 == no checksum) */
- __u64 ibm_srcnid; /* sender's NID */
- __u64 ibm_srcstamp; /* sender's incarnation */
- __u64 ibm_dstnid; /* destination's NID */
- __u64 ibm_dststamp; /* destination's incarnation */
- __u64 ibm_seq; /* sequence number */
-
- union {
- kib_connparams_t connparams;
- kib_immediate_msg_t immediate;
- kib_putreq_msg_t putreq;
- kib_putack_msg_t putack;
- kib_get_msg_t get;
- kib_completion_msg_t completion;
- } WIRE_ATTR ibm_u;
-} WIRE_ATTR kib_msg_t;
-
-#define IBNAL_MSG_MAGIC LNET_PROTO_IIB_MAGIC /* unique magic */
-#define IBNAL_MSG_VERSION 2 /* current protocol version */
-#define IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD 1 /* previous version */
-
-#define IBNAL_MSG_CONNREQ 0xc0 /* connection request */
-#define IBNAL_MSG_CONNACK 0xc1 /* connection acknowledge */
-#define IBNAL_MSG_NOOP 0xd0 /* nothing (just credits) */
-#define IBNAL_MSG_IMMEDIATE 0xd1 /* immediate */
-#define IBNAL_MSG_PUT_REQ 0xd2 /* putreq (src->sink) */
-#define IBNAL_MSG_PUT_NAK 0xd3 /* completion (sink->src) */
-#define IBNAL_MSG_PUT_ACK 0xd4 /* putack (sink->src) */
-#define IBNAL_MSG_PUT_DONE 0xd5 /* completion (src->sink) */
-#define IBNAL_MSG_GET_REQ 0xd6 /* getreq (sink->src) */
-#define IBNAL_MSG_GET_DONE 0xd7 /* completion (src->sink: all OK) */
-
-/* connection rejection reasons */
-#define IBNAL_REJECT_CONN_RACE 0 /* You lost connection race */
-#define IBNAL_REJECT_NO_RESOURCES 1 /* Out of memory/conns etc */
-#define IBNAL_REJECT_FATAL 2 /* Anything else */
-
-/***********************************************************************/
-
-typedef struct kib_rx /* receive message */
-{
- struct list_head rx_list; /* queue for attention */
- struct kib_conn *rx_conn; /* owning conn */
- int rx_nob; /* # bytes received (-1 while posted) */
- __u64 rx_hca_msg; /* pre-mapped buffer (hca vaddr) */
- kib_msg_t *rx_msg; /* pre-mapped buffer (host vaddr) */
- IB_WORK_REQ2 rx_wrq;
- IB_LOCAL_DATASEGMENT rx_gl; /* and its memory */
-} kib_rx_t;
-
-typedef struct kib_tx /* transmit message */
-{
- struct list_head tx_list; /* queue on idle_txs ibc_tx_queue etc. */
- struct kib_conn *tx_conn; /* owning conn */
- int tx_mapped; /* mapped for RDMA? */
- int tx_sending; /* # tx callbacks outstanding */
- int tx_queued; /* queued for sending */
- int tx_waiting; /* waiting for peer */
- int tx_status; /* completion status */
- unsigned long tx_deadline; /* completion deadline */
- __u64 tx_cookie; /* completion cookie */
- lnet_msg_t *tx_lntmsg[2]; /* lnet msgs to finalize on completion */
- kib_msg_t *tx_msg; /* pre-mapped buffer (host vaddr) */
- __u64 tx_hca_msg; /* pre-mapped buffer (HCA vaddr) */
- int tx_nwrq; /* # send work items */
-#if IBNAL_USE_FMR
- IB_WORK_REQ2 tx_wrq[2]; /* send work items... */
- IB_LOCAL_DATASEGMENT tx_gl[2]; /* ...and their memory */
- kib_rdma_desc_t tx_rd[1]; /* rdma descriptor */
- kib_md_t tx_md; /* mapping */
- __u64 *tx_pages; /* page phys addrs */
-#else
- IB_WORK_REQ2 *tx_wrq; /* send work items... */
- IB_LOCAL_DATASEGMENT *tx_gl; /* ...and their memory */
- kib_rdma_desc_t *tx_rd; /* rdma descriptor (src buffers) */
-#endif
-} kib_tx_t;
-
-typedef struct
-{
- /* scratchpad during connection establishment */
- IB_QP_ATTRIBUTES_QUERY cv_qpattrs;
- QUERY cv_query;
- IB_SERVICE_RECORD cv_svcrec;
- IB_PATH_RECORD cv_path;
- CM_CONN_INFO cv_cmci;
-} kib_connvars_t;
-
-typedef struct kib_conn
-{
- struct kib_peer *ibc_peer; /* owning peer */
- struct list_head ibc_list; /* stash on peer's conn list */
- __u64 ibc_incarnation; /* which instance of the peer */
- __u64 ibc_txseq; /* tx sequence number */
- __u64 ibc_rxseq; /* rx sequence number */
- __u32 ibc_version; /* peer protocol version */
- atomic_t ibc_refcount; /* # users */
- int ibc_state; /* what's happening */
- int ibc_nsends_posted; /* # uncompleted sends */
- int ibc_credits; /* # credits I have */
- int ibc_outstanding_credits; /* # credits to return */
- int ibc_reserved_credits; /* # credits for ACK/DONE msgs */
- unsigned long ibc_last_send; /* time of last send */
- struct list_head ibc_early_rxs; /* rxs completed before ESTABLISHED */
- struct list_head ibc_tx_queue_nocred; /* sends that don't need a cred */
- struct list_head ibc_tx_queue_rsrvd; /* sends that need a reserved cred */
- struct list_head ibc_tx_queue; /* send queue */
- struct list_head ibc_active_txs; /* active tx awaiting completion */
- spinlock_t ibc_lock; /* serialise */
- kib_rx_t *ibc_rxs; /* the rx descs */
- kib_pages_t *ibc_rx_pages; /* premapped rx msg pages */
- IB_HANDLE ibc_qp; /* queue pair */
- IB_HANDLE ibc_cep; /* CM endpoint */
- kib_connvars_t *ibc_cvars; /* connection scratchpad */
-} kib_conn_t;
-
-#define IBNAL_CONN_INIT_NOTHING 0 /* initial state */
-#define IBNAL_CONN_INIT_QP 1 /* ibc_qp set up */
-#define IBNAL_CONN_CONNECTING 2 /* started to connect */
-#define IBNAL_CONN_ESTABLISHED 3 /* connection established */
-#define IBNAL_CONN_DISCONNECTING 4 /* to send disconnect req */
-#define IBNAL_CONN_DISCONNECTED 5 /* no more QP or CM traffic */
-
-/* types of connection */
-#define IBNAL_CONN_ACTIVE 0 /* active connect */
-#define IBNAL_CONN_PASSIVE 1 /* passive connect */
-#define IBNAL_CONN_WAITING 2 /* waiting for connect */
-
-typedef struct kib_peer
-{
- struct list_head ibp_list; /* stash on global peer list */
- struct list_head ibp_connd_list; /* schedule on kib_connd_peers */
- lnet_nid_t ibp_nid; /* who's on the other end(s) */
- atomic_t ibp_refcount; /* # users */
- int ibp_persistence; /* "known" peer refs */
- int ibp_version; /* protocol version */
- struct list_head ibp_conns; /* all active connections */
- struct list_head ibp_tx_queue; /* msgs waiting for a conn */
- int ibp_connecting; /* active connects in progress */
- int ibp_accepting; /* passive connects in progress */
- int ibp_passivewait; /* waiting for peer to connect */
- unsigned long ibp_passivewait_deadline; /* when passive wait must complete */
- unsigned long ibp_reconnect_time; /* when reconnect may be attempted */
- unsigned long ibp_reconnect_interval; /* exponential backoff */
- int ibp_error; /* errno on closing this peer */
- cfs_time_t ibp_last_alive; /* when (in jiffies) I was last alive */
-} kib_peer_t;
-
-
-extern kib_data_t kibnal_data;
-extern kib_tunables_t kibnal_tunables;
-
-/******************************************************************************/
-
-/* these are purposely avoiding using local vars so they don't increase
- * stack consumption. */
-
-#define kibnal_conn_addref(conn) \
-do { \
- CDEBUG(D_NET, "conn[%p] (%d)++\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- atomic_inc(&(conn)->ibc_refcount); \
-} while (0)
-
-#define kibnal_conn_decref(conn) \
-do { \
- unsigned long flags; \
- \
- CDEBUG(D_NET, "conn[%p] (%d)--\n", \
- (conn), atomic_read(&(conn)->ibc_refcount)); \
- LASSERT(atomic_read(&(conn)->ibc_refcount) > 0); \
- if (atomic_dec_and_test(&(conn)->ibc_refcount)) { \
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags); \
- list_add_tail(&(conn)->ibc_list, \
- &kibnal_data.kib_connd_zombies); \
- wake_up(&kibnal_data.kib_connd_waitq); \
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags); \
- } \
-} while (0)
-
-#define kibnal_peer_addref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)++\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- atomic_inc(&(peer)->ibp_refcount); \
-} while (0)
-
-#define kibnal_peer_decref(peer) \
-do { \
- CDEBUG(D_NET, "peer[%p] -> %s (%d)--\n", \
- (peer), libcfs_nid2str((peer)->ibp_nid), \
- atomic_read (&(peer)->ibp_refcount)); \
- LASSERT(atomic_read(&(peer)->ibp_refcount) > 0); \
- if (atomic_dec_and_test(&(peer)->ibp_refcount)) \
- kibnal_destroy_peer(peer); \
-} while (0)
-
-/******************************************************************************/
-
-static inline struct list_head *
-kibnal_nid2peerlist (lnet_nid_t nid)
-{
- unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-
- return (&kibnal_data.kib_peers [hash]);
-}
-
-static inline int
-kibnal_peer_active(kib_peer_t *peer)
-{
- /* Am I in the peer hash table? */
- return (!list_empty(&peer->ibp_list));
-}
-
-static inline int
-kibnal_peer_connecting(kib_peer_t *peer)
-{
- /* Am I expecting a connection to materialise? */
- return (peer->ibp_connecting != 0 ||
- peer->ibp_accepting != 0 ||
- peer->ibp_passivewait);
-}
-
-static inline void
-kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
- struct list_head *q;
-
- LASSERT (tx->tx_nwrq > 0); /* work items set up */
- LASSERT (!tx->tx_queued); /* not queued for sending already */
-
- tx->tx_queued = 1;
- tx->tx_deadline = jiffies + (*kibnal_tunables.kib_timeout * HZ);
-
- if (tx->tx_conn == NULL) {
- kibnal_conn_addref(conn);
- tx->tx_conn = conn;
- LASSERT (tx->tx_msg->ibm_type != IBNAL_MSG_PUT_DONE);
- } else {
- LASSERT (tx->tx_conn == conn);
- LASSERT (tx->tx_msg->ibm_type == IBNAL_MSG_PUT_DONE);
- }
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* All messages have simple credit control */
- q = &conn->ibc_tx_queue;
- } else {
- LASSERT (conn->ibc_version == IBNAL_MSG_VERSION);
-
- switch (tx->tx_msg->ibm_type) {
- case IBNAL_MSG_PUT_REQ:
- case IBNAL_MSG_GET_REQ:
- /* RDMA request: reserve a buffer for the RDMA reply
- * before sending */
- q = &conn->ibc_tx_queue_rsrvd;
- break;
-
- case IBNAL_MSG_PUT_NAK:
- case IBNAL_MSG_PUT_ACK:
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- /* RDMA reply/completion: no credits; peer has reserved
- * a reply buffer */
- q = &conn->ibc_tx_queue_nocred;
- break;
-
- case IBNAL_MSG_NOOP:
- case IBNAL_MSG_IMMEDIATE:
- /* Otherwise: consume a credit before sending */
- q = &conn->ibc_tx_queue;
- break;
-
- default:
- LBUG();
- q = NULL;
- }
- }
-
- list_add_tail(&tx->tx_list, q);
-}
-
-static inline int
-kibnal_send_keepalive(kib_conn_t *conn)
-{
- return (*kibnal_tunables.kib_keepalive > 0) &&
- time_after(jiffies, conn->ibc_last_send +
- *kibnal_tunables.kib_keepalive*HZ);
-}
-
-#define KIBNAL_SERVICE_KEY_MASK (IB_SERVICE_RECORD_COMP_SERVICENAME | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_1 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_2 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_3 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_4 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_5 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_6 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_7 | \
- IB_SERVICE_RECORD_COMP_SERVICEDATA8_8)
-
-static inline __u64*
-kibnal_service_nid_field(IB_SERVICE_RECORD *srv)
-{
- /* must be consistent with KIBNAL_SERVICE_KEY_MASK */
- return (__u64 *)srv->ServiceData8;
-}
-
-static inline void
-kibnal_set_service_keys(IB_SERVICE_RECORD *srv, lnet_nid_t nid)
-{
- char *svc_name = *kibnal_tunables.kib_service_name;
-
- LASSERT (strlen(svc_name) < sizeof(srv->ServiceName));
- memset (srv->ServiceName, 0, sizeof(srv->ServiceName));
- strcpy (srv->ServiceName, svc_name);
-
- *kibnal_service_nid_field(srv) = cpu_to_le64(nid);
-}
-
-/* CAVEAT EMPTOR: We rely on tx/rx descriptor alignment to allow us to use the
- * lowest 2 bits of the work request id to stash the work item type (the op
- * field is not valid when the wc completes in error). */
-
-#define IBNAL_WID_TX 0
-#define IBNAL_WID_RX 1
-#define IBNAL_WID_RDMA 2
-#define IBNAL_WID_MASK 3UL
-
-static inline __u64
-kibnal_ptr2wreqid (void *ptr, int type)
-{
- unsigned long lptr = (unsigned long)ptr;
-
- LASSERT ((lptr & IBNAL_WID_MASK) == 0);
- LASSERT ((type & ~IBNAL_WID_MASK) == 0);
- return (__u64)(lptr | type);
-}
-
-static inline void *
-kibnal_wreqid2ptr (__u64 wreqid)
-{
- return (void *)(((unsigned long)wreqid) & ~IBNAL_WID_MASK);
-}
-
-static inline int
-kibnal_wreqid2type (__u64 wreqid)
-{
- return (wreqid & IBNAL_WID_MASK);
-}
-
-static inline void
-kibnal_set_conn_state (kib_conn_t *conn, int state)
-{
- CDEBUG(D_NET,"%p state %d\n", conn, state);
- conn->ibc_state = state;
- mb();
-}
-
-#if IBNAL_USE_FMR
-
-static inline int
-kibnal_rd_size (kib_rdma_desc_t *rd)
-{
- return rd->rd_nob;
-}
-
-#else
-static inline int
-kibnal_rd_size (kib_rdma_desc_t *rd)
-{
- int i;
- int size;
-
- for (i = size = 0; i < rd->rd_nfrag; i++)
- size += rd->rd_frags[i].rf_nob;
-
- return size;
-}
-#endif
-
-int kibnal_startup (lnet_ni_t *ni);
-void kibnal_shutdown (lnet_ni_t *ni);
-int kibnal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-int kibnal_send (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kibnal_eager_recv (lnet_ni_t *ni, void *private,
- lnet_msg_t *lntmsg, void **new_private);
-int kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *msg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-void kibnal_init_msg(kib_msg_t *msg, int type, int body_nob);
-void kibnal_pack_msg(kib_msg_t *msg, __u32 version, int credits,
- lnet_nid_t dstnid, __u64 dststamp, __u64 seq);
-void kibnal_pack_connmsg(kib_msg_t *msg, __u32 version, int nob, int type,
- lnet_nid_t dstnid, __u64 dststamp);
-int kibnal_unpack_msg(kib_msg_t *msg, __u32 expected_version, int nob);
-IB_HANDLE kibnal_create_cep(lnet_nid_t nid);
-int kibnal_create_peer (kib_peer_t **peerp, lnet_nid_t nid);
-void kibnal_destroy_peer (kib_peer_t *peer);
-kib_peer_t *kibnal_find_peer_locked (lnet_nid_t nid);
-int kibnal_del_peer (lnet_nid_t nid);
-void kibnal_peer_alive (kib_peer_t *peer);
-void kibnal_unlink_peer_locked (kib_peer_t *peer);
-int kibnal_add_persistent_peer (lnet_nid_t nid);
-int kibnal_close_stale_conns_locked (kib_peer_t *peer,
- __u64 incarnation);
-int kibnal_conn_rts(kib_conn_t *conn,
- __u32 qpn, __u8 resp_res, __u8 init_depth, __u32 psn);
-kib_conn_t *kibnal_create_conn (lnet_nid_t nid, int proto_version);
-void kibnal_destroy_conn (kib_conn_t *conn);
-void kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg);
-int kibnal_alloc_pages (kib_pages_t **pp, int npages);
-void kibnal_free_pages (kib_pages_t *p);
-void kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn);
-void kibnal_txlist_done (struct list_head *txlist, int status);
-int kibnal_post_receives (kib_conn_t *conn);
-int kibnal_init_rdma (kib_tx_t *tx, int type, int nob,
- kib_rdma_desc_t *dstrd, __u64 dstcookie);
-void kibnal_check_sends (kib_conn_t *conn);
-void kibnal_close_conn_locked (kib_conn_t *conn, int error);
-int kibnal_thread_start (int (*fn)(void *arg), void *arg);
-int kibnal_scheduler(void *arg);
-int kibnal_connd (void *arg);
-void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
-void kibnal_close_conn (kib_conn_t *conn, int why);
-void kibnal_start_active_rdma (int type, int status,
- kib_rx_t *rx, lnet_msg_t *lntmsg,
- unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int nob);
-void kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev);
-void kibnal_hca_callback (void *hca_arg, void *cq_arg);
-int kibnal_tunables_init (void);
-void kibnal_tunables_fini (void);
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/iiblnd/iiblnd_cb.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "iiblnd.h"
-
-void
-hexdump(char *string, void *ptr, int len)
-{
- unsigned char *c = ptr;
- int i;
-
- return;
-
- if (len < 0 || len > 2048) {
- printk("XXX what the hell? %d\n",len);
- return;
- }
-
- printk("%d bytes of '%s' from 0x%p\n", len, string, ptr);
-
- for (i = 0; i < len;) {
- printk("%02x",*(c++));
- i++;
- if (!(i & 15)) {
- printk("\n");
- } else if (!(i&1)) {
- printk(" ");
- }
- }
-
- if(len & 15) {
- printk("\n");
- }
-}
-
-void
-kibnal_tx_done (kib_tx_t *tx)
-{
- lnet_msg_t *lntmsg[2];
- int rc = tx->tx_status;
- int i;
-
- LASSERT (!in_interrupt());
- LASSERT (!tx->tx_queued); /* mustn't be queued for sending */
- LASSERT (tx->tx_sending == 0); /* mustn't be awaiting sent callback */
- LASSERT (!tx->tx_waiting); /* mustn't be awaiting peer response */
-
-#if IBNAL_USE_FMR
- /* Handle unmapping if required */
-#endif
- /* tx may have up to 2 lnet msgs to finalise */
- lntmsg[0] = tx->tx_lntmsg[0]; tx->tx_lntmsg[0] = NULL;
- lntmsg[1] = tx->tx_lntmsg[1]; tx->tx_lntmsg[1] = NULL;
-
- if (tx->tx_conn != NULL) {
- kibnal_conn_decref(tx->tx_conn);
- tx->tx_conn = NULL;
- }
-
- tx->tx_nwrq = 0;
- tx->tx_status = 0;
-
- spin_lock(&kibnal_data.kib_tx_lock);
-
- list_add (&tx->tx_list, &kibnal_data.kib_idle_txs);
-
- spin_unlock(&kibnal_data.kib_tx_lock);
-
- /* delay finalize until my descs have been freed */
- for (i = 0; i < 2; i++) {
- if (lntmsg[i] == NULL)
- continue;
-
- lnet_finalize (kibnal_data.kib_ni, lntmsg[i], rc);
- }
-}
-
-kib_tx_t *
-kibnal_get_idle_tx (void)
-{
- kib_tx_t *tx;
-
- spin_lock(&kibnal_data.kib_tx_lock);
-
- if (list_empty (&kibnal_data.kib_idle_txs)) {
- spin_unlock(&kibnal_data.kib_tx_lock);
- return NULL;
- }
-
- tx = list_entry (kibnal_data.kib_idle_txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- /* Allocate a new completion cookie. It might not be needed,
- * but we've got a lock right now and we're unlikely to
- * wrap... */
- tx->tx_cookie = kibnal_data.kib_next_tx_cookie++;
-
- spin_unlock(&kibnal_data.kib_tx_lock);
-
- LASSERT (tx->tx_nwrq == 0);
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending == 0);
- LASSERT (!tx->tx_waiting);
- LASSERT (tx->tx_status == 0);
- LASSERT (tx->tx_conn == NULL);
- LASSERT (tx->tx_lntmsg[0] == NULL);
- LASSERT (tx->tx_lntmsg[1] == NULL);
-
- return tx;
-}
-
-int
-kibnal_post_rx (kib_rx_t *rx, int credit, int rsrvd_credit)
-{
- kib_conn_t *conn = rx->rx_conn;
- int rc = 0;
- FSTATUS frc;
-
- LASSERT (!in_interrupt());
- /* old peers don't reserve rxs for RDMA replies */
- LASSERT (!rsrvd_credit ||
- conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
-
- rx->rx_gl = (IB_LOCAL_DATASEGMENT) {
- .Address = rx->rx_hca_msg,
- .Lkey = kibnal_data.kib_whole_mem.md_lkey,
- .Length = IBNAL_MSG_SIZE,
- };
-
- rx->rx_wrq = (IB_WORK_REQ2) {
- .Next = NULL,
- .WorkReqId = kibnal_ptr2wreqid(rx, IBNAL_WID_RX),
- .MessageLen = IBNAL_MSG_SIZE,
- .DSList = &rx->rx_gl,
- .DSListDepth = 1,
- .Operation = WROpRecv,
- };
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING);
- LASSERT (rx->rx_nob >= 0); /* not posted */
-
- CDEBUG(D_NET, "posting rx [%d %x "LPX64"]\n",
- rx->rx_wrq.DSList->Length,
- rx->rx_wrq.DSList->Lkey,
- rx->rx_wrq.DSList->Address);
-
- if (conn->ibc_state > IBNAL_CONN_ESTABLISHED) {
- /* No more posts for this rx; so lose its ref */
- kibnal_conn_decref(conn);
- return 0;
- }
-
- rx->rx_nob = -1; /* flag posted */
- mb();
-
- frc = iba_post_recv2(conn->ibc_qp, &rx->rx_wrq, NULL);
- if (frc == FSUCCESS) {
- if (credit || rsrvd_credit) {
- spin_lock(&conn->ibc_lock);
-
- if (credit)
- conn->ibc_outstanding_credits++;
- if (rsrvd_credit)
- conn->ibc_reserved_credits++;
-
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
- }
- return 0;
- }
-
- CERROR ("post rx -> %s failed %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- rc = -EIO;
- kibnal_close_conn(rx->rx_conn, rc);
- /* No more posts for this rx; so lose its ref */
- kibnal_conn_decref(conn);
- return rc;
-}
-
-int
-kibnal_post_receives (kib_conn_t *conn)
-{
- int i;
- int rc;
-
- LASSERT (conn->ibc_state == IBNAL_CONN_CONNECTING);
-
- for (i = 0; i < IBNAL_RX_MSGS; i++) {
- /* +1 ref for rx desc. This ref remains until kibnal_post_rx
- * fails (i.e. actual failure or we're disconnecting) */
- kibnal_conn_addref(conn);
- rc = kibnal_post_rx (&conn->ibc_rxs[i], 0, 0);
- if (rc != 0)
- return rc;
- }
-
- return 0;
-}
-
-kib_tx_t *
-kibnal_find_waiting_tx_locked(kib_conn_t *conn, int txtype, __u64 cookie)
-{
- struct list_head *tmp;
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_sending != 0 || tx->tx_waiting);
-
- if (tx->tx_cookie != cookie)
- continue;
-
- if (tx->tx_waiting &&
- tx->tx_msg->ibm_type == txtype)
- return tx;
-
- CWARN("Bad completion: %swaiting, type %x (wanted %x)\n",
- tx->tx_waiting ? "" : "NOT ",
- tx->tx_msg->ibm_type, txtype);
- }
- return NULL;
-}
-
-void
-kibnal_handle_completion(kib_conn_t *conn, int txtype, int status, __u64 cookie)
-{
- kib_tx_t *tx;
- int idle;
-
- spin_lock(&conn->ibc_lock);
-
- tx = kibnal_find_waiting_tx_locked(conn, txtype, cookie);
- if (tx == NULL) {
- spin_unlock(&conn->ibc_lock);
-
- CWARN("Unmatched completion type %x cookie "LPX64" from %s\n",
- txtype, cookie, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_close_conn (conn, -EPROTO);
- return;
- }
-
- if (tx->tx_status == 0) { /* success so far */
- if (status < 0) { /* failed? */
- tx->tx_status = status;
- } else if (txtype == IBNAL_MSG_GET_REQ) {
- lnet_set_reply_msg_len(kibnal_data.kib_ni,
- tx->tx_lntmsg[1], status);
- }
- }
-
- tx->tx_waiting = 0;
-
- idle = !tx->tx_queued && (tx->tx_sending == 0);
- if (idle)
- list_del(&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kibnal_tx_done(tx);
-}
-
-void
-kibnal_send_completion (kib_conn_t *conn, int type, int status, __u64 cookie)
-{
- kib_tx_t *tx = kibnal_get_idle_tx();
-
- if (tx == NULL) {
- CERROR("Can't get tx for completion %x for %s\n",
- type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return;
- }
-
- tx->tx_msg->ibm_u.completion.ibcm_status = status;
- tx->tx_msg->ibm_u.completion.ibcm_cookie = cookie;
- kibnal_init_tx_msg(tx, type, sizeof(kib_completion_msg_t));
-
- kibnal_queue_tx(tx, conn);
-}
-
-void
-kibnal_handle_rx (kib_rx_t *rx)
-{
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- int credits = msg->ibm_credits;
- kib_tx_t *tx;
- int rc = 0;
- int repost = 1;
- int rsrvd_credit = 0;
- int rc2;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- CDEBUG (D_NET, "Received %x[%d] from %s\n",
- msg->ibm_type, credits, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- if (credits != 0) {
- /* Have I received credits that will let me send? */
- spin_lock(&conn->ibc_lock);
- conn->ibc_credits += credits;
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
- }
-
- switch (msg->ibm_type) {
- default:
- CERROR("Bad IBNAL message type %x from %s\n",
- msg->ibm_type, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
-
- case IBNAL_MSG_NOOP:
- break;
-
- case IBNAL_MSG_IMMEDIATE:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.immediate.ibim_hdr,
- msg->ibm_srcnid, rx, 0);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_PUT_REQ:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.putreq.ibprm_hdr,
- msg->ibm_srcnid, rx, 1);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_PUT_NAK:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- CWARN ("PUT_NACK from %s\n", libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_handle_completion(conn, IBNAL_MSG_PUT_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBNAL_MSG_PUT_ACK:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- spin_lock(&conn->ibc_lock);
- tx = kibnal_find_waiting_tx_locked(conn, IBNAL_MSG_PUT_REQ,
- msg->ibm_u.putack.ibpam_src_cookie);
- if (tx != NULL)
- list_del(&tx->tx_list);
- spin_unlock(&conn->ibc_lock);
-
- if (tx == NULL) {
- CERROR("Unmatched PUT_ACK from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- rc = -EPROTO;
- break;
- }
-
- LASSERT (tx->tx_waiting);
- /* CAVEAT EMPTOR: I could be racing with tx_complete, but...
- * (a) I can overwrite tx_msg since my peer has received it!
- * (b) tx_waiting set tells tx_complete() it's not done. */
-
- tx->tx_nwrq = 0; /* overwrite PUT_REQ */
-
- rc2 = kibnal_init_rdma(tx, IBNAL_MSG_PUT_DONE,
- kibnal_rd_size(&msg->ibm_u.putack.ibpam_rd),
- &msg->ibm_u.putack.ibpam_rd,
- msg->ibm_u.putack.ibpam_dst_cookie);
- if (rc2 < 0)
- CERROR("Can't setup rdma for PUT to %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc2);
-
- spin_lock(&conn->ibc_lock);
- if (tx->tx_status == 0 && rc2 < 0)
- tx->tx_status = rc2;
- tx->tx_waiting = 0; /* clear waiting and queue atomically */
- kibnal_queue_tx_locked(tx, conn);
- spin_unlock(&conn->ibc_lock);
- break;
-
- case IBNAL_MSG_PUT_DONE:
- /* This buffer was pre-reserved by not returning the credit
- * when the PUT_REQ's buffer was reposted, so I just return it
- * now */
- kibnal_handle_completion(conn, IBNAL_MSG_PUT_ACK,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
-
- case IBNAL_MSG_GET_REQ:
- rc = lnet_parse(kibnal_data.kib_ni, &msg->ibm_u.get.ibgm_hdr,
- msg->ibm_srcnid, rx, 1);
- repost = rc < 0; /* repost on error */
- break;
-
- case IBNAL_MSG_GET_DONE:
- rsrvd_credit = 1; /* rdma reply (was pre-reserved) */
-
- kibnal_handle_completion(conn, IBNAL_MSG_GET_REQ,
- msg->ibm_u.completion.ibcm_status,
- msg->ibm_u.completion.ibcm_cookie);
- break;
- }
-
- if (rc < 0) /* protocol error */
- kibnal_close_conn(conn, rc);
-
- if (repost) {
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD)
- rsrvd_credit = 0; /* peer isn't pre-reserving */
-
- kibnal_post_rx(rx, !rsrvd_credit, rsrvd_credit);
- }
-}
-
-void
-kibnal_rx_complete (IB_WORK_COMPLETION *wc, __u64 rxseq)
-{
- kib_rx_t *rx = (kib_rx_t *)kibnal_wreqid2ptr(wc->WorkReqId);
- int nob = wc->Length;
- kib_msg_t *msg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- unsigned long flags;
- int rc;
- int err = -EIO;
-
- LASSERT (rx->rx_nob < 0); /* was posted */
- rx->rx_nob = 0; /* isn't now */
- mb();
-
- /* receives complete with error in any case after we've started
- * disconnecting */
- if (conn->ibc_state > IBNAL_CONN_ESTABLISHED)
- goto ignore;
-
- if (wc->Status != WRStatusSuccess) {
- CERROR("Rx from %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), wc->Status);
- goto failed;
- }
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- goto failed;
- }
-
- rx->rx_nob = nob; /* Now I know nob > 0 */
- mb();
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid ||
- msg->ibm_srcstamp != conn->ibc_incarnation ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR ("Stale rx from %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- err = -ESTALE;
- goto failed;
- }
-
- if (msg->ibm_seq != rxseq) {
- CERROR ("Out-of-sequence rx from %s"
- ": got "LPD64" but expected "LPD64"\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- msg->ibm_seq, rxseq);
- goto failed;
- }
-
- /* set time last known alive */
- kibnal_peer_alive(conn->ibc_peer);
-
- /* racing with connection establishment/teardown! */
-
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- /* must check holding global lock to eliminate race */
- if (conn->ibc_state < IBNAL_CONN_ESTABLISHED) {
- list_add_tail(&rx->rx_list, &conn->ibc_early_rxs);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- return;
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- }
- kibnal_handle_rx(rx);
- return;
-
- failed:
- kibnal_close_conn(conn, err);
- ignore:
- /* Don't re-post rx & drop its ref on conn */
- kibnal_conn_decref(conn);
-}
-
-struct page *
-kibnal_kvaddr_to_page (unsigned long vaddr)
-{
- struct page *page;
-
- if (vaddr >= VMALLOC_START &&
- vaddr < VMALLOC_END) {
- page = vmalloc_to_page ((void *)vaddr);
- LASSERT (page != NULL);
- return page;
- }
-#ifdef CONFIG_HIGHMEM
- if (vaddr >= PKMAP_BASE &&
- vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) {
- /* No highmem pages only used for bulk (kiov) I/O */
- CERROR("find page for address in highmem\n");
- LBUG();
- }
-#endif
- page = virt_to_page (vaddr);
- LASSERT (page != NULL);
- return page;
-}
-
-#if !IBNAL_USE_FMR
-int
-kibnal_append_rdfrag(kib_rdma_desc_t *rd, int active, struct page *page,
- unsigned long page_offset, unsigned long len)
-{
- kib_rdma_frag_t *frag = &rd->rd_frags[rd->rd_nfrag];
-
- if (rd->rd_nfrag >= IBNAL_MAX_RDMA_FRAGS) {
- CERROR ("Too many RDMA fragments\n");
- return -EMSGSIZE;
- }
-
- if (active) {
- if (rd->rd_nfrag == 0)
- rd->rd_key = kibnal_data.kib_whole_mem.md_lkey;
- } else {
- if (rd->rd_nfrag == 0)
- rd->rd_key = kibnal_data.kib_whole_mem.md_rkey;
- }
-
- frag->rf_nob = len;
- frag->rf_addr = kibnal_data.kib_whole_mem.md_addr +
- lnet_page2phys(page) + page_offset;
-
- CDEBUG(D_NET,"map key %x frag [%d]["LPX64" for %d]\n",
- rd->rd_key, rd->rd_nfrag, frag->rf_addr, frag->rf_nob);
-
- rd->rd_nfrag++;
- return 0;
-}
-
-int
-kibnal_setup_rd_iov(kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- int fragnob;
- int rc;
- unsigned long vaddr;
- struct page *page;
- int page_offset;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- rd->rd_nfrag = 0;
- do {
- LASSERT (niov > 0);
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
- page_offset = vaddr & (PAGE_SIZE - 1);
- page = kibnal_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR ("Can't find page\n");
- return -EFAULT;
- }
-
- fragnob = min((int)(iov->iov_len - offset), nob);
- fragnob = min(fragnob, (int)PAGE_SIZE - page_offset);
-
- rc = kibnal_append_rdfrag(rd, active, page,
- page_offset, fragnob);
- if (rc != 0)
- return rc;
-
- if (offset + fragnob < iov->iov_len) {
- offset += fragnob;
- } else {
- offset = 0;
- iov++;
- niov--;
- }
- nob -= fragnob;
- } while (nob > 0);
-
- return 0;
-}
-
-int
-kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- int fragnob;
- int rc;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- rd->rd_nfrag = 0;
- do {
- LASSERT (nkiov > 0);
- fragnob = min((int)(kiov->kiov_len - offset), nob);
-
- rc = kibnal_append_rdfrag(rd, active, kiov->kiov_page,
- kiov->kiov_offset + offset,
- fragnob);
- if (rc != 0)
- return rc;
-
- offset = 0;
- kiov++;
- nkiov--;
- nob -= fragnob;
- } while (nob > 0);
-
- return 0;
-}
-#else
-int
-kibnal_map_tx (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int npages, unsigned long page_offset, int nob)
-{
- IB_ACCESS_CONTROL access = {0,};
- FSTATUS frc;
-
- LASSERT ((rd != tx->tx_rd) == !active);
- LASSERT (!tx->tx_md.md_active);
- LASSERT (tx->tx_md.md_fmrcount > 0);
- LASSERT (page_offset < PAGE_SIZE);
- LASSERT (npages >= (1 + ((page_offset + nob - 1)>>PAGE_SHIFT)));
- LASSERT (npages <= LNET_MAX_IOV);
-
- if (!active) {
- // access.s.MWBindable = 1;
- access.s.LocalWrite = 1;
- access.s.RdmaWrite = 1;
- }
-
- /* Map the memory described by tx->tx_pages
- frc = iibt_register_physical_memory(kibnal_data.kib_hca,
- IBNAL_RDMA_BASE,
- tx->tx_pages, npages,
- page_offset,
- kibnal_data.kib_pd,
- access,
- &tx->tx_md.md_handle,
- &tx->tx_md.md_addr,
- &tx->tx_md.md_lkey,
- &tx->tx_md.md_rkey);
- */
- return -EINVAL;
-}
-
-int
-kibnal_setup_rd_iov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- unsigned int niov, struct iovec *iov, int offset, int nob)
-
-{
- int resid;
- int fragnob;
- struct page *page;
- int npages;
- unsigned long page_offset;
- unsigned long vaddr;
-
- LASSERT (nob > 0);
- LASSERT (niov > 0);
-
- while (offset >= iov->iov_len) {
- offset -= iov->iov_len;
- niov--;
- iov++;
- LASSERT (niov > 0);
- }
-
- if (nob > iov->iov_len - offset) {
- CERROR ("Can't map multiple vaddr fragments\n");
- return (-EMSGSIZE);
- }
-
- vaddr = ((unsigned long)iov->iov_base) + offset;
-
- page_offset = vaddr & (PAGE_SIZE - 1);
- resid = nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
-
- page = kibnal_kvaddr_to_page(vaddr);
- if (page == NULL) {
- CERROR("Can't find page for %lu\n", vaddr);
- return -EFAULT;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(page);
-
- fragnob = PAGE_SIZE - (vaddr & (PAGE_SIZE - 1));
- vaddr += fragnob;
- resid -= fragnob;
-
- } while (resid > 0);
-
- return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
-}
-
-int
-kibnal_setup_rd_kiov (kib_tx_t *tx, kib_rdma_desc_t *rd, int active,
- int nkiov, lnet_kiov_t *kiov, int offset, int nob)
-{
- int resid;
- int npages;
- unsigned long page_offset;
-
- CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
- LASSERT (nob > 0);
- LASSERT (nkiov > 0);
- LASSERT (nkiov <= LNET_MAX_IOV);
- LASSERT (!tx->tx_md.md_active);
- LASSERT ((rd != tx->tx_rd) == !active);
-
- while (offset >= kiov->kiov_len) {
- offset -= kiov->kiov_len;
- nkiov--;
- kiov++;
- LASSERT (nkiov > 0);
- }
-
- page_offset = kiov->kiov_offset + offset;
-
- resid = offset + nob;
- npages = 0;
-
- do {
- LASSERT (npages < LNET_MAX_IOV);
- LASSERT (nkiov > 0);
-
- if ((npages > 0 && kiov->kiov_offset != 0) ||
- (resid > kiov->kiov_len &&
- (kiov->kiov_offset + kiov->kiov_len) != PAGE_SIZE)) {
- /* Can't have gaps */
- CERROR ("Can't make payload contiguous in I/O VM:"
- "page %d, offset %d, len %d \n",
- npages, kiov->kiov_offset, kiov->kiov_len);
-
- return -EINVAL;
- }
-
- tx->tx_pages[npages++] = lnet_page2phys(kiov->kiov_page);
- resid -= kiov->kiov_len;
- kiov++;
- nkiov--;
- } while (resid > 0);
-
- return kibnal_map_tx(tx, rd, active, npages, page_offset, nob);
-}
-#endif
-
-kib_conn_t *
-kibnal_find_conn_locked (kib_peer_t *peer)
-{
- struct list_head *tmp;
-
- /* just return the first connection */
- list_for_each (tmp, &peer->ibp_conns) {
- return (list_entry(tmp, kib_conn_t, ibc_list));
- }
-
- return (NULL);
-}
-
-void
-kibnal_check_sends (kib_conn_t *conn)
-{
- kib_tx_t *tx;
- FSTATUS frc;
- int rc;
- int consume_cred;
- int done;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- spin_lock(&conn->ibc_lock);
-
- LASSERT (conn->ibc_nsends_posted <=
- *kibnal_tunables.kib_concurrent_sends);
- LASSERT (conn->ibc_reserved_credits >= 0);
-
- while (conn->ibc_reserved_credits > 0 &&
- !list_empty(&conn->ibc_tx_queue_rsrvd)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry(conn->ibc_tx_queue_rsrvd.next,
- kib_tx_t, tx_list);
- list_del(&tx->tx_list);
- list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
- conn->ibc_reserved_credits--;
- }
-
- if (list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- (conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER ||
- kibnal_send_keepalive(conn))) {
- spin_unlock(&conn->ibc_lock);
-
- tx = kibnal_get_idle_tx();
- if (tx != NULL)
- kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);
-
- spin_lock(&conn->ibc_lock);
-
- if (tx != NULL)
- kibnal_queue_tx_locked(tx, conn);
- }
-
- for (;;) {
- if (!list_empty(&conn->ibc_tx_queue_nocred)) {
- LASSERT (conn->ibc_version !=
- IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD);
- tx = list_entry (conn->ibc_tx_queue_nocred.next,
- kib_tx_t, tx_list);
- consume_cred = 0;
- } else if (!list_empty (&conn->ibc_tx_queue)) {
- tx = list_entry (conn->ibc_tx_queue.next,
- kib_tx_t, tx_list);
- consume_cred = 1;
- } else {
- /* nothing waiting */
- break;
- }
-
- LASSERT (tx->tx_queued);
- /* We rely on this for QP sizing */
- LASSERT (tx->tx_nwrq > 0 && tx->tx_nwrq <= 1 + IBNAL_MAX_RDMA_FRAGS);
-
- LASSERT (conn->ibc_outstanding_credits >= 0);
- LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);
- LASSERT (conn->ibc_credits >= 0);
- LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);
-
- if (conn->ibc_nsends_posted ==
- *kibnal_tunables.kib_concurrent_sends) {
- /* We've got some tx completions outstanding... */
- CDEBUG(D_NET, "%s: posted enough\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (consume_cred) {
- if (conn->ibc_credits == 0) { /* no credits */
- CDEBUG(D_NET, "%s: no credits\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
-
- if (conn->ibc_credits == 1 && /* last credit reserved for */
- conn->ibc_outstanding_credits == 0) { /* giving back credits */
- CDEBUG(D_NET, "%s: not using last credit\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- break;
- }
- }
-
- list_del (&tx->tx_list);
- tx->tx_queued = 0;
-
- /* NB don't drop ibc_lock before bumping tx_sending */
-
- if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&
- (!list_empty(&conn->ibc_tx_queue) ||
- !list_empty(&conn->ibc_tx_queue_nocred) ||
- (conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER &&
- !kibnal_send_keepalive(conn)))) {
- /* redundant NOOP */
- spin_unlock(&conn->ibc_lock);
- kibnal_tx_done(tx);
- spin_lock(&conn->ibc_lock);
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- continue;
- }
-
- kibnal_pack_msg(tx->tx_msg, conn->ibc_version,
- conn->ibc_outstanding_credits,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation,
- conn->ibc_txseq);
-
- conn->ibc_txseq++;
- conn->ibc_outstanding_credits = 0;
- conn->ibc_nsends_posted++;
- if (consume_cred)
- conn->ibc_credits--;
-
- /* CAVEAT EMPTOR! This tx could be the PUT_DONE of an RDMA
- * PUT. If so, it was first queued here as a PUT_REQ, sent and
- * stashed on ibc_active_txs, matched by an incoming PUT_ACK,
- * and then re-queued here. It's (just) possible that
- * tx_sending is non-zero if we've not done the tx_complete() from
- * the first send; hence the ++ rather than = below. */
- tx->tx_sending++;
-
- list_add (&tx->tx_list, &conn->ibc_active_txs);
-
- LASSERT (tx->tx_nwrq > 0);
-
- rc = 0;
- frc = FSUCCESS;
- if (conn->ibc_state != IBNAL_CONN_ESTABLISHED) {
- rc = -ECONNABORTED;
- } else {
- frc = iba_post_send2(conn->ibc_qp, tx->tx_wrq, NULL);
- if (frc != FSUCCESS)
- rc = -EIO;
- }
-
- conn->ibc_last_send = jiffies;
-
- if (rc != 0) {
- /* NB credits are transferred in the actual
- * message, which can only be the last work item */
- conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
- if (consume_cred)
- conn->ibc_credits++;
- conn->ibc_nsends_posted--;
-
- tx->tx_status = rc;
- tx->tx_waiting = 0;
- tx->tx_sending--;
-
- done = (tx->tx_sending == 0);
- if (done)
- list_del (&tx->tx_list);
-
- spin_unlock(&conn->ibc_lock);
-
- if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
- CERROR ("Error %d posting transmit to %s\n",
- frc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- else
- CDEBUG (D_NET, "Error %d posting transmit to %s\n",
- rc, libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- kibnal_close_conn (conn, rc);
-
- if (done)
- kibnal_tx_done (tx);
- return;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-}
-
-void
-kibnal_tx_complete (IB_WORK_COMPLETION *wc)
-{
- kib_tx_t *tx = (kib_tx_t *)kibnal_wreqid2ptr(wc->WorkReqId);
- kib_conn_t *conn = tx->tx_conn;
- int failed = wc->Status != WRStatusSuccess;
- int idle;
-
- CDEBUG(D_NET, "%s: sending %d nwrq %d status %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_sending, tx->tx_nwrq, wc->Status);
-
- LASSERT (tx->tx_sending > 0);
-
- if (failed &&
- tx->tx_status == 0 &&
- conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-#if KIBLND_DETAILED_DEBUG
- int i;
- IB_WORK_REQ2 *wrq = &tx->tx_wrq[0];
- IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[0];
- lnet_msg_t *lntmsg = tx->tx_lntmsg[0];
-#endif
- CDEBUG(D_NETERROR, "tx -> %s type %x cookie "LPX64
- " sending %d waiting %d failed %d nwrk %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting, wc->Status,
- tx->tx_nwrq);
-#if KIBLND_DETAILED_DEBUG
- for (i = 0; i < tx->tx_nwrq; i++, wrq++, gl++) {
- switch (wrq->Operation) {
- default:
- CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p OP %d "
- "DSList %p(%p)/%d: "LPX64"/%d K %x\n",
- i, wrq, wrq->Next, wrq->Operation,
- wrq->DSList, gl, wrq->DSListDepth,
- gl->Address, gl->Length, gl->Lkey);
- break;
- case WROpSend:
- CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p SEND "
- "DSList %p(%p)/%d: "LPX64"/%d K %x\n",
- i, wrq, wrq->Next,
- wrq->DSList, gl, wrq->DSListDepth,
- gl->Address, gl->Length, gl->Lkey);
- break;
- case WROpRdmaWrite:
- CDEBUG(D_NETERROR, " [%3d] Addr %p Next %p DMA "
- "DSList: %p(%p)/%d "LPX64"/%d K %x -> "
- LPX64" K %x\n",
- i, wrq, wrq->Next,
- wrq->DSList, gl, wrq->DSListDepth,
- gl->Address, gl->Length, gl->Lkey,
- wrq->Req.SendRC.RemoteDS.Address,
- wrq->Req.SendRC.RemoteDS.Rkey);
- break;
- }
- }
-
- switch (tx->tx_msg->ibm_type) {
- default:
- CDEBUG(D_NETERROR, " msg type %x %p/%d, No RDMA\n",
- tx->tx_msg->ibm_type,
- tx->tx_msg, tx->tx_msg->ibm_nob);
- break;
-
- case IBNAL_MSG_PUT_DONE:
- case IBNAL_MSG_GET_DONE:
- CDEBUG(D_NETERROR, " msg type %x %p/%d, RDMA key %x frags %d...\n",
- tx->tx_msg->ibm_type,
- tx->tx_msg, tx->tx_msg->ibm_nob,
- tx->tx_rd->rd_key, tx->tx_rd->rd_nfrag);
- for (i = 0; i < tx->tx_rd->rd_nfrag; i++)
- CDEBUG(D_NETERROR, " [%d] "LPX64"/%d\n", i,
- tx->tx_rd->rd_frags[i].rf_addr,
- tx->tx_rd->rd_frags[i].rf_nob);
- if (lntmsg == NULL) {
- CDEBUG(D_NETERROR, " No lntmsg\n");
- } else if (lntmsg->msg_iov != NULL) {
- CDEBUG(D_NETERROR, " lntmsg in %d VIRT frags...\n",
- lntmsg->msg_niov);
- for (i = 0; i < lntmsg->msg_niov; i++)
- CDEBUG(D_NETERROR, " [%d] %p/%d\n", i,
- lntmsg->msg_iov[i].iov_base,
- lntmsg->msg_iov[i].iov_len);
- } else if (lntmsg->msg_kiov != NULL) {
- CDEBUG(D_NETERROR, " lntmsg in %d PAGE frags...\n",
- lntmsg->msg_niov);
- for (i = 0; i < lntmsg->msg_niov; i++)
- CDEBUG(D_NETERROR, " [%d] %p+%d/%d\n", i,
- lntmsg->msg_kiov[i].kiov_page,
- lntmsg->msg_kiov[i].kiov_offset,
- lntmsg->msg_kiov[i].kiov_len);
- } else {
- CDEBUG(D_NETERROR, " lntmsg in %d frags\n",
- lntmsg->msg_niov);
- }
-
- break;
- }
-#endif
- }
-
- spin_lock(&conn->ibc_lock);
-
- /* I could be racing with rdma completion. Whoever makes 'tx' idle
- * gets to free it, which also drops its ref on 'conn'. */
-
- tx->tx_sending--;
- conn->ibc_nsends_posted--;
-
- if (failed) {
- tx->tx_waiting = 0;
- tx->tx_status = -EIO;
- }
-
- idle = (tx->tx_sending == 0) && /* This is the final callback */
- !tx->tx_waiting && /* Not waiting for peer */
- !tx->tx_queued; /* Not re-queued (PUT_DONE) */
- if (idle)
- list_del(&tx->tx_list);
-
- kibnal_conn_addref(conn); /* 1 ref for me.... */
-
- spin_unlock(&conn->ibc_lock);
-
- if (idle)
- kibnal_tx_done (tx);
-
- if (failed) {
- kibnal_close_conn (conn, -EIO);
- } else {
- kibnal_peer_alive(conn->ibc_peer);
- kibnal_check_sends(conn);
- }
-
- kibnal_conn_decref(conn); /* ...until here */
-}
-
-void
-kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob)
-{
- IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[tx->tx_nwrq];
- IB_WORK_REQ2 *wrq = &tx->tx_wrq[tx->tx_nwrq];
- int nob = offsetof (kib_msg_t, ibm_u) + body_nob;
-
- LASSERT (tx->tx_nwrq >= 0 &&
- tx->tx_nwrq < (1 + IBNAL_MAX_RDMA_FRAGS));
- LASSERT (nob <= IBNAL_MSG_SIZE);
-
- kibnal_init_msg(tx->tx_msg, type, body_nob);
-
- *gl = (IB_LOCAL_DATASEGMENT) {
- .Address = tx->tx_hca_msg,
- .Length = IBNAL_MSG_SIZE,
- .Lkey = kibnal_data.kib_whole_mem.md_lkey,
- };
-
- wrq->Next = NULL; /* This is the last one */
-
- wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_TX);
- wrq->Operation = WROpSend;
- wrq->DSList = gl;
- wrq->DSListDepth = 1;
- wrq->MessageLen = nob;
- wrq->Req.SendRC.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.SolicitedEvent = 1;
- wrq->Req.SendRC.Options.s.SignaledCompletion = 1;
- wrq->Req.SendRC.Options.s.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.Fence = 0;
- /* fence only needed on RDMA reads */
-
- tx->tx_nwrq++;
-}
-
-int
-kibnal_init_rdma (kib_tx_t *tx, int type, int nob,
- kib_rdma_desc_t *dstrd, __u64 dstcookie)
-{
- kib_msg_t *ibmsg = tx->tx_msg;
- kib_rdma_desc_t *srcrd = tx->tx_rd;
- IB_LOCAL_DATASEGMENT *gl;
- IB_WORK_REQ2 *wrq;
- int rc;
-
-#if IBNAL_USE_FMR
- LASSERT (tx->tx_nwrq == 0);
-
- gl = &tx->tx_gl[0];
- gl->Length = nob;
- gl->Address = srcrd->rd_addr;
- gl->Lkey = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[0];
-
- wrq->Next = wrq + 1;
- wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
- wrq->Operation = WROpRdmaWrite;
- wrq->DSList = gl;
- wrq->DSListDepth = 1;
- wrq->MessageLen = nob;
-
- wrq->Req.SendRC.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.SolicitedEvent = 0;
- wrq->Req.SendRC.Options.s.SignaledCompletion = 0;
- wrq->Req.SendRC.Options.s.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.Fence = 0;
-
- wrq->Req.SendRC.RemoteDS.Address = dstrd->rd_addr;
- wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key;
-
- tx->tx_nwrq = 1;
- rc = nob;
-#else
- /* CAVEAT EMPTOR: this 'consumes' the frags in 'dstrd' */
- int resid = nob;
- kib_rdma_frag_t *srcfrag;
- int srcidx;
- kib_rdma_frag_t *dstfrag;
- int dstidx;
- int wrknob;
-
- /* Called by scheduler */
- LASSERT (!in_interrupt());
-
- LASSERT (type == IBNAL_MSG_GET_DONE ||
- type == IBNAL_MSG_PUT_DONE);
-
- srcidx = dstidx = 0;
- srcfrag = &srcrd->rd_frags[0];
- dstfrag = &dstrd->rd_frags[0];
- rc = resid;
-
- while (resid > 0) {
- if (srcidx >= srcrd->rd_nfrag) {
- CERROR("Src buffer exhausted: %d frags\n", srcidx);
- rc = -EPROTO;
- break;
- }
-
- if (dstidx == dstrd->rd_nfrag) {
- CERROR("Dst buffer exhausted: %d frags\n", dstidx);
- rc = -EPROTO;
- break;
- }
-
- if (tx->tx_nwrq == IBNAL_MAX_RDMA_FRAGS) {
- CERROR("RDMA too fragmented: %d/%d src %d/%d dst frags\n",
- srcidx, srcrd->rd_nfrag,
- dstidx, dstrd->rd_nfrag);
- rc = -EMSGSIZE;
- break;
- }
-
- wrknob = MIN(MIN(srcfrag->rf_nob, dstfrag->rf_nob), resid);
-
- gl = &tx->tx_gl[tx->tx_nwrq];
- gl->Length = wrknob;
- gl->Address = srcfrag->rf_addr;
- gl->Lkey = srcrd->rd_key;
-
- wrq = &tx->tx_wrq[tx->tx_nwrq];
-
- wrq->Next = wrq + 1;
- wrq->WorkReqId = kibnal_ptr2wreqid(tx, IBNAL_WID_RDMA);
- wrq->Operation = WROpRdmaWrite;
- wrq->DSList = gl;
- wrq->DSListDepth = 1;
- wrq->MessageLen = nob;
-
- wrq->Req.SendRC.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.SolicitedEvent = 0;
- wrq->Req.SendRC.Options.s.SignaledCompletion = 0;
- wrq->Req.SendRC.Options.s.ImmediateData = 0;
- wrq->Req.SendRC.Options.s.Fence = 0;
-
- wrq->Req.SendRC.RemoteDS.Address = dstfrag->rf_addr;
- wrq->Req.SendRC.RemoteDS.Rkey = dstrd->rd_key;
-
- resid -= wrknob;
- if (wrknob < srcfrag->rf_nob) {
- srcfrag->rf_addr += wrknob;
- srcfrag->rf_nob -= wrknob;
- } else {
- srcfrag++;
- srcidx++;
- }
-
- if (wrknob < dstfrag->rf_nob) {
- dstfrag->rf_addr += wrknob;
- dstfrag->rf_nob -= wrknob;
- } else {
- dstfrag++;
- dstidx++;
- }
-
- tx->tx_nwrq++;
- }
-
- if (rc < 0) /* no RDMA if completing with failure */
- tx->tx_nwrq = 0;
-#endif
-
- ibmsg->ibm_u.completion.ibcm_status = rc;
- ibmsg->ibm_u.completion.ibcm_cookie = dstcookie;
- kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t));
-
- return rc;
-}
-
-void
-kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
- spin_lock(&conn->ibc_lock);
- kibnal_queue_tx_locked (tx, conn);
- spin_unlock(&conn->ibc_lock);
-
- kibnal_check_sends(conn);
-}
-
-void
-kibnal_schedule_active_connect_locked (kib_peer_t *peer, int proto_version)
-{
- /* Called holding kib_global_lock exclusive with IRQs disabled */
-
- peer->ibp_version = proto_version; /* proto version for new conn */
- peer->ibp_connecting++; /* I'm connecting */
- kibnal_peer_addref(peer); /* extra ref for connd */
-
- spin_lock(&kibnal_data.kib_connd_lock);
-
- list_add_tail (&peer->ibp_connd_list, &kibnal_data.kib_connd_peers);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock(&kibnal_data.kib_connd_lock);
-}
-
-void
-kibnal_schedule_active_connect (kib_peer_t *peer, int proto_version)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- kibnal_schedule_active_connect_locked(peer, proto_version);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_launch_tx (kib_tx_t *tx, lnet_nid_t nid)
-{
- kib_peer_t *peer;
- kib_conn_t *conn;
- unsigned long flags;
- rwlock_t *g_lock = &kibnal_data.kib_global_lock;
- int retry;
- int rc;
-
- /* If I get here, I've committed to send, so I complete the tx with
- * failure on any problems */
-
- LASSERT (tx->tx_conn == NULL); /* only set when assigned a conn */
- LASSERT (tx->tx_nwrq > 0); /* work items have been set up */
-
- for (retry = 0; ; retry = 1) {
- read_lock_irqsave(g_lock, flags);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL) {
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- kibnal_conn_addref(conn); /* 1 ref for me... */
- read_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...to here */
- return;
- }
- }
-
- /* Making one or more connections; I'll need a write lock... */
- read_unlock(g_lock);
- write_lock(g_lock);
-
- peer = kibnal_find_peer_locked (nid);
- if (peer != NULL)
- break;
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (retry) {
- CERROR("Can't find peer %s\n", libcfs_nid2str(nid));
-
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
-
- rc = kibnal_add_persistent_peer(nid);
- if (rc != 0) {
- CERROR("Can't add peer %s: %d\n",
- libcfs_nid2str(nid), rc);
-
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
- }
-
- conn = kibnal_find_conn_locked (peer);
- if (conn != NULL) {
- /* Connection exists; queue message on it */
- kibnal_conn_addref(conn); /* 1 ref for me... */
- write_unlock_irqrestore(g_lock, flags);
-
- kibnal_queue_tx (tx, conn);
- kibnal_conn_decref(conn); /* ...until here */
- return;
- }
-
- if (!kibnal_peer_connecting(peer)) {
- if (!(peer->ibp_reconnect_interval == 0 || /* first attempt */
- time_after_eq(jiffies, peer->ibp_reconnect_time))) {
- write_unlock_irqrestore(g_lock, flags);
- tx->tx_status = -EHOSTUNREACH;
- tx->tx_waiting = 0;
- kibnal_tx_done (tx);
- return;
- }
-
- kibnal_schedule_active_connect_locked(peer, IBNAL_MSG_VERSION);
- }
-
- /* A connection is being established; queue the message... */
- list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
-
- write_unlock_irqrestore(g_lock, flags);
-}
-
-void
-kibnal_txlist_done (struct list_head *txlist, int status)
-{
- kib_tx_t *tx;
-
- while (!list_empty (txlist)) {
- tx = list_entry (txlist->next, kib_tx_t, tx_list);
-
- list_del (&tx->tx_list);
- /* complete now */
- tx->tx_waiting = 0;
- tx->tx_status = status;
- kibnal_tx_done (tx);
- }
-}
-
-int
-kibnal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kib_msg_t *ibmsg;
- kib_tx_t *tx;
- int nob;
- int rc;
-
- /* NB 'private' is different depending on what we're sending.... */
-
- CDEBUG(D_NET, "sending %d bytes in %d frags to %s\n",
- payload_nob, payload_niov, libcfs_id2str(target));
-
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
-
- /* Thread context */
- LASSERT (!in_interrupt());
- /* payload is either all vaddrs or all pages */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
- switch (type) {
- default:
- LBUG();
- return (-EIO);
-
- case LNET_MSG_ACK:
- LASSERT (payload_nob == 0);
- break;
-
- case LNET_MSG_GET:
- if (routing || target_is_router)
- break; /* send IMMEDIATE */
-
- /* is the REPLY message too small for RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[lntmsg->msg_md->md_length]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can allocate txd for GET to %s: \n",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.get.ibgm_hdr = *hdr;
- ibmsg->ibm_u.get.ibgm_cookie = tx->tx_cookie;
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- rc = kibnal_setup_rd_iov(tx, &ibmsg->ibm_u.get.ibgm_rd,
- 0,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov,
- 0, lntmsg->msg_md->md_length);
- else
- rc = kibnal_setup_rd_kiov(tx, &ibmsg->ibm_u.get.ibgm_rd,
- 0,
- lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
- if (rc != 0) {
- CERROR("Can't setup GET sink for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
-#if IBNAL_USE_FMR
- nob = sizeof(kib_get_msg_t);
-#else
- {
- int n = ibmsg->ibm_u.get.ibgm_rd.rd_nfrag;
-
- nob = offsetof(kib_get_msg_t, ibgm_rd.rd_frags[n]);
- }
-#endif
- kibnal_init_tx_msg(tx, IBNAL_MSG_GET_REQ, nob);
-
- tx->tx_lntmsg[1] = lnet_create_reply_msg(kibnal_data.kib_ni,
- lntmsg);
- if (tx->tx_lntmsg[1] == NULL) {
- CERROR("Can't create reply for GET -> %s\n",
- libcfs_nid2str(target.nid));
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg[0,1] on completion */
- tx->tx_waiting = 1; /* waiting for GET_DONE */
- kibnal_launch_tx(tx, target.nid);
- return 0;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Is the payload small enough not to need RDMA? */
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
- if (nob <= IBNAL_MSG_SIZE)
- break; /* send IMMEDIATE */
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't allocate %s txd for %s\n",
- type == LNET_MSG_PUT ? "PUT" : "REPLY",
- libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- if (payload_kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1,
- payload_niov, payload_iov,
- payload_offset, payload_nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1,
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- if (rc != 0) {
- CERROR("Can't setup PUT src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- kibnal_tx_done(tx);
- return -EIO;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.putreq.ibprm_hdr = *hdr;
- ibmsg->ibm_u.putreq.ibprm_cookie = tx->tx_cookie;
- kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_REQ, sizeof(kib_putreq_msg_t));
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_{ACK,NAK} */
- kibnal_launch_tx(tx, target.nid);
- return 0;
- }
-
- /* send IMMEDIATE */
-
- LASSERT (offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob])
- <= IBNAL_MSG_SIZE);
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR ("Can't send %d to %s: tx descs exhausted\n",
- type, libcfs_nid2str(target.nid));
- return -ENOMEM;
- }
-
- ibmsg = tx->tx_msg;
- ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
- if (payload_kiov != NULL)
- lnet_copy_kiov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_kiov,
- payload_offset, payload_nob);
- else
- lnet_copy_iov2flat(IBNAL_MSG_SIZE, ibmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-
- nob = offsetof(kib_immediate_msg_t, ibim_payload[payload_nob]);
- kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- kibnal_launch_tx(tx, target.nid);
- return 0;
-}
-
-void
-kibnal_reply(lnet_ni_t *ni, kib_rx_t *rx, lnet_msg_t *lntmsg)
-{
- lnet_process_id_t target = lntmsg->msg_target;
- unsigned int niov = lntmsg->msg_niov;
- struct iovec *iov = lntmsg->msg_iov;
- lnet_kiov_t *kiov = lntmsg->msg_kiov;
- unsigned int offset = lntmsg->msg_offset;
- unsigned int nob = lntmsg->msg_len;
- kib_tx_t *tx;
- int rc;
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't get tx for REPLY to %s\n",
- libcfs_nid2str(target.nid));
- goto failed_0;
- }
-
- if (nob == 0)
- rc = 0;
- else if (kiov == NULL)
- rc = kibnal_setup_rd_iov(tx, tx->tx_rd, 1,
- niov, iov, offset, nob);
- else
- rc = kibnal_setup_rd_kiov(tx, tx->tx_rd, 1,
- niov, kiov, offset, nob);
-
- if (rc != 0) {
- CERROR("Can't setup GET src for %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- rc = kibnal_init_rdma(tx, IBNAL_MSG_GET_DONE, nob,
- &rx->rx_msg->ibm_u.get.ibgm_rd,
- rx->rx_msg->ibm_u.get.ibgm_cookie);
- if (rc < 0) {
- CERROR("Can't setup rdma for GET from %s: %d\n",
- libcfs_nid2str(target.nid), rc);
- goto failed_1;
- }
-
- if (rc == 0) {
- /* No RDMA: local completion may happen now! */
- lnet_finalize(ni, lntmsg, 0);
- } else {
- /* RDMA: lnet_finalize(lntmsg) when it
- * completes */
- tx->tx_lntmsg[0] = lntmsg;
- }
-
- kibnal_queue_tx(tx, rx->rx_conn);
- return;
-
- failed_1:
- kibnal_tx_done(tx);
- failed_0:
- lnet_finalize(ni, lntmsg, -EIO);
-}
-
-int
-kibnal_eager_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- void **new_private)
-{
- kib_rx_t *rx = private;
- kib_conn_t *conn = rx->rx_conn;
-
- if (conn->ibc_version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD) {
- /* Can't block if RDMA completions need normal credits */
- LCONSOLE_ERROR_MSG(0x12d, "Dropping message from %s: no "
- "buffers free. %s is running an old version"
- " of LNET that may deadlock if messages "
- "wait for buffers)\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- return -EDEADLK;
- }
-
- *new_private = private;
- return 0;
-}
-
-int
-kibnal_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kib_rx_t *rx = private;
- kib_msg_t *rxmsg = rx->rx_msg;
- kib_conn_t *conn = rx->rx_conn;
- kib_tx_t *tx;
- kib_msg_t *txmsg;
- int nob;
- int post_cred = 1;
- int rc = 0;
-
- LASSERT (mlen <= rlen);
- LASSERT (!in_interrupt());
- /* Either all pages or all vaddrs */
- LASSERT (!(kiov != NULL && iov != NULL));
-
- switch (rxmsg->ibm_type) {
- default:
- LBUG();
-
- case IBNAL_MSG_IMMEDIATE:
- nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_nid2str(rxmsg->ibm_u.immediate.ibim_hdr.src_nid),
- nob, rx->rx_nob);
- rc = -EPROTO;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(niov, kiov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- else
- lnet_copy_flat2iov(niov, iov, offset,
- IBNAL_MSG_SIZE, rxmsg,
- offsetof(kib_msg_t, ibm_u.immediate.ibim_payload),
- mlen);
- lnet_finalize (ni, lntmsg, 0);
- break;
-
- case IBNAL_MSG_PUT_REQ:
- if (mlen == 0) {
- lnet_finalize(ni, lntmsg, 0);
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, 0,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- tx = kibnal_get_idle_tx();
- if (tx == NULL) {
- CERROR("Can't allocate tx for %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* Not replying will break the connection */
- rc = -ENOMEM;
- break;
- }
-
- txmsg = tx->tx_msg;
- if (kiov == NULL)
- rc = kibnal_setup_rd_iov(tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- 0,
- niov, iov, offset, mlen);
- else
- rc = kibnal_setup_rd_kiov(tx,
- &txmsg->ibm_u.putack.ibpam_rd,
- 0,
- niov, kiov, offset, mlen);
- if (rc != 0) {
- CERROR("Can't setup PUT sink for %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), rc);
- kibnal_tx_done(tx);
- /* tell peer it's over */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_PUT_NAK, rc,
- rxmsg->ibm_u.putreq.ibprm_cookie);
- break;
- }
-
- txmsg->ibm_u.putack.ibpam_src_cookie = rxmsg->ibm_u.putreq.ibprm_cookie;
- txmsg->ibm_u.putack.ibpam_dst_cookie = tx->tx_cookie;
-#if IBNAL_USE_FMR
- nob = sizeof(kib_putack_msg_t);
-#else
- {
- int n = tx->tx_msg->ibm_u.putack.ibpam_rd.rd_nfrag;
-
- nob = offsetof(kib_putack_msg_t, ibpam_rd.rd_frags[n]);
- }
-#endif
- kibnal_init_tx_msg(tx, IBNAL_MSG_PUT_ACK, nob);
-
- tx->tx_lntmsg[0] = lntmsg; /* finalise lntmsg on completion */
- tx->tx_waiting = 1; /* waiting for PUT_DONE */
- kibnal_queue_tx(tx, conn);
-
- if (conn->ibc_version != IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD)
- post_cred = 0; /* peer still owns 'rx' for sending PUT_DONE */
- break;
-
- case IBNAL_MSG_GET_REQ:
- if (lntmsg != NULL) {
- /* Optimized GET; RDMA lntmsg's payload */
- kibnal_reply(ni, rx, lntmsg);
- } else {
- /* GET didn't match anything */
- kibnal_send_completion(rx->rx_conn, IBNAL_MSG_GET_DONE,
- -ENODATA,
- rxmsg->ibm_u.get.ibgm_cookie);
- }
- break;
- }
-
- kibnal_post_rx(rx, post_cred, 0);
- return rc;
-}
-
-int
-kibnal_thread_start (int (*fn)(void *arg), void *arg)
-{
- long pid = kernel_thread (fn, arg, 0);
-
- if (pid < 0)
- return ((int)pid);
-
- atomic_inc (&kibnal_data.kib_nthreads);
- return (0);
-}
-
-void
-kibnal_thread_fini (void)
-{
- atomic_dec (&kibnal_data.kib_nthreads);
-}
-
-void
-kibnal_peer_alive (kib_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->ibp_last_alive = cfs_time_current();
- mb();
-}
-
-void
-kibnal_peer_notify (kib_peer_t *peer)
-{
- time_t last_alive = 0;
- int error = 0;
- unsigned long flags;
-
- read_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- if (list_empty(&peer->ibp_conns) &&
- peer->ibp_accepting == 0 &&
- peer->ibp_connecting == 0 &&
- peer->ibp_error != 0) {
- error = peer->ibp_error;
- peer->ibp_error = 0;
- last_alive = cfs_time_current_sec() -
- cfs_duration_sec(cfs_time_current() -
- peer->ibp_last_alive);
- }
-
- read_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- if (error != 0)
- lnet_notify(kibnal_data.kib_ni, peer->ibp_nid, 0, last_alive);
-}
-
-void
-kibnal_schedule_conn (kib_conn_t *conn)
-{
- unsigned long flags;
-
- kibnal_conn_addref(conn); /* ++ref for connd */
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns);
- wake_up (&kibnal_data.kib_connd_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_connd_lock, flags);
-}
-
-void
-kibnal_close_conn_locked (kib_conn_t *conn, int error)
-{
- /* This just does the immediate housekeeping to start shutdown of an
- * established connection. 'error' is zero for a normal shutdown.
- * Caller holds kib_global_lock exclusively in irq context */
- kib_peer_t *peer = conn->ibc_peer;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- if (conn->ibc_state != IBNAL_CONN_ESTABLISHED)
- return; /* already being handled */
-
- /* NB Can't take ibc_lock here (could be in IRQ context), without
- * risking deadlock, so access to ibc_{tx_queue,active_txs} is racey */
-
- if (error == 0 &&
- list_empty(&conn->ibc_tx_queue) &&
- list_empty(&conn->ibc_tx_queue_rsrvd) &&
- list_empty(&conn->ibc_tx_queue_nocred) &&
- list_empty(&conn->ibc_active_txs)) {
- CDEBUG(D_NET, "closing conn to %s"
- " rx# "LPD64" tx# "LPD64"\n",
- libcfs_nid2str(peer->ibp_nid),
- conn->ibc_txseq, conn->ibc_rxseq);
- } else {
- CDEBUG(D_NETERROR, "Closing conn to %s: error %d%s%s%s%s"
- " rx# "LPD64" tx# "LPD64"\n",
- libcfs_nid2str(peer->ibp_nid), error,
- list_empty(&conn->ibc_tx_queue) ? "" : "(sending)",
- list_empty(&conn->ibc_tx_queue_rsrvd) ? "" : "(sending_rsrvd)",
- list_empty(&conn->ibc_tx_queue_nocred) ? "" : "(sending_nocred)",
- list_empty(&conn->ibc_active_txs) ? "" : "(waiting)",
- conn->ibc_txseq, conn->ibc_rxseq);
-#if 0
- /* can't skip down the queue without holding ibc_lock (see above) */
- list_for_each(tmp, &conn->ibc_tx_queue) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- CERROR(" queued tx type %x cookie "LPX64
- " sending %d waiting %d ticks %ld/%d\n",
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting,
- (long)(tx->tx_deadline - jiffies), HZ);
- }
-
- list_for_each(tmp, &conn->ibc_active_txs) {
- kib_tx_t *tx = list_entry(tmp, kib_tx_t, tx_list);
-
- CERROR(" active tx type %x cookie "LPX64
- " sending %d waiting %d ticks %ld/%d\n",
- tx->tx_msg->ibm_type, tx->tx_cookie,
- tx->tx_sending, tx->tx_waiting,
- (long)(tx->tx_deadline - jiffies), HZ);
- }
-#endif
- }
-
- list_del (&conn->ibc_list);
-
- if (list_empty (&peer->ibp_conns)) { /* no more conns */
- if (peer->ibp_persistence == 0 && /* non-persistent peer */
- kibnal_peer_active(peer)) /* still in peer table */
- kibnal_unlink_peer_locked (peer);
-
- peer->ibp_error = error; /* set/clear error on last conn */
- }
-
- kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTING);
-
- kibnal_schedule_conn(conn);
- kibnal_conn_decref(conn); /* lose ibc_list's ref */
-}
-
-void
-kibnal_close_conn (kib_conn_t *conn, int error)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- kibnal_close_conn_locked (conn, error);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_handle_early_rxs(kib_conn_t *conn)
-{
- unsigned long flags;
- kib_rx_t *rx;
-
- LASSERT (!in_interrupt());
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- while (!list_empty(&conn->ibc_early_rxs)) {
- rx = list_entry(conn->ibc_early_rxs.next,
- kib_rx_t, rx_list);
- list_del(&rx->rx_list);
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_handle_rx(rx);
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-}
-
-void
-kibnal_abort_txs(kib_conn_t *conn, struct list_head *txs)
-{
- LIST_HEAD (zombies);
- struct list_head *tmp;
- struct list_head *nxt;
- kib_tx_t *tx;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each_safe (tmp, nxt, txs) {
- tx = list_entry (tmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- tx->tx_status = -ECONNABORTED;
- tx->tx_queued = 0;
- tx->tx_waiting = 0;
-
- if (tx->tx_sending == 0) {
- list_del (&tx->tx_list);
- list_add (&tx->tx_list, &zombies);
- }
- }
-
- spin_unlock(&conn->ibc_lock);
-
- kibnal_txlist_done(&zombies, -ECONNABORTED);
-}
-
-void
-kibnal_conn_disconnected(kib_conn_t *conn)
-{
- static IB_QP_ATTRIBUTES_MODIFY qpam = {.RequestState = QPStateError};
-
- FSTATUS frc;
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP);
-
- kibnal_set_conn_state(conn, IBNAL_CONN_DISCONNECTED);
-
- /* move QP to error state to make posted work items complete */
- frc = iba_modify_qp(conn->ibc_qp, &qpam, NULL);
- if (frc != FSUCCESS)
- CERROR("can't move qp state to error: %d\n", frc);
-
- /* Complete all tx descs not waiting for sends to complete.
- * NB we should be safe from RDMA now that the QP has changed state */
-
- kibnal_abort_txs(conn, &conn->ibc_tx_queue);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue_rsrvd);
- kibnal_abort_txs(conn, &conn->ibc_tx_queue);
- kibnal_abort_txs(conn, &conn->ibc_active_txs);
-
- kibnal_handle_early_rxs(conn);
-}
-
-void
-kibnal_peer_connect_failed (kib_peer_t *peer, int type, int error)
-{
- LIST_HEAD (zombies);
- unsigned long flags;
-
- LASSERT (error != 0);
- LASSERT (!in_interrupt());
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- LASSERT (kibnal_peer_connecting(peer));
-
- switch (type) {
- case IBNAL_CONN_ACTIVE:
- LASSERT (peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- break;
-
- case IBNAL_CONN_PASSIVE:
- LASSERT (peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- break;
-
- case IBNAL_CONN_WAITING:
- /* Can't assert; I might be racing with a successful connection
- * which clears passivewait */
- peer->ibp_passivewait = 0;
- break;
- default:
- LBUG();
- }
-
- if (kibnal_peer_connecting(peer) || /* another attempt underway */
- !list_empty(&peer->ibp_conns)) { /* got connected */
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
- return;
- }
-
- /* Say when active connection can be re-attempted */
- peer->ibp_reconnect_interval *= 2;
- peer->ibp_reconnect_interval =
- MAX(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_min_reconnect_interval);
- peer->ibp_reconnect_interval =
- MIN(peer->ibp_reconnect_interval,
- *kibnal_tunables.kib_max_reconnect_interval);
-
- peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval * HZ;
-
- /* Take peer's blocked transmits to complete with error */
- list_add(&zombies, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- if (kibnal_peer_active(peer) &&
- peer->ibp_persistence == 0) {
- /* failed connection attempt on non-persistent peer */
- kibnal_unlink_peer_locked (peer);
- }
-
- peer->ibp_error = error;
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_notify(peer);
-
- if (list_empty (&zombies))
- return;
-
- CDEBUG (D_NETERROR, "Deleting messages for %s: connection failed\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_txlist_done (&zombies, -EHOSTUNREACH);
-}
-
-void
-kibnal_connreq_done (kib_conn_t *conn, int type, int status)
-{
- kib_peer_t *peer = conn->ibc_peer;
- struct list_head txs;
- kib_tx_t *tx;
- unsigned long flags;
-
- LASSERT (!in_interrupt());
- LASSERT (type == IBNAL_CONN_ACTIVE || type == IBNAL_CONN_PASSIVE);
- LASSERT (conn->ibc_state >= IBNAL_CONN_INIT_QP);
- LASSERT (conn->ibc_state < IBNAL_CONN_ESTABLISHED);
- LASSERT (kibnal_peer_connecting(peer));
-
- LIBCFS_FREE(conn->ibc_cvars, sizeof(*conn->ibc_cvars));
- conn->ibc_cvars = NULL;
-
- if (status != 0) {
- /* failed to establish connection */
- kibnal_peer_connect_failed(conn->ibc_peer, type, status);
- kibnal_conn_disconnected(conn);
- kibnal_conn_decref(conn); /* Lose CM's ref */
- return;
- }
-
- /* connection established */
- LASSERT(conn->ibc_state == IBNAL_CONN_CONNECTING);
-
- conn->ibc_last_send = jiffies;
- kibnal_set_conn_state(conn, IBNAL_CONN_ESTABLISHED);
- kibnal_peer_alive(peer);
-
- CDEBUG(D_NET, "Connection %s ESTABLISHED\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- write_lock_irqsave(&kibnal_data.kib_global_lock, flags);
-
- peer->ibp_passivewait = 0; /* not waiting (got conn now) */
- kibnal_conn_addref(conn); /* +1 ref for ibc_list */
- list_add_tail(&conn->ibc_list, &peer->ibp_conns);
-
- if (!kibnal_peer_active(peer)) {
- /* peer has been deleted */
- kibnal_close_conn_locked(conn, -ECONNABORTED);
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_connect_failed(conn->ibc_peer, type, -ECONNABORTED);
- kibnal_conn_decref(conn); /* lose CM's ref */
- return;
- }
-
- switch (type) {
- case IBNAL_CONN_ACTIVE:
- LASSERT (peer->ibp_connecting > 0);
- peer->ibp_connecting--;
- break;
-
- case IBNAL_CONN_PASSIVE:
- LASSERT (peer->ibp_accepting > 0);
- peer->ibp_accepting--;
- break;
- default:
- LBUG();
- }
-
- peer->ibp_reconnect_interval = 0; /* OK to reconnect at any time */
-
- /* Nuke any dangling conns from a different peer instance... */
- kibnal_close_stale_conns_locked(peer, conn->ibc_incarnation);
-
- /* grab txs blocking for a conn */
- list_add(&txs, &peer->ibp_tx_queue);
- list_del_init(&peer->ibp_tx_queue);
-
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- /* Schedule blocked txs */
- spin_lock (&conn->ibc_lock);
- while (!list_empty (&txs)) {
- tx = list_entry (txs.next, kib_tx_t, tx_list);
- list_del (&tx->tx_list);
-
- kibnal_queue_tx_locked (tx, conn);
- }
- spin_unlock (&conn->ibc_lock);
- kibnal_check_sends (conn);
-}
-
-void
-kibnal_reject (lnet_nid_t nid, IB_HANDLE cep, int why)
-{
- static CM_REJECT_INFO msgs[3];
- CM_REJECT_INFO *msg = &msgs[why];
- FSTATUS frc;
-
- LASSERT (why >= 0 && why < sizeof(msgs)/sizeof(msgs[0]));
-
- /* If I wasn't so lazy, I'd initialise this only once; it's effectively
- * read-only... */
- msg->Reason = RC_USER_REJ;
- msg->PrivateData[0] = (IBNAL_MSG_MAGIC) & 0xff;
- msg->PrivateData[1] = (IBNAL_MSG_MAGIC >> 8) & 0xff;
- msg->PrivateData[2] = (IBNAL_MSG_MAGIC >> 16) & 0xff;
- msg->PrivateData[3] = (IBNAL_MSG_MAGIC >> 24) & 0xff;
- msg->PrivateData[4] = (IBNAL_MSG_VERSION) & 0xff;
- msg->PrivateData[5] = (IBNAL_MSG_VERSION >> 8) & 0xff;
- msg->PrivateData[6] = why;
-
- frc = iba_cm_reject(cep, msg);
- if (frc != FSUCCESS)
- CERROR("Error %d rejecting %s\n", frc, libcfs_nid2str(nid));
-}
-
-void
-kibnal_check_connreject(kib_conn_t *conn, int type, CM_REJECT_INFO *rej)
-{
- kib_peer_t *peer = conn->ibc_peer;
- unsigned long flags;
- int magic;
- int version;
- int why;
-
- LASSERT (type == IBNAL_CONN_ACTIVE ||
- type == IBNAL_CONN_PASSIVE);
-
- CDEBUG(D_NET, "%s connection with %s rejected: %d\n",
- (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid), rej->Reason);
-
- switch (rej->Reason) {
- case RC_STALE_CONN:
- if (type == IBNAL_CONN_PASSIVE) {
- CERROR("Connection to %s rejected (stale QP)\n",
- libcfs_nid2str(peer->ibp_nid));
- } else {
- CWARN("Connection from %s rejected (stale QP): "
- "retrying...\n", libcfs_nid2str(peer->ibp_nid));
-
- /* retry from scratch to allocate a new conn
- * which will use a different QP */
- kibnal_schedule_active_connect(peer, peer->ibp_version);
- }
-
- /* An FCM_DISCONNECTED callback is still outstanding: give it a
- * ref since kibnal_connreq_done() drops the CM's ref on conn
- * on failure */
- kibnal_conn_addref(conn);
- break;
-
- case RC_USER_REJ:
- magic = (rej->PrivateData[0]) |
- (rej->PrivateData[1] << 8) |
- (rej->PrivateData[2] << 16) |
- (rej->PrivateData[3] << 24);
- version = (rej->PrivateData[4]) |
- (rej->PrivateData[5] << 8);
- why = (rej->PrivateData[6]);
-
- /* retry with old proto version */
- if (magic == IBNAL_MSG_MAGIC &&
- version == IBNAL_MSG_VERSION_RDMAREPLYNOTRSRVD &&
- conn->ibc_version == IBNAL_MSG_VERSION &&
- type != IBNAL_CONN_PASSIVE) {
- /* retry with a new conn */
- CWARN ("Connection to %s refused: "
- "retrying with old protocol version 0x%x\n",
- libcfs_nid2str(peer->ibp_nid), version);
- kibnal_schedule_active_connect(peer, version);
- break;
- }
-
- if (magic != IBNAL_MSG_MAGIC ||
- version != IBNAL_MSG_VERSION) {
- CERROR("%s connection with %s rejected "
- "(magic/ver %08x/%d why %d): "
- "incompatible protocol\n",
- (type == IBNAL_CONN_ACTIVE) ?
- "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid),
- magic, version, why);
- break;
- }
-
- if (type == IBNAL_CONN_ACTIVE &&
- why == IBNAL_REJECT_CONN_RACE) {
- /* lost connection race */
- CWARN("Connection to %s rejected: "
- "lost connection race\n",
- libcfs_nid2str(peer->ibp_nid));
-
- write_lock_irqsave(&kibnal_data.kib_global_lock,
- flags);
-
- if (list_empty(&peer->ibp_conns)) {
- peer->ibp_passivewait = 1;
- peer->ibp_passivewait_deadline =
- jiffies +
- (*kibnal_tunables.kib_timeout * HZ);
- }
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- break;
- }
-
- CERROR("%s connection with %s rejected: %d\n",
- (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid), why);
- break;
-
- default:
- CERROR("%s connection with %s rejected: %d\n",
- (type == IBNAL_CONN_ACTIVE) ? "Active" : "Passive",
- libcfs_nid2str(peer->ibp_nid), rej->Reason);
- }
-
- kibnal_connreq_done(conn, type, -ECONNREFUSED);
-}
-
-void
-kibnal_cm_disconnect_callback(kib_conn_t *conn, CM_CONN_INFO *info)
-{
- CDEBUG(D_NET, "%s: state %d, status 0x%x\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- conn->ibc_state, info->Status);
-
- LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-
- switch (info->Status) {
- default:
- LBUG();
- break;
-
- case FCM_DISCONNECT_REQUEST:
- /* Schedule conn to iba_cm_disconnect() if it wasn't already */
- kibnal_close_conn (conn, 0);
- break;
-
- case FCM_DISCONNECT_REPLY: /* peer acks my disconnect req */
- case FCM_DISCONNECTED: /* end of TIME_WAIT */
- CDEBUG(D_NET, "Connection %s disconnected.\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_conn_decref(conn); /* Lose CM's ref */
- break;
- }
-}
-
-void
-kibnal_cm_passive_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
- kib_conn_t *conn = arg;
-
- CDEBUG(D_NET, "status 0x%x\n", info->Status);
-
- /* Established Connection Notifier */
- switch (info->Status) {
- default:
- CERROR("Unexpected status %d on Connection %s\n",
- info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- LBUG();
- break;
-
- case FCM_CONNECT_TIMEOUT:
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ETIMEDOUT);
- break;
-
- case FCM_CONNECT_REJECT:
- kibnal_check_connreject(conn, IBNAL_CONN_PASSIVE,
- &info->Info.Reject);
- break;
-
- case FCM_CONNECT_ESTABLISHED:
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, 0);
- break;
-
- case FCM_DISCONNECT_REQUEST:
- case FCM_DISCONNECT_REPLY:
- case FCM_DISCONNECTED:
- kibnal_cm_disconnect_callback(conn, info);
- break;
- }
-}
-
-int
-kibnal_accept (kib_conn_t **connp, IB_HANDLE cep, kib_msg_t *msg, int nob)
-{
- lnet_nid_t nid;
- kib_conn_t *conn;
- kib_peer_t *peer;
- kib_peer_t *peer2;
- unsigned long flags;
- int rc;
-
- rc = kibnal_unpack_msg(msg, 0, nob);
- if (rc != 0) {
- /* SILENT! kibnal_unpack_msg() complains if required */
- kibnal_reject(LNET_NID_ANY, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- nid = msg->ibm_srcnid;
-
- if (msg->ibm_version != IBNAL_MSG_VERSION)
- CWARN("Connection from %s: old protocol version 0x%x\n",
- libcfs_nid2str(nid), msg->ibm_version);
-
- if (msg->ibm_type != IBNAL_MSG_CONNREQ) {
- CERROR("Can't accept %s: bad request type %d (%d expected)\n",
- libcfs_nid2str(nid), msg->ibm_type, IBNAL_MSG_CONNREQ);
- kibnal_reject(nid, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- if (msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid) {
- CERROR("Can't accept %s: bad dst NID %s (%s expected)\n",
- libcfs_nid2str(nid),
- libcfs_nid2str(msg->ibm_dstnid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid));
- kibnal_reject(nid, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE ||
- msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE ||
- msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n",
- libcfs_nid2str(nid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- msg->ibm_u.connparams.ibcp_max_msg_size,
- msg->ibm_u.connparams.ibcp_max_frags,
- IBNAL_MSG_QUEUE_SIZE,
- IBNAL_MSG_SIZE,
- IBNAL_MAX_RDMA_FRAGS);
- kibnal_reject(nid, cep, IBNAL_REJECT_FATAL);
- return -EPROTO;
- }
-
- conn = kibnal_create_conn(nid, msg->ibm_version);
- if (conn == NULL) {
- kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES);
- return -ENOMEM;
- }
-
- /* assume 'nid' is a new peer */
- rc = kibnal_create_peer(&peer, nid);
- if (rc != 0) {
- kibnal_conn_decref(conn);
- kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES);
- return -ENOMEM;
- }
-
- write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
- if (kibnal_data.kib_listener_cep == NULL) { /* shutdown started */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock, flags);
-
- kibnal_peer_decref(peer);
- kibnal_conn_decref(conn);
- kibnal_reject(nid, cep, IBNAL_REJECT_NO_RESOURCES);
- return -ESHUTDOWN;
- }
-
- peer2 = kibnal_find_peer_locked(nid);
- if (peer2 == NULL) {
- /* peer table takes my ref on peer */
- list_add_tail (&peer->ibp_list, kibnal_nid2peerlist(nid));
- LASSERT (peer->ibp_connecting == 0);
- } else {
- kibnal_peer_decref(peer);
- peer = peer2;
-
- if (peer->ibp_connecting != 0 &&
- peer->ibp_nid < kibnal_data.kib_ni->ni_nid) {
- /* Resolve concurrent connection attempts in favour of
- * the higher NID */
- write_unlock_irqrestore(&kibnal_data.kib_global_lock,
- flags);
- kibnal_conn_decref(conn);
- kibnal_reject(nid, cep, IBNAL_REJECT_CONN_RACE);
- return -EALREADY;
- }
- }
-
- kibnal_peer_addref(peer); /* +1 ref for conn */
- peer->ibp_accepting++;
-
- kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING);
- conn->ibc_peer = peer;
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBNAL_RX_MSGS);
-
- write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
- *connp = conn;
- return 0;
-}
-
-void
-kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
-
- CM_REQUEST_INFO *req = &info->Info.Request;
- CM_REPLY_INFO *rep;
- kib_conn_t *conn;
- FSTATUS frc;
- int rc;
-
- LASSERT(arg == NULL); /* no conn yet for passive */
-
- CDEBUG(D_NET, "%x\n", info->Status);
-
- if (info->Status == FCM_CONNECT_CANCEL) {
- up(&kibnal_data.kib_listener_signal);
- return;
- }
-
- LASSERT (info->Status == FCM_CONNECT_REQUEST);
-
- rc = kibnal_accept(&conn, cep, (kib_msg_t *)req->PrivateData,
- CM_REQUEST_INFO_USER_LEN);
- if (rc != 0) /* kibnal_accept has rejected */
- return;
-
- conn->ibc_cvars->cv_path = req->PathInfo.Path;
-
- rc = kibnal_conn_rts(conn,
- req->CEPInfo.QPN,
- req->CEPInfo.OfferedInitiatorDepth,
- req->CEPInfo.OfferedResponderResources,
- req->CEPInfo.StartingPSN);
- if (rc != 0) {
- kibnal_reject(conn->ibc_peer->ibp_nid, cep,
- IBNAL_REJECT_NO_RESOURCES);
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED);
- return;
- }
-
- memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci));
- rep = &conn->ibc_cvars->cv_cmci.Info.Reply;
-
- rep->QPN = conn->ibc_cvars->cv_qpattrs.QPNumber;
- rep->QKey = conn->ibc_cvars->cv_qpattrs.Qkey;
- rep->StartingPSN = conn->ibc_cvars->cv_qpattrs.RecvPSN;
- rep->EndToEndFlowControl = conn->ibc_cvars->cv_qpattrs.FlowControl;
- rep->ArbInitiatorDepth = conn->ibc_cvars->cv_qpattrs.InitiatorDepth;
- rep->ArbResponderResources = conn->ibc_cvars->cv_qpattrs.ResponderResources;
- rep->TargetAckDelay = kibnal_data.kib_hca_attrs.LocalCaAckDelay;
- rep->FailoverAccepted = IBNAL_FAILOVER_ACCEPTED;
- rep->RnRRetryCount = req->CEPInfo.RnrRetryCount;
-
- CLASSERT (CM_REPLY_INFO_USER_LEN >=
- offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
-
- kibnal_pack_connmsg((kib_msg_t *)rep->PrivateData,
- conn->ibc_version,
- CM_REPLY_INFO_USER_LEN,
- IBNAL_MSG_CONNACK,
- conn->ibc_peer->ibp_nid, conn->ibc_incarnation);
-
- LASSERT (conn->ibc_cep == NULL);
- kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING);
-
- frc = iba_cm_accept(cep,
- &conn->ibc_cvars->cv_cmci,
- NULL,
- kibnal_cm_passive_callback, conn,
- &conn->ibc_cep);
-
- if (frc == FSUCCESS || frc == FPENDING)
- return;
-
- CERROR("iba_cm_accept(%s) failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_PASSIVE, -ECONNABORTED);
-}
-
-void
-kibnal_check_connreply(kib_conn_t *conn, CM_REPLY_INFO *rep)
-{
- kib_msg_t *msg = (kib_msg_t *)rep->PrivateData;
- lnet_nid_t nid = conn->ibc_peer->ibp_nid;
- FSTATUS frc;
- int rc;
-
- rc = kibnal_unpack_msg(msg, conn->ibc_version, CM_REPLY_INFO_USER_LEN);
- if (rc != 0) {
- CERROR ("Error %d unpacking connack from %s\n",
- rc, libcfs_nid2str(nid));
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO);
- return;
- }
-
- if (msg->ibm_type != IBNAL_MSG_CONNACK) {
- CERROR("Bad connack request type %d (%d expected) from %s\n",
- msg->ibm_type, IBNAL_MSG_CONNREQ,
- libcfs_nid2str(msg->ibm_srcnid));
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO);
- return;
- }
-
- if (msg->ibm_srcnid != conn->ibc_peer->ibp_nid ||
- msg->ibm_dstnid != kibnal_data.kib_ni->ni_nid ||
- msg->ibm_dststamp != kibnal_data.kib_incarnation) {
- CERROR("Stale connack from %s(%s): %s(%s), "LPX64"("LPX64")\n",
- libcfs_nid2str(msg->ibm_srcnid),
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- libcfs_nid2str(msg->ibm_dstnid),
- libcfs_nid2str(kibnal_data.kib_ni->ni_nid),
- msg->ibm_dststamp, kibnal_data.kib_incarnation);
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ESTALE);
- return;
- }
-
- if (msg->ibm_u.connparams.ibcp_queue_depth != IBNAL_MSG_QUEUE_SIZE ||
- msg->ibm_u.connparams.ibcp_max_msg_size > IBNAL_MSG_SIZE ||
- msg->ibm_u.connparams.ibcp_max_frags > IBNAL_MAX_RDMA_FRAGS) {
- CERROR("Reject %s: q %d sz %d frag %d, (%d %d %d expected)\n",
- libcfs_nid2str(msg->ibm_srcnid),
- msg->ibm_u.connparams.ibcp_queue_depth,
- msg->ibm_u.connparams.ibcp_max_msg_size,
- msg->ibm_u.connparams.ibcp_max_frags,
- IBNAL_MSG_QUEUE_SIZE,
- IBNAL_MSG_SIZE,
- IBNAL_MAX_RDMA_FRAGS);
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_FATAL);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EPROTO);
- return;
- }
-
- CDEBUG(D_NET, "Connection %s REP_RECEIVED.\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
-
- conn->ibc_incarnation = msg->ibm_srcstamp;
- conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
- conn->ibc_reserved_credits = IBNAL_MSG_QUEUE_SIZE;
- LASSERT (conn->ibc_credits + conn->ibc_reserved_credits
- <= IBNAL_RX_MSGS);
-
- rc = kibnal_conn_rts(conn,
- rep->QPN,
- rep->ArbInitiatorDepth,
- rep->ArbResponderResources,
- rep->StartingPSN);
- if (rc != 0) {
- kibnal_reject(nid, conn->ibc_cep, IBNAL_REJECT_NO_RESOURCES);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EIO);
- return;
- }
-
- memset(&conn->ibc_cvars->cv_cmci, 0, sizeof(conn->ibc_cvars->cv_cmci));
-
- frc = iba_cm_accept(conn->ibc_cep,
- &conn->ibc_cvars->cv_cmci,
- NULL, NULL, NULL, NULL);
-
- if (frc == FCM_CONNECT_ESTABLISHED) {
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, 0);
- return;
- }
-
- CERROR("Connection %s CMAccept failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ECONNABORTED);
-}
-
-void
-kibnal_cm_active_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
- kib_conn_t *conn = arg;
-
- CDEBUG(D_NET, "status 0x%x\n", info->Status);
-
- switch (info->Status) {
- default:
- CERROR("unknown status %d on Connection %s\n",
- info->Status, libcfs_nid2str(conn->ibc_peer->ibp_nid));
- LBUG();
- break;
-
- case FCM_CONNECT_TIMEOUT:
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ETIMEDOUT);
- break;
-
- case FCM_CONNECT_REJECT:
- kibnal_check_connreject(conn, IBNAL_CONN_ACTIVE,
- &info->Info.Reject);
- break;
-
- case FCM_CONNECT_REPLY:
- kibnal_check_connreply(conn, &info->Info.Reply);
- break;
-
- case FCM_DISCONNECT_REQUEST:
- case FCM_DISCONNECT_REPLY:
- case FCM_DISCONNECTED:
- kibnal_cm_disconnect_callback(conn, info);
- break;
- }
-}
-
-void
-dump_path_records(PATH_RESULTS *results)
-{
- IB_PATH_RECORD *path;
- int i;
-
- for (i = 0; i < results->NumPathRecords; i++) {
- path = &results->PathRecords[i];
- CDEBUG(D_NET, "%d: sgid "LPX64":"LPX64" dgid "
- LPX64":"LPX64" pkey %x\n",
- i,
- path->SGID.Type.Global.SubnetPrefix,
- path->SGID.Type.Global.InterfaceID,
- path->DGID.Type.Global.SubnetPrefix,
- path->DGID.Type.Global.InterfaceID,
- path->P_Key);
- }
-}
-
-void
-kibnal_pathreq_callback (void *arg, QUERY *qry,
- QUERY_RESULT_VALUES *qrslt)
-{
- IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs;
- kib_conn_t *conn = arg;
- CM_REQUEST_INFO *req = &conn->ibc_cvars->cv_cmci.Info.Request;
- PATH_RESULTS *path = (PATH_RESULTS *)qrslt->QueryResult;
- FSTATUS frc;
-
- if (qrslt->Status != FSUCCESS ||
- qrslt->ResultDataSize < sizeof(*path)) {
- CDEBUG (D_NETERROR, "pathreq %s failed: status %d data size %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- qrslt->Status, qrslt->ResultDataSize);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- if (path->NumPathRecords < 1) {
- CDEBUG (D_NETERROR, "pathreq %s failed: no path records\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- //dump_path_records(path);
- conn->ibc_cvars->cv_path = path->PathRecords[0];
-
- LASSERT (conn->ibc_cep == NULL);
-
- conn->ibc_cep = kibnal_create_cep(conn->ibc_peer->ibp_nid);
- if (conn->ibc_cep == NULL) {
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -ENOMEM);
- return;
- }
-
- memset(req, 0, sizeof(*req));
- req->SID = conn->ibc_cvars->cv_svcrec.RID.ServiceID;
- req->CEPInfo.CaGUID = kibnal_data.kib_hca_guids[kibnal_data.kib_hca_idx];
- req->CEPInfo.EndToEndFlowControl = IBNAL_EE_FLOW;
- req->CEPInfo.PortGUID = conn->ibc_cvars->cv_path.SGID.Type.Global.InterfaceID;
- req->CEPInfo.RetryCount = IBNAL_RETRY;
- req->CEPInfo.RnrRetryCount = IBNAL_RNR_RETRY;
- req->CEPInfo.AckTimeout = IBNAL_ACK_TIMEOUT;
- req->CEPInfo.StartingPSN = IBNAL_STARTING_PSN;
- req->CEPInfo.QPN = conn->ibc_cvars->cv_qpattrs.QPNumber;
- req->CEPInfo.QKey = conn->ibc_cvars->cv_qpattrs.Qkey;
- req->CEPInfo.OfferedResponderResources = ca_attr->MaxQPResponderResources;
- req->CEPInfo.OfferedInitiatorDepth = ca_attr->MaxQPInitiatorDepth;
- req->PathInfo.bSubnetLocal = IBNAL_LOCAL_SUB;
- req->PathInfo.Path = conn->ibc_cvars->cv_path;
-
- CLASSERT (CM_REQUEST_INFO_USER_LEN >=
- offsetof(kib_msg_t, ibm_u) + sizeof(kib_connparams_t));
-
- kibnal_pack_connmsg((kib_msg_t *)req->PrivateData,
- conn->ibc_version,
- CM_REQUEST_INFO_USER_LEN,
- IBNAL_MSG_CONNREQ,
- conn->ibc_peer->ibp_nid, 0);
-
- if (the_lnet.ln_testprotocompat != 0) {
- /* single-shot proto test */
- LNET_LOCK();
- if ((the_lnet.ln_testprotocompat & 1) != 0) {
- ((kib_msg_t *)req->PrivateData)->ibm_version++;
- the_lnet.ln_testprotocompat &= ~1;
- }
- if ((the_lnet.ln_testprotocompat & 2) != 0) {
- ((kib_msg_t *)req->PrivateData)->ibm_magic =
- LNET_PROTO_MAGIC;
- the_lnet.ln_testprotocompat &= ~2;
- }
- LNET_UNLOCK();
- }
-
- /* Flag I'm getting involved with the CM... */
- kibnal_set_conn_state(conn, IBNAL_CONN_CONNECTING);
-
- /* cm callback gets my conn ref */
- frc = iba_cm_connect(conn->ibc_cep, req,
- kibnal_cm_active_callback, conn);
- if (frc == FPENDING || frc == FSUCCESS)
- return;
-
- CERROR ("Connect %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
-}
-
-void
-kibnal_dump_service_records(SERVICE_RECORD_RESULTS *results)
-{
- IB_SERVICE_RECORD *svc;
- int i;
-
- for (i = 0; i < results->NumServiceRecords; i++) {
- svc = &results->ServiceRecords[i];
- CDEBUG(D_NET, "%d: sid "LPX64" gid "LPX64":"LPX64" pkey %x\n",
- i,
- svc->RID.ServiceID,
- svc->RID.ServiceGID.Type.Global.SubnetPrefix,
- svc->RID.ServiceGID.Type.Global.InterfaceID,
- svc->RID.ServiceP_Key);
- }
-}
-
-void
-kibnal_service_get_callback (void *arg, QUERY *qry,
- QUERY_RESULT_VALUES *qrslt)
-{
- kib_conn_t *conn = arg;
- SERVICE_RECORD_RESULTS *svc;
- FSTATUS frc;
-
- if (qrslt->Status != FSUCCESS ||
- qrslt->ResultDataSize < sizeof(*svc)) {
- CDEBUG (D_NETERROR, "Lookup %s failed: status %d data size %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid),
- qrslt->Status, qrslt->ResultDataSize);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- svc = (SERVICE_RECORD_RESULTS *)qrslt->QueryResult;
- if (svc->NumServiceRecords < 1) {
- CDEBUG (D_NETERROR, "lookup %s failed: no service records\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
- return;
- }
-
- //kibnal_dump_service_records(svc);
- conn->ibc_cvars->cv_svcrec = svc->ServiceRecords[0];
-
- qry = &conn->ibc_cvars->cv_query;
- memset(qry, 0, sizeof(*qry));
-
- qry->OutputType = OutputTypePathRecord;
- qry->InputType = InputTypePortGuidPair;
-
- qry->InputValue.PortGuidPair.SourcePortGuid =
- kibnal_data.kib_port_guid;
- qry->InputValue.PortGuidPair.DestPortGuid =
- conn->ibc_cvars->cv_svcrec.RID.ServiceGID.Type.Global.InterfaceID;
-
- /* kibnal_pathreq_callback gets my conn ref */
- frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- qry,
- kibnal_pathreq_callback,
- &kibnal_data.kib_sdretry,
- conn);
- if (frc == FPENDING)
- return;
-
- CERROR ("pathreq %s failed: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
-}
-
-void
-kibnal_connect_peer (kib_peer_t *peer)
-{
- QUERY *qry;
- FSTATUS frc;
- kib_conn_t *conn;
-
- LASSERT (peer->ibp_connecting != 0);
-
- conn = kibnal_create_conn(peer->ibp_nid, peer->ibp_version);
- if (conn == NULL) {
- CERROR ("Can't allocate conn\n");
- kibnal_peer_connect_failed(peer, IBNAL_CONN_ACTIVE, -ENOMEM);
- return;
- }
-
- conn->ibc_peer = peer;
- kibnal_peer_addref(peer);
-
- qry = &conn->ibc_cvars->cv_query;
- memset(qry, 0, sizeof(*qry));
-
- qry->OutputType = OutputTypeServiceRecord;
- qry->InputType = InputTypeServiceRecord;
-
- qry->InputValue.ServiceRecordValue.ComponentMask =
- KIBNAL_SERVICE_KEY_MASK;
- kibnal_set_service_keys(
- &qry->InputValue.ServiceRecordValue.ServiceRecord,
- peer->ibp_nid);
-
- /* kibnal_service_get_callback gets my conn ref */
- frc = iba_sd_query_port_fabric_info(kibnal_data.kib_sd,
- kibnal_data.kib_port_guid,
- qry,
- kibnal_service_get_callback,
- &kibnal_data.kib_sdretry,
- conn);
- if (frc == FPENDING)
- return;
-
- CERROR("Lookup %s failed: %d\n", libcfs_nid2str(peer->ibp_nid), frc);
- kibnal_connreq_done(conn, IBNAL_CONN_ACTIVE, -EHOSTUNREACH);
-}
-
-int
-kibnal_check_txs (kib_conn_t *conn, struct list_head *txs)
-{
- kib_tx_t *tx;
- struct list_head *ttmp;
- int timed_out = 0;
-
- spin_lock(&conn->ibc_lock);
-
- list_for_each (ttmp, txs) {
- tx = list_entry (ttmp, kib_tx_t, tx_list);
-
- if (txs == &conn->ibc_active_txs) {
- LASSERT (!tx->tx_queued);
- LASSERT (tx->tx_waiting || tx->tx_sending != 0);
- } else {
- LASSERT (tx->tx_queued);
- }
-
- if (time_after_eq (jiffies, tx->tx_deadline)) {
- timed_out = 1;
- break;
- }
- }
-
- spin_unlock(&conn->ibc_lock);
- return timed_out;
-}
-
-int
-kibnal_conn_timed_out (kib_conn_t *conn)
-{
- return kibnal_check_txs(conn, &conn->ibc_tx_queue) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_rsrvd) ||
- kibnal_check_txs(conn, &conn->ibc_tx_queue_nocred) ||
- kibnal_check_txs(conn, &conn->ibc_active_txs);
-}
-
-void
-kibnal_check_peers (int idx)
-{
- rwlock_t *rwlock = &kibnal_data.kib_global_lock;
- struct list_head *peers = &kibnal_data.kib_peers[idx];
- struct list_head *ptmp;
- kib_peer_t *peer;
- kib_conn_t *conn;
- struct list_head *ctmp;
- unsigned long flags;
-
- again:
- /* NB. We expect to have a look at all the peers and not find any
- * rdmas to time out, so we just use a shared lock while we
- * take a look... */
- read_lock_irqsave(rwlock, flags);
-
- list_for_each (ptmp, peers) {
- peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
- if (peer->ibp_passivewait) {
- LASSERT (list_empty(&peer->ibp_conns));
-
- if (!time_after_eq(jiffies,
- peer->ibp_passivewait_deadline))
- continue;
-
- kibnal_peer_addref(peer); /* ++ ref for me... */
- read_unlock_irqrestore(rwlock, flags);
-
- kibnal_peer_connect_failed(peer, IBNAL_CONN_WAITING,
- -ETIMEDOUT);
- kibnal_peer_decref(peer); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
-
- list_for_each (ctmp, &peer->ibp_conns) {
- conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
- LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
- /* In case we have enough credits to return via a
- * NOOP, but there were no non-blocking tx descs
- * free to do it last time... */
- kibnal_check_sends(conn);
-
- if (!kibnal_conn_timed_out(conn))
- continue;
-
- /* Handle timeout by closing the whole connection. We
- * can only be sure RDMA activity has ceased once the
- * QP has been modified. */
-
- kibnal_conn_addref(conn); /* 1 ref for me... */
-
- read_unlock_irqrestore(rwlock, flags);
-
- CERROR("Timed out RDMA with %s\n",
- libcfs_nid2str(peer->ibp_nid));
-
- kibnal_close_conn (conn, -ETIMEDOUT);
- kibnal_conn_decref(conn); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(rwlock, flags);
-}
-
-void
-kibnal_disconnect_conn (kib_conn_t *conn)
-{
- FSTATUS frc;
-
- LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTING);
-
- kibnal_conn_disconnected(conn);
-
- frc = iba_cm_disconnect(conn->ibc_cep, NULL, NULL);
- switch (frc) {
- case FSUCCESS:
- break;
-
- case FINSUFFICIENT_RESOURCES:
- CERROR("ENOMEM disconnecting %s\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid));
- /* This might cause the module to become unloadable since the
- * FCM_DISCONNECTED callback is still outstanding */
- break;
-
- default:
- CERROR("Unexpected error disconnecting %s: %d\n",
- libcfs_nid2str(conn->ibc_peer->ibp_nid), frc);
- LBUG();
- }
-
- kibnal_peer_notify(conn->ibc_peer);
-}
-
-int
-kibnal_connd (void *arg)
-{
- wait_queue_t wait;
- unsigned long flags;
- kib_conn_t *conn;
- kib_peer_t *peer;
- int timeout;
- int i;
- int did_something;
- int peer_index = 0;
- unsigned long deadline = jiffies;
-
- cfs_daemonize ("kibnal_connd");
- cfs_block_allsigs ();
-
- init_waitqueue_entry (&wait, current);
-
- spin_lock_irqsave(&kibnal_data.kib_connd_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- did_something = 0;
-
- if (!list_empty (&kibnal_data.kib_connd_zombies)) {
- conn = list_entry (kibnal_data.kib_connd_zombies.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
-
- kibnal_destroy_conn(conn);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_conns)) {
- conn = list_entry (kibnal_data.kib_connd_conns.next,
- kib_conn_t, ibc_list);
- list_del (&conn->ibc_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
-
- kibnal_disconnect_conn(conn);
- kibnal_conn_decref(conn);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- if (!list_empty (&kibnal_data.kib_connd_peers)) {
- peer = list_entry (kibnal_data.kib_connd_peers.next,
- kib_peer_t, ibp_connd_list);
-
- list_del_init (&peer->ibp_connd_list);
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
-
- kibnal_connect_peer (peer);
- kibnal_peer_decref (peer);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- /* careful with the jiffy wrap... */
- while ((timeout = (int)(deadline - jiffies)) <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kibnal_data.kib_peer_hash_size;
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if (*kibnal_tunables.kib_timeout > n * p)
- chunk = (chunk * n * p) /
- *kibnal_tunables.kib_timeout;
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kibnal_check_peers (peer_index);
- peer_index = (peer_index + 1) %
- kibnal_data.kib_peer_hash_size;
- }
-
- deadline += p * HZ;
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- did_something = 1;
- }
-
- if (did_something)
- continue;
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- set_current_state (TASK_INTERRUPTIBLE);
- add_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
- if (!kibnal_data.kib_shutdown &&
- list_empty (&kibnal_data.kib_connd_conns) &&
- list_empty (&kibnal_data.kib_connd_peers))
- schedule_timeout (timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
- spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
- }
-
- spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
- kibnal_thread_fini ();
- return (0);
-}
-
-
-void
-kibnal_hca_async_callback (void *hca_arg, IB_EVENT_RECORD *ev)
-{
- /* XXX flesh out. this seems largely for async errors */
- CERROR("type: %d code: %u\n", ev->EventType, ev->EventCode);
-}
-
-void
-kibnal_hca_callback (void *hca_arg, void *cq_arg)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- kibnal_data.kib_ready = 1;
- wake_up(&kibnal_data.kib_sched_waitq);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-}
-
-int
-kibnal_scheduler(void *arg)
-{
- long id = (long)arg;
- wait_queue_t wait;
- char name[16];
- FSTATUS frc;
- FSTATUS frc2;
- IB_WORK_COMPLETION wc;
- kib_rx_t *rx;
- unsigned long flags;
- __u64 rxseq = 0;
- int busy_loops = 0;
-
- snprintf(name, sizeof(name), "kibnal_sd_%02ld", id);
- cfs_daemonize(name);
- cfs_block_allsigs();
-
- init_waitqueue_entry(&wait, current);
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
- while (!kibnal_data.kib_shutdown) {
- if (busy_loops++ >= IBNAL_RESCHED) {
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- cfs_cond_resched();
- busy_loops = 0;
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- }
-
- if (kibnal_data.kib_ready &&
- !kibnal_data.kib_checking_cq) {
- /* take ownership of completion polling */
- kibnal_data.kib_checking_cq = 1;
- /* Assume I'll exhaust the CQ */
- kibnal_data.kib_ready = 0;
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- frc = iba_poll_cq(kibnal_data.kib_cq, &wc);
- if (frc == FNOT_DONE) {
- /* CQ empty */
- frc2 = iba_rearm_cq(kibnal_data.kib_cq,
- CQEventSelNextWC);
- LASSERT (frc2 == FSUCCESS);
- }
-
- if (frc == FSUCCESS &&
- kibnal_wreqid2type(wc.WorkReqId) == IBNAL_WID_RX) {
- rx = (kib_rx_t *)kibnal_wreqid2ptr(wc.WorkReqId);
-
- /* Grab the RX sequence number NOW before
- * anyone else can get an RX completion */
- rxseq = rx->rx_conn->ibc_rxseq++;
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- /* give up ownership of completion polling */
- kibnal_data.kib_checking_cq = 0;
-
- if (frc == FNOT_DONE)
- continue;
-
- LASSERT (frc == FSUCCESS);
- /* Assume there's more: get another scheduler to check
- * while I handle this completion... */
-
- kibnal_data.kib_ready = 1;
- wake_up(&kibnal_data.kib_sched_waitq);
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- switch (kibnal_wreqid2type(wc.WorkReqId)) {
- case IBNAL_WID_RX:
- kibnal_rx_complete(&wc, rxseq);
- break;
-
- case IBNAL_WID_TX:
- kibnal_tx_complete(&wc);
- break;
-
- case IBNAL_WID_RDMA:
- /* We only get RDMA completion notification if
- * it fails. So we just ignore them completely
- * because...
- *
- * 1) If an RDMA fails, all subsequent work
- * items, including the final SEND will fail
- * too, so I'm still guaranteed to notice that
- * this connection is hosed.
- *
- * 2) It's positively dangerous to look inside
- * the tx descriptor obtained from an RDMA work
- * item. As soon as I drop the kib_sched_lock,
- * I give a scheduler on another CPU a chance
- * to get the final SEND completion, so the tx
- * descriptor can get freed as I inspect it. */
- CERROR ("RDMA failed: %d\n", wc.Status);
- break;
-
- default:
- LBUG();
- }
-
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- continue;
- }
-
- /* Nothing to do; sleep... */
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kibnal_data.kib_sched_waitq, &wait);
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
- flags);
-
- schedule();
-
- remove_wait_queue(&kibnal_data.kib_sched_waitq, &wait);
- set_current_state(TASK_RUNNING);
- spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
- }
-
- spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-
- kibnal_thread_fini();
- return (0);
-}
+++ /dev/null
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/iiblnd/iiblnd_modparams.c
- *
- * Author: Eric Barton <eric@bartonsoftware.com>
- */
-
-#include "iiblnd.h"
-
-static char *ipif_basename = "ib";
-CFS_MODULE_PARM(ipif_basename, "s", charp, 0444,
- "IPoIB interface base name");
-
-static char *service_name = "iiblnd";
-CFS_MODULE_PARM(service_name, "s", charp, 0444,
- "IB service name");
-
-static int service_number = 0x11b9a2;
-CFS_MODULE_PARM(service_number, "i", int, 0444,
- "IB service number");
-
-static int min_reconnect_interval = 1;
-CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
- "minimum connection retry interval (seconds)");
-
-static int max_reconnect_interval = 60;
-CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
- "maximum connection retry interval (seconds)");
-
-static int concurrent_peers = 1152;
-CFS_MODULE_PARM(concurrent_peers, "i", int, 0444,
- "maximum number of peers that may connect");
-
-static int cksum = 0;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of message descriptors");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "# concurrent sends");
-
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
- "# concurrent sends to 1 peer");
-
-static int sd_retries = 8;
-CFS_MODULE_PARM(sd_retries, "i", int, 0444,
- "# times to retry SD queries");
-
-static int keepalive = 100;
-CFS_MODULE_PARM(keepalive, "i", int, 0644,
- "Idle time in seconds before sending a keepalive");
-
-static int concurrent_sends = IBNAL_RX_MSGS;
-CFS_MODULE_PARM(concurrent_sends, "i", int, 0644,
- "Send work queue sizing");
-
-kib_tunables_t kibnal_tunables = {
- .kib_ipif_basename = &ipif_basename,
- .kib_service_name = &service_name,
- .kib_service_number = &service_number,
- .kib_min_reconnect_interval = &min_reconnect_interval,
- .kib_max_reconnect_interval = &max_reconnect_interval,
- .kib_concurrent_peers = &concurrent_peers,
- .kib_cksum = &cksum,
- .kib_timeout = &timeout,
- .kib_keepalive = &keepalive,
- .kib_ntx = &ntx,
- .kib_credits = &credits,
- .kib_peercredits = &peer_credits,
- .kib_sd_retries = &sd_retries,
- .kib_concurrent_sends = &concurrent_sends,
-};
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-/* NB max_size specified for proc_dostring entries only needs to be big enough
- * not to truncate the printout; it only needs to be the actual size of the
- * string buffer if we allow writes (and we don't) */
-
-#ifdef HAVE_SYSCTL_UNNUMBERED
-
-enum {
- IIBLND_IPIF_BASENAME = 1,
- IIBLND_SERVICE_NAME,
- IIBLND_SERVICE_NUMBER,
- IIBLND_RECONNECT_MIN,
- IIBLND_RECONNECT_MAX,
- IIBLND_CONCURRENT_PEERS,
- IIBLND_CKSUM,
- IIBLND_TIMEOUT,
- IIBLND_NTX,
- IIBLND_CREDITS,
- IIBLND_PEER_CREDITS,
- IIBLND_SD_RETRIES,
- IIBLND_KEEPALIVE,
- IIBLND_CONCURRENT_SENDS
-};
-
-#else
-
-#define IIBLND_IPIF_BASENAME CTL_UNNUMBERED
-#define IIBLND_SERVICE_NAME CTL_UNNUMBERED
-#define IIBLND_SERVICE_NUMBER CTL_UNNUMBERED
-#define IIBLND_RECONNECT_MIN CTL_UNNUMBERED
-#define IIBLND_RECONNECT_MAX CTL_UNNUMBERED
-#define IIBLND_CONCURRENT_PEERS CTL_UNNUMBERED
-#define IIBLND_CKSUM CTL_UNNUMBERED
-#define IIBLND_TIMEOUT CTL_UNNUMBERED
-#define IIBLND_NTX CTL_UNNUMBERED
-#define IIBLND_CREDITS CTL_UNNUMBERED
-#define IIBLND_PEER_CREDITS CTL_UNNUMBERED
-#define IIBLND_SD_RETRIES CTL_UNNUMBERED
-#define IIBLND_KEEPALIVE CTL_UNNUMBERED
-#define IIBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-
-#endif
-
-static cfs_sysctl_table_t kibnal_ctl_table[] = {
- {
- .ctl_name = IBBLND_IPIF_BASENAME,
- .procname = "ipif_basename",
- .data = &ipif_basename,
- .maxlen = 1024,
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = IIBLND_SERVICE_NAME,
- .procname = "service_name",
- .data = &service_name,
- .maxlen = 1024,
- .mode = 0444,
- .proc_handler = &proc_dostring
- },
- {
- .ctl_name = IIBLND_SERVICE_NUMBER,
- .procname = "service_number",
- .data = &service_number,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_RECONNECT_MIN,
- .procname = "min_reconnect_interval",
- .data = &min_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_RECONNECT_MAX,
- .procname = "max_reconnect_interval",
- .data = &max_reconnect_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_CONCURRENT_PEERS,
- .procname = "concurrent_peers",
- .data = &concurrent_peers,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_CKSUM,
- .procname = "cksum",
- .data = &cksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_PEER_CREDITS,
- .procname = "peer_credits",
- .data = &peer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_SD_RETRIES,
- .procname = "sd_retries",
- .data = &sd_retries,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_KEEPALIVE,
- .procname = "keepalive",
- .data = &keepalive,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = IIBLND_CONCURRENT_SENDS,
- .procname = "concurrent_sends",
- .data = &concurrent_sends,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {0}
-};
-
-static cfs_sysctl_table_t kibnal_top_ctl_table[] = {
- {
- .ctl_name = CTL_IIBLND,
- .procname = "iibnal",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kibnal_ctl_table
- },
- {0}
-};
-
-int
-kibnal_tunables_init ()
-{
- kibnal_tunables.kib_sysctl =
- cfs_register_sysctl_table(kibnal_top_ctl_table, 0);
-
- if (kibnal_tunables.kib_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- if (*kibnal_tunables.kib_concurrent_sends > IBNAL_RX_MSGS)
- *kibnal_tunables.kib_concurrent_sends = IBNAL_RX_MSGS;
- if (*kibnal_tunables.kib_concurrent_sends < IBNAL_MSG_QUEUE_SIZE)
- *kibnal_tunables.kib_concurrent_sends = IBNAL_MSG_QUEUE_SIZE;
-
- return 0;