])
#
-# LN_CONFIG_PORTALS
-#
-# configure support for Portals
-#
-AC_DEFUN([LN_CONFIG_PORTALS],
-[AC_MSG_CHECKING([for portals])
-AC_ARG_WITH([portals],
- AC_HELP_STRING([--with-portals=path],
- [set path to portals]),
- [
- case $with_portals in
- no) ENABLEPORTALS=0
- ;;
- *) PORTALS="${with_portals}"
- ENABLEPORTALS=1
- ;;
- esac
- ], [
- ENABLEPORTALS=0
- ])
-PTLLNDCPPFLAGS=""
-if test $ENABLEPORTALS -eq 0; then
- AC_MSG_RESULT([no])
-elif test ! \( -f ${PORTALS}/include/portals/p30.h \); then
- AC_MSG_RESULT([no])
- AC_MSG_ERROR([bad --with-portals path])
-else
- PORTALS=$(readlink --canonicalize $PORTALS)
- AC_MSG_RESULT([$PORTALS])
- PTLLNDCPPFLAGS="-I${PORTALS}/include"
-fi
-AC_SUBST(PTLLNDCPPFLAGS)
-])
-
-#
# LN_CONFIG_BACKOFF
#
# check if tunable tcp backoff is available/wanted
fi
])
-
-#
-# LN_CONFIG_PTLLND
-#
-# configure support for Portals LND
-#
-AC_DEFUN([LN_CONFIG_PTLLND],
-[
-if test -z "$ENABLEPORTALS"; then
- LN_CONFIG_PORTALS
-fi
-
-AC_MSG_CHECKING([whether to build the kernel portals LND])
-
-PTLLND=""
-if test $ENABLEPORTALS -ne 0; then
- AC_MSG_RESULT([yes])
- PTLLND="ptllnd"
-else
- AC_MSG_RESULT([no])
-fi
-AC_SUBST(PTLLND)
-])
-
-#
-# LN_CONFIG_UPTLLND
-#
-# configure support for Portals LND
-#
-AC_DEFUN([LN_CONFIG_UPTLLND],
-[
-if test -z "$ENABLEPORTALS"; then
- LN_CONFIG_PORTALS
-fi
-
-AC_MSG_CHECKING([whether to build the userspace portals LND])
-
-UPTLLND=""
-if test $ENABLEPORTALS -ne 0; then
- AC_MSG_RESULT([yes])
- UPTLLND="ptllnd"
-else
- AC_MSG_RESULT([no])
-fi
-AC_SUBST(UPTLLND)
-])
-
#
# LN_CONFIG_USOCKLND
#
LN_CONFIG_O2IB
LN_CONFIG_RALND
LN_CONFIG_GNILND
-LN_CONFIG_PTLLND
LN_CONFIG_MX
# 2.6.36
LN_CONFIG_TCP_SENDPAGE
fi
LN_CONFIG_MAX_PAYLOAD
-LN_CONFIG_UPTLLND
LN_CONFIG_USOCKLND
])
AM_CONDITIONAL(BUILD_RALND, test x$RALND = "xralnd")
AM_CONDITIONAL(BUILD_GNILND, test x$GNILND = "xgnilnd")
AM_CONDITIONAL(BUILD_GNILND_RCA, test x$GNILNDRCA = "xgnilndrca")
-AM_CONDITIONAL(BUILD_PTLLND, test x$PTLLND = "xptllnd")
AM_CONDITIONAL(BUILD_USOCKLND, test x$USOCKLND = "xusocklnd")
])
lnet/klnds/gnilnd/autoMakefile
lnet/klnds/socklnd/Makefile
lnet/klnds/socklnd/autoMakefile
-lnet/klnds/ptllnd/Makefile
-lnet/klnds/ptllnd/autoMakefile
lnet/lnet/Makefile
lnet/lnet/autoMakefile
lnet/selftest/Makefile
EXTRA_DIST = api.h api-support.h \
lib-lnet.h lib-types.h lnet.h lnetctl.h types.h \
- socklnd.h ptllnd.h ptllnd_wire.h lnetst.h
+ socklnd.h lnetst.h
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/include/lnet/ptllnd.h
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
-/*
- * The PTLLND was designed to support Portals with
- * Lustre and non-lustre UNLINK semantics.
- * However for now the two targets are Cray Portals
- * on the XT3 and Lustre Portals (for testing) both
- * have Lustre UNLINK semantics, so this is defined
- * by default.
- */
-#define LUSTRE_PORTALS_UNLINK_SEMANTICS
-
-
-#ifdef _USING_LUSTRE_PORTALS_
-
-/* NIDs are 64-bits on Lustre Portals */
-#define FMT_NID LPU64
-#define FMT_PID "%d"
-
-/* When using Lustre Portals Lustre completion semantics are imlicit*/
-#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0
-
-#else /* _USING_CRAY_PORTALS_ */
-
-/* NIDs are integers on Cray Portals */
-#define FMT_NID "%u"
-#define FMT_PID "%d"
-
-/* When using Cray Portals this is defined in the Cray Portals Header*/
-/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */
-
-/* Can compare handles directly on Cray Portals */
-#define PtlHandleIsEqual(a,b) ((a) == (b))
-
-/* Diffrent error types on Cray Portals*/
-#define ptl_err_t ptl_ni_fail_t
-
-/*
- * The Cray Portals has no maximum number of IOVs. The
- * maximum is limited only by memory and size of the
- * int parameters (2^31-1).
- * Lustre only really require that the underyling
- * implemenation to support at least LNET_MAX_IOV,
- * so for Cray portals we can safely just use that
- * value here.
- *
- */
-#define PTL_MD_MAX_IOV LNET_MAX_IOV
-
-#endif
-
-#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID
-
-/* Align incoming small request messages to an 8 byte boundary if this is
- * supported to avoid alignment issues on some architectures */
-#ifndef PTL_MD_LOCAL_ALIGN8
-# define PTL_MD_LOCAL_ALIGN8 0
-#endif
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/include/lnet/ptllnd_wire.h
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
-/* Minimum buffer size that any peer will post to receive ptllnd messages */
-#define PTLLND_MIN_BUFFER_SIZE 256
-
-/************************************************************************
- * Tunable defaults that {u,k}lnds/ptllnd should have in common.
- */
-
-#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */
-#define PTLLND_PID 9 /* The Portals PID */
-#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */
-
-/* Default buffer size for kernel ptllnds (guaranteed eager) */
-#define PTLLND_MAX_KLND_MSG_SIZE 512
-
-/* Default buffer size for catamount ptllnds (not guaranteed eager) - large
- * enough to avoid RDMA for anything sent while control is not in liblustre */
-#define PTLLND_MAX_ULND_MSG_SIZE 512
-
-
-/************************************************************************
- * Portals LND Wire message format.
- * These are sent in sender's byte order (i.e. receiver flips).
- */
-
-#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved
- * above is for bulk data transfer */
-#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */
-
-typedef struct
-{
- lnet_hdr_t kptlim_hdr; /* portals header */
- char kptlim_payload[0]; /* piggy-backed payload */
-} WIRE_ATTR kptl_immediate_msg_t;
-
-typedef struct
-{
- lnet_hdr_t kptlrm_hdr; /* portals header */
- __u64 kptlrm_matchbits; /* matchbits */
-} WIRE_ATTR kptl_rdma_msg_t;
-
-typedef struct
-{
- __u64 kptlhm_matchbits; /* matchbits */
- __u32 kptlhm_max_msg_size; /* max message size */
-} WIRE_ATTR kptl_hello_msg_t;
-
-typedef struct
-{
- /* First 2 fields fixed FOR ALL TIME */
- __u32 ptlm_magic; /* I'm a Portals LND message */
- __u16 ptlm_version; /* this is my version number */
- __u8 ptlm_type; /* the message type */
- __u8 ptlm_credits; /* returned credits */
- __u32 ptlm_nob; /* # bytes in whole message */
- __u32 ptlm_cksum; /* checksum (0 == no checksum) */
- __u64 ptlm_srcnid; /* sender's NID */
- __u64 ptlm_srcstamp; /* sender's incarnation */
- __u64 ptlm_dstnid; /* destination's NID */
- __u64 ptlm_dststamp; /* destination's incarnation */
- __u32 ptlm_srcpid; /* sender's PID */
- __u32 ptlm_dstpid; /* destination's PID */
-
- union {
- kptl_immediate_msg_t immediate;
- kptl_rdma_msg_t rdma;
- kptl_hello_msg_t hello;
- } WIRE_ATTR ptlm_u;
-
-} kptl_msg_t;
-
-/* kptl_msg_t::ptlm_credits is only a __u8 */
-#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t*) 0)->ptlm_credits)) -1)
-
-#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC
-#define PTLLND_MSG_VERSION 0x04
-
-#define PTLLND_RDMA_OK 0x00
-#define PTLLND_RDMA_FAIL 0x01
-
-#define PTLLND_MSG_TYPE_INVALID 0x00
-#define PTLLND_MSG_TYPE_PUT 0x01
-#define PTLLND_MSG_TYPE_GET 0x02
-#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/
-#define PTLLND_MSG_TYPE_NOOP 0x04
-#define PTLLND_MSG_TYPE_HELLO 0x05
-#define PTLLND_MSG_TYPE_NAK 0x06
@BUILD_GNILND_TRUE@subdir-m += gnilnd
@BUILD_O2IBLND_TRUE@subdir-m += o2iblnd
@BUILD_QSWLND_TRUE@subdir-m += qswlnd
-@BUILD_PTLLND_TRUE@subdir-m += ptllnd
subdir-m += socklnd
@INCLUDE_RULES@
# Lustre is a trademark of Sun Microsystems, Inc.
#
-SUBDIRS = socklnd qswlnd mxlnd ralnd gnilnd ptllnd o2iblnd
+SUBDIRS = socklnd qswlnd mxlnd ralnd gnilnd o2iblnd
+++ /dev/null
-/wirecheck
+++ /dev/null
-MODULES := kptllnd
-
-EXTRA_POST_CFLAGS := @PTLLNDCPPFLAGS@
-
-kptllnd-objs := ptllnd.o \
- ptllnd_cb.o \
- ptllnd_modparams.o \
- ptllnd_peer.o \
- ptllnd_rx_buf.o \
- ptllnd_tx.o
-
-@INCLUDE_RULES@
+++ /dev/null
-1. This version of the Portals LND is intended to work on the Cray XT3 using
- Cray Portals as a network transport.
-
-2. To enable the building of the Portals LND (ptllnd.ko) configure with the
- following option:
- ./configure --with-portals=<path-to-portals-headers>
-
-3. The following configuration options are supported
-
- ntx:
- The total number of message descritprs
-
- concurrent_peers:
- The maximum number of conncurent peers. Peers attemting
- to connect beyond the maximum will not be allowd.
-
- peer_hash_table_size:
- The number of hash table slots for the peers. This number
- should scale with concurrent_peers.
-
- cksum:
- Set to non-zero to enable message (not RDMA) checksums for
- outgoing packets. Incoming packets will always be checksumed
- if necssary, independnt of this value.
-
- timeout:
- The amount of time a request can linger in a peers active
- queue, before the peer is considered dead. Units: seconds.
-
- portal:
- The portal ID to use for the ptllnd traffic.
-
- rxb_npages:
- The number of pages in a RX Buffer.
-
- credits:
- The maximum total number of concurrent sends that are
- outstanding at any given instant.
-
- peercredits:
- The maximum number of concurrent sends that are
- outstanding to a single piere at any given instant.
-
- max_msg_size:
- The maximum immedate message size. This MUST be
- the same on all nodes in a cluster. A peer connecting
- with a diffrent max_msg_size will be rejected.
+++ /dev/null
-if MODULES
-if BUILD_PTLLND
-modulenet_DATA = kptllnd$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = @MOSTLYCLEANFILES@
-EXTRA_DIST = $(kptllnd-objs:%.o=%.c) ptllnd.h
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
-#include "ptllnd.h"
-
-lnd_t kptllnd_lnd = {
- .lnd_type = PTLLND,
- .lnd_startup = kptllnd_startup,
- .lnd_shutdown = kptllnd_shutdown,
- .lnd_ctl = kptllnd_ctl,
- .lnd_query = kptllnd_query,
- .lnd_send = kptllnd_send,
- .lnd_recv = kptllnd_recv,
- .lnd_eager_recv = kptllnd_eager_recv,
-};
-
-kptl_data_t kptllnd_data;
-
-char *
-kptllnd_ptlid2str(ptl_process_id_t id)
-{
- static char strs[64][32];
- static int idx = 0;
-
- unsigned long flags;
- char *str;
-
- spin_lock_irqsave(&kptllnd_data.kptl_ptlid2str_lock, flags);
- str = strs[idx++];
- if (idx >= sizeof(strs)/sizeof(strs[0]))
- idx = 0;
- spin_unlock_irqrestore(&kptllnd_data.kptl_ptlid2str_lock, flags);
-
- snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid);
- return str;
-}
-
-void
-kptllnd_assert_wire_constants (void)
-{
- /* Wire protocol assertions generated by 'wirecheck'
- * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU
- * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */
-
-
- /* Constants... */
- CLASSERT (PTL_RESERVED_MATCHBITS == 0x100);
- CLASSERT (LNET_MSG_MATCHBITS == 0);
- CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E);
- CLASSERT (PTLLND_MSG_VERSION == 0x04);
- CLASSERT (PTLLND_RDMA_OK == 0x00);
- CLASSERT (PTLLND_RDMA_FAIL == 0x01);
- CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00);
- CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01);
- CLASSERT (PTLLND_MSG_TYPE_GET == 0x02);
- CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03);
- CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04);
- CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05);
- CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06);
-
- /* Checks for struct kptl_msg_t */
- CLASSERT ((int)sizeof(kptl_msg_t) == 136);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80);
- CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56);
- CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12);
-
- /* Checks for struct kptl_immediate_msg_t */
- CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72);
- CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0);
- CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72);
- CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85);
- CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1);
-
- /* Checks for struct kptl_rdma_msg_t */
- CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80);
- CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0);
- CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72);
- CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72);
- CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8);
-
- /* Checks for struct kptl_hello_msg_t */
- CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12);
- CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0);
- CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8);
- CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8);
- CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4);
-}
-
-const char *kptllnd_evtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTL_EVENT_GET_START);
- DO_TYPE(PTL_EVENT_GET_END);
- DO_TYPE(PTL_EVENT_PUT_START);
- DO_TYPE(PTL_EVENT_PUT_END);
- DO_TYPE(PTL_EVENT_REPLY_START);
- DO_TYPE(PTL_EVENT_REPLY_END);
- DO_TYPE(PTL_EVENT_ACK);
- DO_TYPE(PTL_EVENT_SEND_START);
- DO_TYPE(PTL_EVENT_SEND_END);
- DO_TYPE(PTL_EVENT_UNLINK);
- default:
- return "<unknown event type>";
- }
-#undef DO_TYPE
-}
-
-const char *kptllnd_msgtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTLLND_MSG_TYPE_INVALID);
- DO_TYPE(PTLLND_MSG_TYPE_PUT);
- DO_TYPE(PTLLND_MSG_TYPE_GET);
- DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE);
- DO_TYPE(PTLLND_MSG_TYPE_HELLO);
- DO_TYPE(PTLLND_MSG_TYPE_NOOP);
- DO_TYPE(PTLLND_MSG_TYPE_NAK);
- default:
- return "<unknown msg type>";
- }
-#undef DO_TYPE
-}
-
-const char *kptllnd_errtype2str(int type)
-{
-#define DO_TYPE(x) case x: return #x;
- switch(type)
- {
- DO_TYPE(PTL_OK);
- DO_TYPE(PTL_SEGV);
- DO_TYPE(PTL_NO_SPACE);
- DO_TYPE(PTL_ME_IN_USE);
- DO_TYPE(PTL_NAL_FAILED);
- DO_TYPE(PTL_NO_INIT);
- DO_TYPE(PTL_IFACE_DUP);
- DO_TYPE(PTL_IFACE_INVALID);
- DO_TYPE(PTL_HANDLE_INVALID);
- DO_TYPE(PTL_MD_INVALID);
- DO_TYPE(PTL_ME_INVALID);
- DO_TYPE(PTL_PROCESS_INVALID);
- DO_TYPE(PTL_PT_INDEX_INVALID);
- DO_TYPE(PTL_SR_INDEX_INVALID);
- DO_TYPE(PTL_EQ_INVALID);
- DO_TYPE(PTL_EQ_DROPPED);
- DO_TYPE(PTL_EQ_EMPTY);
- DO_TYPE(PTL_MD_NO_UPDATE);
- DO_TYPE(PTL_FAIL);
- DO_TYPE(PTL_AC_INDEX_INVALID);
- DO_TYPE(PTL_MD_ILLEGAL);
- DO_TYPE(PTL_ME_LIST_TOO_LONG);
- DO_TYPE(PTL_MD_IN_USE);
- DO_TYPE(PTL_NI_INVALID);
- DO_TYPE(PTL_PID_INVALID);
- DO_TYPE(PTL_PT_FULL);
- DO_TYPE(PTL_VAL_FAILED);
- DO_TYPE(PTL_NOT_IMPLEMENTED);
- DO_TYPE(PTL_NO_ACK);
- DO_TYPE(PTL_EQ_IN_USE);
- DO_TYPE(PTL_PID_IN_USE);
- DO_TYPE(PTL_INV_EQ_SIZE);
- DO_TYPE(PTL_AGAIN);
- default:
- return "<unknown event type>";
- }
-#undef DO_TYPE
-}
-
-__u32
-kptllnd_cksum (void *ptr, int nob)
-{
- char *c = ptr;
- __u32 sum = 0;
-
- while (nob-- > 0)
- sum = ((sum << 1) | (sum >> 31)) + *c++;
-
- /* ensure I don't return 0 (== no checksum) */
- return (sum == 0) ? 1 : sum;
-}
-
-void
-kptllnd_init_msg(kptl_msg_t *msg, int type,
- lnet_process_id_t target, int body_nob)
-{
- msg->ptlm_type = type;
- msg->ptlm_nob = (offsetof(kptl_msg_t, ptlm_u) + body_nob + 7) & ~7;
- msg->ptlm_dstpid = target.pid;
- msg->ptlm_dstnid = target.nid;
- msg->ptlm_srcpid = the_lnet.ln_pid;
- msg->ptlm_srcnid = kptllnd_ptl2lnetnid(target.nid,
- kptllnd_data.kptl_portals_id.nid);
-
- LASSERT(msg->ptlm_nob <= *kptllnd_tunables.kptl_max_msg_size);
-}
-
-void
-kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer)
-{
- msg->ptlm_magic = PTLLND_MSG_MAGIC;
- msg->ptlm_version = PTLLND_MSG_VERSION;
- /* msg->ptlm_type Filled in kptllnd_init_msg() */
- msg->ptlm_credits = peer->peer_outstanding_credits;
- /* msg->ptlm_nob Filled in kptllnd_init_msg() */
- msg->ptlm_cksum = 0;
- /* msg->ptlm_{src|dst}[pn]id Filled in kptllnd_init_msg */
- msg->ptlm_srcstamp = peer->peer_myincarnation;
- msg->ptlm_dststamp = peer->peer_incarnation;
-
- if (*kptllnd_tunables.kptl_checksum) {
- /* NB ptlm_cksum zero while computing cksum */
- msg->ptlm_cksum = kptllnd_cksum(msg,
- offsetof(kptl_msg_t, ptlm_u));
- }
-}
-
-int
-kptllnd_msg_unpack(kptl_msg_t *msg, int nob)
-{
- const int hdr_size = offsetof(kptl_msg_t, ptlm_u);
- __u32 msg_cksum;
- __u16 msg_version;
- int flip;
-
- /* 6 bytes are enough to have received magic + version */
- if (nob < 6) {
- CERROR("Very Short message: %d\n", nob);
- return -EPROTO;
- }
-
- /*
- * Determine if we need to flip
- */
- if (msg->ptlm_magic == PTLLND_MSG_MAGIC) {
- flip = 0;
- } else if (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC)) {
- flip = 1;
- } else {
- CERROR("Bad magic: %08x\n", msg->ptlm_magic);
- return -EPROTO;
- }
-
- msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version;
-
- if (msg_version != PTLLND_MSG_VERSION) {
- CERROR("Bad version: got %04x expected %04x\n",
- (__u32)msg_version, PTLLND_MSG_VERSION);
- return -EPROTO;
- }
-
- if (nob < hdr_size) {
- CERROR("Short message: got %d, wanted at least %d\n",
- nob, hdr_size);
- return -EPROTO;
- }
-
- /* checksum must be computed with
- * 1) ptlm_cksum zero and
- * 2) BEFORE anything gets modified/flipped
- */
- msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum;
- msg->ptlm_cksum = 0;
- if (msg_cksum != 0 &&
- msg_cksum != kptllnd_cksum(msg, hdr_size)) {
- CERROR("Bad checksum\n");
- return -EPROTO;
- }
-
- msg->ptlm_version = msg_version;
- msg->ptlm_cksum = msg_cksum;
-
- if (flip) {
- /* These two are 1 byte long so we don't swap them
- But check this assumtion*/
- CLASSERT (sizeof(msg->ptlm_type) == 1);
- CLASSERT (sizeof(msg->ptlm_credits) == 1);
- /* src & dst stamps are opaque cookies */
- __swab32s(&msg->ptlm_nob);
- __swab64s(&msg->ptlm_srcnid);
- __swab64s(&msg->ptlm_dstnid);
- __swab32s(&msg->ptlm_srcpid);
- __swab32s(&msg->ptlm_dstpid);
- }
-
- if (msg->ptlm_nob != nob) {
- CERROR("msg_nob corrupt: got 0x%08x, wanted %08x\n",
- msg->ptlm_nob, nob);
- return -EPROTO;
- }
-
- switch(msg->ptlm_type)
- {
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- if (nob < hdr_size + sizeof(kptl_rdma_msg_t)) {
- CERROR("Short rdma request: got %d, want %d\n",
- nob, hdr_size + (int)sizeof(kptl_rdma_msg_t));
- return -EPROTO;
- }
-
- if (flip)
- __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits);
-
- if (msg->ptlm_u.rdma.kptlrm_matchbits < PTL_RESERVED_MATCHBITS) {
- CERROR("Bad matchbits "LPX64"\n",
- msg->ptlm_u.rdma.kptlrm_matchbits);
- return -EPROTO;
- }
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- if (nob < offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload)) {
- CERROR("Short immediate: got %d, want %d\n", nob,
- (int)offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload));
- return -EPROTO;
- }
- /* Do nothing */
- break;
-
- case PTLLND_MSG_TYPE_NOOP:
- case PTLLND_MSG_TYPE_NAK:
- /* Do nothing */
- break;
-
- case PTLLND_MSG_TYPE_HELLO:
- if (nob < hdr_size + sizeof(kptl_hello_msg_t)) {
- CERROR("Short hello: got %d want %d\n",
- nob, hdr_size + (int)sizeof(kptl_hello_msg_t));
- return -EPROTO;
- }
- if (flip) {
- __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits);
- __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size);
- }
- break;
-
- default:
- CERROR("Bad message type: 0x%02x\n", (__u32)msg->ptlm_type);
- return -EPROTO;
- }
-
- return 0;
-}
-
-int
-kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg)
-{
- kptl_net_t *net = ni->ni_data;
- struct libcfs_ioctl_data *data = arg;
- int rc = -EINVAL;
-
- CDEBUG(D_NET, ">>> kptllnd_ctl cmd=%u arg=%p\n", cmd, arg);
-
- /*
- * Validate that the context block is actually
- * pointing to this interface
- */
- LASSERT (ni == net->net_ni);
-
- switch(cmd) {
- case IOC_LIBCFS_DEL_PEER: {
- lnet_process_id_t id;
-
- id.nid = data->ioc_nid;
- id.pid = data->ioc_u32[1];
-
- rc = kptllnd_peer_del(id);
- break;
- }
-
- case IOC_LIBCFS_GET_PEER: {
- lnet_process_id_t id = {.nid = LNET_NID_ANY,
- .pid = LNET_PID_ANY};
- __u64 incarnation = 0;
- __u64 next_matchbits = 0;
- __u64 last_matchbits_seen = 0;
- int state = 0;
- int sent_hello = 0;
- int refcount = 0;
- int nsendq = 0;
- int nactiveq = 0;
- int credits = 0;
- int outstanding_credits = 0;
-
- rc = kptllnd_get_peer_info(data->ioc_count, &id,
- &state, &sent_hello,
- &refcount, &incarnation,
- &next_matchbits, &last_matchbits_seen,
- &nsendq, &nactiveq,
- &credits, &outstanding_credits);
- /* wince... */
- data->ioc_nid = id.nid;
- data->ioc_net = state;
- data->ioc_flags = sent_hello;
- data->ioc_count = refcount;
- data->ioc_u64[0] = incarnation;
- data->ioc_u32[0] = (__u32)next_matchbits;
- data->ioc_u32[1] = (__u32)(next_matchbits >> 32);
- data->ioc_u32[2] = (__u32)last_matchbits_seen;
- data->ioc_u32[3] = (__u32)(last_matchbits_seen >> 32);
- data->ioc_u32[4] = id.pid;
- data->ioc_u32[5] = (nsendq << 16) | nactiveq;
- data->ioc_u32[6] = (credits << 16) | outstanding_credits;
- break;
- }
-
- default:
- rc=-EINVAL;
- break;
- }
- CDEBUG(D_NET, "<<< kptllnd_ctl rc=%d\n", rc);
- return rc;
-}
-
-void
-kptllnd_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when)
-{
- kptl_net_t *net = ni->ni_data;
- kptl_peer_t *peer = NULL;
- lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID};
- unsigned long flags;
-
- /* NB: kptllnd_find_target connects to peer if necessary */
- if (kptllnd_find_target(net, id, &peer) != 0)
- return;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- if (peer->peer_last_alive != 0)
- *when = peer->peer_last_alive;
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- kptllnd_peer_decref(peer);
- return;
-}
-
-void
-kptllnd_base_shutdown (void)
-{
- int i;
- ptl_err_t prc;
- unsigned long flags;
- lnet_process_id_t process_id;
-
- read_lock(&kptllnd_data.kptl_net_rw_lock);
- LASSERT (cfs_list_empty(&kptllnd_data.kptl_nets));
- read_unlock(&kptllnd_data.kptl_net_rw_lock);
-
- switch (kptllnd_data.kptl_init) {
- default:
- LBUG();
-
- case PTLLND_INIT_ALL:
- case PTLLND_INIT_DATA:
- /* stop receiving */
- kptllnd_rx_buffer_pool_fini(&kptllnd_data.kptl_rx_buffer_pool);
- LASSERT (cfs_list_empty(&kptllnd_data.kptl_sched_rxq));
- LASSERT (cfs_list_empty(&kptllnd_data.kptl_sched_rxbq));
-
- /* lock to interleave cleanly with peer birth/death */
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- LASSERT (kptllnd_data.kptl_shutdown == 0);
- kptllnd_data.kptl_shutdown = 1; /* phase 1 == destroy peers */
- /* no new peers possible now */
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- /* nuke all existing peers */
- process_id.nid = LNET_NID_ANY;
- process_id.pid = LNET_PID_ANY;
- kptllnd_peer_del(process_id);
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- LASSERT (kptllnd_data.kptl_n_active_peers == 0);
-
- i = 2;
- while (kptllnd_data.kptl_npeers != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for %d peers to terminate\n",
- kptllnd_data.kptl_npeers);
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- cfs_pause(cfs_time_seconds(1));
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock,
- flags);
- }
-
- LASSERT (cfs_list_empty(&kptllnd_data.kptl_closing_peers));
- LASSERT (cfs_list_empty(&kptllnd_data.kptl_zombie_peers));
- LASSERT (kptllnd_data.kptl_peers != NULL);
- for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++)
- LASSERT (cfs_list_empty (&kptllnd_data.kptl_peers[i]));
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
- CDEBUG(D_NET, "All peers deleted\n");
-
- /* Shutdown phase 2: kill the daemons... */
- kptllnd_data.kptl_shutdown = 2;
- smp_mb();
-
- i = 2;
- while (cfs_atomic_read (&kptllnd_data.kptl_nthreads) != 0) {
- /* Wake up all threads*/
- wake_up_all(&kptllnd_data.kptl_sched_waitq);
- wake_up_all(&kptllnd_data.kptl_watchdog_waitq);
-
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d threads to terminate\n",
- cfs_atomic_read(&kptllnd_data.kptl_nthreads));
- cfs_pause(cfs_time_seconds(1));
- }
-
- CDEBUG(D_NET, "All Threads stopped\n");
- LASSERT(cfs_list_empty(&kptllnd_data.kptl_sched_txq));
-
- kptllnd_cleanup_tx_descs();
-
- /* Nothing here now, but libcfs might soon require
- * us to explicitly destroy wait queues and semaphores
- * that would be done here */
-
- /* fall through */
-
- case PTLLND_INIT_NOTHING:
- CDEBUG(D_NET, "PTLLND_INIT_NOTHING\n");
- break;
- }
-
- if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) {
- prc = PtlEQFree(kptllnd_data.kptl_eqh);
- if (prc != PTL_OK)
- CERROR("Error %s(%d) freeing portals EQ\n",
- kptllnd_errtype2str(prc), prc);
- }
-
- if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) {
- prc = PtlNIFini(kptllnd_data.kptl_nih);
- if (prc != PTL_OK)
- CERROR("Error %s(%d) finalizing portals NI\n",
- kptllnd_errtype2str(prc), prc);
- }
-
- LASSERT (cfs_atomic_read(&kptllnd_data.kptl_ntx) == 0);
- LASSERT (cfs_list_empty(&kptllnd_data.kptl_idle_txs));
-
- if (kptllnd_data.kptl_rx_cache != NULL)
- kmem_cache_destroy(kptllnd_data.kptl_rx_cache);
-
- if (kptllnd_data.kptl_peers != NULL)
- LIBCFS_FREE(kptllnd_data.kptl_peers,
- sizeof (cfs_list_t) *
- kptllnd_data.kptl_peer_hash_size);
-
- if (kptllnd_data.kptl_nak_msg != NULL)
- LIBCFS_FREE(kptllnd_data.kptl_nak_msg,
- offsetof(kptl_msg_t, ptlm_u));
-
- memset(&kptllnd_data, 0, sizeof(kptllnd_data));
- module_put(THIS_MODULE);
- return;
-}
-
-int
-kptllnd_base_startup (void)
-{
- int i;
- int rc;
- int spares;
- struct timeval tv;
- lnet_process_id_t target;
- ptl_err_t ptl_rc;
- char name[16];
-
- if (*kptllnd_tunables.kptl_max_procs_per_node < 1) {
- CERROR("max_procs_per_node must be >= 1\n");
- return -EINVAL;
- }
-
- if (*kptllnd_tunables.kptl_peertxcredits > PTLLND_MSG_MAX_CREDITS) {
- CERROR("peercredits must be <= %d\n", PTLLND_MSG_MAX_CREDITS);
- return -EINVAL;
- }
-
- *kptllnd_tunables.kptl_max_msg_size &= ~7;
- if (*kptllnd_tunables.kptl_max_msg_size < PTLLND_MIN_BUFFER_SIZE)
- *kptllnd_tunables.kptl_max_msg_size = PTLLND_MIN_BUFFER_SIZE;
-
- CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0);
- CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE);
-
- /* Zero pointers, flags etc; put everything into a known state. */
- memset (&kptllnd_data, 0, sizeof (kptllnd_data));
-
- LIBCFS_ALLOC(kptllnd_data.kptl_nak_msg, offsetof(kptl_msg_t, ptlm_u));
- if (kptllnd_data.kptl_nak_msg == NULL) {
- CERROR("Can't allocate NAK msg\n");
- return -ENOMEM;
- }
- memset(kptllnd_data.kptl_nak_msg, 0, offsetof(kptl_msg_t, ptlm_u));
-
- kptllnd_data.kptl_eqh = PTL_INVALID_HANDLE;
- kptllnd_data.kptl_nih = PTL_INVALID_HANDLE;
-
- rwlock_init(&kptllnd_data.kptl_net_rw_lock);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_nets);
-
- /* Setup the sched locks/lists/waitq */
- spin_lock_init(&kptllnd_data.kptl_sched_lock);
- init_waitqueue_head(&kptllnd_data.kptl_sched_waitq);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_sched_txq);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxq);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxbq);
-
- /* Init kptl_ptlid2str_lock before any call to kptllnd_ptlid2str */
- spin_lock_init(&kptllnd_data.kptl_ptlid2str_lock);
-
- /* Setup the tx locks/lists */
- spin_lock_init(&kptllnd_data.kptl_tx_lock);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_idle_txs);
- cfs_atomic_set(&kptllnd_data.kptl_ntx, 0);
-
- /* Uptick the module reference count */
- try_module_get(THIS_MODULE);
-
- kptllnd_data.kptl_expected_peers =
- *kptllnd_tunables.kptl_max_nodes *
- *kptllnd_tunables.kptl_max_procs_per_node;
-
- /*
- * Initialize the Network interface instance
- * We use the default because we don't have any
- * way to choose a better interface.
- * Requested and actual limits are ignored.
- */
- ptl_rc = PtlNIInit(
-#ifdef _USING_LUSTRE_PORTALS_
- PTL_IFACE_DEFAULT,
-#else
- CRAY_KERN_NAL,
-#endif
- *kptllnd_tunables.kptl_pid, NULL, NULL,
- &kptllnd_data.kptl_nih);
-
- /*
- * Note: PTL_IFACE_DUP simply means that the requested
- * interface was already inited and that we're sharing it.
- * Which is ok.
- */
- if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) {
- CERROR ("PtlNIInit: error %s(%d)\n",
- kptllnd_errtype2str(ptl_rc), ptl_rc);
- rc = -EINVAL;
- goto failed;
- }
-
- /* NB eq size irrelevant if using a callback */
- ptl_rc = PtlEQAlloc(kptllnd_data.kptl_nih,
- 8, /* size */
- kptllnd_eq_callback, /* handler callback */
- &kptllnd_data.kptl_eqh); /* output handle */
- if (ptl_rc != PTL_OK) {
- CERROR("PtlEQAlloc failed %s(%d)\n",
- kptllnd_errtype2str(ptl_rc), ptl_rc);
- rc = -ENOMEM;
- goto failed;
- }
-
- /* Fetch the lower NID */
- ptl_rc = PtlGetId(kptllnd_data.kptl_nih,
- &kptllnd_data.kptl_portals_id);
- if (ptl_rc != PTL_OK) {
- CERROR ("PtlGetID: error %s(%d)\n",
- kptllnd_errtype2str(ptl_rc), ptl_rc);
- rc = -EINVAL;
- goto failed;
- }
-
- if (kptllnd_data.kptl_portals_id.pid != *kptllnd_tunables.kptl_pid) {
- /* The kernel ptllnd must have the expected PID */
- CERROR("Unexpected PID: %u (%u expected)\n",
- kptllnd_data.kptl_portals_id.pid,
- *kptllnd_tunables.kptl_pid);
- rc = -EINVAL;
- goto failed;
- }
-
- /* Initialized the incarnation - it must be for-all-time unique, even
- * accounting for the fact that we increment it when we disconnect a
- * peer that's using it */
- do_gettimeofday(&tv);
- kptllnd_data.kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) +
- tv.tv_usec;
- CDEBUG(D_NET, "Incarnation="LPX64"\n", kptllnd_data.kptl_incarnation);
-
- target.nid = LNET_NID_ANY;
- target.pid = LNET_PID_ANY; /* NB target for NAK doesn't matter */
- kptllnd_init_msg(kptllnd_data.kptl_nak_msg, PTLLND_MSG_TYPE_NAK, target, 0);
- kptllnd_data.kptl_nak_msg->ptlm_magic = PTLLND_MSG_MAGIC;
- kptllnd_data.kptl_nak_msg->ptlm_version = PTLLND_MSG_VERSION;
- kptllnd_data.kptl_nak_msg->ptlm_srcpid = the_lnet.ln_pid;
- kptllnd_data.kptl_nak_msg->ptlm_srcstamp = kptllnd_data.kptl_incarnation;
-
- rwlock_init(&kptllnd_data.kptl_peer_rw_lock);
- init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers);
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers);
-
- /* Allocate and setup the peer hash table */
- kptllnd_data.kptl_peer_hash_size =
- *kptllnd_tunables.kptl_peer_hash_table_size;
- LIBCFS_ALLOC(kptllnd_data.kptl_peers,
- sizeof(cfs_list_t) *
- kptllnd_data.kptl_peer_hash_size);
- if (kptllnd_data.kptl_peers == NULL) {
- CERROR("Failed to allocate space for peer hash table size=%d\n",
- kptllnd_data.kptl_peer_hash_size);
- rc = -ENOMEM;
- goto failed;
- }
- for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++)
- CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_peers[i]);
-
- kptllnd_rx_buffer_pool_init(&kptllnd_data.kptl_rx_buffer_pool);
-
- kptllnd_data.kptl_rx_cache =
- kmem_cache_create("ptllnd_rx",
- sizeof(kptl_rx_t) +
- *kptllnd_tunables.kptl_max_msg_size,
- 0, /* offset */
- 0); /* flags */
- if (kptllnd_data.kptl_rx_cache == NULL) {
- CERROR("Can't create slab for RX descriptors\n");
- rc = -ENOMEM;
- goto failed;
- }
-
- /* lists/ptrs/locks initialised */
- kptllnd_data.kptl_init = PTLLND_INIT_DATA;
-
- /*****************************************************/
-
- rc = kptllnd_setup_tx_descs();
- if (rc != 0) {
- CERROR("Can't pre-allocate %d TX descriptors: %d\n",
- *kptllnd_tunables.kptl_ntx, rc);
- goto failed;
- }
-
- /* Start the scheduler threads for handling incoming requests. No need
- * to advance the state because this will be automatically cleaned up
- * now that PTLLND_INIT_DATA state has been entered */
- CDEBUG(D_NET, "starting %d scheduler threads\n", PTLLND_N_SCHED);
- for (i = 0; i < PTLLND_N_SCHED; i++) {
- snprintf(name, sizeof(name), "kptllnd_sd_%02d", i);
- rc = kptllnd_thread_start(kptllnd_scheduler, (void *)((long)i));
- if (rc != 0) {
- CERROR("Can't spawn scheduler[%d]: %d\n", i, rc);
- goto failed;
- }
- }
-
- snprintf(name, sizeof(name), "kptllnd_wd_%02d", i);
- rc = kptllnd_thread_start(kptllnd_watchdog, NULL, name);
- if (rc != 0) {
- CERROR("Can't spawn watchdog: %d\n", rc);
- goto failed;
- }
-
- /* Ensure that 'rxb_nspare' buffers can be off the net (being emptied)
- * and we will still have enough buffers posted for all our peers */
- spares = *kptllnd_tunables.kptl_rxb_nspare *
- ((*kptllnd_tunables.kptl_rxb_npages * PAGE_SIZE)/
- *kptllnd_tunables.kptl_max_msg_size);
-
- /* reserve and post the buffers */
- rc = kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool,
- kptllnd_data.kptl_expected_peers +
- spares);
- if (rc != 0) {
- CERROR("Can't reserve RX Buffer pool: %d\n", rc);
- goto failed;
- }
-
- /* flag everything initialised */
- kptllnd_data.kptl_init = PTLLND_INIT_ALL;
-
- /*****************************************************/
-
- if (*kptllnd_tunables.kptl_checksum)
- CWARN("Checksumming enabled\n");
-
- CDEBUG(D_NET, "<<< kptllnd_base_startup SUCCESS\n");
- return 0;
-
- failed:
- CERROR("kptllnd_base_startup failed: %d\n", rc);
- kptllnd_base_shutdown();
- return rc;
-}
-
-int
-kptllnd_startup (lnet_ni_t *ni)
-{
- int rc;
- kptl_net_t *net;
-
- LASSERT (ni->ni_lnd == &kptllnd_lnd);
-
- if (kptllnd_data.kptl_init == PTLLND_INIT_NOTHING) {
- rc = kptllnd_base_startup();
- if (rc != 0)
- return rc;
- }
-
- LIBCFS_ALLOC(net, sizeof(*net));
- ni->ni_data = net;
- if (net == NULL) {
- CERROR("Can't allocate kptl_net_t\n");
- rc = -ENOMEM;
- goto failed;
- }
- memset(net, 0, sizeof(*net));
- net->net_ni = ni;
-
- ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits;
- ni->ni_peertxcredits = *kptllnd_tunables.kptl_peertxcredits;
- ni->ni_peerrtrcredits = *kptllnd_tunables.kptl_peerrtrcredits;
- ni->ni_nid = kptllnd_ptl2lnetnid(ni->ni_nid,
- kptllnd_data.kptl_portals_id.nid);
- CDEBUG(D_NET, "ptl id=%s, lnet id=%s\n",
- kptllnd_ptlid2str(kptllnd_data.kptl_portals_id),
- libcfs_nid2str(ni->ni_nid));
-
- /* NB LNET_NIDNET(ptlm_srcnid) of NAK doesn't matter in case of
- * multiple NIs */
- kptllnd_data.kptl_nak_msg->ptlm_srcnid = ni->ni_nid;
-
- cfs_atomic_set(&net->net_refcount, 1);
- write_lock(&kptllnd_data.kptl_net_rw_lock);
- cfs_list_add_tail(&net->net_list, &kptllnd_data.kptl_nets);
- write_unlock(&kptllnd_data.kptl_net_rw_lock);
- return 0;
-
- failed:
- kptllnd_shutdown(ni);
- return rc;
-}
-
-void
-kptllnd_shutdown (lnet_ni_t *ni)
-{
- kptl_net_t *net = ni->ni_data;
- int i;
- unsigned long flags;
-
- LASSERT (kptllnd_data.kptl_init == PTLLND_INIT_ALL);
-
- CDEBUG(D_MALLOC, "before LND cleanup: kmem %d\n",
- cfs_atomic_read (&libcfs_kmemory));
-
- if (net == NULL)
- goto out;
-
- LASSERT (ni == net->net_ni);
- LASSERT (!net->net_shutdown);
- LASSERT (!cfs_list_empty(&net->net_list));
- LASSERT (cfs_atomic_read(&net->net_refcount) != 0);
- ni->ni_data = NULL;
- net->net_ni = NULL;
-
- write_lock(&kptllnd_data.kptl_net_rw_lock);
- kptllnd_net_decref(net);
- cfs_list_del_init(&net->net_list);
- write_unlock(&kptllnd_data.kptl_net_rw_lock);
-
- /* Can't nuke peers here - they are shared among all NIs */
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- net->net_shutdown = 1; /* Order with peer creation */
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- i = 2;
- while (cfs_atomic_read(&net->net_refcount) != 0) {
- i++;
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET,
- "Waiting for %d references to drop\n",
- cfs_atomic_read(&net->net_refcount));
-
- cfs_pause(cfs_time_seconds(1));
- }
-
- LIBCFS_FREE(net, sizeof(*net));
-out:
- /* NB no locking since I don't race with writers */
- if (cfs_list_empty(&kptllnd_data.kptl_nets))
- kptllnd_base_shutdown();
- CDEBUG(D_MALLOC, "after LND cleanup: kmem %d\n",
- cfs_atomic_read (&libcfs_kmemory));
- return;
-}
-
-int __init
-kptllnd_module_init (void)
-{
- int rc;
-
- kptllnd_assert_wire_constants();
-
- rc = kptllnd_tunables_init();
- if (rc != 0)
- return rc;
-
- kptllnd_init_ptltrace();
-
- lnet_register_lnd(&kptllnd_lnd);
-
- return 0;
-}
-
-void __exit
-kptllnd_module_fini (void)
-{
- lnet_unregister_lnd(&kptllnd_lnd);
- kptllnd_tunables_fini();
-}
-
-MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
-MODULE_DESCRIPTION("Kernel Portals LND v1.00");
-MODULE_LICENSE("GPL");
-
-module_init(kptllnd_module_init);
-module_exit(kptllnd_module_fini);
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd.h
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <linux/random.h>
-
-#include <net/sock.h>
-#include <linux/in.h>
-
-
-#define DEBUG_SUBSYSTEM S_LND
-
-#include <libcfs/libcfs.h>
-#include <lnet/lnet.h>
-#include <lnet/lib-lnet.h>
-#include <lnet/lnet-sysctl.h>
-#include <portals/p30.h>
-#include <lnet/ptllnd.h> /* Depends on portals/p30.h */
-
-/*
- * Define this to enable console debug logging
- * and simulation
- */
-//#define PJK_DEBUGGING
-
-#ifdef CONFIG_SMP
-# define PTLLND_N_SCHED num_online_cpus() /* # schedulers */
-#else
-# define PTLLND_N_SCHED 1 /* # schedulers */
-#endif
-
-#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peertxcredits)-1)
- /* when eagerly to return credits */
-
-typedef struct
-{
- int *kptl_ntx; /* # tx descs to pre-allocate */
- int *kptl_max_nodes; /* max # nodes all talking to me */
- int *kptl_max_procs_per_node; /* max # processes per node */
- int *kptl_checksum; /* checksum kptl_msg_t? */
- int *kptl_timeout; /* comms timeout (seconds) */
- int *kptl_portal; /* portal number */
- int *kptl_pid; /* portals PID (self + kernel peers) */
- int *kptl_rxb_npages; /* number of pages for rx buffer */
- int *kptl_rxb_nspare; /* number of spare rx buffers */
- int *kptl_credits; /* number of credits */
- int *kptl_peertxcredits; /* number of peer tx credits */
- int *kptl_peerrtrcredits; /* number of peer router credits */
- int *kptl_max_msg_size; /* max immd message size*/
- int *kptl_peer_hash_table_size; /* # slots in peer hash table */
- int *kptl_reschedule_loops; /* scheduler yield loops */
- int *kptl_ack_puts; /* make portals ack PUTs */
-#ifdef PJK_DEBUGGING
- int *kptl_simulation_bitmap;/* simulation bitmap */
-#endif
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
- struct ctl_table_header *kptl_sysctl; /* sysctl interface */
-#endif
-} kptl_tunables_t;
-
-#include "lnet/ptllnd_wire.h"
-
-/***********************************************************************/
-
-typedef struct kptl_data kptl_data_t;
-typedef struct kptl_net kptl_net_t;
-typedef struct kptl_rx_buffer kptl_rx_buffer_t;
-typedef struct kptl_peer kptl_peer_t;
-
-typedef struct {
- char eva_type;
-} kptl_eventarg_t;
-
-#define PTLLND_EVENTARG_TYPE_MSG 0x1
-#define PTLLND_EVENTARG_TYPE_RDMA 0x2
-#define PTLLND_EVENTARG_TYPE_BUF 0x3
-
-typedef struct kptl_rx /* receive message */
-{
- cfs_list_t rx_list; /* queue for attention */
- kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */
- kptl_msg_t *rx_msg; /* received message */
- int rx_nob; /* received message size */
- unsigned long rx_treceived; /* time received */
- ptl_process_id_t rx_initiator; /* sender's address */
- kptl_peer_t *rx_peer; /* pointer to peer */
- char rx_space[0]; /* copy of incoming request */
-} kptl_rx_t;
-
-#define PTLLND_POSTRX_DONT_POST 0 /* don't post */
-#define PTLLND_POSTRX_NO_CREDIT 1 /* post: no credits */
-#define PTLLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */
-
-typedef struct kptl_rx_buffer_pool
-{
- spinlock_t rxbp_lock;
- cfs_list_t rxbp_list; /* all allocated buffers */
- int rxbp_count; /* # allocated buffers */
- int rxbp_reserved; /* # requests to buffer */
- int rxbp_shutdown; /* shutdown flag */
-} kptl_rx_buffer_pool_t;
-
-struct kptl_rx_buffer
-{
- kptl_rx_buffer_pool_t *rxb_pool;
- cfs_list_t rxb_list; /* for the rxb_pool list */
- cfs_list_t rxb_repost_list;/* for the kptl_sched_rxbq list */
- int rxb_posted:1; /* on the net */
- int rxb_idle:1; /* all done */
- kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */
- int rxb_refcount; /* reference count */
- ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */
- char *rxb_buffer; /* the buffer */
-
-};
-
-enum kptl_tx_type
-{
- TX_TYPE_RESERVED = 0,
- TX_TYPE_SMALL_MESSAGE = 1,
- TX_TYPE_PUT_REQUEST = 2,
- TX_TYPE_GET_REQUEST = 3,
- TX_TYPE_PUT_RESPONSE = 4,
- TX_TYPE_GET_RESPONSE = 5,
-};
-
-typedef union {
-#ifdef _USING_LUSTRE_PORTALS_
- struct iovec iov[PTL_MD_MAX_IOV];
- lnet_kiov_t kiov[PTL_MD_MAX_IOV];
-#else
- ptl_md_iovec_t iov[PTL_MD_MAX_IOV];
-#endif
-} kptl_fragvec_t;
-
-typedef struct kptl_tx /* transmit message */
-{
- cfs_list_t tx_list; /* queue on idle_txs etc */
- cfs_atomic_t tx_refcount; /* reference count*/
- enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */
- int tx_active:1; /* queued on the peer */
- int tx_idle:1; /* on the free list */
- int tx_acked:1; /* portals ACK wanted (for debug only) */
- kptl_eventarg_t tx_msg_eventarg; /* event->md.user_ptr */
- kptl_eventarg_t tx_rdma_eventarg; /* event->md.user_ptr */
- int tx_status; /* the status of this tx descriptor */
- ptl_handle_md_t tx_rdma_mdh; /* RDMA buffer */
- ptl_handle_md_t tx_msg_mdh; /* the portals MD handle for the initial message */
- lnet_msg_t *tx_lnet_msg; /* LNET message to finalize */
- lnet_msg_t *tx_lnet_replymsg; /* LNET reply message to finalize */
- kptl_msg_t *tx_msg; /* the message data */
- kptl_peer_t *tx_peer; /* the peer this is waiting on */
- unsigned long tx_deadline; /* deadline */
- unsigned long tx_tposted; /* time posted */
- ptl_md_t tx_rdma_md; /* rdma descriptor */
- kptl_fragvec_t *tx_frags; /* buffer fragments */
-} kptl_tx_t;
-
-enum kptllnd_peer_state
-{
- PEER_STATE_UNINITIALIZED = 0,
- PEER_STATE_ALLOCATED = 1,
- PEER_STATE_WAITING_HELLO = 2,
- PEER_STATE_ACTIVE = 3,
- PEER_STATE_CLOSING = 4,
- PEER_STATE_ZOMBIE = 5,
-};
-
-struct kptl_peer
-{
- cfs_list_t peer_list;
- cfs_atomic_t peer_refcount; /* The current references */
- enum kptllnd_peer_state peer_state;
- spinlock_t peer_lock; /* serialize */
- cfs_list_t peer_noops; /* PTLLND_MSG_TYPE_NOOP txs */
- cfs_list_t peer_sendq; /* txs waiting for mh handles */
- cfs_list_t peer_activeq; /* txs awaiting completion */
- lnet_process_id_t peer_id; /* Peer's LNET id */
- ptl_process_id_t peer_ptlid; /* Peer's portals id */
- __u64 peer_incarnation; /* peer's incarnation */
- __u64 peer_myincarnation; /* my incarnation at HELLO */
- int peer_sent_hello; /* have I sent HELLO? */
- int peer_credits; /* number of send credits */
- int peer_outstanding_credits;/* number of peer credits to return */
- int peer_sent_credits; /* #msg buffers posted for peer */
- int peer_max_msg_size; /* peer's rx buffer size */
- int peer_error; /* errno on closing this peer */
- int peer_retry_noop; /* need to retry returning credits */
- int peer_check_stamp; /* watchdog check stamp */
- cfs_time_t peer_last_alive; /* when (in jiffies) I was last alive */
- __u64 peer_next_matchbits; /* Next value to register RDMA from peer */
- __u64 peer_last_matchbits_seen; /* last matchbits used to RDMA to peer */
-};
-
-struct kptl_data
-{
- int kptl_init; /* initialisation state */
- volatile int kptl_shutdown; /* shut down? */
- cfs_atomic_t kptl_nthreads; /* # live threads */
- ptl_handle_ni_t kptl_nih; /* network inteface handle */
- ptl_process_id_t kptl_portals_id; /* Portals ID of interface */
- __u64 kptl_incarnation; /* which one am I */
- ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */
-
- rwlock_t kptl_net_rw_lock; /* serialise... */
- cfs_list_t kptl_nets; /* kptl_net instance*/
-
- spinlock_t kptl_sched_lock; /* serialise... */
- wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */
- cfs_list_t kptl_sched_txq; /* tx requiring attention */
- cfs_list_t kptl_sched_rxq; /* rx requiring attention */
- cfs_list_t kptl_sched_rxbq; /* rxb requiring reposting */
-
- wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */
-
- kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */
- struct kmem_cache *kptl_rx_cache; /* rx descripter cache */
-
- cfs_atomic_t kptl_ntx; /* # tx descs allocated */
- spinlock_t kptl_tx_lock; /* serialise idle tx list*/
- cfs_list_t kptl_idle_txs; /* idle tx descriptors */
-
- rwlock_t kptl_peer_rw_lock; /* lock for peer table */
- cfs_list_t *kptl_peers; /* hash table of all my known peers */
- cfs_list_t kptl_closing_peers; /* peers being closed */
- cfs_list_t kptl_zombie_peers; /* peers waiting for refs to drain */
- int kptl_peer_hash_size; /* size of kptl_peers */
- int kptl_npeers; /* # peers extant */
- int kptl_n_active_peers; /* # active peers */
- int kptl_expected_peers; /* # peers I can buffer HELLOs from */
-
- kptl_msg_t *kptl_nak_msg; /* common NAK message */
- spinlock_t kptl_ptlid2str_lock; /* serialise str ops */
-};
-
-struct kptl_net
-{
- cfs_list_t net_list; /* chain on kptl_data:: kptl_nets */
- lnet_ni_t *net_ni;
- cfs_atomic_t net_refcount; /* # current references */
- int net_shutdown; /* lnd_shutdown called */
-};
-
-enum
-{
- PTLLND_INIT_NOTHING = 0,
- PTLLND_INIT_DATA,
- PTLLND_INIT_ALL,
-};
-
-extern kptl_tunables_t kptllnd_tunables;
-extern kptl_data_t kptllnd_data;
-
-static inline lnet_nid_t
-kptllnd_ptl2lnetnid(lnet_nid_t ni_nid, ptl_nid_t ptl_nid)
-{
-#ifdef _USING_LUSTRE_PORTALS_
- return LNET_MKNID(LNET_NIDNET(ni_nid), LNET_NIDADDR(ptl_nid));
-#else
- return LNET_MKNID(LNET_NIDNET(ni_nid), ptl_nid);
-#endif
-}
-
-static inline ptl_nid_t
-kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid)
-{
-#ifdef _USING_LUSTRE_PORTALS_
- return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_portals_id.nid),
- LNET_NIDADDR(lnet_nid));
-#else
- return LNET_NIDADDR(lnet_nid);
-#endif
-}
-
-int kptllnd_startup(lnet_ni_t *ni);
-void kptllnd_shutdown(lnet_ni_t *ni);
-int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg);
-void kptllnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when);
-int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg);
-int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg,
- int delayed, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen);
-int kptllnd_eager_recv(struct lnet_ni *ni, void *private,
- lnet_msg_t *msg, void **new_privatep);
-void kptllnd_eq_callback(ptl_event_t *evp);
-int kptllnd_scheduler(void *arg);
-int kptllnd_watchdog(void *arg);
-int kptllnd_thread_start(int (*fn)(void *arg), void *arg);
-int kptllnd_tunables_init(void);
-void kptllnd_tunables_fini(void);
-
-const char *kptllnd_evtype2str(int evtype);
-const char *kptllnd_msgtype2str(int msgtype);
-const char *kptllnd_errtype2str(int errtype);
-
-static inline void *
-kptllnd_eventarg2obj (kptl_eventarg_t *eva)
-{
- switch (eva->eva_type) {
- default:
- LBUG();
- case PTLLND_EVENTARG_TYPE_BUF:
- return cfs_list_entry(eva, kptl_rx_buffer_t, rxb_eventarg);
- case PTLLND_EVENTARG_TYPE_RDMA:
- return cfs_list_entry(eva, kptl_tx_t, tx_rdma_eventarg);
- case PTLLND_EVENTARG_TYPE_MSG:
- return cfs_list_entry(eva, kptl_tx_t, tx_msg_eventarg);
- }
-}
-
-/*
- * RX BUFFER SUPPORT FUNCTIONS
- */
-void kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp);
-void kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp);
-int kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count);
-void kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, int count);
-void kptllnd_rx_buffer_callback(ptl_event_t *ev);
-void kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb);
-
-static inline int
-kptllnd_rx_buffer_size(void)
-{
- return PAGE_SIZE * (*kptllnd_tunables.kptl_rxb_npages);
-}
-
-static inline void
-kptllnd_rx_buffer_addref(kptl_rx_buffer_t *rxb)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
- rxb->rxb_refcount++;
- spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
-}
-
-static inline void
-kptllnd_rx_buffer_decref_locked(kptl_rx_buffer_t *rxb)
-{
- if (--(rxb->rxb_refcount) == 0) {
- spin_lock(&kptllnd_data.kptl_sched_lock);
-
- cfs_list_add_tail(&rxb->rxb_repost_list,
- &kptllnd_data.kptl_sched_rxbq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock(&kptllnd_data.kptl_sched_lock);
- }
-}
-
-static inline void
-kptllnd_rx_buffer_decref(kptl_rx_buffer_t *rxb)
-{
- unsigned long flags;
- int count;
-
- spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags);
- count = --(rxb->rxb_refcount);
- spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags);
-
- if (count == 0)
- kptllnd_rx_buffer_post(rxb);
-}
-
-/*
- * RX SUPPORT FUNCTIONS
- */
-void kptllnd_rx_parse(kptl_rx_t *rx);
-void kptllnd_rx_done(kptl_rx_t *rx, int post_credit);
-
-/*
- * PEER SUPPORT FUNCTIONS
- */
-int kptllnd_get_peer_info(int index,
- lnet_process_id_t *id,
- int *state, int *sent_hello,
- int *refcount, __u64 *incarnation,
- __u64 *next_matchbits, __u64 *last_matchbits_seen,
- int *nsendq, int *nactiveq,
- int *credits, int *outstanding_credits);
-void kptllnd_peer_destroy(kptl_peer_t *peer);
-int kptllnd_peer_del(lnet_process_id_t id);
-void kptllnd_peer_close_locked(kptl_peer_t *peer, int why);
-void kptllnd_peer_close(kptl_peer_t *peer, int why);
-void kptllnd_handle_closing_peers(void);
-int kptllnd_peer_connect(kptl_tx_t *tx, lnet_nid_t nid);
-void kptllnd_peer_check_sends(kptl_peer_t *peer);
-void kptllnd_peer_check_bucket(int idx, int stamp);
-void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag);
-int kptllnd_find_target(kptl_net_t *net, lnet_process_id_t target,
- kptl_peer_t **peerp);
-kptl_peer_t *kptllnd_peer_handle_hello(kptl_net_t *net,
- ptl_process_id_t initiator,
- kptl_msg_t *msg);
-kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id);
-void kptllnd_peer_alive(kptl_peer_t *peer);
-
-static inline void
-kptllnd_peer_addref (kptl_peer_t *peer)
-{
- cfs_atomic_inc(&peer->peer_refcount);
-}
-
-static inline void
-kptllnd_peer_decref (kptl_peer_t *peer)
-{
- if (cfs_atomic_dec_and_test(&peer->peer_refcount))
- kptllnd_peer_destroy(peer);
-}
-
-static inline void
-kptllnd_net_addref (kptl_net_t *net)
-{
- LASSERT (cfs_atomic_read(&net->net_refcount) > 0);
- cfs_atomic_inc(&net->net_refcount);
-}
-
-static inline void
-kptllnd_net_decref (kptl_net_t *net)
-{
- LASSERT (cfs_atomic_read(&net->net_refcount) > 0);
- cfs_atomic_dec(&net->net_refcount);
-}
-
-static inline void
-kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer)
-{
- LASSERT (tx->tx_peer == NULL);
-
- kptllnd_peer_addref(peer);
- tx->tx_peer = peer;
-}
-
-static inline cfs_list_t *
-kptllnd_nid2peerlist(lnet_nid_t nid)
-{
- /* Only one copy of peer state for all logical peers, so the net part
- * of NIDs is ignored; e.g. A@ptl0 and A@ptl2 share peer state */
- unsigned int hash = ((unsigned int)LNET_NIDADDR(nid)) %
- kptllnd_data.kptl_peer_hash_size;
-
- return &kptllnd_data.kptl_peers[hash];
-}
-
-static inline kptl_peer_t *
-kptllnd_id2peer(lnet_process_id_t id)
-{
- kptl_peer_t *peer;
- unsigned long flags;
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- peer = kptllnd_id2peer_locked(id);
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- return peer;
-}
-
-static inline int
-kptllnd_reserve_buffers(int n)
-{
- return kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool,
- n);
-}
-
-static inline int
-kptllnd_peer_reserve_buffers(void)
-{
- return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peertxcredits);
-}
-
-static inline void
-kptllnd_peer_unreserve_buffers(void)
-{
- kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool,
- *kptllnd_tunables.kptl_peertxcredits);
-}
-
-/*
- * TX SUPPORT FUNCTIONS
- */
-int kptllnd_setup_tx_descs(void);
-void kptllnd_cleanup_tx_descs(void);
-void kptllnd_tx_fini(kptl_tx_t *tx);
-void kptllnd_cancel_txlist(cfs_list_t *peerq, cfs_list_t *txs);
-void kptllnd_restart_txs(kptl_net_t *net, lnet_process_id_t id,
- cfs_list_t *restarts);
-kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose);
-void kptllnd_tx_callback(ptl_event_t *ev);
-const char *kptllnd_tx_typestr(int type);
-
-static inline void
-kptllnd_tx_addref(kptl_tx_t *tx)
-{
- cfs_atomic_inc(&tx->tx_refcount);
-}
-
-static inline void
-kptllnd_tx_decref(kptl_tx_t *tx)
-{
- LASSERT (!in_interrupt()); /* Thread context only */
-
- if (cfs_atomic_dec_and_test(&tx->tx_refcount))
- kptllnd_tx_fini(tx);
-}
-
-/*
- * MESSAGE SUPPORT FUNCTIONS
- */
-void kptllnd_init_msg(kptl_msg_t *msg, int type,
- lnet_process_id_t target, int body_nob);
-void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer);
-int kptllnd_msg_unpack(kptl_msg_t *msg, int nob);
-
-/*
- * MISC SUPPORT FUNCTIONS
- */
-void kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int nob);
-char *kptllnd_ptlid2str(ptl_process_id_t id);
-
-void kptllnd_init_ptltrace(void);
-void kptllnd_dump_ptltrace(void);
-
-#ifdef PJK_DEBUGGING
-#define SIMULATION_FAIL_TX_PUT_ALLOC 0 /* 0x00000001 */
-#define SIMULATION_FAIL_TX_GET_ALLOC 1 /* 0x00000002 */
-#define SIMULATION_FAIL_TX 2 /* 0x00000004 */
-#define SIMULATION_FAIL_RX_ALLOC 3 /* 0x00000008 */
-
-#define IS_SIMULATION_ENABLED(x) \
- (((*kptllnd_tunables.kptl_simulation_bitmap) & 1<< SIMULATION_##x) != 0)
-#else
-#define IS_SIMULATION_ENABLED(x) 0
-#endif
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd_cb.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
-#include "ptllnd.h"
-
-#ifndef _USING_LUSTRE_PORTALS_
-int
-kptllnd_extract_iov (int dst_niov, ptl_md_iovec_t *dst,
- int src_niov, struct iovec *src,
- unsigned int offset, unsigned int len)
-{
- /* Initialise 'dst' to the subset of 'src' starting at 'offset',
- * for exactly 'len' bytes, and return the number of entries.
- * NB not destructive to 'src' */
- unsigned int frag_len;
- unsigned int niov;
-
- if (len == 0) /* no data => */
- return (0); /* no frags */
-
- LASSERT (src_niov > 0);
- while (offset >= src->iov_len) { /* skip initial frags */
- offset -= src->iov_len;
- src_niov--;
- src++;
- LASSERT (src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
-
- frag_len = src->iov_len - offset;
- dst->iov_base = ((char *)src->iov_base) + offset;
-
- if (len <= frag_len) {
- dst->iov_len = len;
- return (niov);
- }
-
- dst->iov_len = frag_len;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-
-int
-kptllnd_extract_phys (int dst_niov, ptl_md_iovec_t *dst,
- int src_niov, lnet_kiov_t *src,
- unsigned int offset, unsigned int len)
-{
- /* Initialise 'dst' to the physical addresses of the subset of 'src'
- * starting at 'offset', for exactly 'len' bytes, and return the number
- * of entries. NB not destructive to 'src' */
- unsigned int frag_len;
- unsigned int niov;
- __u64 phys_page;
- __u64 phys;
-
- if (len == 0) /* no data => */
- return (0); /* no frags */
-
- LASSERT (src_niov > 0);
- while (offset >= src->kiov_len) { /* skip initial frags */
- offset -= src->kiov_len;
- src_niov--;
- src++;
- LASSERT (src_niov > 0);
- }
-
- niov = 1;
- for (;;) {
- LASSERT (src_niov > 0);
- LASSERT (niov <= dst_niov);
-
- frag_len = min(src->kiov_len - offset, len);
- phys_page = lnet_page2phys(src->kiov_page);
- phys = phys_page + src->kiov_offset + offset;
-
- LASSERT (sizeof(void *) > 4 ||
- (phys <= 0xffffffffULL &&
- phys + (frag_len - 1) <= 0xffffffffULL));
-
- dst->iov_base = (void *)((unsigned long)phys);
- dst->iov_len = frag_len;
-
- if (frag_len == len)
- return niov;
-
- len -= frag_len;
- dst++;
- src++;
- niov++;
- src_niov--;
- offset = 0;
- }
-}
-#endif
-
-void
-kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov,
- struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int nob)
-{
- LASSERT (iov == NULL || kiov == NULL);
-
- memset(&tx->tx_rdma_md, 0, sizeof(tx->tx_rdma_md));
-
- tx->tx_rdma_md.start = tx->tx_frags;
- tx->tx_rdma_md.user_ptr = &tx->tx_rdma_eventarg;
- tx->tx_rdma_md.eq_handle = kptllnd_data.kptl_eqh;
- tx->tx_rdma_md.options = PTL_MD_LUSTRE_COMPLETION_SEMANTICS |
- PTL_MD_EVENT_START_DISABLE;
- switch (tx->tx_type) {
- default:
- LBUG();
-
- case TX_TYPE_PUT_REQUEST: /* passive: peer gets */
- tx->tx_rdma_md.threshold = 1; /* GET event */
- tx->tx_rdma_md.options |= PTL_MD_OP_GET;
- break;
-
- case TX_TYPE_GET_REQUEST: /* passive: peer puts */
- tx->tx_rdma_md.threshold = 1; /* PUT event */
- tx->tx_rdma_md.options |= PTL_MD_OP_PUT;
- break;
-
- case TX_TYPE_PUT_RESPONSE: /* active: I get */
- tx->tx_rdma_md.threshold = 2; /* SEND + REPLY */
- break;
-
- case TX_TYPE_GET_RESPONSE: /* active: I put */
- tx->tx_rdma_md.threshold = tx->tx_acked ? 2 : 1; /* SEND + ACK? */
- break;
- }
-
- if (nob == 0) {
- tx->tx_rdma_md.length = 0;
- return;
- }
-
-#ifdef _USING_LUSTRE_PORTALS_
- if (iov != NULL) {
- tx->tx_rdma_md.options |= PTL_MD_IOVEC;
- tx->tx_rdma_md.length =
- lnet_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov,
- niov, iov, offset, nob);
- return;
- }
-
- /* Cheating OK since ptl_kiov_t == lnet_kiov_t */
- CLASSERT(sizeof(ptl_kiov_t) == sizeof(lnet_kiov_t));
- CLASSERT(offsetof(ptl_kiov_t, kiov_offset) ==
- offsetof(lnet_kiov_t, kiov_offset));
- CLASSERT(offsetof(ptl_kiov_t, kiov_page) ==
- offsetof(lnet_kiov_t, kiov_page));
- CLASSERT(offsetof(ptl_kiov_t, kiov_len) ==
- offsetof(lnet_kiov_t, kiov_len));
-
- tx->tx_rdma_md.options |= PTL_MD_KIOV;
- tx->tx_rdma_md.length =
- lnet_extract_kiov(PTL_MD_MAX_IOV, tx->tx_frags->kiov,
- niov, kiov, offset, nob);
-#else
- if (iov != NULL) {
- tx->tx_rdma_md.options |= PTL_MD_IOVEC;
- tx->tx_rdma_md.length =
- kptllnd_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov,
- niov, iov, offset, nob);
- return;
- }
-
- tx->tx_rdma_md.options |= PTL_MD_IOVEC | PTL_MD_PHYS;
- tx->tx_rdma_md.length =
- kptllnd_extract_phys(PTL_MD_MAX_IOV, tx->tx_frags->iov,
- niov, kiov, offset, nob);
-#endif
-}
-
-int
-kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, int nob)
-{
- kptl_tx_t *tx;
- ptl_err_t ptlrc;
- kptl_msg_t *rxmsg = rx->rx_msg;
- kptl_peer_t *peer = rx->rx_peer;
- unsigned long flags;
- ptl_handle_md_t mdh;
-
- LASSERT (type == TX_TYPE_PUT_RESPONSE ||
- type == TX_TYPE_GET_RESPONSE);
-
- tx = kptllnd_get_idle_tx(type);
- if (tx == NULL) {
- CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n",
- type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT",
- libcfs_id2str(peer->peer_id));
- return -ENOMEM;
- }
-
- kptllnd_set_tx_peer(tx, peer);
- kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob);
-
- ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md,
- PTL_UNLINK, &mdh);
- if (ptlrc != PTL_OK) {
- CERROR("PtlMDBind(%s) failed: %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(ptlrc), ptlrc);
- tx->tx_status = -EIO;
- kptllnd_tx_decref(tx);
- return -EIO;
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- tx->tx_lnet_msg = lntmsg;
- /* lnet_finalize() will be called when tx is torn down, so I must
- * return success from here on... */
-
- tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ);
- tx->tx_rdma_mdh = mdh;
- tx->tx_active = 1;
- cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- /* peer has now got my ref on 'tx' */
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- tx->tx_tposted = jiffies;
-
- if (type == TX_TYPE_GET_RESPONSE)
- ptlrc = PtlPut(mdh,
- tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
- rx->rx_initiator,
- *kptllnd_tunables.kptl_portal,
- 0, /* acl cookie */
- rxmsg->ptlm_u.rdma.kptlrm_matchbits,
- 0, /* offset */
- (lntmsg != NULL) ? /* header data */
- PTLLND_RDMA_OK :
- PTLLND_RDMA_FAIL);
- else
- ptlrc = PtlGet(mdh,
- rx->rx_initiator,
- *kptllnd_tunables.kptl_portal,
- 0, /* acl cookie */
- rxmsg->ptlm_u.rdma.kptlrm_matchbits,
- 0); /* offset */
-
- if (ptlrc != PTL_OK) {
- CERROR("Ptl%s failed: %s(%d)\n",
- (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get",
- kptllnd_errtype2str(ptlrc), ptlrc);
-
- kptllnd_peer_close(peer, -EIO);
- /* Everything (including this RDMA) queued on the peer will
- * be completed with failure */
- }
-
- return 0;
-}
-
-int
-kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg)
-{
- lnet_hdr_t *hdr = &lntmsg->msg_hdr;
- int type = lntmsg->msg_type;
- lnet_process_id_t target = lntmsg->msg_target;
- int target_is_router = lntmsg->msg_target_is_router;
- int routing = lntmsg->msg_routing;
- unsigned int payload_niov = lntmsg->msg_niov;
- struct iovec *payload_iov = lntmsg->msg_iov;
- lnet_kiov_t *payload_kiov = lntmsg->msg_kiov;
- unsigned int payload_offset = lntmsg->msg_offset;
- unsigned int payload_nob = lntmsg->msg_len;
- kptl_net_t *net = ni->ni_data;
- kptl_peer_t *peer = NULL;
- int mpflag = 0;
- kptl_tx_t *tx;
- int nob;
- int nfrag;
- int rc;
-
- LASSERT (net->net_ni == ni);
- LASSERT (!net->net_shutdown);
- LASSERT (payload_nob == 0 || payload_niov > 0);
- LASSERT (payload_niov <= LNET_MAX_IOV);
- LASSERT (payload_niov <= PTL_MD_MAX_IOV); /* !!! */
- LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
- LASSERT (!in_interrupt());
-
- if (lntmsg->msg_vmflush)
- mpflag = cfs_memory_pressure_get_and_set();
-
- rc = kptllnd_find_target(net, target, &peer);
- if (rc != 0)
- goto out;
-
- /* NB peer->peer_id does NOT always equal target, be careful with
- * which one to use */
- switch (type) {
- default:
- LBUG();
- return -EINVAL;
-
- case LNET_MSG_REPLY:
- case LNET_MSG_PUT:
- /* Should the payload avoid RDMA? */
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[payload_nob]);
- if (payload_kiov == NULL &&
- nob <= peer->peer_max_msg_size)
- break;
-
- tx = kptllnd_get_idle_tx(TX_TYPE_PUT_REQUEST);
- if (tx == NULL) {
- CERROR("Can't send %s to %s: can't allocate descriptor\n",
- lnet_msgtyp2str(type),
- libcfs_id2str(target));
- rc = -ENOMEM;
- goto out;
- }
-
- kptllnd_init_rdma_md(tx, payload_niov,
- payload_iov, payload_kiov,
- payload_offset, payload_nob);
-
- tx->tx_lnet_msg = lntmsg;
- tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr;
- kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_PUT,
- target, sizeof(kptl_rdma_msg_t));
-
- CDEBUG(D_NETTRACE, "%s: passive PUT p %d %p\n",
- libcfs_id2str(target),
- le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx);
-
- kptllnd_tx_launch(peer, tx, 0);
- goto out;
-
- case LNET_MSG_GET:
- /* routed gets don't RDMA */
- if (target_is_router || routing)
- break;
-
- /* Is the payload small enough not to need RDMA? */
- nob = lntmsg->msg_md->md_length;
- nob = offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload[nob]);
- if (nob <= peer->peer_max_msg_size)
- break;
-
- tx = kptllnd_get_idle_tx(TX_TYPE_GET_REQUEST);
- if (tx == NULL) {
- CERROR("Can't send GET to %s: can't allocate descriptor\n",
- libcfs_id2str(target));
- rc = -ENOMEM;
- goto out;
- }
-
- tx->tx_lnet_replymsg = lnet_create_reply_msg(ni, lntmsg);
- if (tx->tx_lnet_replymsg == NULL) {
- CERROR("Failed to allocate LNET reply for %s\n",
- libcfs_id2str(target));
- kptllnd_tx_decref(tx);
- rc = -ENOMEM;
- goto out;
- }
-
- if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0)
- kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov,
- lntmsg->msg_md->md_iov.iov, NULL,
- 0, lntmsg->msg_md->md_length);
- else
- kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov,
- NULL, lntmsg->msg_md->md_iov.kiov,
- 0, lntmsg->msg_md->md_length);
-
- tx->tx_lnet_msg = lntmsg;
- tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr;
- kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_GET,
- target, sizeof(kptl_rdma_msg_t));
-
- CDEBUG(D_NETTRACE, "%s: passive GET p %d %p\n",
- libcfs_id2str(target),
- le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx);
-
- kptllnd_tx_launch(peer, tx, 0);
- goto out;
-
- case LNET_MSG_ACK:
- CDEBUG(D_NET, "LNET_MSG_ACK\n");
- LASSERT (payload_nob == 0);
- break;
- }
-
- /* I don't have to handle kiovs */
- LASSERT (payload_nob == 0 || payload_iov != NULL);
-
- tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (tx == NULL) {
- CERROR("Can't send %s to %s: can't allocate descriptor\n",
- lnet_msgtyp2str(type), libcfs_id2str(target));
- rc = -ENOMEM;
- goto out;
- }
-
- tx->tx_lnet_msg = lntmsg;
- tx->tx_msg->ptlm_u.immediate.kptlim_hdr = *hdr;
-
- if (payload_nob == 0) {
- nfrag = 0;
- } else {
- tx->tx_frags->iov[0].iov_base = tx->tx_msg;
- tx->tx_frags->iov[0].iov_len = offsetof(kptl_msg_t,
- ptlm_u.immediate.kptlim_payload);
-
- /* NB relying on lustre not asking for PTL_MD_MAX_IOV
- * fragments!! */
-#ifdef _USING_LUSTRE_PORTALS_
- nfrag = 1 + lnet_extract_iov(PTL_MD_MAX_IOV - 1,
- &tx->tx_frags->iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-#else
- nfrag = 1 + kptllnd_extract_iov(PTL_MD_MAX_IOV - 1,
- &tx->tx_frags->iov[1],
- payload_niov, payload_iov,
- payload_offset, payload_nob);
-#endif
- }
-
- nob = offsetof(kptl_immediate_msg_t, kptlim_payload[payload_nob]);
- kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_IMMEDIATE, target, nob);
-
- CDEBUG(D_NETTRACE, "%s: immediate %s p %d %p\n",
- libcfs_id2str(target),
- lnet_msgtyp2str(lntmsg->msg_type),
- (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_PUT) ?
- le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index) :
- (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_GET) ?
- le32_to_cpu(lntmsg->msg_hdr.msg.get.ptl_index) : -1,
- tx);
-
- kptllnd_tx_launch(peer, tx, nfrag);
-
- out:
- if (lntmsg->msg_vmflush)
- cfs_memory_pressure_restore(mpflag);
- if (peer)
- kptllnd_peer_decref(peer);
- return rc;
-}
-
-int
-kptllnd_eager_recv(struct lnet_ni *ni, void *private,
- lnet_msg_t *msg, void **new_privatep)
-{
- kptl_rx_t *rx = private;
-
- CDEBUG(D_NET, "Eager RX=%p RXB=%p\n", rx, rx->rx_rxb);
-
- /* I have to release my ref on rxb (if I have one) to ensure I'm an
- * eager receiver, so I copy the incoming request from the buffer it
- * landed in, into space reserved in the descriptor... */
-
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- if (rx->rx_rxb == NULL) /* already copied */
- return 0; /* to fix alignment */
-#else
- LASSERT(rx->rx_rxb != NULL);
-#endif
- LASSERT(rx->rx_nob <= *kptllnd_tunables.kptl_max_msg_size);
-
- memcpy(rx->rx_space, rx->rx_msg, rx->rx_nob);
- rx->rx_msg = (kptl_msg_t *)rx->rx_space;
-
- kptllnd_rx_buffer_decref(rx->rx_rxb);
- rx->rx_rxb = NULL;
-
- return 0;
-}
-
-
-int
-kptllnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed,
- unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
- unsigned int offset, unsigned int mlen, unsigned int rlen)
-{
- kptl_rx_t *rx = private;
- kptl_msg_t *rxmsg = rx->rx_msg;
- int nob;
- int rc;
-
- CDEBUG(D_NET, "%s niov=%d offset=%d mlen=%d rlen=%d\n",
- kptllnd_msgtype2str(rxmsg->ptlm_type),
- niov, offset, mlen, rlen);
-
- LASSERT (mlen <= rlen);
- LASSERT (mlen >= 0);
- LASSERT (!in_interrupt());
- LASSERT (!(kiov != NULL && iov != NULL)); /* never both */
- LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */
-
- switch(rxmsg->ptlm_type)
- {
- default:
- LBUG();
- rc = -EINVAL;
- break;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE %d,%d\n", mlen, rlen);
-
- nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[rlen]);
- if (nob > rx->rx_nob) {
- CERROR ("Immediate message from %s too big: %d(%d)\n",
- libcfs_id2str(rx->rx_peer->peer_id), nob,
- rx->rx_nob);
- rc = -EINVAL;
- break;
- }
-
- if (kiov != NULL)
- lnet_copy_flat2kiov(
- niov, kiov, offset,
- *kptllnd_tunables.kptl_max_msg_size,
- rxmsg->ptlm_u.immediate.kptlim_payload,
- 0,
- mlen);
- else
- lnet_copy_flat2iov(
- niov, iov, offset,
- *kptllnd_tunables.kptl_max_msg_size,
- rxmsg->ptlm_u.immediate.kptlim_payload,
- 0,
- mlen);
-
- lnet_finalize (ni, lntmsg, 0);
- rc = 0;
- break;
-
- case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET %d,%d\n", mlen, rlen);
-
- /* NB always send RDMA so the peer can complete. I send
- * success/failure in the portals 'hdr_data' */
-
- if (lntmsg == NULL)
- rc = kptllnd_active_rdma(rx, NULL,
- TX_TYPE_GET_RESPONSE,
- 0, NULL, NULL, 0, 0);
- else
- rc = kptllnd_active_rdma(rx, lntmsg,
- TX_TYPE_GET_RESPONSE,
- lntmsg->msg_niov,
- lntmsg->msg_iov,
- lntmsg->msg_kiov,
- lntmsg->msg_offset,
- lntmsg->msg_len);
- break;
-
- case PTLLND_MSG_TYPE_PUT:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT %d,%d\n", mlen, rlen);
-
- /* NB always send RDMA so the peer can complete; it'll be 0
- * bytes if there was no match (lntmsg == NULL). I have no way
- * to let my peer know this, but she's only interested in when
- * the net has stopped accessing her buffer in any case. */
-
- rc = kptllnd_active_rdma(rx, lntmsg, TX_TYPE_PUT_RESPONSE,
- niov, iov, kiov, offset, mlen);
- break;
- }
-
- /*
- * We're done with the RX
- */
- kptllnd_rx_done(rx, PTLLND_POSTRX_PEER_CREDIT);
- return rc;
-}
-
-void
-kptllnd_eq_callback(ptl_event_t *ev)
-{
- kptl_eventarg_t *eva = ev->md.user_ptr;
-
- switch (eva->eva_type) {
- default:
- LBUG();
-
- case PTLLND_EVENTARG_TYPE_MSG:
- case PTLLND_EVENTARG_TYPE_RDMA:
- kptllnd_tx_callback(ev);
- break;
-
- case PTLLND_EVENTARG_TYPE_BUF:
- kptllnd_rx_buffer_callback(ev);
- break;
- }
-}
-
-void
-kptllnd_thread_fini (void)
-{
- cfs_atomic_dec(&kptllnd_data.kptl_nthreads);
-}
-
-int
-kptllnd_thread_start(int (*fn)(void *arg), void *arg, char *name)
-{
- struct task_struct *task;
-
- cfs_atomic_inc(&kptllnd_data.kptl_nthreads);
-
- task = kthread_run(fn, arg, name);
- if (IS_ERR(task)) {
- CERROR("Failed to start thread: error %ld\n", PTR_ERR(task));
- kptllnd_thread_fini();
- }
- return PTR_ERR(task);
-}
-
-int
-kptllnd_watchdog(void *arg)
-{
- int id = (long)arg;
- wait_queue_t waitlink;
- int stamp = 0;
- int peer_index = 0;
- unsigned long deadline = jiffies;
- int timeout;
- int i;
-
- cfs_block_allsigs();
-
- init_waitqueue_entry_current(&waitlink);
-
- /* threads shut down in phase 2 after all peers have been destroyed */
- while (kptllnd_data.kptl_shutdown < 2) {
-
- timeout = (int)(deadline - jiffies);
- if (timeout <= 0) {
- const int n = 4;
- const int p = 1;
- int chunk = kptllnd_data.kptl_peer_hash_size;
-
-
- /* Time to check for RDMA timeouts on a few more
- * peers: I do checks every 'p' seconds on a
- * proportion of the peer table and I need to check
- * every connection 'n' times within a timeout
- * interval, to ensure I detect a timeout on any
- * connection within (n+1)/n times the timeout
- * interval. */
-
- if ((*kptllnd_tunables.kptl_timeout) > n * p)
- chunk = (chunk * n * p) /
- (*kptllnd_tunables.kptl_timeout);
- if (chunk == 0)
- chunk = 1;
-
- for (i = 0; i < chunk; i++) {
- kptllnd_peer_check_bucket(peer_index, stamp);
- peer_index = (peer_index + 1) %
- kptllnd_data.kptl_peer_hash_size;
- }
-
- deadline += p * HZ;
- stamp++;
- continue;
- }
-
- kptllnd_handle_closing_peers();
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kptllnd_data.kptl_watchdog_waitq,
- &waitlink);
-
- waitq_timedwait(&waitlink, TASK_INTERRUPTIBLE, timeout);
-
- set_current_state (TASK_RUNNING);
- remove_wait_queue(&kptllnd_data.kptl_watchdog_waitq, &waitlink);
- }
-
- kptllnd_thread_fini();
- CDEBUG(D_NET, "<<<\n");
- return (0);
-};
-
-int
-kptllnd_scheduler (void *arg)
-{
- int id = (long)arg;
- wait_queue_t waitlink;
- unsigned long flags;
- int did_something;
- int counter = 0;
- kptl_rx_t *rx;
- kptl_rx_buffer_t *rxb;
- kptl_tx_t *tx;
-
- cfs_block_allsigs();
-
- init_waitqueue_entry_current(&waitlink);
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
-
- /* threads shut down in phase 2 after all peers have been destroyed */
- while (kptllnd_data.kptl_shutdown < 2) {
-
- did_something = 0;
-
- if (!cfs_list_empty(&kptllnd_data.kptl_sched_rxq)) {
- rx = cfs_list_entry (kptllnd_data.kptl_sched_rxq.next,
- kptl_rx_t, rx_list);
- cfs_list_del(&rx->rx_list);
-
- spin_unlock_irqrestore(&kptllnd_data. \
- kptl_sched_lock,
- flags);
-
- kptllnd_rx_parse(rx);
- did_something = 1;
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock,
- flags);
- }
-
- if (!cfs_list_empty(&kptllnd_data.kptl_sched_rxbq)) {
- rxb = cfs_list_entry (kptllnd_data.kptl_sched_rxbq.next,
- kptl_rx_buffer_t,
- rxb_repost_list);
- cfs_list_del(&rxb->rxb_repost_list);
-
- spin_unlock_irqrestore(&kptllnd_data. \
- kptl_sched_lock,
- flags);
-
- kptllnd_rx_buffer_post(rxb);
- did_something = 1;
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock,
- flags);
- }
-
- if (!cfs_list_empty(&kptllnd_data.kptl_sched_txq)) {
- tx = cfs_list_entry (kptllnd_data.kptl_sched_txq.next,
- kptl_tx_t, tx_list);
- cfs_list_del_init(&tx->tx_list);
-
- spin_unlock_irqrestore(&kptllnd_data. \
- kptl_sched_lock, flags);
-
- kptllnd_tx_fini(tx);
- did_something = 1;
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock,
- flags);
- }
-
- if (did_something) {
- if (++counter != *kptllnd_tunables.kptl_reschedule_loops)
- continue;
- }
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue_exclusive(&kptllnd_data.kptl_sched_waitq,
- &waitlink);
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock,
- flags);
-
- if (!did_something)
- waitq_wait(&waitlink, TASK_INTERRUPTIBLE);
- else
- cond_resched();
-
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&kptllnd_data.kptl_sched_waitq, &waitlink);
-
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
-
- counter = 0;
- }
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags);
-
- kptllnd_thread_fini();
- return 0;
-}
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd_modparams.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
-
-#include "ptllnd.h"
-
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
- "# of TX descriptors");
-
-static int max_nodes = 1152;
-CFS_MODULE_PARM(max_nodes, "i", int, 0444,
- "maximum number of peer nodes");
-
-static int max_procs_per_node = 2;
-CFS_MODULE_PARM(max_procs_per_node, "i", int, 0444,
- "maximum number of processes per peer node to cache");
-
-static int checksum = 0;
-CFS_MODULE_PARM(checksum, "i", int, 0644,
- "set non-zero to enable message (not RDMA) checksums");
-
-/* NB 250 is the Cray Portals wire timeout */
-static int timeout = 250;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
- "timeout (seconds)");
-
-static int portal = PTLLND_PORTAL; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(portal, "i", int, 0444,
- "portal id");
-
-static int pid = PTLLND_PID; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(pid, "i", int, 0444,
- "portals pid");
-
-static int rxb_npages = 1;
-CFS_MODULE_PARM(rxb_npages, "i", int, 0444,
- "# of pages per rx buffer");
-
-static int rxb_nspare = 8;
-CFS_MODULE_PARM(rxb_nspare, "i", int, 0444,
- "# of spare rx buffers");
-
-static int credits = 128;
-CFS_MODULE_PARM(credits, "i", int, 0444,
- "concurrent sends");
-
-static int peercredits = PTLLND_PEERCREDITS; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(peercredits, "i", int, 0444,
- "concurrent sends to 1 peer");
-
-static int peer_buffer_credits = 0;
-CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
- "# per-peer router buffer credits");
-
-static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE; /* <lnet/ptllnd_wire.h> */
-CFS_MODULE_PARM(max_msg_size, "i", int, 0444,
- "max size of immediate message");
-
-static int peer_hash_table_size = 101;
-CFS_MODULE_PARM(peer_hash_table_size, "i", int, 0444,
- "# of slots in the peer hash table");
-
-static int reschedule_loops = 100;
-CFS_MODULE_PARM(reschedule_loops, "i", int, 0644,
- "# of loops before scheduler does cond_resched()");
-
-static int ack_puts = 0;
-CFS_MODULE_PARM(ack_puts, "i", int, 0644,
- "get portals to ack all PUTs");
-
-#ifdef PJK_DEBUGGING
-static int simulation_bitmap = 0;
-CFS_MODULE_PARM(simulation_bitmap, "i", int, 0444,
- "simulation bitmap");
-#endif
-
-
-kptl_tunables_t kptllnd_tunables = {
- .kptl_ntx = &ntx,
- .kptl_max_nodes = &max_nodes,
- .kptl_max_procs_per_node = &max_procs_per_node,
- .kptl_checksum = &checksum,
- .kptl_portal = &portal,
- .kptl_pid = &pid,
- .kptl_timeout = &timeout,
- .kptl_rxb_npages = &rxb_npages,
- .kptl_rxb_nspare = &rxb_nspare,
- .kptl_credits = &credits,
- .kptl_peertxcredits = &peercredits,
- .kptl_peerrtrcredits = &peer_buffer_credits,
- .kptl_max_msg_size = &max_msg_size,
- .kptl_peer_hash_table_size = &peer_hash_table_size,
- .kptl_reschedule_loops = &reschedule_loops,
- .kptl_ack_puts = &ack_puts,
-#ifdef PJK_DEBUGGING
- .kptl_simulation_bitmap = &simulation_bitmap,
-#endif
-};
-
-
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-#ifndef HAVE_SYSCTL_UNNUMBERED
-
-enum {
- KPTLLND_NTX = 1,
- KPTLLND_MAX_NODES,
- KPTLLND_MAX_PROC_PER_NODE,
- KPTLLND_CHECKSUM,
- KPTLLND_TIMEOUT,
- KPTLLND_PORTAL,
- KPTLLND_PID,
- KPTLLND_RXB_PAGES,
- KPTLLND_CREDITS,
- KPTLLND_PEERTXCREDITS,
- KPTLLND_PEERRTRCREDITS,
- KPTLLND_MAX_MSG_SIZE,
- KPTLLND_PEER_HASH_SIZE,
- KPTLLND_RESHEDULE_LOOPS,
- KPTLLND_ACK_PUTS,
- KPTLLND_TRACETIMEOUT,
- KPTLLND_TRACEFAIL,
- KPTLLND_TRACEBASENAME,
- KPTLLND_SIMULATION_BITMAP
-};
-#else
-
-#define KPTLLND_NTX CTL_UNNUMBERED
-#define KPTLLND_MAX_NODES CTL_UNNUMBERED
-#define KPTLLND_MAX_PROC_PER_NODE CTL_UNNUMBERED
-#define KPTLLND_CHECKSUM CTL_UNNUMBERED
-#define KPTLLND_TIMEOUT CTL_UNNUMBERED
-#define KPTLLND_PORTAL CTL_UNNUMBERED
-#define KPTLLND_PID CTL_UNNUMBERED
-#define KPTLLND_RXB_PAGES CTL_UNNUMBERED
-#define KPTLLND_CREDITS CTL_UNNUMBERED
-#define KPTLLND_PEERTXCREDITS CTL_UNNUMBERED
-#define KPTLLND_PEERRTRCREDITS CTL_UNNUMBERED
-#define KPTLLND_MAX_MSG_SIZE CTL_UNNUMBERED
-#define KPTLLND_PEER_HASH_SIZE CTL_UNNUMBERED
-#define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED
-#define KPTLLND_ACK_PUTS CTL_UNNUMBERED
-#define KPTLLND_TRACETIMEOUT CTL_UNNUMBERED
-#define KPTLLND_TRACEFAIL CTL_UNNUMBERED
-#define KPTLLND_TRACEBASENAME CTL_UNNUMBERED
-#define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED
-#endif
-
-static struct ctl_table kptllnd_ctl_table[] = {
- {
- .ctl_name = KPTLLND_NTX,
- .procname = "ntx",
- .data = &ntx,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_MAX_NODES,
- .procname = "max_nodes",
- .data = &max_nodes,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_MAX_PROC_PER_NODE,
- .procname = "max_procs_per_node",
- .data = &max_procs_per_node,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_CHECKSUM,
- .procname = "checksum",
- .data = &checksum,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_TIMEOUT,
- .procname = "timeout",
- .data = &timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_PORTAL,
- .procname = "portal",
- .data = &portal,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_PID,
- .procname = "pid",
- .data = &pid,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_RXB_PAGES,
- .procname = "rxb_npages",
- .data = &rxb_npages,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_CREDITS,
- .procname = "credits",
- .data = &credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_PEERTXCREDITS,
- .procname = "peercredits",
- .data = &peercredits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_PEERRTRCREDITS,
- .procname = "peer_buffer_credits",
- .data = &peer_buffer_credits,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_MAX_MSG_SIZE,
- .procname = "max_msg_size",
- .data = &max_msg_size,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_PEER_HASH_SIZE,
- .procname = "peer_hash_table_size",
- .data = &peer_hash_table_size,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_RESHEDULE_LOOPS,
- .procname = "reschedule_loops",
- .data = &reschedule_loops,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
- {
- .ctl_name = KPTLLND_ACK_PUTS,
- .procname = "ack_puts",
- .data = &ack_puts,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
-#ifdef PJK_DEBUGGING
- {
- .ctl_name = KPTLLND_SIMULATION_BITMAP,
- .procname = "simulation_bitmap",
- .data = &simulation_bitmap,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec
- },
-#endif
-
- {0}
-};
-
-static struct ctl_table kptllnd_top_ctl_table[] = {
- {
- .ctl_name = CTL_PTLLND,
- .procname = "ptllnd",
- .data = NULL,
- .maxlen = 0,
- .mode = 0555,
- .child = kptllnd_ctl_table
- },
- {0}
-};
-
-int
-kptllnd_tunables_init ()
-{
- kptllnd_tunables.kptl_sysctl =
- register_sysctl_table(kptllnd_top_ctl_table, 0);
-
- if (kptllnd_tunables.kptl_sysctl == NULL)
- CWARN("Can't setup /proc tunables\n");
-
- return 0;
-}
-
-void
-kptllnd_tunables_fini ()
-{
- if (kptllnd_tunables.kptl_sysctl != NULL)
- unregister_sysctl_table(kptllnd_tunables.kptl_sysctl);
-}
-
-#else
-
-int
-kptllnd_tunables_init ()
-{
- return 0;
-}
-
-void
-kptllnd_tunables_fini ()
-{
-}
-
-#endif
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd_peer.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- * Author: E Barton <eeb@bartonsoftware.com>
- */
-
-#include "ptllnd.h"
-#include <libcfs/list.h>
-
-static int
-kptllnd_count_queue(cfs_list_t *q)
-{
- cfs_list_t *e;
- int n = 0;
-
- cfs_list_for_each(e, q) {
- n++;
- }
-
- return n;
-}
-
-int
-kptllnd_get_peer_info(int index,
- lnet_process_id_t *id,
- int *state, int *sent_hello,
- int *refcount, __u64 *incarnation,
- __u64 *next_matchbits, __u64 *last_matchbits_seen,
- int *nsendq, int *nactiveq,
- int *credits, int *outstanding_credits)
-{
- rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock;
- unsigned long flags;
- cfs_list_t *ptmp;
- kptl_peer_t *peer;
- int i;
- int rc = -ENOENT;
-
- read_lock_irqsave(g_lock, flags);
-
- for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) {
- cfs_list_for_each (ptmp, &kptllnd_data.kptl_peers[i]) {
- peer = cfs_list_entry(ptmp, kptl_peer_t, peer_list);
-
- if (index-- > 0)
- continue;
-
- *id = peer->peer_id;
- *state = peer->peer_state;
- *sent_hello = peer->peer_sent_hello;
- *refcount = cfs_atomic_read(&peer->peer_refcount);
- *incarnation = peer->peer_incarnation;
-
- spin_lock(&peer->peer_lock);
-
- *next_matchbits = peer->peer_next_matchbits;
- *last_matchbits_seen = peer->peer_last_matchbits_seen;
- *credits = peer->peer_credits;
- *outstanding_credits = peer->peer_outstanding_credits;
-
- *nsendq = kptllnd_count_queue(&peer->peer_sendq);
- *nactiveq = kptllnd_count_queue(&peer->peer_activeq);
-
- spin_unlock(&peer->peer_lock);
-
- rc = 0;
- goto out;
- }
- }
-
- out:
- read_unlock_irqrestore(g_lock, flags);
- return rc;
-}
-
-void
-kptllnd_peer_add_peertable_locked (kptl_peer_t *peer)
-{
- LASSERT (kptllnd_data.kptl_n_active_peers <
- kptllnd_data.kptl_expected_peers);
-
- LASSERT (peer->peer_state == PEER_STATE_WAITING_HELLO ||
- peer->peer_state == PEER_STATE_ACTIVE);
-
- kptllnd_data.kptl_n_active_peers++;
- cfs_atomic_inc(&peer->peer_refcount); /* +1 ref for the list */
-
- /* NB add to HEAD of peer list for MRU order!
- * (see kptllnd_cull_peertable) */
- cfs_list_add(&peer->peer_list, kptllnd_nid2peerlist(peer->peer_id.nid));
-}
-
-void
-kptllnd_cull_peertable_locked (lnet_process_id_t pid)
-{
- /* I'm about to add a new peer with this portals ID to the peer table,
- * so (a) this peer should not exist already and (b) I want to leave at
- * most (max_procs_per_nid - 1) peers with this NID in the table. */
- cfs_list_t *peers = kptllnd_nid2peerlist(pid.nid);
- int cull_count = *kptllnd_tunables.kptl_max_procs_per_node;
- int count;
- cfs_list_t *tmp;
- cfs_list_t *nxt;
- kptl_peer_t *peer;
-
- count = 0;
- cfs_list_for_each_safe (tmp, nxt, peers) {
- /* NB I rely on kptllnd_peer_add_peertable_locked to add peers
- * in MRU order */
- peer = cfs_list_entry(tmp, kptl_peer_t, peer_list);
-
- if (LNET_NIDADDR(peer->peer_id.nid) != LNET_NIDADDR(pid.nid))
- continue;
-
- LASSERT (peer->peer_id.pid != pid.pid);
-
- count++;
-
- if (count < cull_count) /* recent (don't cull) */
- continue;
-
- CDEBUG(D_NET, "Cull %s(%s)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_ptlid2str(peer->peer_ptlid));
-
- kptllnd_peer_close_locked(peer, 0);
- }
-}
-
-kptl_peer_t *
-kptllnd_peer_allocate (kptl_net_t *net, lnet_process_id_t lpid, ptl_process_id_t ppid)
-{
- unsigned long flags;
- kptl_peer_t *peer;
-
- LIBCFS_ALLOC(peer, sizeof (*peer));
- if (peer == NULL) {
- CERROR("Can't create peer %s (%s)\n",
- libcfs_id2str(lpid),
- kptllnd_ptlid2str(ppid));
- return NULL;
- }
-
- memset(peer, 0, sizeof(*peer)); /* zero flags etc */
-
- CFS_INIT_LIST_HEAD (&peer->peer_noops);
- CFS_INIT_LIST_HEAD (&peer->peer_sendq);
- CFS_INIT_LIST_HEAD (&peer->peer_activeq);
- spin_lock_init(&peer->peer_lock);
-
- peer->peer_state = PEER_STATE_ALLOCATED;
- peer->peer_error = 0;
- peer->peer_last_alive = 0;
- peer->peer_id = lpid;
- peer->peer_ptlid = ppid;
- peer->peer_credits = 1; /* enough for HELLO */
- peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS;
- peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peertxcredits - 1;
- peer->peer_sent_credits = 1; /* HELLO credit is implicit */
- peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */
-
- cfs_atomic_set(&peer->peer_refcount, 1); /* 1 ref for caller */
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- peer->peer_myincarnation = kptllnd_data.kptl_incarnation;
-
- /* Only increase # peers under lock, to guarantee we dont grow it
- * during shutdown */
- if (net->net_shutdown) {
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
- LIBCFS_FREE(peer, sizeof(*peer));
- return NULL;
- }
-
- kptllnd_data.kptl_npeers++;
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
- return peer;
-}
-
-void
-kptllnd_peer_destroy (kptl_peer_t *peer)
-{
- unsigned long flags;
-
- CDEBUG(D_NET, "Peer=%p\n", peer);
-
- LASSERT (!in_interrupt());
- LASSERT (cfs_atomic_read(&peer->peer_refcount) == 0);
- LASSERT (peer->peer_state == PEER_STATE_ALLOCATED ||
- peer->peer_state == PEER_STATE_ZOMBIE);
- LASSERT (cfs_list_empty(&peer->peer_noops));
- LASSERT (cfs_list_empty(&peer->peer_sendq));
- LASSERT (cfs_list_empty(&peer->peer_activeq));
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- if (peer->peer_state == PEER_STATE_ZOMBIE)
- cfs_list_del(&peer->peer_list);
-
- kptllnd_data.kptl_npeers--;
-
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- LIBCFS_FREE (peer, sizeof (*peer));
-}
-
-void
-kptllnd_cancel_txlist (cfs_list_t *peerq, cfs_list_t *txs)
-{
- cfs_list_t *tmp;
- cfs_list_t *nxt;
- kptl_tx_t *tx;
-
- cfs_list_for_each_safe (tmp, nxt, peerq) {
- tx = cfs_list_entry(tmp, kptl_tx_t, tx_list);
-
- cfs_list_del(&tx->tx_list);
- cfs_list_add_tail(&tx->tx_list, txs);
-
- tx->tx_status = -EIO;
- tx->tx_active = 0;
- }
-}
-
-void
-kptllnd_peer_cancel_txs(kptl_peer_t *peer, cfs_list_t *txs)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- kptllnd_cancel_txlist(&peer->peer_noops, txs);
- kptllnd_cancel_txlist(&peer->peer_sendq, txs);
- kptllnd_cancel_txlist(&peer->peer_activeq, txs);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-}
-
-void
-kptllnd_peer_alive (kptl_peer_t *peer)
-{
- /* This is racy, but everyone's only writing cfs_time_current() */
- peer->peer_last_alive = cfs_time_current();
- smp_mb();
-}
-
-void
-kptllnd_peer_notify (kptl_peer_t *peer)
-{
- unsigned long flags;
- kptl_net_t *net;
- kptl_net_t **nets;
- int i = 0;
- int nnets = 0;
- int error = 0;
- cfs_time_t last_alive = 0;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (peer->peer_error != 0) {
- error = peer->peer_error;
- peer->peer_error = 0;
- last_alive = peer->peer_last_alive;
- }
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- if (error == 0)
- return;
-
- read_lock(&kptllnd_data.kptl_net_rw_lock);
- cfs_list_for_each_entry (net, &kptllnd_data.kptl_nets, net_list)
- nnets++;
- read_unlock(&kptllnd_data.kptl_net_rw_lock);
-
- if (nnets == 0) /* shutdown in progress */
- return;
-
- LIBCFS_ALLOC(nets, nnets * sizeof(*nets));
- if (nets == NULL) {
- CERROR("Failed to allocate nets[%d]\n", nnets);
- return;
- }
- memset(nets, 0, nnets * sizeof(*nets));
-
- read_lock(&kptllnd_data.kptl_net_rw_lock);
- i = 0;
- cfs_list_for_each_entry (net, &kptllnd_data.kptl_nets, net_list) {
- LASSERT (i < nnets);
- nets[i] = net;
- kptllnd_net_addref(net);
- i++;
- }
- read_unlock(&kptllnd_data.kptl_net_rw_lock);
-
- for (i = 0; i < nnets; i++) {
- lnet_nid_t peer_nid;
-
- net = nets[i];
- if (net == NULL)
- break;
-
- if (!net->net_shutdown) {
- peer_nid = kptllnd_ptl2lnetnid(net->net_ni->ni_nid,
- peer->peer_ptlid.nid);
- lnet_notify(net->net_ni, peer_nid, 0, last_alive);
- }
-
- kptllnd_net_decref(net);
- }
-
- LIBCFS_FREE(nets, nnets * sizeof(*nets));
-}
-
-void
-kptllnd_handle_closing_peers ()
-{
- unsigned long flags;
- cfs_list_t txs;
- kptl_peer_t *peer;
- cfs_list_t *tmp;
- cfs_list_t *nxt;
- kptl_tx_t *tx;
- int idle;
-
- /* Check with a read lock first to avoid blocking anyone */
-
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- idle = cfs_list_empty(&kptllnd_data.kptl_closing_peers) &&
- cfs_list_empty(&kptllnd_data.kptl_zombie_peers);
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- if (idle)
- return;
-
- CFS_INIT_LIST_HEAD(&txs);
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- /* Cancel txs on all zombie peers. NB anyone dropping the last peer
- * ref removes it from this list, so I musn't drop the lock while
- * scanning it. */
- cfs_list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) {
- peer = cfs_list_entry (tmp, kptl_peer_t, peer_list);
-
- LASSERT (peer->peer_state == PEER_STATE_ZOMBIE);
-
- kptllnd_peer_cancel_txs(peer, &txs);
- }
-
- /* Notify LNET and cancel txs on closing (i.e. newly closed) peers. NB
- * I'm the only one removing from this list, but peers can be added on
- * the end any time I drop the lock. */
-
- cfs_list_for_each_safe (tmp, nxt, &kptllnd_data.kptl_closing_peers) {
- peer = cfs_list_entry (tmp, kptl_peer_t, peer_list);
-
- LASSERT (peer->peer_state == PEER_STATE_CLOSING);
-
- cfs_list_del(&peer->peer_list);
- cfs_list_add_tail(&peer->peer_list,
- &kptllnd_data.kptl_zombie_peers);
- peer->peer_state = PEER_STATE_ZOMBIE;
-
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- kptllnd_peer_notify(peer);
- kptllnd_peer_cancel_txs(peer, &txs);
- kptllnd_peer_decref(peer);
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- }
-
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- /* Drop peer's ref on all cancelled txs. This will get
- * kptllnd_tx_fini() to abort outstanding comms if necessary. */
-
- cfs_list_for_each_safe (tmp, nxt, &txs) {
- tx = cfs_list_entry(tmp, kptl_tx_t, tx_list);
- cfs_list_del(&tx->tx_list);
- kptllnd_tx_decref(tx);
- }
-}
-
-void
-kptllnd_peer_close_locked(kptl_peer_t *peer, int why)
-{
- switch (peer->peer_state) {
- default:
- LBUG();
-
- case PEER_STATE_WAITING_HELLO:
- case PEER_STATE_ACTIVE:
- /* Ensure new peers see a new incarnation of me */
- LASSERT(peer->peer_myincarnation <= kptllnd_data.kptl_incarnation);
- if (peer->peer_myincarnation == kptllnd_data.kptl_incarnation)
- kptllnd_data.kptl_incarnation++;
-
- /* Removing from peer table */
- kptllnd_data.kptl_n_active_peers--;
- LASSERT (kptllnd_data.kptl_n_active_peers >= 0);
-
- cfs_list_del(&peer->peer_list);
- kptllnd_peer_unreserve_buffers();
-
- peer->peer_error = why; /* stash 'why' only on first close */
- peer->peer_state = PEER_STATE_CLOSING;
-
- /* Schedule for immediate attention, taking peer table's ref */
- cfs_list_add_tail(&peer->peer_list,
- &kptllnd_data.kptl_closing_peers);
- wake_up(&kptllnd_data.kptl_watchdog_waitq);
- break;
-
- case PEER_STATE_ZOMBIE:
- case PEER_STATE_CLOSING:
- break;
- }
-}
-
-void
-kptllnd_peer_close(kptl_peer_t *peer, int why)
-{
- unsigned long flags;
-
- write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
- kptllnd_peer_close_locked(peer, why);
- write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-}
-
-int
-kptllnd_peer_del(lnet_process_id_t id)
-{
- cfs_list_t *ptmp;
- cfs_list_t *pnxt;
- kptl_peer_t *peer;
- int lo;
- int hi;
- int i;
- unsigned long flags;
- int rc = -ENOENT;
-
- /*
- * Find the single bucket we are supposed to look at or if nid is a
- * wildcard (LNET_NID_ANY) then look at all of the buckets
- */
- if (id.nid != LNET_NID_ANY) {
- cfs_list_t *l = kptllnd_nid2peerlist(id.nid);
-
- lo = hi = l - kptllnd_data.kptl_peers;
- } else {
- if (id.pid != LNET_PID_ANY)
- return -EINVAL;
-
- lo = 0;
- hi = kptllnd_data.kptl_peer_hash_size - 1;
- }
-
-again:
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- for (i = lo; i <= hi; i++) {
- cfs_list_for_each_safe (ptmp, pnxt,
- &kptllnd_data.kptl_peers[i]) {
- peer = cfs_list_entry (ptmp, kptl_peer_t, peer_list);
-
- if (!(id.nid == LNET_NID_ANY ||
- (LNET_NIDADDR(peer->peer_id.nid) == LNET_NIDADDR(id.nid) &&
- (id.pid == LNET_PID_ANY ||
- peer->peer_id.pid == id.pid))))
- continue;
-
- kptllnd_peer_addref(peer); /* 1 ref for me... */
-
- read_unlock_irqrestore(&kptllnd_data. \
- kptl_peer_rw_lock,
- flags);
-
- kptllnd_peer_close(peer, 0);
- kptllnd_peer_decref(peer); /* ...until here */
-
- rc = 0; /* matched something */
-
- /* start again now I've dropped the lock */
- goto again;
- }
- }
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- return (rc);
-}
-
-void
-kptllnd_queue_tx(kptl_peer_t *peer, kptl_tx_t *tx)
-{
- /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */
- unsigned long flags;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- /* Ensure HELLO is sent first */
- if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP)
- cfs_list_add(&tx->tx_list, &peer->peer_noops);
- else if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO)
- cfs_list_add(&tx->tx_list, &peer->peer_sendq);
- else
- cfs_list_add_tail(&tx->tx_list, &peer->peer_sendq);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-}
-
-
-void
-kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
-{
- /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */
- ptl_handle_md_t msg_mdh;
- ptl_md_t md;
- ptl_err_t prc;
-
- LASSERT (!tx->tx_idle);
- LASSERT (!tx->tx_active);
- LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
- LASSERT (tx->tx_type == TX_TYPE_SMALL_MESSAGE ||
- tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_GET_REQUEST);
-
- kptllnd_set_tx_peer(tx, peer);
-
- memset(&md, 0, sizeof(md));
-
- md.threshold = tx->tx_acked ? 2 : 1; /* SEND END + ACK? */
- md.options = PTL_MD_OP_PUT |
- PTL_MD_LUSTRE_COMPLETION_SEMANTICS |
- PTL_MD_EVENT_START_DISABLE;
- md.user_ptr = &tx->tx_msg_eventarg;
- md.eq_handle = kptllnd_data.kptl_eqh;
-
- if (nfrag == 0) {
- md.start = tx->tx_msg;
- md.length = tx->tx_msg->ptlm_nob;
- } else {
- LASSERT (nfrag > 1);
- LASSERT (tx->tx_frags->iov[0].iov_base == (void *)tx->tx_msg);
-
- md.start = tx->tx_frags;
- md.length = nfrag;
- md.options |= PTL_MD_IOVEC;
- }
-
- prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh);
- if (prc != PTL_OK) {
- CERROR("PtlMDBind(%s) failed: %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(prc), prc);
- tx->tx_status = -EIO;
- kptllnd_tx_decref(tx);
- return;
- }
-
-
- tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ);
- tx->tx_active = 1;
- tx->tx_msg_mdh = msg_mdh;
- kptllnd_queue_tx(peer, tx);
-}
-
-/* NB "restarts" comes from peer_sendq of a single peer */
-void
-kptllnd_restart_txs (kptl_net_t *net, lnet_process_id_t target,
- cfs_list_t *restarts)
-{
- kptl_tx_t *tx;
- kptl_tx_t *tmp;
- kptl_peer_t *peer;
-
- LASSERT (!cfs_list_empty(restarts));
-
- if (kptllnd_find_target(net, target, &peer) != 0)
- peer = NULL;
-
- cfs_list_for_each_entry_safe (tx, tmp, restarts, tx_list) {
- LASSERT (tx->tx_peer != NULL);
- LASSERT (tx->tx_type == TX_TYPE_GET_REQUEST ||
- tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_SMALL_MESSAGE);
-
- cfs_list_del_init(&tx->tx_list);
-
- if (peer == NULL ||
- tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
- kptllnd_tx_decref(tx);
- continue;
- }
-
- LASSERT (tx->tx_msg->ptlm_type != PTLLND_MSG_TYPE_NOOP);
- tx->tx_status = 0;
- tx->tx_active = 1;
- kptllnd_peer_decref(tx->tx_peer);
- tx->tx_peer = NULL;
- kptllnd_set_tx_peer(tx, peer);
- kptllnd_queue_tx(peer, tx); /* takes over my ref on tx */
- }
-
- if (peer == NULL)
- return;
-
- kptllnd_peer_check_sends(peer);
- kptllnd_peer_decref(peer);
-}
-
-static inline int
-kptllnd_peer_send_noop (kptl_peer_t *peer)
-{
- if (!peer->peer_sent_hello ||
- peer->peer_credits == 0 ||
- !cfs_list_empty(&peer->peer_noops) ||
- peer->peer_outstanding_credits < PTLLND_CREDIT_HIGHWATER)
- return 0;
-
- /* No tx to piggyback NOOP onto or no credit to send a tx */
- return (cfs_list_empty(&peer->peer_sendq) || peer->peer_credits == 1);
-}
-
-void
-kptllnd_peer_check_sends (kptl_peer_t *peer)
-{
- ptl_handle_me_t meh;
- kptl_tx_t *tx;
- int rc;
- int msg_type;
- unsigned long flags;
-
- LASSERT(!in_interrupt());
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- peer->peer_retry_noop = 0;
-
- if (kptllnd_peer_send_noop(peer)) {
- /* post a NOOP to return credits */
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (tx == NULL) {
- CERROR("Can't return credits to %s: can't allocate descriptor\n",
- libcfs_id2str(peer->peer_id));
- } else {
- kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_NOOP,
- peer->peer_id, 0);
- kptllnd_post_tx(peer, tx, 0);
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- peer->peer_retry_noop = (tx == NULL);
- }
-
- for (;;) {
- if (!cfs_list_empty(&peer->peer_noops)) {
- LASSERT (peer->peer_sent_hello);
- tx = cfs_list_entry(peer->peer_noops.next,
- kptl_tx_t, tx_list);
- } else if (!cfs_list_empty(&peer->peer_sendq)) {
- tx = cfs_list_entry(peer->peer_sendq.next,
- kptl_tx_t, tx_list);
- } else {
- /* nothing to send right now */
- break;
- }
-
- LASSERT (tx->tx_active);
- LASSERT (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
-
- LASSERT (peer->peer_outstanding_credits >= 0);
- LASSERT (peer->peer_sent_credits >= 0);
- LASSERT (peer->peer_sent_credits +
- peer->peer_outstanding_credits <=
- *kptllnd_tunables.kptl_peertxcredits);
- LASSERT (peer->peer_credits >= 0);
-
- msg_type = tx->tx_msg->ptlm_type;
-
- /* Ensure HELLO is sent first */
- if (!peer->peer_sent_hello) {
- LASSERT (cfs_list_empty(&peer->peer_noops));
- if (msg_type != PTLLND_MSG_TYPE_HELLO)
- break;
- peer->peer_sent_hello = 1;
- }
-
- if (peer->peer_credits == 0) {
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: no credits for %s[%p]\n",
- libcfs_id2str(peer->peer_id),
- peer->peer_credits,
- peer->peer_outstanding_credits,
- peer->peer_sent_credits,
- kptllnd_msgtype2str(msg_type), tx);
- break;
- }
-
- /* Last/Initial credit reserved for NOOP/HELLO */
- if (peer->peer_credits == 1 &&
- msg_type != PTLLND_MSG_TYPE_HELLO &&
- msg_type != PTLLND_MSG_TYPE_NOOP) {
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: "
- "not using last credit for %s[%p]\n",
- libcfs_id2str(peer->peer_id),
- peer->peer_credits,
- peer->peer_outstanding_credits,
- peer->peer_sent_credits,
- kptllnd_msgtype2str(msg_type), tx);
- break;
- }
-
- cfs_list_del(&tx->tx_list);
-
- /* Discard any NOOP I queued if I'm not at the high-water mark
- * any more or more messages have been queued */
- if (msg_type == PTLLND_MSG_TYPE_NOOP &&
- !kptllnd_peer_send_noop(peer)) {
- tx->tx_active = 0;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- CDEBUG(D_NET, "%s: redundant noop\n",
- libcfs_id2str(peer->peer_id));
- kptllnd_tx_decref(tx);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- continue;
- }
-
- /* fill last-minute msg fields */
- kptllnd_msg_pack(tx->tx_msg, peer);
-
- if (tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_GET_REQUEST) {
- /* peer_next_matchbits must be known good */
- LASSERT (peer->peer_state >= PEER_STATE_ACTIVE);
- /* Assume 64-bit matchbits can't wrap */
- LASSERT (peer->peer_next_matchbits >= PTL_RESERVED_MATCHBITS);
- tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits =
- peer->peer_next_matchbits++;
- }
-
- peer->peer_sent_credits += peer->peer_outstanding_credits;
- peer->peer_outstanding_credits = 0;
- peer->peer_credits--;
-
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s tx=%p nob=%d cred=%d\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- kptllnd_msgtype2str(msg_type), tx, tx->tx_msg->ptlm_nob,
- tx->tx_msg->ptlm_credits);
-
- cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- kptllnd_tx_addref(tx); /* 1 ref for me... */
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- if (tx->tx_type == TX_TYPE_PUT_REQUEST ||
- tx->tx_type == TX_TYPE_GET_REQUEST) {
- /* Post bulk now we have safe matchbits */
- rc = PtlMEAttach(kptllnd_data.kptl_nih,
- *kptllnd_tunables.kptl_portal,
- peer->peer_ptlid,
- tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits,
- 0, /* ignore bits */
- PTL_UNLINK,
- PTL_INS_BEFORE,
- &meh);
- if (rc != PTL_OK) {
- CERROR("PtlMEAttach(%s) failed: %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(rc), rc);
- goto failed;
- }
-
- rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK,
- &tx->tx_rdma_mdh);
- if (rc != PTL_OK) {
- CERROR("PtlMDAttach(%s) failed: %s(%d)\n",
- libcfs_id2str(tx->tx_peer->peer_id),
- kptllnd_errtype2str(rc), rc);
- rc = PtlMEUnlink(meh);
- LASSERT(rc == PTL_OK);
- tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
- goto failed;
- }
- /* I'm not racing with the event callback here. It's a
- * bug if there's an event on the MD I just attached
- * before I actually send the RDMA request message -
- * probably matchbits re-used in error. */
- }
-
- tx->tx_tposted = jiffies; /* going on the wire */
-
- rc = PtlPut (tx->tx_msg_mdh,
- tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
- peer->peer_ptlid,
- *kptllnd_tunables.kptl_portal,
- 0, /* acl cookie */
- LNET_MSG_MATCHBITS,
- 0, /* offset */
- 0); /* header data */
- if (rc != PTL_OK) {
- CERROR("PtlPut %s error %s(%d)\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_errtype2str(rc), rc);
- goto failed;
- }
-
- kptllnd_tx_decref(tx); /* drop my ref */
-
- spin_lock_irqsave(&peer->peer_lock, flags);
- }
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return;
-
- failed:
- /* Nuke everything (including tx we were trying) */
- kptllnd_peer_close(peer, -EIO);
- kptllnd_tx_decref(tx);
-}
-
-kptl_tx_t *
-kptllnd_find_timed_out_tx(kptl_peer_t *peer)
-{
- kptl_tx_t *tx;
- cfs_list_t *ele;
-
- cfs_list_for_each(ele, &peer->peer_sendq) {
- tx = cfs_list_entry(ele, kptl_tx_t, tx_list);
-
- if (cfs_time_aftereq(jiffies, tx->tx_deadline)) {
- kptllnd_tx_addref(tx);
- return tx;
- }
- }
-
- cfs_list_for_each(ele, &peer->peer_activeq) {
- tx = cfs_list_entry(ele, kptl_tx_t, tx_list);
-
- if (cfs_time_aftereq(jiffies, tx->tx_deadline)) {
- kptllnd_tx_addref(tx);
- return tx;
- }
- }
-
- return NULL;
-}
-
-
-void
-kptllnd_peer_check_bucket (int idx, int stamp)
-{
- cfs_list_t *peers = &kptllnd_data.kptl_peers[idx];
- kptl_peer_t *peer;
- unsigned long flags;
-
- CDEBUG(D_NET, "Bucket=%d, stamp=%d\n", idx, stamp);
-
- again:
- /* NB. Shared lock while I just look */
- read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags);
-
- cfs_list_for_each_entry (peer, peers, peer_list) {
- kptl_tx_t *tx;
- int check_sends;
- int c = -1, oc = -1, sc = -1;
- int nsend = -1, nactive = -1;
- int sent_hello = -1, state = -1;
-
- CDEBUG(D_NET, "Peer=%s Credits=%d Outstanding=%d Send=%d\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits);
-
- spin_lock(&peer->peer_lock);
-
- if (peer->peer_check_stamp == stamp) {
- /* checked already this pass */
- spin_unlock(&peer->peer_lock);
- continue;
- }
-
- peer->peer_check_stamp = stamp;
- tx = kptllnd_find_timed_out_tx(peer);
- check_sends = peer->peer_retry_noop;
-
- if (tx != NULL) {
- c = peer->peer_credits;
- sc = peer->peer_sent_credits;
- oc = peer->peer_outstanding_credits;
- state = peer->peer_state;
- sent_hello = peer->peer_sent_hello;
- nsend = kptllnd_count_queue(&peer->peer_sendq);
- nactive = kptllnd_count_queue(&peer->peer_activeq);
- }
-
- spin_unlock(&peer->peer_lock);
-
- if (tx == NULL && !check_sends)
- continue;
-
- kptllnd_peer_addref(peer); /* 1 ref for me... */
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock,
- flags);
-
- if (tx == NULL) { /* nothing timed out */
- kptllnd_peer_check_sends(peer);
- kptllnd_peer_decref(peer); /* ...until here or... */
-
- /* rescan after dropping the lock */
- goto again;
- }
-
- LCONSOLE_ERROR_MSG(0x126, "Timing out %s: %s\n",
- libcfs_id2str(peer->peer_id),
- (tx->tx_tposted == 0) ?
- "no free peer buffers" :
- "please check Portals");
-
- if (tx->tx_tposted) {
- CERROR("Could not send to %s after %ds (sent %lds ago); "
- "check Portals for possible issues\n",
- libcfs_id2str(peer->peer_id),
- *kptllnd_tunables.kptl_timeout,
- cfs_duration_sec(jiffies - tx->tx_tposted));
- } else if (state < PEER_STATE_ACTIVE) {
- CERROR("Could not connect %s (%d) after %ds; "
- "peer might be down\n",
- libcfs_id2str(peer->peer_id), state,
- *kptllnd_tunables.kptl_timeout);
- } else {
- CERROR("Could not get credits for %s after %ds; "
- "possible Lustre networking issues\n",
- libcfs_id2str(peer->peer_id),
- *kptllnd_tunables.kptl_timeout);
- }
-
- CERROR("%s timed out: cred %d outstanding %d, sent %d, "
- "state %d, sent_hello %d, sendq %d, activeq %d "
- "Tx %p %s %s (%s%s%s) status %d %sposted %lu T/O %ds\n",
- libcfs_id2str(peer->peer_id), c, oc, sc,
- state, sent_hello, nsend, nactive,
- tx, kptllnd_tx_typestr(tx->tx_type),
- kptllnd_msgtype2str(tx->tx_msg->ptlm_type),
- tx->tx_active ? "A" : "",
- PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ?
- "" : "M",
- PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ?
- "" : "D",
- tx->tx_status,
- (tx->tx_tposted == 0) ? "not " : "",
- (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted),
- *kptllnd_tunables.kptl_timeout);
-
- kptllnd_tx_decref(tx);
-
- kptllnd_peer_close(peer, -ETIMEDOUT);
- kptllnd_peer_decref(peer); /* ...until here */
-
- /* start again now I've dropped the lock */
- goto again;
- }
-
- read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags);
-}
-
-kptl_peer_t *
-kptllnd_id2peer_locked (lnet_process_id_t id)
-{
- cfs_list_t *peers = kptllnd_nid2peerlist(id.nid);
- cfs_list_t *tmp;
- kptl_peer_t *peer;
-
- cfs_list_for_each (tmp, peers) {
- peer = cfs_list_entry (tmp, kptl_peer_t, peer_list);
-
- LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO ||
- peer->peer_state == PEER_STATE_ACTIVE);
-
- /* NB logical LNet peers share one kptl_peer_t */
- if (peer->peer_id.pid != id.pid ||
- LNET_NIDADDR(id.nid) != LNET_NIDADDR(peer->peer_id.nid))
- continue;
-
- kptllnd_peer_addref(peer);
-
- CDEBUG(D_NET, "%s -> %s (%d)\n",
- libcfs_id2str(id),
- kptllnd_ptlid2str(peer->peer_ptlid),
- cfs_atomic_read (&peer->peer_refcount));
- return peer;
- }
-
- return NULL;
-}
-
-void
-kptllnd_peertable_overflow_msg(char *str, lnet_process_id_t id)
-{
- LCONSOLE_ERROR_MSG(0x127, "%s %s overflows the peer table[%d]: "
- "messages may be dropped\n",
- str, libcfs_id2str(id),
- kptllnd_data.kptl_n_active_peers);
- LCONSOLE_ERROR_MSG(0x128, "Please correct by increasing "
- "'max_nodes' or 'max_procs_per_node'\n");
-}
-
-__u64
-kptllnd_get_last_seen_matchbits_locked(lnet_process_id_t lpid)
-{
- kptl_peer_t *peer;
- cfs_list_t *tmp;
-
- /* Find the last matchbits I saw this new peer using. Note..
- A. This peer cannot be in the peer table - she's new!
- B. If I can't find the peer in the closing/zombie peers, all
- matchbits are safe because all refs to the (old) peer have gone
- so all txs have completed so there's no risk of matchbit
- collision!
- */
-
- LASSERT(kptllnd_id2peer_locked(lpid) == NULL);
-
- /* peer's last matchbits can't change after it comes out of the peer
- * table, so first match is fine */
-
- cfs_list_for_each (tmp, &kptllnd_data.kptl_closing_peers) {
- peer = cfs_list_entry (tmp, kptl_peer_t, peer_list);
-
- if (LNET_NIDADDR(peer->peer_id.nid) == LNET_NIDADDR(lpid.nid) &&
- peer->peer_id.pid == lpid.pid)
- return peer->peer_last_matchbits_seen;
- }
-
- cfs_list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) {
- peer = cfs_list_entry (tmp, kptl_peer_t, peer_list);
-
- if (LNET_NIDADDR(peer->peer_id.nid) == LNET_NIDADDR(lpid.nid) &&
- peer->peer_id.pid == lpid.pid)
- return peer->peer_last_matchbits_seen;
- }
-
- return PTL_RESERVED_MATCHBITS;
-}
-
-kptl_peer_t *
-kptllnd_peer_handle_hello (kptl_net_t *net,
- ptl_process_id_t initiator, kptl_msg_t *msg)
-{
- rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock;
- kptl_peer_t *peer;
- kptl_peer_t *new_peer;
- lnet_process_id_t lpid;
- unsigned long flags;
- kptl_tx_t *hello_tx;
- int rc;
- __u64 safe_matchbits;
- __u64 last_matchbits_seen;
-
- lpid.nid = msg->ptlm_srcnid;
- lpid.pid = msg->ptlm_srcpid;
-
- CDEBUG(D_NET, "hello from %s(%s)\n",
- libcfs_id2str(lpid), kptllnd_ptlid2str(initiator));
-
- if (initiator.pid != kptllnd_data.kptl_portals_id.pid &&
- (msg->ptlm_srcpid & LNET_PID_USERFLAG) == 0) {
- /* If the peer's PID isn't _the_ ptllnd kernel pid, she must be
- * userspace. Refuse the connection if she hasn't set the
- * correct flag in her PID... */
- CERROR("Userflag not set in hello from %s (%s)\n",
- libcfs_id2str(lpid), kptllnd_ptlid2str(initiator));
- return NULL;
- }
-
- /* kptlhm_matchbits are the highest matchbits my peer may have used to
- * RDMA to me. I ensure I never register buffers for RDMA that could
- * match any she used */
- safe_matchbits = msg->ptlm_u.hello.kptlhm_matchbits + 1;
-
- if (safe_matchbits < PTL_RESERVED_MATCHBITS) {
- CERROR("Illegal matchbits "LPX64" in HELLO from %s\n",
- safe_matchbits, libcfs_id2str(lpid));
- return NULL;
- }
-
- if (msg->ptlm_u.hello.kptlhm_max_msg_size < PTLLND_MIN_BUFFER_SIZE) {
- CERROR("%s: max message size %d < MIN %d",
- libcfs_id2str(lpid),
- msg->ptlm_u.hello.kptlhm_max_msg_size,
- PTLLND_MIN_BUFFER_SIZE);
- return NULL;
- }
-
- if (msg->ptlm_credits <= 1) {
- CERROR("Need more than 1+%d credits from %s\n",
- msg->ptlm_credits, libcfs_id2str(lpid));
- return NULL;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- peer = kptllnd_id2peer_locked(lpid);
- if (peer != NULL) {
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
- /* Completing HELLO handshake */
- LASSERT(peer->peer_incarnation == 0);
-
- if (msg->ptlm_dststamp != 0 &&
- msg->ptlm_dststamp != peer->peer_myincarnation) {
- write_unlock_irqrestore(g_lock, flags);
-
- CERROR("Ignoring HELLO from %s: unexpected "
- "dststamp "LPX64" ("LPX64" wanted)\n",
- libcfs_id2str(lpid),
- msg->ptlm_dststamp,
- peer->peer_myincarnation);
- kptllnd_peer_decref(peer);
- return NULL;
- }
-
- /* Concurrent initiation or response to my HELLO */
- peer->peer_state = PEER_STATE_ACTIVE;
- peer->peer_incarnation = msg->ptlm_srcstamp;
- peer->peer_next_matchbits = safe_matchbits;
- peer->peer_max_msg_size =
- msg->ptlm_u.hello.kptlhm_max_msg_size;
-
- write_unlock_irqrestore(g_lock, flags);
- return peer;
- }
-
- if (msg->ptlm_dststamp != 0 &&
- msg->ptlm_dststamp <= peer->peer_myincarnation) {
- write_unlock_irqrestore(g_lock, flags);
-
- CERROR("Ignoring stale HELLO from %s: "
- "dststamp "LPX64" (current "LPX64")\n",
- libcfs_id2str(lpid),
- msg->ptlm_dststamp,
- peer->peer_myincarnation);
- kptllnd_peer_decref(peer);
- return NULL;
- }
-
- /* Brand new connection attempt: remove old incarnation */
- kptllnd_peer_close_locked(peer, 0);
- }
-
- kptllnd_cull_peertable_locked(lpid);
-
- write_unlock_irqrestore(g_lock, flags);
-
- if (peer != NULL) {
- CDEBUG(D_NET, "Peer %s (%s) reconnecting:"
- " stamp "LPX64"("LPX64")\n",
- libcfs_id2str(lpid), kptllnd_ptlid2str(initiator),
- msg->ptlm_srcstamp, peer->peer_incarnation);
-
- kptllnd_peer_decref(peer);
- peer = NULL;
- }
-
- hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (hello_tx == NULL) {
- CERROR("Unable to allocate HELLO message for %s\n",
- libcfs_id2str(lpid));
- return NULL;
- }
-
- kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
- lpid, sizeof(kptl_hello_msg_t));
-
- new_peer = kptllnd_peer_allocate(net, lpid, initiator);
- if (new_peer == NULL) {
- kptllnd_tx_decref(hello_tx);
- return NULL;
- }
-
- rc = kptllnd_peer_reserve_buffers();
- if (rc != 0) {
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
-
- CERROR("Failed to reserve buffers for %s\n",
- libcfs_id2str(lpid));
- return NULL;
- }
-
- write_lock_irqsave(g_lock, flags);
-
- again:
- if (net->net_shutdown) {
- write_unlock_irqrestore(g_lock, flags);
-
- CERROR ("Shutdown started, refusing connection from %s\n",
- libcfs_id2str(lpid));
- kptllnd_peer_unreserve_buffers();
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
- return NULL;
- }
-
- peer = kptllnd_id2peer_locked(lpid);
- if (peer != NULL) {
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
- /* An outgoing message instantiated 'peer' for me */
- LASSERT(peer->peer_incarnation == 0);
-
- peer->peer_state = PEER_STATE_ACTIVE;
- peer->peer_incarnation = msg->ptlm_srcstamp;
- peer->peer_next_matchbits = safe_matchbits;
- peer->peer_max_msg_size =
- msg->ptlm_u.hello.kptlhm_max_msg_size;
-
- write_unlock_irqrestore(g_lock, flags);
-
- CWARN("Outgoing instantiated peer %s\n",
- libcfs_id2str(lpid));
- } else {
- LASSERT (peer->peer_state == PEER_STATE_ACTIVE);
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* WOW! Somehow this peer completed the HELLO
- * handshake while I slept. I guess I could have slept
- * while it rebooted and sent a new HELLO, so I'll fail
- * this one... */
- CWARN("Wow! peer %s\n", libcfs_id2str(lpid));
- kptllnd_peer_decref(peer);
- peer = NULL;
- }
-
- kptllnd_peer_unreserve_buffers();
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
- return peer;
- }
-
- if (kptllnd_data.kptl_n_active_peers ==
- kptllnd_data.kptl_expected_peers) {
- /* peer table full */
- write_unlock_irqrestore(g_lock, flags);
-
- kptllnd_peertable_overflow_msg("Connection from ", lpid);
-
- rc = kptllnd_reserve_buffers(1); /* HELLO headroom */
- if (rc != 0) {
- CERROR("Refusing connection from %s\n",
- libcfs_id2str(lpid));
- kptllnd_peer_unreserve_buffers();
- kptllnd_peer_decref(new_peer);
- kptllnd_tx_decref(hello_tx);
- return NULL;
- }
-
- write_lock_irqsave(g_lock, flags);
- kptllnd_data.kptl_expected_peers++;
- goto again;
- }
-
- last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(lpid);
-
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen;
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
- *kptllnd_tunables.kptl_max_msg_size;
-
- new_peer->peer_state = PEER_STATE_ACTIVE;
- new_peer->peer_incarnation = msg->ptlm_srcstamp;
- new_peer->peer_next_matchbits = safe_matchbits;
- new_peer->peer_last_matchbits_seen = last_matchbits_seen;
- new_peer->peer_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size;
-
- LASSERT (!net->net_shutdown);
- kptllnd_peer_add_peertable_locked(new_peer);
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* NB someone else could get in now and post a message before I post
- * the HELLO, but post_tx/check_sends take care of that! */
-
- CDEBUG(D_NETTRACE, "%s: post response hello %p\n",
- libcfs_id2str(new_peer->peer_id), hello_tx);
-
- kptllnd_post_tx(new_peer, hello_tx, 0);
- kptllnd_peer_check_sends(new_peer);
-
- return new_peer;
-}
-
-void
-kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag)
-{
- kptllnd_post_tx(peer, tx, nfrag);
- kptllnd_peer_check_sends(peer);
-}
-
-int
-kptllnd_find_target(kptl_net_t *net, lnet_process_id_t target,
- kptl_peer_t **peerp)
-{
- rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock;
- ptl_process_id_t ptl_id;
- kptl_peer_t *new_peer;
- kptl_tx_t *hello_tx;
- unsigned long flags;
- int rc;
- __u64 last_matchbits_seen;
-
- /* I expect to find the peer, so I only take a read lock... */
- read_lock_irqsave(g_lock, flags);
- *peerp = kptllnd_id2peer_locked(target);
- read_unlock_irqrestore(g_lock, flags);
-
- if (*peerp != NULL)
- return 0;
-
- if ((target.pid & LNET_PID_USERFLAG) != 0) {
- CWARN("Refusing to create a new connection to %s "
- "(non-kernel peer)\n", libcfs_id2str(target));
- return -EHOSTUNREACH;
- }
-
- /* The new peer is a kernel ptllnd, and kernel ptllnds all have the
- * same portals PID, which has nothing to do with LUSTRE_SRV_LNET_PID */
- ptl_id.nid = kptllnd_lnet2ptlnid(target.nid);
- ptl_id.pid = kptllnd_data.kptl_portals_id.pid;
-
- hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE);
- if (hello_tx == NULL) {
- CERROR("Unable to allocate connect message for %s\n",
- libcfs_id2str(target));
- return -ENOMEM;
- }
-
- hello_tx->tx_acked = 1;
- kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO,
- target, sizeof(kptl_hello_msg_t));
-
- new_peer = kptllnd_peer_allocate(net, target, ptl_id);
- if (new_peer == NULL) {
- rc = -ENOMEM;
- goto unwind_0;
- }
-
- rc = kptllnd_peer_reserve_buffers();
- if (rc != 0)
- goto unwind_1;
-
- write_lock_irqsave(g_lock, flags);
- again:
- /* Called only in lnd_send which can't happen after lnd_shutdown */
- LASSERT (!net->net_shutdown);
-
- *peerp = kptllnd_id2peer_locked(target);
- if (*peerp != NULL) {
- write_unlock_irqrestore(g_lock, flags);
- goto unwind_2;
- }
-
- kptllnd_cull_peertable_locked(target);
-
- if (kptllnd_data.kptl_n_active_peers ==
- kptllnd_data.kptl_expected_peers) {
- /* peer table full */
- write_unlock_irqrestore(g_lock, flags);
-
- kptllnd_peertable_overflow_msg("Connection to ", target);
-
- rc = kptllnd_reserve_buffers(1); /* HELLO headroom */
- if (rc != 0) {
- CERROR("Can't create connection to %s\n",
- libcfs_id2str(target));
- rc = -ENOMEM;
- goto unwind_2;
- }
- write_lock_irqsave(g_lock, flags);
- kptllnd_data.kptl_expected_peers++;
- goto again;
- }
-
- last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(target);
-
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen;
- hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size =
- *kptllnd_tunables.kptl_max_msg_size;
-
- new_peer->peer_state = PEER_STATE_WAITING_HELLO;
- new_peer->peer_last_matchbits_seen = last_matchbits_seen;
-
- kptllnd_peer_add_peertable_locked(new_peer);
-
- write_unlock_irqrestore(g_lock, flags);
-
- /* NB someone else could get in now and post a message before I post
- * the HELLO, but post_tx/check_sends take care of that! */
-
- CDEBUG(D_NETTRACE, "%s: post initial hello %p\n",
- libcfs_id2str(new_peer->peer_id), hello_tx);
-
- kptllnd_post_tx(new_peer, hello_tx, 0);
- kptllnd_peer_check_sends(new_peer);
-
- *peerp = new_peer;
- return 0;
-
- unwind_2:
- kptllnd_peer_unreserve_buffers();
- unwind_1:
- kptllnd_peer_decref(new_peer);
- unwind_0:
- kptllnd_tx_decref(hello_tx);
-
- return rc;
-}
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd_rx_buf.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
- #include "ptllnd.h"
-
-void
-kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp)
-{
- memset(rxbp, 0, sizeof(*rxbp));
- spin_lock_init(&rxbp->rxbp_lock);
- CFS_INIT_LIST_HEAD(&rxbp->rxbp_list);
-}
-
-void
-kptllnd_rx_buffer_destroy(kptl_rx_buffer_t *rxb)
-{
- kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool;
-
- LASSERT(rxb->rxb_refcount == 0);
- LASSERT(PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE));
- LASSERT(!rxb->rxb_posted);
- LASSERT(rxb->rxb_idle);
-
- cfs_list_del(&rxb->rxb_list);
- rxbp->rxbp_count--;
-
- LIBCFS_FREE(rxb->rxb_buffer, kptllnd_rx_buffer_size());
- LIBCFS_FREE(rxb, sizeof(*rxb));
-}
-
-int
-kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count)
-{
- int bufsize;
- int msgs_per_buffer;
- int rc;
- kptl_rx_buffer_t *rxb;
- char *buffer;
- unsigned long flags;
-
- bufsize = kptllnd_rx_buffer_size();
- msgs_per_buffer = bufsize / (*kptllnd_tunables.kptl_max_msg_size);
-
- CDEBUG(D_NET, "kptllnd_rx_buffer_pool_reserve(%d)\n", count);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- for (;;) {
- if (rxbp->rxbp_shutdown) {
- rc = -ESHUTDOWN;
- break;
- }
-
- if (rxbp->rxbp_reserved + count <=
- rxbp->rxbp_count * msgs_per_buffer) {
- rc = 0;
- break;
- }
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- LIBCFS_ALLOC(rxb, sizeof(*rxb));
- LIBCFS_ALLOC(buffer, bufsize);
-
- if (rxb == NULL || buffer == NULL) {
- CERROR("Failed to allocate rx buffer\n");
-
- if (rxb != NULL)
- LIBCFS_FREE(rxb, sizeof(*rxb));
- if (buffer != NULL)
- LIBCFS_FREE(buffer, bufsize);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- rc = -ENOMEM;
- break;
- }
-
- memset(rxb, 0, sizeof(*rxb));
-
- rxb->rxb_eventarg.eva_type = PTLLND_EVENTARG_TYPE_BUF;
- rxb->rxb_refcount = 0;
- rxb->rxb_pool = rxbp;
- rxb->rxb_idle = 0;
- rxb->rxb_posted = 0;
- rxb->rxb_buffer = buffer;
- rxb->rxb_mdh = PTL_INVALID_HANDLE;
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- if (rxbp->rxbp_shutdown) {
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- LIBCFS_FREE(rxb, sizeof(*rxb));
- LIBCFS_FREE(buffer, bufsize);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- rc = -ESHUTDOWN;
- break;
- }
-
- cfs_list_add_tail(&rxb->rxb_list, &rxbp->rxbp_list);
- rxbp->rxbp_count++;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- kptllnd_rx_buffer_post(rxb);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- }
-
- if (rc == 0)
- rxbp->rxbp_reserved += count;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- return rc;
-}
-
-void
-kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp,
- int count)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- CDEBUG(D_NET, "kptllnd_rx_buffer_pool_unreserve(%d)\n", count);
- rxbp->rxbp_reserved -= count;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-}
-
-void
-kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp)
-{
- kptl_rx_buffer_t *rxb;
- int rc;
- int i;
- unsigned long flags;
- cfs_list_t *tmp;
- cfs_list_t *nxt;
- ptl_handle_md_t mdh;
-
- /* CAVEAT EMPTOR: I'm racing with everything here!!!
- *
- * Buffers can still be posted after I set rxbp_shutdown because I
- * can't hold rxbp_lock while I'm posting them.
- *
- * Calling PtlMDUnlink() here races with auto-unlinks; i.e. a buffer's
- * MD handle could become invalid under me. I am vulnerable to portals
- * re-using handles (i.e. make the same handle valid again, but for a
- * different MD) from when the MD is actually unlinked, to when the
- * event callback tells me it has been unlinked. */
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- rxbp->rxbp_shutdown = 1;
-
- for (i = 9;; i++) {
- cfs_list_for_each_safe(tmp, nxt, &rxbp->rxbp_list) {
- rxb = cfs_list_entry (tmp, kptl_rx_buffer_t, rxb_list);
-
- if (rxb->rxb_idle) {
- spin_unlock_irqrestore(&rxbp->rxbp_lock,
- flags);
- kptllnd_rx_buffer_destroy(rxb);
- spin_lock_irqsave(&rxbp->rxbp_lock,
- flags);
- continue;
- }
-
- mdh = rxb->rxb_mdh;
- if (PtlHandleIsEqual(mdh, PTL_INVALID_HANDLE))
- continue;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- rc = PtlMDUnlink(mdh);
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- /* callback clears rxb_mdh and drops net's ref
- * (which causes repost, but since I set
- * shutdown, it will just set the buffer
- * idle) */
-#else
- if (rc == PTL_OK) {
- rxb->rxb_posted = 0;
- rxb->rxb_mdh = PTL_INVALID_HANDLE;
- kptllnd_rx_buffer_decref_locked(rxb);
- }
-#endif
- }
-
- if (cfs_list_empty(&rxbp->rxbp_list))
- break;
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- /* Wait a bit for references to be dropped */
- CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
- "Waiting for %d Busy RX Buffers\n",
- rxbp->rxbp_count);
-
- cfs_pause(cfs_time_seconds(1));
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- }
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-}
-
-void
-kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb)
-{
- int rc;
- ptl_md_t md;
- ptl_handle_me_t meh;
- ptl_handle_md_t mdh;
- ptl_process_id_t any;
- kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool;
- unsigned long flags;
-
- LASSERT (!in_interrupt());
- LASSERT (rxb->rxb_refcount == 0);
- LASSERT (!rxb->rxb_idle);
- LASSERT (!rxb->rxb_posted);
- LASSERT (PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE));
-
- any.nid = PTL_NID_ANY;
- any.pid = PTL_PID_ANY;
-
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- if (rxbp->rxbp_shutdown) {
- rxb->rxb_idle = 1;
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
- return;
- }
-
- rxb->rxb_refcount = 1; /* net's ref */
- rxb->rxb_posted = 1; /* I'm posting */
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-
- rc = PtlMEAttach(kptllnd_data.kptl_nih,
- *kptllnd_tunables.kptl_portal,
- any,
- LNET_MSG_MATCHBITS,
- 0, /* all matchbits are valid - ignore none */
- PTL_UNLINK,
- PTL_INS_AFTER,
- &meh);
- if (rc != PTL_OK) {
- CERROR("PtlMeAttach rxb failed %s(%d)\n",
- kptllnd_errtype2str(rc), rc);
- goto failed;
- }
-
- /*
- * Setup MD
- */
- md.start = rxb->rxb_buffer;
- md.length = kptllnd_rx_buffer_size();
- md.threshold = PTL_MD_THRESH_INF;
- md.options = PTL_MD_OP_PUT |
- PTL_MD_LUSTRE_COMPLETION_SEMANTICS |
- PTL_MD_EVENT_START_DISABLE |
- PTL_MD_MAX_SIZE |
- PTL_MD_LOCAL_ALIGN8;
- md.user_ptr = &rxb->rxb_eventarg;
- md.max_size = *kptllnd_tunables.kptl_max_msg_size;
- md.eq_handle = kptllnd_data.kptl_eqh;
-
- rc = PtlMDAttach(meh, md, PTL_UNLINK, &mdh);
- if (rc == PTL_OK) {
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- if (rxb->rxb_posted) /* Not auto-unlinked yet!!! */
- rxb->rxb_mdh = mdh;
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
- return;
- }
-
- CERROR("PtlMDAttach rxb failed %s(%d)\n",
- kptllnd_errtype2str(rc), rc);
- rc = PtlMEUnlink(meh);
- LASSERT(rc == PTL_OK);
-
- failed:
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
- rxb->rxb_posted = 0;
- /* XXX this will just try again immediately */
- kptllnd_rx_buffer_decref_locked(rxb);
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
-}
-
-kptl_rx_t *
-kptllnd_rx_alloc(void)
-{
- kptl_rx_t* rx;
-
- if (IS_SIMULATION_ENABLED(FAIL_RX_ALLOC)) {
- CERROR ("FAIL_RX_ALLOC SIMULATION triggered\n");
- return NULL;
- }
-
- rx = kmem_cache_alloc(kptllnd_data.kptl_rx_cache, GFP_ATOMIC);
- if (rx == NULL) {
- CERROR("Failed to allocate rx\n");
- return NULL;
- }
-
- memset(rx, 0, sizeof(*rx));
- return rx;
-}
-
-void
-kptllnd_rx_done(kptl_rx_t *rx, int post_credit)
-{
- kptl_rx_buffer_t *rxb = rx->rx_rxb;
- kptl_peer_t *peer = rx->rx_peer;
- unsigned long flags;
-
- LASSERT (post_credit == PTLLND_POSTRX_NO_CREDIT ||
- post_credit == PTLLND_POSTRX_PEER_CREDIT);
-
- CDEBUG(D_NET, "rx=%p rxb %p peer %p\n", rx, rxb, peer);
-
- if (rxb != NULL)
- kptllnd_rx_buffer_decref(rxb);
-
- if (peer != NULL) {
- /* Update credits (after I've decref-ed the buffer) */
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (post_credit == PTLLND_POSTRX_PEER_CREDIT)
- peer->peer_outstanding_credits++;
-
- LASSERT (peer->peer_outstanding_credits +
- peer->peer_sent_credits <=
- *kptllnd_tunables.kptl_peertxcredits);
-
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- rx);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* I might have to send back credits */
- kptllnd_peer_check_sends(peer);
- kptllnd_peer_decref(peer);
- }
-
- kmem_cache_free(kptllnd_data.kptl_rx_cache, rx);
-}
-
-void
-kptllnd_rx_buffer_callback (ptl_event_t *ev)
-{
- kptl_eventarg_t *eva = ev->md.user_ptr;
- kptl_rx_buffer_t *rxb = kptllnd_eventarg2obj(eva);
- kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool;
- kptl_rx_t *rx;
- int unlinked;
- unsigned long flags;
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- unlinked = ev->unlinked;
-#else
- unlinked = ev->type == PTL_EVENT_UNLINK;
-#endif
-
- CDEBUG(D_NET, "%s: %s(%d) rxb=%p fail=%s(%d) unlink=%d\n",
- kptllnd_ptlid2str(ev->initiator),
- kptllnd_evtype2str(ev->type), ev->type, rxb,
- kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type,
- unlinked);
-
- LASSERT (!rxb->rxb_idle);
- LASSERT (ev->md.start == rxb->rxb_buffer);
- LASSERT (ev->offset + ev->mlength <=
- PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages);
- LASSERT (ev->type == PTL_EVENT_PUT_END ||
- ev->type == PTL_EVENT_UNLINK);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->match_bits == LNET_MSG_MATCHBITS);
-
- if (ev->ni_fail_type != PTL_NI_OK) {
- CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn",
- kptllnd_ptlid2str(ev->initiator),
- kptllnd_evtype2str(ev->type), ev->type, rxb,
- kptllnd_errtype2str(ev->ni_fail_type),
- ev->ni_fail_type, unlinked);
- } else if (ev->type == PTL_EVENT_PUT_END &&
- !rxbp->rxbp_shutdown) {
-
- /* rxbp_shutdown sampled without locking! I only treat it as a
- * hint since shutdown can start while rx's are queued on
- * kptl_sched_rxq. */
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- /* Portals can't force message alignment - someone sending an
- * odd-length message will misalign subsequent messages and
- * force the fixup below... */
- if ((ev->mlength & 7) != 0)
- CWARN("Message from %s has odd length "LPU64": "
- "probable version incompatibility\n",
- kptllnd_ptlid2str(ev->initiator),
- (__u64)ev->mlength);
-#endif
- rx = kptllnd_rx_alloc();
- if (rx == NULL) {
- CERROR("Message from %s dropped: ENOMEM",
- kptllnd_ptlid2str(ev->initiator));
- } else {
- if ((ev->offset & 7) == 0) {
- kptllnd_rx_buffer_addref(rxb);
- rx->rx_rxb = rxb;
- rx->rx_nob = ev->mlength;
- rx->rx_msg = (kptl_msg_t *)
- (rxb->rxb_buffer + ev->offset);
- } else {
-#if (PTL_MD_LOCAL_ALIGN8 == 0)
- /* Portals can't force alignment - copy into
- * rx_space (avoiding overflow) to fix */
- int maxlen = *kptllnd_tunables.kptl_max_msg_size;
-
- rx->rx_rxb = NULL;
- rx->rx_nob = MIN(maxlen, ev->mlength);
- rx->rx_msg = (kptl_msg_t *)rx->rx_space;
- memcpy(rx->rx_msg, rxb->rxb_buffer + ev->offset,
- rx->rx_nob);
-#else
- /* Portals should have forced the alignment */
- LBUG();
-#endif
- }
-
- rx->rx_initiator = ev->initiator;
- rx->rx_treceived = jiffies;
- /* Queue for attention */
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock,
- flags);
-
- cfs_list_add_tail(&rx->rx_list,
- &kptllnd_data.kptl_sched_rxq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock_irqrestore(&kptllnd_data. \
- kptl_sched_lock, flags);
- }
- }
-
- if (unlinked) {
- spin_lock_irqsave(&rxbp->rxbp_lock, flags);
-
- rxb->rxb_posted = 0;
- rxb->rxb_mdh = PTL_INVALID_HANDLE;
- kptllnd_rx_buffer_decref_locked(rxb);
-
- spin_unlock_irqrestore(&rxbp->rxbp_lock, flags);
- }
-}
-
-void
-kptllnd_nak (ptl_process_id_t dest)
-{
- /* Fire-and-forget a stub message that will let the peer know my
- * protocol magic/version and make her drop/refresh any peer state she
- * might have with me. */
- ptl_md_t md = {
- .start = kptllnd_data.kptl_nak_msg,
- .length = kptllnd_data.kptl_nak_msg->ptlm_nob,
- .threshold = 1,
- .options = 0,
- .user_ptr = NULL,
- .eq_handle = PTL_EQ_NONE};
- ptl_handle_md_t mdh;
- int rc;
-
- rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh);
- if (rc != PTL_OK) {
- CWARN("Can't NAK %s: bind failed %s(%d)\n",
- kptllnd_ptlid2str(dest), kptllnd_errtype2str(rc), rc);
- return;
- }
-
- rc = PtlPut(mdh, PTL_NOACK_REQ, dest,
- *kptllnd_tunables.kptl_portal, 0,
- LNET_MSG_MATCHBITS, 0, 0);
- if (rc != PTL_OK) {
- CWARN("Can't NAK %s: put failed %s(%d)\n",
- kptllnd_ptlid2str(dest), kptllnd_errtype2str(rc), rc);
- }
-}
-
-kptl_net_t *
-kptllnd_find_net (lnet_nid_t nid)
-{
- kptl_net_t *net;
-
- read_lock(&kptllnd_data.kptl_net_rw_lock);
- cfs_list_for_each_entry (net, &kptllnd_data.kptl_nets, net_list) {
- LASSERT (!net->net_shutdown);
-
- if (net->net_ni->ni_nid == nid) {
- kptllnd_net_addref(net);
- read_unlock(&kptllnd_data.kptl_net_rw_lock);
- return net;
- }
- }
- read_unlock(&kptllnd_data.kptl_net_rw_lock);
-
- return NULL;
-}
-
-void
-kptllnd_rx_parse(kptl_rx_t *rx)
-{
- kptl_msg_t *msg = rx->rx_msg;
- int rc = 0;
- int post_credit = PTLLND_POSTRX_PEER_CREDIT;
- kptl_net_t *net = NULL;
- kptl_peer_t *peer;
- cfs_list_t txs;
- unsigned long flags;
- lnet_process_id_t srcid;
-
- LASSERT (!in_interrupt());
- LASSERT (rx->rx_peer == NULL);
-
- CFS_INIT_LIST_HEAD(&txs);
-
- if ((rx->rx_nob >= 4 &&
- (msg->ptlm_magic == LNET_PROTO_MAGIC ||
- msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) ||
- (rx->rx_nob >= 6 &&
- ((msg->ptlm_magic == PTLLND_MSG_MAGIC &&
- msg->ptlm_version != PTLLND_MSG_VERSION) ||
- (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC) &&
- msg->ptlm_version != __swab16(PTLLND_MSG_VERSION))))) {
- /* NAK incompatible versions
- * See other LNDs for how to handle this if/when ptllnd begins
- * to allow different versions to co-exist */
- CERROR("Bad version: got %04x expected %04x from %s\n",
- (__u32)(msg->ptlm_magic == PTLLND_MSG_MAGIC ?
- msg->ptlm_version : __swab16(msg->ptlm_version)),
- PTLLND_MSG_VERSION, kptllnd_ptlid2str(rx->rx_initiator));
- /* NB backward compatibility */
- kptllnd_nak(rx->rx_initiator);
- goto rx_done;
- }
-
- rc = kptllnd_msg_unpack(msg, rx->rx_nob);
- if (rc != 0) {
- CERROR ("Error %d unpacking rx from %s\n",
- rc, kptllnd_ptlid2str(rx->rx_initiator));
- goto rx_done;
- }
-
- srcid.nid = msg->ptlm_srcnid;
- srcid.pid = msg->ptlm_srcpid;
-
- CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n",
- libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type),
- msg->ptlm_credits, rx, rx->rx_rxb,
- jiffies - rx->rx_treceived,
- cfs_duration_sec(jiffies - rx->rx_treceived));
-
- if (kptllnd_lnet2ptlnid(srcid.nid) != rx->rx_initiator.nid) {
- CERROR("Bad source nid %s from %s\n",
- libcfs_id2str(srcid),
- kptllnd_ptlid2str(rx->rx_initiator));
- goto rx_done;
- }
-
- if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) {
- peer = kptllnd_id2peer(srcid);
- if (peer == NULL)
- goto rx_done;
-
- CWARN("NAK from %s (%d:%s)\n",
- libcfs_id2str(srcid), peer->peer_state,
- kptllnd_ptlid2str(rx->rx_initiator));
-
- /* NB can't nuke new peer - bug 17546 comment 31 */
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
- CDEBUG(D_NET, "Stale NAK from %s(%s): WAITING_HELLO\n",
- libcfs_id2str(srcid),
- kptllnd_ptlid2str(rx->rx_initiator));
- kptllnd_peer_decref(peer);
- goto rx_done;
- }
-
- rc = -EPROTO;
- goto failed;
- }
-
- net = kptllnd_find_net(msg->ptlm_dstnid);
- if (net == NULL || msg->ptlm_dstpid != the_lnet.ln_pid) {
- CERROR("Bad dstid %s from %s\n",
- libcfs_id2str((lnet_process_id_t) {
- .nid = msg->ptlm_dstnid,
- .pid = msg->ptlm_dstpid}),
- kptllnd_ptlid2str(rx->rx_initiator));
- goto rx_done;
- }
-
- if (LNET_NIDNET(srcid.nid) != LNET_NIDNET(net->net_ni->ni_nid)) {
- lnet_nid_t nid = LNET_MKNID(LNET_NIDNET(net->net_ni->ni_nid),
- LNET_NIDADDR(srcid.nid));
- CERROR("Bad source nid %s from %s, %s expected.\n",
- libcfs_id2str(srcid),
- kptllnd_ptlid2str(rx->rx_initiator),
- libcfs_nid2str(nid));
- goto rx_done;
- }
-
- if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) {
- peer = kptllnd_peer_handle_hello(net, rx->rx_initiator, msg);
- if (peer == NULL)
- goto rx_done;
- } else {
- peer = kptllnd_id2peer(srcid);
- if (peer == NULL) {
- CWARN("NAK %s: no connection, %s must reconnect\n",
- kptllnd_msgtype2str(msg->ptlm_type),
- libcfs_id2str(srcid));
- /* NAK to make the peer reconnect */
- kptllnd_nak(rx->rx_initiator);
- goto rx_done;
- }
-
- /* Ignore any messages for a previous incarnation of me */
- if (msg->ptlm_dststamp < peer->peer_myincarnation) {
- kptllnd_peer_decref(peer);
- goto rx_done;
- }
-
- if (msg->ptlm_dststamp != peer->peer_myincarnation) {
- CERROR("%s: Unexpected dststamp "LPX64" "
- "("LPX64" expected)\n",
- libcfs_id2str(peer->peer_id), msg->ptlm_dststamp,
- peer->peer_myincarnation);
- rc = -EPROTO;
- goto failed;
- }
-
- if (peer->peer_state == PEER_STATE_WAITING_HELLO) {
- /* recoverable error - restart txs */
- spin_lock_irqsave(&peer->peer_lock, flags);
- kptllnd_cancel_txlist(&peer->peer_sendq, &txs);
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- CWARN("NAK %s: Unexpected %s message\n",
- libcfs_id2str(srcid),
- kptllnd_msgtype2str(msg->ptlm_type));
- kptllnd_nak(rx->rx_initiator);
- rc = -EPROTO;
- goto failed;
- }
-
- if (msg->ptlm_srcstamp != peer->peer_incarnation) {
- CERROR("%s: Unexpected srcstamp "LPX64" "
- "("LPX64" expected)\n",
- libcfs_id2str(srcid),
- msg->ptlm_srcstamp,
- peer->peer_incarnation);
- rc = -EPROTO;
- goto failed;
- }
- }
-
- LASSERTF (LNET_NIDADDR(msg->ptlm_srcnid) ==
- LNET_NIDADDR(peer->peer_id.nid), "m %s p %s\n",
- libcfs_nid2str(msg->ptlm_srcnid),
- libcfs_nid2str(peer->peer_id.nid));
- LASSERTF (msg->ptlm_srcpid == peer->peer_id.pid, "m %u p %u\n",
- msg->ptlm_srcpid, peer->peer_id.pid);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- /* Check peer only sends when I've sent her credits */
- if (peer->peer_sent_credits == 0) {
- int c = peer->peer_credits;
- int oc = peer->peer_outstanding_credits;
- int sc = peer->peer_sent_credits;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- CERROR("%s: buffer overrun [%d/%d+%d]\n",
- libcfs_id2str(peer->peer_id), c, sc, oc);
- rc = -EPROTO;
- goto failed;
- }
- peer->peer_sent_credits--;
-
- /* No check for credit overflow - the peer may post new
- * buffers after the startup handshake. */
- peer->peer_credits += msg->ptlm_credits;
-
- /* This ensures the credit taken by NOOP can be returned */
- if (msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) {
- peer->peer_outstanding_credits++;
- post_credit = PTLLND_POSTRX_NO_CREDIT;
- }
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* See if something can go out now that credits have come in */
- if (msg->ptlm_credits != 0)
- kptllnd_peer_check_sends(peer);
-
- /* ptllnd-level protocol correct - rx takes my ref on peer and increments
- * peer_outstanding_credits when it completes */
- rx->rx_peer = peer;
- kptllnd_peer_alive(peer);
-
- switch (msg->ptlm_type) {
- default:
- /* already checked by kptllnd_msg_unpack() */
- LBUG();
-
- case PTLLND_MSG_TYPE_HELLO:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO\n");
- goto rx_done;
-
- case PTLLND_MSG_TYPE_NOOP:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP\n");
- goto rx_done;
-
- case PTLLND_MSG_TYPE_IMMEDIATE:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n");
- rc = lnet_parse(net->net_ni,
- &msg->ptlm_u.immediate.kptlim_hdr,
- msg->ptlm_srcnid,
- rx, 0);
- if (rc >= 0) { /* kptllnd_recv owns 'rx' now */
- kptllnd_net_decref(net);
- return;
- }
- goto failed;
-
- case PTLLND_MSG_TYPE_PUT:
- case PTLLND_MSG_TYPE_GET:
- CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n",
- msg->ptlm_type == PTLLND_MSG_TYPE_PUT ?
- "PUT" : "GET");
-
- /* checked in kptllnd_msg_unpack() */
- LASSERT (msg->ptlm_u.rdma.kptlrm_matchbits >=
- PTL_RESERVED_MATCHBITS);
-
- /* Update last match bits seen */
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (msg->ptlm_u.rdma.kptlrm_matchbits >
- rx->rx_peer->peer_last_matchbits_seen)
- rx->rx_peer->peer_last_matchbits_seen =
- msg->ptlm_u.rdma.kptlrm_matchbits;
-
- spin_unlock_irqrestore(&rx->rx_peer->peer_lock, flags);
-
- rc = lnet_parse(net->net_ni,
- &msg->ptlm_u.rdma.kptlrm_hdr,
- msg->ptlm_srcnid,
- rx, 1);
- if (rc >= 0) { /* kptllnd_recv owns 'rx' now */
- kptllnd_net_decref(net);
- return;
- }
- goto failed;
- }
-
- failed:
- LASSERT (rc != 0);
- kptllnd_peer_close(peer, rc);
- if (rx->rx_peer == NULL) /* drop ref on peer */
- kptllnd_peer_decref(peer); /* unless rx_done will */
- if (!cfs_list_empty(&txs)) {
- LASSERT (net != NULL);
- kptllnd_restart_txs(net, srcid, &txs);
- }
- rx_done:
- if (net != NULL)
- kptllnd_net_decref(net);
- kptllnd_rx_done(rx, post_credit);
-}
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- *
- * Copyright (c) 2012, Intel Corporation.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/ptllnd_tx.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-
- #include "ptllnd.h"
-
-void
-kptllnd_free_tx(kptl_tx_t *tx)
-{
- if (tx->tx_msg != NULL)
- LIBCFS_FREE(tx->tx_msg, sizeof(*tx->tx_msg));
-
- if (tx->tx_frags != NULL)
- LIBCFS_FREE(tx->tx_frags, sizeof(*tx->tx_frags));
-
- LIBCFS_FREE(tx, sizeof(*tx));
-
- cfs_atomic_dec(&kptllnd_data.kptl_ntx);
-
- /* Keep the tunable in step for visibility */
- *kptllnd_tunables.kptl_ntx = cfs_atomic_read(&kptllnd_data.kptl_ntx);
-}
-
-kptl_tx_t *
-kptllnd_alloc_tx(void)
-{
- kptl_tx_t *tx;
-
- LIBCFS_ALLOC(tx, sizeof(*tx));
- if (tx == NULL) {
- CERROR("Failed to allocate TX\n");
- return NULL;
- }
-
- cfs_atomic_inc(&kptllnd_data.kptl_ntx);
-
- /* Keep the tunable in step for visibility */
- *kptllnd_tunables.kptl_ntx = cfs_atomic_read(&kptllnd_data.kptl_ntx);
-
- tx->tx_idle = 1;
- tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
- tx->tx_msg_mdh = PTL_INVALID_HANDLE;
- tx->tx_rdma_eventarg.eva_type = PTLLND_EVENTARG_TYPE_RDMA;
- tx->tx_msg_eventarg.eva_type = PTLLND_EVENTARG_TYPE_MSG;
- tx->tx_msg = NULL;
- tx->tx_peer = NULL;
- tx->tx_frags = NULL;
-
- LIBCFS_ALLOC(tx->tx_msg, sizeof(*tx->tx_msg));
- if (tx->tx_msg == NULL) {
- CERROR("Failed to allocate TX payload\n");
- goto failed;
- }
-
- LIBCFS_ALLOC(tx->tx_frags, sizeof(*tx->tx_frags));
- if (tx->tx_frags == NULL) {
- CERROR("Failed to allocate TX frags\n");
- goto failed;
- }
-
- return tx;
-
- failed:
- kptllnd_free_tx(tx);
- return NULL;
-}
-
-int
-kptllnd_setup_tx_descs()
-{
- int n = *kptllnd_tunables.kptl_ntx;
- int i;
-
- for (i = 0; i < n; i++) {
- kptl_tx_t *tx = kptllnd_alloc_tx();
- if (tx == NULL)
- return -ENOMEM;
-
- spin_lock(&kptllnd_data.kptl_tx_lock);
- cfs_list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs);
- spin_unlock(&kptllnd_data.kptl_tx_lock);
- }
-
- return 0;
-}
-
-void
-kptllnd_cleanup_tx_descs()
-{
- kptl_tx_t *tx;
-
- /* No locking; single threaded now */
- LASSERT (kptllnd_data.kptl_shutdown == 2);
-
- while (!cfs_list_empty(&kptllnd_data.kptl_idle_txs)) {
- tx = cfs_list_entry(kptllnd_data.kptl_idle_txs.next,
- kptl_tx_t, tx_list);
-
- cfs_list_del(&tx->tx_list);
- kptllnd_free_tx(tx);
- }
-
- LASSERT (cfs_atomic_read(&kptllnd_data.kptl_ntx) == 0);
-}
-
-kptl_tx_t *
-kptllnd_get_idle_tx(enum kptl_tx_type type)
-{
- kptl_tx_t *tx = NULL;
-
- if (IS_SIMULATION_ENABLED(FAIL_TX_PUT_ALLOC) &&
- type == TX_TYPE_PUT_REQUEST) {
- CERROR("FAIL_TX_PUT_ALLOC SIMULATION triggered\n");
- return NULL;
- }
-
- if (IS_SIMULATION_ENABLED(FAIL_TX_GET_ALLOC) &&
- type == TX_TYPE_GET_REQUEST) {
- CERROR ("FAIL_TX_GET_ALLOC SIMULATION triggered\n");
- return NULL;
- }
-
- if (IS_SIMULATION_ENABLED(FAIL_TX)) {
- CERROR ("FAIL_TX SIMULATION triggered\n");
- return NULL;
- }
-
- spin_lock(&kptllnd_data.kptl_tx_lock);
-
- if (cfs_list_empty (&kptllnd_data.kptl_idle_txs)) {
- spin_unlock(&kptllnd_data.kptl_tx_lock);
-
- tx = kptllnd_alloc_tx();
- if (tx == NULL)
- return NULL;
- } else {
- tx = cfs_list_entry(kptllnd_data.kptl_idle_txs.next,
- kptl_tx_t, tx_list);
- cfs_list_del(&tx->tx_list);
-
- spin_unlock(&kptllnd_data.kptl_tx_lock);
- }
-
- LASSERT (cfs_atomic_read(&tx->tx_refcount)== 0);
- LASSERT (tx->tx_idle);
- LASSERT (!tx->tx_active);
- LASSERT (tx->tx_lnet_msg == NULL);
- LASSERT (tx->tx_lnet_replymsg == NULL);
- LASSERT (tx->tx_peer == NULL);
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
-
- tx->tx_type = type;
- cfs_atomic_set(&tx->tx_refcount, 1);
- tx->tx_status = 0;
- tx->tx_idle = 0;
- tx->tx_tposted = 0;
- tx->tx_acked = *kptllnd_tunables.kptl_ack_puts;
-
- CDEBUG(D_NET, "tx=%p\n", tx);
- return tx;
-}
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
-int
-kptllnd_tx_abort_netio(kptl_tx_t *tx)
-{
- kptl_peer_t *peer = tx->tx_peer;
- ptl_handle_md_t msg_mdh;
- ptl_handle_md_t rdma_mdh;
- unsigned long flags;
-
- LASSERT (cfs_atomic_read(&tx->tx_refcount) == 0);
- LASSERT (!tx->tx_active);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- msg_mdh = tx->tx_msg_mdh;
- rdma_mdh = tx->tx_rdma_mdh;
-
- if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return 0;
- }
-
- /* Uncompleted comms: there must have been some error and it must be
- * propagated to LNET... */
- LASSERT (tx->tx_status != 0 ||
- (tx->tx_lnet_msg == NULL &&
- tx->tx_lnet_replymsg == NULL));
-
- /* stash the tx on its peer until it completes */
- cfs_atomic_set(&tx->tx_refcount, 1);
- tx->tx_active = 1;
- cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* These unlinks will ensure completion events (normal or unlink) will
- * happen ASAP */
-
- if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE))
- PtlMDUnlink(msg_mdh);
-
- if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE))
- PtlMDUnlink(rdma_mdh);
-
- return -EAGAIN;
-}
-#else
-int
-kptllnd_tx_abort_netio(kptl_tx_t *tx)
-{
- ptl_peer_t *peer = tx->tx_peer;
- ptl_handle_md_t msg_mdh;
- ptl_handle_md_t rdma_mdh;
- unsigned long flags;
- ptl_err_t prc;
-
- LASSERT (cfs_atomic_read(&tx->tx_refcount) == 0);
- LASSERT (!tx->tx_active);
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- msg_mdh = tx->tx_msg_mdh;
- rdma_mdh = tx->tx_rdma_mdh;
-
- if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return 0;
- }
-
- /* Uncompleted comms: there must have been some error and it must be
- * propagated to LNET... */
- LASSERT (tx->tx_status != 0 ||
- (tx->tx_lnet_msg == NULL &&
- tx->tx_replymsg == NULL));
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) {
- prc = PtlMDUnlink(msg_mdh);
- if (prc == PTL_OK)
- msg_mdh = PTL_INVALID_HANDLE;
- }
-
- if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- prc = PtlMDUnlink(rdma_mdh);
- if (prc == PTL_OK)
- rdma_mdh = PTL_INVALID_HANDLE;
- }
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- /* update tx_???_mdh if callback hasn't fired */
- if (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE))
- msg_mdh = PTL_INVALID_HANDLE;
- else
- tx->tx_msg_mdh = msg_mdh;
-
- if (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE))
- rdma_mdh = PTL_INVALID_HANDLE;
- else
- tx->tx_rdma_mdh = rdma_mdh;
-
- if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) &&
- PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return 0;
- }
-
- /* stash the tx on its peer until it completes */
- cfs_atomic_set(&tx->tx_refcount, 1);
- tx->tx_active = 1;
- cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);
-
- kptllnd_peer_addref(peer); /* extra ref for me... */
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* This will get the watchdog thread to try aborting all the peer's
- * comms again. NB, this deems it fair that 1 failing tx which can't
- * be aborted immediately (i.e. its MDs are still busy) is valid cause
- * to nuke everything to the same peer! */
- kptllnd_peer_close(peer, tx->tx_status);
-
- kptllnd_peer_decref(peer);
-
- return -EAGAIN;
-}
-#endif
-
-void
-kptllnd_tx_fini (kptl_tx_t *tx)
-{
- lnet_msg_t *replymsg = tx->tx_lnet_replymsg;
- lnet_msg_t *msg = tx->tx_lnet_msg;
- kptl_peer_t *peer = tx->tx_peer;
- int status = tx->tx_status;
- int rc;
-
- LASSERT (!in_interrupt());
- LASSERT (cfs_atomic_read(&tx->tx_refcount) == 0);
- LASSERT (!tx->tx_idle);
- LASSERT (!tx->tx_active);
-
- /* TX has completed or failed */
-
- if (peer != NULL) {
- rc = kptllnd_tx_abort_netio(tx);
- if (rc != 0)
- return;
- }
-
- LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
- LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
-
- tx->tx_lnet_msg = tx->tx_lnet_replymsg = NULL;
- tx->tx_peer = NULL;
- tx->tx_idle = 1;
-
- spin_lock(&kptllnd_data.kptl_tx_lock);
- cfs_list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs);
- spin_unlock(&kptllnd_data.kptl_tx_lock);
-
- /* Must finalize AFTER freeing 'tx' */
- if (msg != NULL)
- lnet_finalize(NULL, msg, (replymsg == NULL) ? status : 0);
-
- if (replymsg != NULL)
- lnet_finalize(NULL, replymsg, status);
-
- if (peer != NULL)
- kptllnd_peer_decref(peer);
-}
-
-const char *
-kptllnd_tx_typestr(int type)
-{
- switch (type) {
- default:
- return "<TYPE UNKNOWN>";
-
- case TX_TYPE_SMALL_MESSAGE:
- return "msg";
-
- case TX_TYPE_PUT_REQUEST:
- return "put_req";
-
- case TX_TYPE_GET_REQUEST:
- return "get_req";
- break;
-
- case TX_TYPE_PUT_RESPONSE:
- return "put_rsp";
- break;
-
- case TX_TYPE_GET_RESPONSE:
- return "get_rsp";
- }
-}
-
-void
-kptllnd_tx_callback(ptl_event_t *ev)
-{
- kptl_eventarg_t *eva = ev->md.user_ptr;
- int ismsg = (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG);
- kptl_tx_t *tx = kptllnd_eventarg2obj(eva);
- kptl_peer_t *peer = tx->tx_peer;
- int ok = (ev->ni_fail_type == PTL_OK);
- int unlinked;
- unsigned long flags;
-
- LASSERT (peer != NULL);
- LASSERT (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG ||
- eva->eva_type == PTLLND_EVENTARG_TYPE_RDMA);
- LASSERT (!ismsg || !PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE));
- LASSERT (ismsg || !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE));
-
-#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS
- unlinked = ev->unlinked;
-#else
- unlinked = (ev->type == PTL_EVENT_UNLINK);
-#endif
- CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n",
- libcfs_id2str(peer->peer_id), peer->peer_credits,
- peer->peer_outstanding_credits, peer->peer_sent_credits,
- kptllnd_evtype2str(ev->type), ev->type,
- tx, kptllnd_errtype2str(ev->ni_fail_type),
- ev->ni_fail_type, unlinked);
-
- switch (tx->tx_type) {
- default:
- LBUG();
-
- case TX_TYPE_SMALL_MESSAGE:
- LASSERT (ismsg);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->type == PTL_EVENT_SEND_END ||
- (ev->type == PTL_EVENT_ACK && tx->tx_acked));
- break;
-
- case TX_TYPE_PUT_REQUEST:
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- (ismsg && ev->type == PTL_EVENT_SEND_END) ||
- (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) ||
- (!ismsg && ev->type == PTL_EVENT_GET_END));
- break;
-
- case TX_TYPE_GET_REQUEST:
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- (ismsg && ev->type == PTL_EVENT_SEND_END) ||
- (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) ||
- (!ismsg && ev->type == PTL_EVENT_PUT_END));
-
- if (!ismsg && ok && ev->type == PTL_EVENT_PUT_END) {
- if (ev->hdr_data == PTLLND_RDMA_OK) {
- lnet_set_reply_msg_len(NULL,
- tx->tx_lnet_replymsg,
- ev->mlength);
- } else {
- /* no match at peer */
- tx->tx_status = -EIO;
- }
- }
- break;
-
- case TX_TYPE_PUT_RESPONSE:
- LASSERT (!ismsg);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->type == PTL_EVENT_SEND_END ||
- ev->type == PTL_EVENT_REPLY_END);
- break;
-
- case TX_TYPE_GET_RESPONSE:
- LASSERT (!ismsg);
- LASSERT (ev->type == PTL_EVENT_UNLINK ||
- ev->type == PTL_EVENT_SEND_END ||
- (ev->type == PTL_EVENT_ACK && tx->tx_acked));
- break;
- }
-
- if (ok) {
- kptllnd_peer_alive(peer);
- } else {
- CERROR("Portals error to %s: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n",
- libcfs_id2str(peer->peer_id),
- kptllnd_evtype2str(ev->type), ev->type,
- tx, kptllnd_errtype2str(ev->ni_fail_type),
- ev->ni_fail_type, unlinked);
- tx->tx_status = -EIO;
- kptllnd_peer_close(peer, -EIO);
- }
-
- if (!unlinked)
- return;
-
- spin_lock_irqsave(&peer->peer_lock, flags);
-
- if (ismsg)
- tx->tx_msg_mdh = PTL_INVALID_HANDLE;
- else
- tx->tx_rdma_mdh = PTL_INVALID_HANDLE;
-
- if (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ||
- !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ||
- !tx->tx_active) {
- spin_unlock_irqrestore(&peer->peer_lock, flags);
- return;
- }
-
- cfs_list_del(&tx->tx_list);
- tx->tx_active = 0;
-
- spin_unlock_irqrestore(&peer->peer_lock, flags);
-
- /* drop peer's ref, but if it was the last one... */
- if (cfs_atomic_dec_and_test(&tx->tx_refcount)) {
- /* ...finalize it in thread context! */
- spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags);
-
- cfs_list_add_tail(&tx->tx_list, &kptllnd_data.kptl_sched_txq);
- wake_up(&kptllnd_data.kptl_sched_waitq);
-
- spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock,
- flags);
- }
-}
+++ /dev/null
-/*
- * GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
- *
- * GPL HEADER END
- */
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
- *
- * lnet/klnds/ptllnd/wirecheck.c
- *
- * Author: PJ Kirner <pjkirner@clusterfs.com>
- */
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-
-#include <config.h>
-
-#include <lnet/api-support.h>
-
-/* This ghastly hack to allows me to include lib-types.h It doesn't affect any
- * assertions generated here (but fails-safe if it ever does) */
-typedef struct {
- int counter;
-} cfs_atomic_t;
-
-#include <lnet/lib-types.h>
-#include <lnet/ptllnd_wire.h>
-
-#ifndef HAVE_STRNLEN
-#define strnlen(s, i) strlen(s)
-#endif
-
-#define BLANK_LINE() \
-do { \
- printf ("\n"); \
-} while (0)
-
-#define COMMENT(c) \
-do { \
- printf (" /* "c" */\n"); \
-} while (0)
-
-#undef STRINGIFY
-#define STRINGIFY(a) #a
-
-#define CHECK_DEFINE(a) \
-do { \
- printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \
-} while (0)
-
-#define CHECK_VALUE(a) \
-do { \
- printf (" CLASSERT ("#a" == %d);\n", a); \
-} while (0)
-
-#define CHECK_MEMBER_OFFSET(s,m) \
-do { \
- CHECK_VALUE((int)offsetof(s, m)); \
-} while (0)
-
-#define CHECK_MEMBER_SIZEOF(s,m) \
-do { \
- CHECK_VALUE((int)sizeof(((s *)0)->m)); \
-} while (0)
-
-#define CHECK_MEMBER(s,m) \
-do { \
- CHECK_MEMBER_OFFSET(s, m); \
- CHECK_MEMBER_SIZEOF(s, m); \
-} while (0)
-
-#define CHECK_STRUCT(s) \
-do { \
- BLANK_LINE (); \
- COMMENT ("Checks for struct "#s); \
- CHECK_VALUE((int)sizeof(s)); \
-} while (0)
-
-void
-system_string (char *cmdline, char *str, int len)
-{
- int fds[2];
- int rc;
- pid_t pid;
-
- rc = pipe (fds);
- if (rc != 0)
- abort ();
-
- pid = fork ();
- if (pid == 0) {
- /* child */
- int fd = fileno(stdout);
-
- rc = dup2(fds[1], fd);
- if (rc != fd)
- abort();
-
- exit(system(cmdline));
- /* notreached */
- } else if ((int)pid < 0) {
- abort();
- } else {
- FILE *f = fdopen (fds[0], "r");
-
- if (f == NULL)
- abort();
-
- close(fds[1]);
-
- if (fgets(str, len, f) == NULL)
- abort();
-
- if (waitpid(pid, &rc, 0) != pid)
- abort();
-
- if (!WIFEXITED(rc) ||
- WEXITSTATUS(rc) != 0)
- abort();
-
- if (strnlen(str, len) == len)
- str[len - 1] = 0;
-
- if (str[strlen(str) - 1] == '\n')
- str[strlen(str) - 1] = 0;
-
- fclose(f);
- }
-}
-
-int
-main (int argc, char **argv)
-{
- char unameinfo[80];
- char gccinfo[80];
-
- system_string("uname -a", unameinfo, sizeof(unameinfo));
- system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo));
-
- printf ("void kptllnd_assert_wire_constants (void)\n"
- "{\n"
- " /* Wire protocol assertions generated by 'wirecheck'\n"
- " * running on %s\n"
- " * with %s */\n"
- "\n", unameinfo, gccinfo);
-
- BLANK_LINE ();
-
- COMMENT ("Constants...");
- CHECK_DEFINE (PTL_RESERVED_MATCHBITS);
- CHECK_DEFINE (LNET_MSG_MATCHBITS);
-
- CHECK_DEFINE (PTLLND_MSG_MAGIC);
- CHECK_DEFINE (PTLLND_MSG_VERSION);
-
- CHECK_DEFINE (PTLLND_RDMA_OK);
- CHECK_DEFINE (PTLLND_RDMA_FAIL);
-
- CHECK_DEFINE (PTLLND_MSG_TYPE_INVALID);
- CHECK_DEFINE (PTLLND_MSG_TYPE_PUT);
- CHECK_DEFINE (PTLLND_MSG_TYPE_GET);
- CHECK_DEFINE (PTLLND_MSG_TYPE_IMMEDIATE);
- CHECK_DEFINE (PTLLND_MSG_TYPE_NOOP);
- CHECK_DEFINE (PTLLND_MSG_TYPE_HELLO);
- CHECK_DEFINE (PTLLND_MSG_TYPE_NAK);
-
- CHECK_STRUCT (kptl_msg_t);
- CHECK_MEMBER (kptl_msg_t, ptlm_magic);
- CHECK_MEMBER (kptl_msg_t, ptlm_version);
- CHECK_MEMBER (kptl_msg_t, ptlm_type);
- CHECK_MEMBER (kptl_msg_t, ptlm_credits);
- CHECK_MEMBER (kptl_msg_t, ptlm_nob);
- CHECK_MEMBER (kptl_msg_t, ptlm_cksum);
- CHECK_MEMBER (kptl_msg_t, ptlm_srcnid);
- CHECK_MEMBER (kptl_msg_t, ptlm_srcstamp);
- CHECK_MEMBER (kptl_msg_t, ptlm_dstnid);
- CHECK_MEMBER (kptl_msg_t, ptlm_dststamp);
- CHECK_MEMBER (kptl_msg_t, ptlm_srcpid);
- CHECK_MEMBER (kptl_msg_t, ptlm_dstpid);
- CHECK_MEMBER (kptl_msg_t, ptlm_u.immediate);
- CHECK_MEMBER (kptl_msg_t, ptlm_u.rdma);
- CHECK_MEMBER (kptl_msg_t, ptlm_u.hello);
-
- CHECK_STRUCT (kptl_immediate_msg_t);
- CHECK_MEMBER (kptl_immediate_msg_t, kptlim_hdr);
- CHECK_MEMBER (kptl_immediate_msg_t, kptlim_payload[13]);
-
- CHECK_STRUCT (kptl_rdma_msg_t);
- CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_hdr);
- CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_matchbits);
-
- CHECK_STRUCT (kptl_hello_msg_t);
- CHECK_MEMBER (kptl_hello_msg_t, kptlhm_matchbits);
- CHECK_MEMBER (kptl_hello_msg_t, kptlhm_max_msg_size);
-
- printf ("}\n\n");
-
- return (0);
-}
+++ /dev/null
-// !$*UTF8*$!
-{
- archiveVersion = 1;
- classes = {
- };
- objectVersion = 39;
- objects = {
- 06AA1262FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = NO;
- GCC_DYNAMIC_NO_PIC = NO;
- GCC_ENABLE_FIX_AND_CONTINUE = YES;
- GCC_GENERATE_DEBUGGING_SYMBOLS = YES;
- GCC_OPTIMIZATION_LEVEL = 0;
- OPTIMIZATION_CFLAGS = "-O0";
- ZERO_LINK = YES;
- };
- isa = PBXBuildStyle;
- name = Development;
- };
- 06AA1263FFB20DD611CA28AA = {
- buildRules = (
- );
- buildSettings = {
- COPY_PHASE_STRIP = YES;
- GCC_ENABLE_FIX_AND_CONTINUE = NO;
- ZERO_LINK = NO;
- };
- isa = PBXBuildStyle;
- name = Deployment;
- };
-//060
-//061
-//062
-//063
-//064
-//080
-//081
-//082
-//083
-//084
- 089C1669FE841209C02AAC07 = {
- buildSettings = {
- };
- buildStyles = (
- 06AA1262FFB20DD611CA28AA,
- 06AA1263FFB20DD611CA28AA,
- );
- hasScannedForEncodings = 1;
- isa = PBXProject;
- mainGroup = 089C166AFE841209C02AAC07;
- projectDirPath = "";
- targets = (
- 32A4FEB80562C75700D090E7,
- );
- };
- 089C166AFE841209C02AAC07 = {
- children = (
- 247142CAFF3F8F9811CA285C,
- 089C167CFE841241C02AAC07,
- 19C28FB6FE9D52B211CA2CBB,
- );
- isa = PBXGroup;
- name = portals;
- refType = 4;
- sourceTree = "<group>";
- };
- 089C167CFE841241C02AAC07 = {
- children = (
- 32A4FEC30562C75700D090E7,
- );
- isa = PBXGroup;
- name = Resources;
- refType = 4;
- sourceTree = "<group>";
- };
-//080
-//081
-//082
-//083
-//084
-//190
-//191
-//192
-//193
-//194
- 19A778270730EACD00846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = module.c;
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778280730EACD00846375 = {
- fileRef = 19A778270730EACD00846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7782B0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "api-errno.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782C0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "api-ni.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782D0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "api-wrap.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782E0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-eq.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A7782F0730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-init.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778300730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-md.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778310730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-me.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778320730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-move.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778330730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-msg.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778340730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-ni.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778350730EB8400846375 = {
- fileEncoding = 30;
- isa = PBXFileReference;
- lastKnownFileType = sourcecode.c.c;
- path = "lib-pid.c";
- refType = 2;
- sourceTree = SOURCE_ROOT;
- };
- 19A778360730EB8400846375 = {
- fileRef = 19A7782B0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778370730EB8400846375 = {
- fileRef = 19A7782C0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778380730EB8400846375 = {
- fileRef = 19A7782D0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778390730EB8400846375 = {
- fileRef = 19A7782E0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783A0730EB8400846375 = {
- fileRef = 19A7782F0730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783B0730EB8400846375 = {
- fileRef = 19A778300730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783C0730EB8400846375 = {
- fileRef = 19A778310730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783D0730EB8400846375 = {
- fileRef = 19A778320730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783E0730EB8400846375 = {
- fileRef = 19A778330730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A7783F0730EB8400846375 = {
- fileRef = 19A778340730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19A778400730EB8400846375 = {
- fileRef = 19A778350730EB8400846375;
- isa = PBXBuildFile;
- settings = {
- };
- };
- 19C28FB6FE9D52B211CA2CBB = {
- children = (
- 32A4FEC40562C75800D090E7,
- );
- isa = PBXGroup;
- name = Products;
- refType = 4;
- sourceTree = "<group>";
- };
-//190
-//191
-//192
-//193
-//194
-//240
-//241
-//242
-//243
-//244
- 247142CAFF3F8F9811CA285C = {
- children = (
- 19A7782B0730EB8400846375,
- 19A7782C0730EB8400846375,
- 19A7782D0730EB8400846375,
- 19A7782E0730EB8400846375,
- 19A7782F0730EB8400846375,
- 19A778300730EB8400846375,
- 19A778310730EB8400846375,
- 19A778320730EB8400846375,
- 19A778330730EB8400846375,
- 19A778340730EB8400846375,
- 19A778350730EB8400846375,
- 19A778270730EACD00846375,
- );
- isa = PBXGroup;
- name = Source;
- path = "";
- refType = 4;
- sourceTree = "<group>";
- };
-//240
-//241
-//242
-//243
-//244
-//320
-//321
-//322
-//323
-//324
- 32A4FEB80562C75700D090E7 = {
- buildPhases = (
- 32A4FEB90562C75700D090E7,
- 32A4FEBA0562C75700D090E7,
- 32A4FEBB0562C75700D090E7,
- 32A4FEBD0562C75700D090E7,
- 32A4FEBF0562C75700D090E7,
- 32A4FEC00562C75700D090E7,
- 32A4FEC10562C75700D090E7,
- );
- buildRules = (
- );
- buildSettings = {
- FRAMEWORK_SEARCH_PATHS = "";
- GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO;
- GCC_WARN_UNKNOWN_PRAGMAS = NO;
- HEADER_SEARCH_PATHS = ../include;
- INFOPLIST_FILE = Info.plist;
- INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- LIBRARY_SEARCH_PATHS = "";
- MODULE_NAME = com.clusterfs.lustre.portals.portals.portals;
- MODULE_START = portals_start;
- MODULE_STOP = portals_stop;
- MODULE_VERSION = 1.0.1;
- OTHER_CFLAGS = "-D__KERNEL__";
- OTHER_LDFLAGS = "";
- OTHER_REZFLAGS = "";
- PRODUCT_NAME = portals;
- SECTORDER_FLAGS = "";
- WARNING_CFLAGS = "-Wmost";
- WRAPPER_EXTENSION = kext;
- };
- dependencies = (
- );
- isa = PBXNativeTarget;
- name = portals;
- productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions";
- productName = portals;
- productReference = 32A4FEC40562C75800D090E7;
- productType = "com.apple.product-type.kernel-extension";
- };
- 32A4FEB90562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEBA0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXHeadersBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBB0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXResourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBD0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- 19A778280730EACD00846375,
- 19A778360730EB8400846375,
- 19A778370730EB8400846375,
- 19A778380730EB8400846375,
- 19A778390730EB8400846375,
- 19A7783A0730EB8400846375,
- 19A7783B0730EB8400846375,
- 19A7783C0730EB8400846375,
- 19A7783D0730EB8400846375,
- 19A7783E0730EB8400846375,
- 19A7783F0730EB8400846375,
- 19A778400730EB8400846375,
- );
- isa = PBXSourcesBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEBF0562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXFrameworksBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC00562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXRezBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- };
- 32A4FEC10562C75700D090E7 = {
- buildActionMask = 2147483647;
- files = (
- );
- isa = PBXShellScriptBuildPhase;
- runOnlyForDeploymentPostprocessing = 0;
- shellPath = /bin/sh;
- shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi";
- };
- 32A4FEC30562C75700D090E7 = {
- isa = PBXFileReference;
- lastKnownFileType = text.plist.xml;
- path = Info.plist;
- refType = 4;
- sourceTree = "<group>";
- };
- 32A4FEC40562C75800D090E7 = {
- explicitFileType = wrapper.cfbundle;
- includeInIndex = 0;
- isa = PBXFileReference;
- path = portals.kext;
- refType = 3;
- sourceTree = BUILT_PRODUCTS_DIR;
- };
- };
- rootObject = 089C1669FE841209C02AAC07;
-}
return; /* can't carry NI status info */
cfs_list_for_each_entry(rtr, &gw->lp_routes, lr_gwlist) {
- int ptl_status = LNET_NI_STATUS_INVALID;
int down = 0;
int up = 0;
int i;
continue;
if (stat->ns_status == LNET_NI_STATUS_DOWN) {
- if (LNET_NETTYP(LNET_NIDNET(nid)) != PTLLND)
- down++;
- else if (ptl_status != LNET_NI_STATUS_UP)
- ptl_status = LNET_NI_STATUS_DOWN;
+ down++;
continue;
}
up = 1;
break;
}
- /* ptl NIs are considered down only when
- * they're all down */
- if (LNET_NETTYP(LNET_NIDNET(nid)) == PTLLND)
- ptl_status = LNET_NI_STATUS_UP;
continue;
}
rtr->lr_downis = 0;
continue;
}
- rtr->lr_downis = down + (ptl_status == LNET_NI_STATUS_DOWN);
+ rtr->lr_downis = down;
}
}
{ "lnet", "lnet/lnet" },
{ "kmxlnd", "lnet/klnds/mxlnd" },
{ "ko2iblnd", "lnet/klnds/o2iblnd" },
- { "kptllnd", "lnet/klnds/ptllnd" },
{ "kgnilnd", "lnet/klnds/gnilnd"},
{ "kqswlnd", "lnet/klnds/qswlnd" },
{ "kralnd", "lnet/klnds/ralnd" },
int index;
int rc;
- if (!g_net_is_compatible (argv[0], SOCKLND, RALND, PTLLND, MXLND,
+ if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND,
O2IBLND, GNILND, 0))
return -1;
sizeof(buffer[1]), 1),
data.ioc_u32[1], /* peer port */
data.ioc_u32[3]); /* conn_count */
- } else if (g_net_is_compatible(NULL, PTLLND, 0)) {
- id.nid = data.ioc_nid;
- id.pid = data.ioc_u32[4];
- printf ("%-20s s %d%s [%d] "LPD64".%06d"
- " m "LPD64"/"LPD64" q %d/%d c %d/%d\n",
- libcfs_id2str(id),
- data.ioc_net, /* state */
- data.ioc_flags ? "" : " ~!h", /* sent_hello */
- data.ioc_count, /* refcount */
- data.ioc_u64[0]/1000000, /* incarnation secs */
- (int)(data.ioc_u64[0]%1000000), /* incarnation usecs */
- (((__u64)data.ioc_u32[1])<<32) |
- ((__u64)data.ioc_u32[0]), /* next_matchbits */
- (((__u64)data.ioc_u32[3])<<32) |
- ((__u64)data.ioc_u32[2]), /* last_matchbits_seen */
- data.ioc_u32[5] >> 16, /* nsendq */
- data.ioc_u32[5] & 0xffff, /* nactiveq */
- data.ioc_u32[6] >> 16, /* credits */
- data.ioc_u32[6] & 0xffff); /* outstanding_credits */
} else if (g_net_is_compatible(NULL, RALND, 0)) {
printf ("%-20s [%d]@%s:%d\n",
libcfs_nid2str(data.ioc_nid), /* peer nid */
lnet_nid_t nid = LNET_NID_ANY;
lnet_pid_t pid = LNET_PID_ANY;
__u32 ip = 0;
- char *end;
int rc;
- if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, PTLLND,
+ if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND,
O2IBLND, GNILND, 0))
return -1;
argv[0]);
return 0;
}
- } else if (g_net_is_compatible(NULL, PTLLND, 0)) {
- if (argc > 3) {
- fprintf (stderr, "usage: %s [nid] [pid]\n",
- argv[0]);
- return 0;
- }
} else if (argc > 2) {
fprintf (stderr, "usage: %s [nid]\n", argv[0]);
return 0;
argv[2]);
return -1;
}
- } else if (g_net_is_compatible(NULL, PTLLND, 0)) {
- if (argc > 2) {
- pid = strtol(argv[2], &end, 0);
- if (end == argv[2] || *end == 0) {
- fprintf(stderr, "Can't parse pid %s\n",
- argv[2]);
- return -1;
- }
- }
}
LIBCFS_IOC_INIT(data);