From 61bcfb2ff55edf6df2a3735b08bbeb479b598833 Mon Sep 17 00:00:00 2001 From: James Simmons Date: Tue, 31 Dec 2013 10:59:38 -0500 Subject: [PATCH] LU-1422 lnet: eliminate obsolete Cray SeaStar support Remove the bulk of code for the no longer supported SeaStar interconnect found on older Cray systems. Signed-off-by: James Simmons Change-Id: I29d07df9e7a5d33a700f7c9a14a49a9b3bf61dbe Reviewed-on: http://review.whamcloud.com/7469 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Liang Zhen Reviewed-by: Doug Oucharek Reviewed-by: Chuck Fossen Reviewed-by: Oleg Drokin --- lnet/autoconf/lustre-lnet.m4 | 87 -- lnet/include/lnet/Makefile.am | 2 +- lnet/include/lnet/ptllnd.h | 94 -- lnet/include/lnet/ptllnd_wire.h | 124 --- lnet/klnds/Makefile.in | 1 - lnet/klnds/autoMakefile.am | 2 +- lnet/klnds/ptllnd/.gitignore | 1 - lnet/klnds/ptllnd/Makefile.in | 12 - lnet/klnds/ptllnd/README | 47 - lnet/klnds/ptllnd/autoMakefile.am | 8 - lnet/klnds/ptllnd/ptllnd.c | 1005 --------------------- lnet/klnds/ptllnd/ptllnd.h | 592 ------------- lnet/klnds/ptllnd/ptllnd_cb.c | 834 ------------------ lnet/klnds/ptllnd/ptllnd_modparams.c | 364 -------- lnet/klnds/ptllnd/ptllnd_peer.c | 1463 ------------------------------- lnet/klnds/ptllnd/ptllnd_rx_buf.c | 826 ----------------- lnet/klnds/ptllnd/ptllnd_tx.c | 526 ----------- lnet/klnds/ptllnd/wirecheck.c | 226 ----- lnet/lnet/portals.xcode/project.pbxproj | 430 --------- lnet/lnet/router.c | 12 +- lnet/utils/debug.c | 1 - lnet/utils/portals.c | 39 +- 22 files changed, 6 insertions(+), 6690 deletions(-) delete mode 100644 lnet/include/lnet/ptllnd.h delete mode 100644 lnet/include/lnet/ptllnd_wire.h delete mode 100644 lnet/klnds/ptllnd/.gitignore delete mode 100755 lnet/klnds/ptllnd/Makefile.in delete mode 100644 lnet/klnds/ptllnd/README delete mode 100755 lnet/klnds/ptllnd/autoMakefile.am delete mode 100644 lnet/klnds/ptllnd/ptllnd.c delete mode 100644 lnet/klnds/ptllnd/ptllnd.h delete mode 100644 lnet/klnds/ptllnd/ptllnd_cb.c delete mode 100644 lnet/klnds/ptllnd/ptllnd_modparams.c delete mode 100644 lnet/klnds/ptllnd/ptllnd_peer.c delete mode 100644 lnet/klnds/ptllnd/ptllnd_rx_buf.c delete mode 100644 lnet/klnds/ptllnd/ptllnd_tx.c delete mode 100644 lnet/klnds/ptllnd/wirecheck.c delete mode 100644 lnet/lnet/portals.xcode/project.pbxproj diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4 index f593139..6ca08ff 100644 --- a/lnet/autoconf/lustre-lnet.m4 +++ b/lnet/autoconf/lustre-lnet.m4 @@ -93,41 +93,6 @@ fi ]) # -# LN_CONFIG_PORTALS -# -# configure support for Portals -# -AC_DEFUN([LN_CONFIG_PORTALS], -[AC_MSG_CHECKING([for portals]) -AC_ARG_WITH([portals], - AC_HELP_STRING([--with-portals=path], - [set path to portals]), - [ - case $with_portals in - no) ENABLEPORTALS=0 - ;; - *) PORTALS="${with_portals}" - ENABLEPORTALS=1 - ;; - esac - ], [ - ENABLEPORTALS=0 - ]) -PTLLNDCPPFLAGS="" -if test $ENABLEPORTALS -eq 0; then - AC_MSG_RESULT([no]) -elif test ! \( -f ${PORTALS}/include/portals/p30.h \); then - AC_MSG_RESULT([no]) - AC_MSG_ERROR([bad --with-portals path]) -else - PORTALS=$(readlink --canonicalize $PORTALS) - AC_MSG_RESULT([$PORTALS]) - PTLLNDCPPFLAGS="-I${PORTALS}/include" -fi -AC_SUBST(PTLLNDCPPFLAGS) -]) - -# # LN_CONFIG_BACKOFF # # check if tunable tcp backoff is available/wanted @@ -154,53 +119,6 @@ else fi ]) - -# -# LN_CONFIG_PTLLND -# -# configure support for Portals LND -# -AC_DEFUN([LN_CONFIG_PTLLND], -[ -if test -z "$ENABLEPORTALS"; then - LN_CONFIG_PORTALS -fi - -AC_MSG_CHECKING([whether to build the kernel portals LND]) - -PTLLND="" -if test $ENABLEPORTALS -ne 0; then - AC_MSG_RESULT([yes]) - PTLLND="ptllnd" -else - AC_MSG_RESULT([no]) -fi -AC_SUBST(PTLLND) -]) - -# -# LN_CONFIG_UPTLLND -# -# configure support for Portals LND -# -AC_DEFUN([LN_CONFIG_UPTLLND], -[ -if test -z "$ENABLEPORTALS"; then - LN_CONFIG_PORTALS -fi - -AC_MSG_CHECKING([whether to build the userspace portals LND]) - -UPTLLND="" -if test $ENABLEPORTALS -ne 0; then - AC_MSG_RESULT([yes]) - UPTLLND="ptllnd" -else - AC_MSG_RESULT([no]) -fi -AC_SUBST(UPTLLND) -]) - # # LN_CONFIG_USOCKLND # @@ -683,7 +601,6 @@ LN_CONFIG_QUADRICS LN_CONFIG_O2IB LN_CONFIG_RALND LN_CONFIG_GNILND -LN_CONFIG_PTLLND LN_CONFIG_MX # 2.6.36 LN_CONFIG_TCP_SENDPAGE @@ -809,7 +726,6 @@ if test x$enable_liblustre = xyes ; then fi LN_CONFIG_MAX_PAYLOAD -LN_CONFIG_UPTLLND LN_CONFIG_USOCKLND ]) @@ -825,7 +741,6 @@ AM_CONDITIONAL(BUILD_O2IBLND, test x$O2IBLND = "xo2iblnd") AM_CONDITIONAL(BUILD_RALND, test x$RALND = "xralnd") AM_CONDITIONAL(BUILD_GNILND, test x$GNILND = "xgnilnd") AM_CONDITIONAL(BUILD_GNILND_RCA, test x$GNILNDRCA = "xgnilndrca") -AM_CONDITIONAL(BUILD_PTLLND, test x$PTLLND = "xptllnd") AM_CONDITIONAL(BUILD_USOCKLND, test x$USOCKLND = "xusocklnd") ]) @@ -858,8 +773,6 @@ lnet/klnds/gnilnd/Makefile lnet/klnds/gnilnd/autoMakefile lnet/klnds/socklnd/Makefile lnet/klnds/socklnd/autoMakefile -lnet/klnds/ptllnd/Makefile -lnet/klnds/ptllnd/autoMakefile lnet/lnet/Makefile lnet/lnet/autoMakefile lnet/selftest/Makefile diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am index 0f4825b..4a7ac95 100644 --- a/lnet/include/lnet/Makefile.am +++ b/lnet/include/lnet/Makefile.am @@ -8,4 +8,4 @@ DIST_SUBDIRS = linux darwin EXTRA_DIST = api.h api-support.h \ lib-lnet.h lib-types.h lnet.h lnetctl.h types.h \ - socklnd.h ptllnd.h ptllnd_wire.h lnetst.h + socklnd.h lnetst.h diff --git a/lnet/include/lnet/ptllnd.h b/lnet/include/lnet/ptllnd.h deleted file mode 100644 index 915b654..0000000 --- a/lnet/include/lnet/ptllnd.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/include/lnet/ptllnd.h - * - * Author: PJ Kirner - */ - -/* - * The PTLLND was designed to support Portals with - * Lustre and non-lustre UNLINK semantics. - * However for now the two targets are Cray Portals - * on the XT3 and Lustre Portals (for testing) both - * have Lustre UNLINK semantics, so this is defined - * by default. - */ -#define LUSTRE_PORTALS_UNLINK_SEMANTICS - - -#ifdef _USING_LUSTRE_PORTALS_ - -/* NIDs are 64-bits on Lustre Portals */ -#define FMT_NID LPU64 -#define FMT_PID "%d" - -/* When using Lustre Portals Lustre completion semantics are imlicit*/ -#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS 0 - -#else /* _USING_CRAY_PORTALS_ */ - -/* NIDs are integers on Cray Portals */ -#define FMT_NID "%u" -#define FMT_PID "%d" - -/* When using Cray Portals this is defined in the Cray Portals Header*/ -/*#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS */ - -/* Can compare handles directly on Cray Portals */ -#define PtlHandleIsEqual(a,b) ((a) == (b)) - -/* Diffrent error types on Cray Portals*/ -#define ptl_err_t ptl_ni_fail_t - -/* - * The Cray Portals has no maximum number of IOVs. The - * maximum is limited only by memory and size of the - * int parameters (2^31-1). - * Lustre only really require that the underyling - * implemenation to support at least LNET_MAX_IOV, - * so for Cray portals we can safely just use that - * value here. - * - */ -#define PTL_MD_MAX_IOV LNET_MAX_IOV - -#endif - -#define FMT_PTLID "ptlid:"FMT_PID"-"FMT_NID - -/* Align incoming small request messages to an 8 byte boundary if this is - * supported to avoid alignment issues on some architectures */ -#ifndef PTL_MD_LOCAL_ALIGN8 -# define PTL_MD_LOCAL_ALIGN8 0 -#endif diff --git a/lnet/include/lnet/ptllnd_wire.h b/lnet/include/lnet/ptllnd_wire.h deleted file mode 100644 index 845a532..0000000 --- a/lnet/include/lnet/ptllnd_wire.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/include/lnet/ptllnd_wire.h - * - * Author: PJ Kirner - */ - -/* Minimum buffer size that any peer will post to receive ptllnd messages */ -#define PTLLND_MIN_BUFFER_SIZE 256 - -/************************************************************************ - * Tunable defaults that {u,k}lnds/ptllnd should have in common. - */ - -#define PTLLND_PORTAL 9 /* The same portal PTLPRC used when talking to cray portals */ -#define PTLLND_PID 9 /* The Portals PID */ -#define PTLLND_PEERCREDITS 8 /* concurrent sends to 1 peer */ - -/* Default buffer size for kernel ptllnds (guaranteed eager) */ -#define PTLLND_MAX_KLND_MSG_SIZE 512 - -/* Default buffer size for catamount ptllnds (not guaranteed eager) - large - * enough to avoid RDMA for anything sent while control is not in liblustre */ -#define PTLLND_MAX_ULND_MSG_SIZE 512 - - -/************************************************************************ - * Portals LND Wire message format. - * These are sent in sender's byte order (i.e. receiver flips). - */ - -#define PTL_RESERVED_MATCHBITS 0x100 /* below this value is reserved - * above is for bulk data transfer */ -#define LNET_MSG_MATCHBITS 0 /* the value for the message channel */ - -typedef struct -{ - lnet_hdr_t kptlim_hdr; /* portals header */ - char kptlim_payload[0]; /* piggy-backed payload */ -} WIRE_ATTR kptl_immediate_msg_t; - -typedef struct -{ - lnet_hdr_t kptlrm_hdr; /* portals header */ - __u64 kptlrm_matchbits; /* matchbits */ -} WIRE_ATTR kptl_rdma_msg_t; - -typedef struct -{ - __u64 kptlhm_matchbits; /* matchbits */ - __u32 kptlhm_max_msg_size; /* max message size */ -} WIRE_ATTR kptl_hello_msg_t; - -typedef struct -{ - /* First 2 fields fixed FOR ALL TIME */ - __u32 ptlm_magic; /* I'm a Portals LND message */ - __u16 ptlm_version; /* this is my version number */ - __u8 ptlm_type; /* the message type */ - __u8 ptlm_credits; /* returned credits */ - __u32 ptlm_nob; /* # bytes in whole message */ - __u32 ptlm_cksum; /* checksum (0 == no checksum) */ - __u64 ptlm_srcnid; /* sender's NID */ - __u64 ptlm_srcstamp; /* sender's incarnation */ - __u64 ptlm_dstnid; /* destination's NID */ - __u64 ptlm_dststamp; /* destination's incarnation */ - __u32 ptlm_srcpid; /* sender's PID */ - __u32 ptlm_dstpid; /* destination's PID */ - - union { - kptl_immediate_msg_t immediate; - kptl_rdma_msg_t rdma; - kptl_hello_msg_t hello; - } WIRE_ATTR ptlm_u; - -} kptl_msg_t; - -/* kptl_msg_t::ptlm_credits is only a __u8 */ -#define PTLLND_MSG_MAX_CREDITS ((typeof(((kptl_msg_t*) 0)->ptlm_credits)) -1) - -#define PTLLND_MSG_MAGIC LNET_PROTO_PTL_MAGIC -#define PTLLND_MSG_VERSION 0x04 - -#define PTLLND_RDMA_OK 0x00 -#define PTLLND_RDMA_FAIL 0x01 - -#define PTLLND_MSG_TYPE_INVALID 0x00 -#define PTLLND_MSG_TYPE_PUT 0x01 -#define PTLLND_MSG_TYPE_GET 0x02 -#define PTLLND_MSG_TYPE_IMMEDIATE 0x03 /* No bulk data xfer*/ -#define PTLLND_MSG_TYPE_NOOP 0x04 -#define PTLLND_MSG_TYPE_HELLO 0x05 -#define PTLLND_MSG_TYPE_NAK 0x06 diff --git a/lnet/klnds/Makefile.in b/lnet/klnds/Makefile.in index 0d99a87..ad17897 100644 --- a/lnet/klnds/Makefile.in +++ b/lnet/klnds/Makefile.in @@ -3,7 +3,6 @@ @BUILD_GNILND_TRUE@subdir-m += gnilnd @BUILD_O2IBLND_TRUE@subdir-m += o2iblnd @BUILD_QSWLND_TRUE@subdir-m += qswlnd -@BUILD_PTLLND_TRUE@subdir-m += ptllnd subdir-m += socklnd @INCLUDE_RULES@ diff --git a/lnet/klnds/autoMakefile.am b/lnet/klnds/autoMakefile.am index 1591d87..78eb985 100644 --- a/lnet/klnds/autoMakefile.am +++ b/lnet/klnds/autoMakefile.am @@ -34,4 +34,4 @@ # Lustre is a trademark of Sun Microsystems, Inc. # -SUBDIRS = socklnd qswlnd mxlnd ralnd gnilnd ptllnd o2iblnd +SUBDIRS = socklnd qswlnd mxlnd ralnd gnilnd o2iblnd diff --git a/lnet/klnds/ptllnd/.gitignore b/lnet/klnds/ptllnd/.gitignore deleted file mode 100644 index c123ea7..0000000 --- a/lnet/klnds/ptllnd/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/wirecheck diff --git a/lnet/klnds/ptllnd/Makefile.in b/lnet/klnds/ptllnd/Makefile.in deleted file mode 100755 index 1532ab6..0000000 --- a/lnet/klnds/ptllnd/Makefile.in +++ /dev/null @@ -1,12 +0,0 @@ -MODULES := kptllnd - -EXTRA_POST_CFLAGS := @PTLLNDCPPFLAGS@ - -kptllnd-objs := ptllnd.o \ - ptllnd_cb.o \ - ptllnd_modparams.o \ - ptllnd_peer.o \ - ptllnd_rx_buf.o \ - ptllnd_tx.o - -@INCLUDE_RULES@ diff --git a/lnet/klnds/ptllnd/README b/lnet/klnds/ptllnd/README deleted file mode 100644 index 5cb6cfc..0000000 --- a/lnet/klnds/ptllnd/README +++ /dev/null @@ -1,47 +0,0 @@ -1. This version of the Portals LND is intended to work on the Cray XT3 using - Cray Portals as a network transport. - -2. To enable the building of the Portals LND (ptllnd.ko) configure with the - following option: - ./configure --with-portals= - -3. The following configuration options are supported - - ntx: - The total number of message descritprs - - concurrent_peers: - The maximum number of conncurent peers. Peers attemting - to connect beyond the maximum will not be allowd. - - peer_hash_table_size: - The number of hash table slots for the peers. This number - should scale with concurrent_peers. - - cksum: - Set to non-zero to enable message (not RDMA) checksums for - outgoing packets. Incoming packets will always be checksumed - if necssary, independnt of this value. - - timeout: - The amount of time a request can linger in a peers active - queue, before the peer is considered dead. Units: seconds. - - portal: - The portal ID to use for the ptllnd traffic. - - rxb_npages: - The number of pages in a RX Buffer. - - credits: - The maximum total number of concurrent sends that are - outstanding at any given instant. - - peercredits: - The maximum number of concurrent sends that are - outstanding to a single piere at any given instant. - - max_msg_size: - The maximum immedate message size. This MUST be - the same on all nodes in a cluster. A peer connecting - with a diffrent max_msg_size will be rejected. diff --git a/lnet/klnds/ptllnd/autoMakefile.am b/lnet/klnds/ptllnd/autoMakefile.am deleted file mode 100755 index 1001efd..0000000 --- a/lnet/klnds/ptllnd/autoMakefile.am +++ /dev/null @@ -1,8 +0,0 @@ -if MODULES -if BUILD_PTLLND -modulenet_DATA = kptllnd$(KMODEXT) -endif -endif - -MOSTLYCLEANFILES = @MOSTLYCLEANFILES@ -EXTRA_DIST = $(kptllnd-objs:%.o=%.c) ptllnd.h diff --git a/lnet/klnds/ptllnd/ptllnd.c b/lnet/klnds/ptllnd/ptllnd.c deleted file mode 100644 index 5650ed0..0000000 --- a/lnet/klnds/ptllnd/ptllnd.c +++ /dev/null @@ -1,1005 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd.c - * - * Author: PJ Kirner - */ - -#include "ptllnd.h" - -lnd_t kptllnd_lnd = { - .lnd_type = PTLLND, - .lnd_startup = kptllnd_startup, - .lnd_shutdown = kptllnd_shutdown, - .lnd_ctl = kptllnd_ctl, - .lnd_query = kptllnd_query, - .lnd_send = kptllnd_send, - .lnd_recv = kptllnd_recv, - .lnd_eager_recv = kptllnd_eager_recv, -}; - -kptl_data_t kptllnd_data; - -char * -kptllnd_ptlid2str(ptl_process_id_t id) -{ - static char strs[64][32]; - static int idx = 0; - - unsigned long flags; - char *str; - - spin_lock_irqsave(&kptllnd_data.kptl_ptlid2str_lock, flags); - str = strs[idx++]; - if (idx >= sizeof(strs)/sizeof(strs[0])) - idx = 0; - spin_unlock_irqrestore(&kptllnd_data.kptl_ptlid2str_lock, flags); - - snprintf(str, sizeof(strs[0]), FMT_PTLID, id.pid, id.nid); - return str; -} - -void -kptllnd_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' - * running on Linux fedora 2.6.11-co-0.6.4 #1 Mon Jun 19 05:36:13 UTC 2006 i686 i686 i386 GNU - * with gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) */ - - - /* Constants... */ - CLASSERT (PTL_RESERVED_MATCHBITS == 0x100); - CLASSERT (LNET_MSG_MATCHBITS == 0); - CLASSERT (PTLLND_MSG_MAGIC == 0x50746C4E); - CLASSERT (PTLLND_MSG_VERSION == 0x04); - CLASSERT (PTLLND_RDMA_OK == 0x00); - CLASSERT (PTLLND_RDMA_FAIL == 0x01); - CLASSERT (PTLLND_MSG_TYPE_INVALID == 0x00); - CLASSERT (PTLLND_MSG_TYPE_PUT == 0x01); - CLASSERT (PTLLND_MSG_TYPE_GET == 0x02); - CLASSERT (PTLLND_MSG_TYPE_IMMEDIATE == 0x03); - CLASSERT (PTLLND_MSG_TYPE_NOOP == 0x04); - CLASSERT (PTLLND_MSG_TYPE_HELLO == 0x05); - CLASSERT (PTLLND_MSG_TYPE_NAK == 0x06); - - /* Checks for struct kptl_msg_t */ - CLASSERT ((int)sizeof(kptl_msg_t) == 136); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_magic) == 0); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_magic) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_version) == 4); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_version) == 2); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_type) == 6); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_type) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_credits) == 7); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_credits) == 1); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_nob) == 8); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_nob) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_cksum) == 12); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_cksum) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcnid) == 16); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcstamp) == 24); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcstamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstnid) == 32); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstnid) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dststamp) == 40); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dststamp) == 8); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_srcpid) == 48); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_srcpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_dstpid) == 52); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_dstpid) == 4); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.immediate) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.immediate) == 72); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.rdma) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.rdma) == 80); - CLASSERT ((int)offsetof(kptl_msg_t, ptlm_u.hello) == 56); - CLASSERT ((int)sizeof(((kptl_msg_t *)0)->ptlm_u.hello) == 12); - - /* Checks for struct kptl_immediate_msg_t */ - CLASSERT ((int)sizeof(kptl_immediate_msg_t) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_hdr) == 72); - CLASSERT ((int)offsetof(kptl_immediate_msg_t, kptlim_payload[13]) == 85); - CLASSERT ((int)sizeof(((kptl_immediate_msg_t *)0)->kptlim_payload[13]) == 1); - - /* Checks for struct kptl_rdma_msg_t */ - CLASSERT ((int)sizeof(kptl_rdma_msg_t) == 80); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_hdr) == 0); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_hdr) == 72); - CLASSERT ((int)offsetof(kptl_rdma_msg_t, kptlrm_matchbits) == 72); - CLASSERT ((int)sizeof(((kptl_rdma_msg_t *)0)->kptlrm_matchbits) == 8); - - /* Checks for struct kptl_hello_msg_t */ - CLASSERT ((int)sizeof(kptl_hello_msg_t) == 12); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_matchbits) == 0); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_matchbits) == 8); - CLASSERT ((int)offsetof(kptl_hello_msg_t, kptlhm_max_msg_size) == 8); - CLASSERT ((int)sizeof(((kptl_hello_msg_t *)0)->kptlhm_max_msg_size) == 4); -} - -const char *kptllnd_evtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_EVENT_GET_START); - DO_TYPE(PTL_EVENT_GET_END); - DO_TYPE(PTL_EVENT_PUT_START); - DO_TYPE(PTL_EVENT_PUT_END); - DO_TYPE(PTL_EVENT_REPLY_START); - DO_TYPE(PTL_EVENT_REPLY_END); - DO_TYPE(PTL_EVENT_ACK); - DO_TYPE(PTL_EVENT_SEND_START); - DO_TYPE(PTL_EVENT_SEND_END); - DO_TYPE(PTL_EVENT_UNLINK); - default: - return ""; - } -#undef DO_TYPE -} - -const char *kptllnd_msgtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTLLND_MSG_TYPE_INVALID); - DO_TYPE(PTLLND_MSG_TYPE_PUT); - DO_TYPE(PTLLND_MSG_TYPE_GET); - DO_TYPE(PTLLND_MSG_TYPE_IMMEDIATE); - DO_TYPE(PTLLND_MSG_TYPE_HELLO); - DO_TYPE(PTLLND_MSG_TYPE_NOOP); - DO_TYPE(PTLLND_MSG_TYPE_NAK); - default: - return ""; - } -#undef DO_TYPE -} - -const char *kptllnd_errtype2str(int type) -{ -#define DO_TYPE(x) case x: return #x; - switch(type) - { - DO_TYPE(PTL_OK); - DO_TYPE(PTL_SEGV); - DO_TYPE(PTL_NO_SPACE); - DO_TYPE(PTL_ME_IN_USE); - DO_TYPE(PTL_NAL_FAILED); - DO_TYPE(PTL_NO_INIT); - DO_TYPE(PTL_IFACE_DUP); - DO_TYPE(PTL_IFACE_INVALID); - DO_TYPE(PTL_HANDLE_INVALID); - DO_TYPE(PTL_MD_INVALID); - DO_TYPE(PTL_ME_INVALID); - DO_TYPE(PTL_PROCESS_INVALID); - DO_TYPE(PTL_PT_INDEX_INVALID); - DO_TYPE(PTL_SR_INDEX_INVALID); - DO_TYPE(PTL_EQ_INVALID); - DO_TYPE(PTL_EQ_DROPPED); - DO_TYPE(PTL_EQ_EMPTY); - DO_TYPE(PTL_MD_NO_UPDATE); - DO_TYPE(PTL_FAIL); - DO_TYPE(PTL_AC_INDEX_INVALID); - DO_TYPE(PTL_MD_ILLEGAL); - DO_TYPE(PTL_ME_LIST_TOO_LONG); - DO_TYPE(PTL_MD_IN_USE); - DO_TYPE(PTL_NI_INVALID); - DO_TYPE(PTL_PID_INVALID); - DO_TYPE(PTL_PT_FULL); - DO_TYPE(PTL_VAL_FAILED); - DO_TYPE(PTL_NOT_IMPLEMENTED); - DO_TYPE(PTL_NO_ACK); - DO_TYPE(PTL_EQ_IN_USE); - DO_TYPE(PTL_PID_IN_USE); - DO_TYPE(PTL_INV_EQ_SIZE); - DO_TYPE(PTL_AGAIN); - default: - return ""; - } -#undef DO_TYPE -} - -__u32 -kptllnd_cksum (void *ptr, int nob) -{ - char *c = ptr; - __u32 sum = 0; - - while (nob-- > 0) - sum = ((sum << 1) | (sum >> 31)) + *c++; - - /* ensure I don't return 0 (== no checksum) */ - return (sum == 0) ? 1 : sum; -} - -void -kptllnd_init_msg(kptl_msg_t *msg, int type, - lnet_process_id_t target, int body_nob) -{ - msg->ptlm_type = type; - msg->ptlm_nob = (offsetof(kptl_msg_t, ptlm_u) + body_nob + 7) & ~7; - msg->ptlm_dstpid = target.pid; - msg->ptlm_dstnid = target.nid; - msg->ptlm_srcpid = the_lnet.ln_pid; - msg->ptlm_srcnid = kptllnd_ptl2lnetnid(target.nid, - kptllnd_data.kptl_portals_id.nid); - - LASSERT(msg->ptlm_nob <= *kptllnd_tunables.kptl_max_msg_size); -} - -void -kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer) -{ - msg->ptlm_magic = PTLLND_MSG_MAGIC; - msg->ptlm_version = PTLLND_MSG_VERSION; - /* msg->ptlm_type Filled in kptllnd_init_msg() */ - msg->ptlm_credits = peer->peer_outstanding_credits; - /* msg->ptlm_nob Filled in kptllnd_init_msg() */ - msg->ptlm_cksum = 0; - /* msg->ptlm_{src|dst}[pn]id Filled in kptllnd_init_msg */ - msg->ptlm_srcstamp = peer->peer_myincarnation; - msg->ptlm_dststamp = peer->peer_incarnation; - - if (*kptllnd_tunables.kptl_checksum) { - /* NB ptlm_cksum zero while computing cksum */ - msg->ptlm_cksum = kptllnd_cksum(msg, - offsetof(kptl_msg_t, ptlm_u)); - } -} - -int -kptllnd_msg_unpack(kptl_msg_t *msg, int nob) -{ - const int hdr_size = offsetof(kptl_msg_t, ptlm_u); - __u32 msg_cksum; - __u16 msg_version; - int flip; - - /* 6 bytes are enough to have received magic + version */ - if (nob < 6) { - CERROR("Very Short message: %d\n", nob); - return -EPROTO; - } - - /* - * Determine if we need to flip - */ - if (msg->ptlm_magic == PTLLND_MSG_MAGIC) { - flip = 0; - } else if (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC)) { - flip = 1; - } else { - CERROR("Bad magic: %08x\n", msg->ptlm_magic); - return -EPROTO; - } - - msg_version = flip ? __swab16(msg->ptlm_version) : msg->ptlm_version; - - if (msg_version != PTLLND_MSG_VERSION) { - CERROR("Bad version: got %04x expected %04x\n", - (__u32)msg_version, PTLLND_MSG_VERSION); - return -EPROTO; - } - - if (nob < hdr_size) { - CERROR("Short message: got %d, wanted at least %d\n", - nob, hdr_size); - return -EPROTO; - } - - /* checksum must be computed with - * 1) ptlm_cksum zero and - * 2) BEFORE anything gets modified/flipped - */ - msg_cksum = flip ? __swab32(msg->ptlm_cksum) : msg->ptlm_cksum; - msg->ptlm_cksum = 0; - if (msg_cksum != 0 && - msg_cksum != kptllnd_cksum(msg, hdr_size)) { - CERROR("Bad checksum\n"); - return -EPROTO; - } - - msg->ptlm_version = msg_version; - msg->ptlm_cksum = msg_cksum; - - if (flip) { - /* These two are 1 byte long so we don't swap them - But check this assumtion*/ - CLASSERT (sizeof(msg->ptlm_type) == 1); - CLASSERT (sizeof(msg->ptlm_credits) == 1); - /* src & dst stamps are opaque cookies */ - __swab32s(&msg->ptlm_nob); - __swab64s(&msg->ptlm_srcnid); - __swab64s(&msg->ptlm_dstnid); - __swab32s(&msg->ptlm_srcpid); - __swab32s(&msg->ptlm_dstpid); - } - - if (msg->ptlm_nob != nob) { - CERROR("msg_nob corrupt: got 0x%08x, wanted %08x\n", - msg->ptlm_nob, nob); - return -EPROTO; - } - - switch(msg->ptlm_type) - { - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - if (nob < hdr_size + sizeof(kptl_rdma_msg_t)) { - CERROR("Short rdma request: got %d, want %d\n", - nob, hdr_size + (int)sizeof(kptl_rdma_msg_t)); - return -EPROTO; - } - - if (flip) - __swab64s(&msg->ptlm_u.rdma.kptlrm_matchbits); - - if (msg->ptlm_u.rdma.kptlrm_matchbits < PTL_RESERVED_MATCHBITS) { - CERROR("Bad matchbits "LPX64"\n", - msg->ptlm_u.rdma.kptlrm_matchbits); - return -EPROTO; - } - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - if (nob < offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)) { - CERROR("Short immediate: got %d, want %d\n", nob, - (int)offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload)); - return -EPROTO; - } - /* Do nothing */ - break; - - case PTLLND_MSG_TYPE_NOOP: - case PTLLND_MSG_TYPE_NAK: - /* Do nothing */ - break; - - case PTLLND_MSG_TYPE_HELLO: - if (nob < hdr_size + sizeof(kptl_hello_msg_t)) { - CERROR("Short hello: got %d want %d\n", - nob, hdr_size + (int)sizeof(kptl_hello_msg_t)); - return -EPROTO; - } - if (flip) { - __swab64s(&msg->ptlm_u.hello.kptlhm_matchbits); - __swab32s(&msg->ptlm_u.hello.kptlhm_max_msg_size); - } - break; - - default: - CERROR("Bad message type: 0x%02x\n", (__u32)msg->ptlm_type); - return -EPROTO; - } - - return 0; -} - -int -kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg) -{ - kptl_net_t *net = ni->ni_data; - struct libcfs_ioctl_data *data = arg; - int rc = -EINVAL; - - CDEBUG(D_NET, ">>> kptllnd_ctl cmd=%u arg=%p\n", cmd, arg); - - /* - * Validate that the context block is actually - * pointing to this interface - */ - LASSERT (ni == net->net_ni); - - switch(cmd) { - case IOC_LIBCFS_DEL_PEER: { - lnet_process_id_t id; - - id.nid = data->ioc_nid; - id.pid = data->ioc_u32[1]; - - rc = kptllnd_peer_del(id); - break; - } - - case IOC_LIBCFS_GET_PEER: { - lnet_process_id_t id = {.nid = LNET_NID_ANY, - .pid = LNET_PID_ANY}; - __u64 incarnation = 0; - __u64 next_matchbits = 0; - __u64 last_matchbits_seen = 0; - int state = 0; - int sent_hello = 0; - int refcount = 0; - int nsendq = 0; - int nactiveq = 0; - int credits = 0; - int outstanding_credits = 0; - - rc = kptllnd_get_peer_info(data->ioc_count, &id, - &state, &sent_hello, - &refcount, &incarnation, - &next_matchbits, &last_matchbits_seen, - &nsendq, &nactiveq, - &credits, &outstanding_credits); - /* wince... */ - data->ioc_nid = id.nid; - data->ioc_net = state; - data->ioc_flags = sent_hello; - data->ioc_count = refcount; - data->ioc_u64[0] = incarnation; - data->ioc_u32[0] = (__u32)next_matchbits; - data->ioc_u32[1] = (__u32)(next_matchbits >> 32); - data->ioc_u32[2] = (__u32)last_matchbits_seen; - data->ioc_u32[3] = (__u32)(last_matchbits_seen >> 32); - data->ioc_u32[4] = id.pid; - data->ioc_u32[5] = (nsendq << 16) | nactiveq; - data->ioc_u32[6] = (credits << 16) | outstanding_credits; - break; - } - - default: - rc=-EINVAL; - break; - } - CDEBUG(D_NET, "<<< kptllnd_ctl rc=%d\n", rc); - return rc; -} - -void -kptllnd_query (lnet_ni_t *ni, lnet_nid_t nid, cfs_time_t *when) -{ - kptl_net_t *net = ni->ni_data; - kptl_peer_t *peer = NULL; - lnet_process_id_t id = {.nid = nid, .pid = LUSTRE_SRV_LNET_PID}; - unsigned long flags; - - /* NB: kptllnd_find_target connects to peer if necessary */ - if (kptllnd_find_target(net, id, &peer) != 0) - return; - - spin_lock_irqsave(&peer->peer_lock, flags); - if (peer->peer_last_alive != 0) - *when = peer->peer_last_alive; - spin_unlock_irqrestore(&peer->peer_lock, flags); - kptllnd_peer_decref(peer); - return; -} - -void -kptllnd_base_shutdown (void) -{ - int i; - ptl_err_t prc; - unsigned long flags; - lnet_process_id_t process_id; - - read_lock(&kptllnd_data.kptl_net_rw_lock); - LASSERT (cfs_list_empty(&kptllnd_data.kptl_nets)); - read_unlock(&kptllnd_data.kptl_net_rw_lock); - - switch (kptllnd_data.kptl_init) { - default: - LBUG(); - - case PTLLND_INIT_ALL: - case PTLLND_INIT_DATA: - /* stop receiving */ - kptllnd_rx_buffer_pool_fini(&kptllnd_data.kptl_rx_buffer_pool); - LASSERT (cfs_list_empty(&kptllnd_data.kptl_sched_rxq)); - LASSERT (cfs_list_empty(&kptllnd_data.kptl_sched_rxbq)); - - /* lock to interleave cleanly with peer birth/death */ - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - LASSERT (kptllnd_data.kptl_shutdown == 0); - kptllnd_data.kptl_shutdown = 1; /* phase 1 == destroy peers */ - /* no new peers possible now */ - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - /* nuke all existing peers */ - process_id.nid = LNET_NID_ANY; - process_id.pid = LNET_PID_ANY; - kptllnd_peer_del(process_id); - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - LASSERT (kptllnd_data.kptl_n_active_peers == 0); - - i = 2; - while (kptllnd_data.kptl_npeers != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d peers to terminate\n", - kptllnd_data.kptl_npeers); - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - cfs_pause(cfs_time_seconds(1)); - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, - flags); - } - - LASSERT (cfs_list_empty(&kptllnd_data.kptl_closing_peers)); - LASSERT (cfs_list_empty(&kptllnd_data.kptl_zombie_peers)); - LASSERT (kptllnd_data.kptl_peers != NULL); - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) - LASSERT (cfs_list_empty (&kptllnd_data.kptl_peers[i])); - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - CDEBUG(D_NET, "All peers deleted\n"); - - /* Shutdown phase 2: kill the daemons... */ - kptllnd_data.kptl_shutdown = 2; - smp_mb(); - - i = 2; - while (cfs_atomic_read (&kptllnd_data.kptl_nthreads) != 0) { - /* Wake up all threads*/ - wake_up_all(&kptllnd_data.kptl_sched_waitq); - wake_up_all(&kptllnd_data.kptl_watchdog_waitq); - - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d threads to terminate\n", - cfs_atomic_read(&kptllnd_data.kptl_nthreads)); - cfs_pause(cfs_time_seconds(1)); - } - - CDEBUG(D_NET, "All Threads stopped\n"); - LASSERT(cfs_list_empty(&kptllnd_data.kptl_sched_txq)); - - kptllnd_cleanup_tx_descs(); - - /* Nothing here now, but libcfs might soon require - * us to explicitly destroy wait queues and semaphores - * that would be done here */ - - /* fall through */ - - case PTLLND_INIT_NOTHING: - CDEBUG(D_NET, "PTLLND_INIT_NOTHING\n"); - break; - } - - if (!PtlHandleIsEqual(kptllnd_data.kptl_eqh, PTL_INVALID_HANDLE)) { - prc = PtlEQFree(kptllnd_data.kptl_eqh); - if (prc != PTL_OK) - CERROR("Error %s(%d) freeing portals EQ\n", - kptllnd_errtype2str(prc), prc); - } - - if (!PtlHandleIsEqual(kptllnd_data.kptl_nih, PTL_INVALID_HANDLE)) { - prc = PtlNIFini(kptllnd_data.kptl_nih); - if (prc != PTL_OK) - CERROR("Error %s(%d) finalizing portals NI\n", - kptllnd_errtype2str(prc), prc); - } - - LASSERT (cfs_atomic_read(&kptllnd_data.kptl_ntx) == 0); - LASSERT (cfs_list_empty(&kptllnd_data.kptl_idle_txs)); - - if (kptllnd_data.kptl_rx_cache != NULL) - kmem_cache_destroy(kptllnd_data.kptl_rx_cache); - - if (kptllnd_data.kptl_peers != NULL) - LIBCFS_FREE(kptllnd_data.kptl_peers, - sizeof (cfs_list_t) * - kptllnd_data.kptl_peer_hash_size); - - if (kptllnd_data.kptl_nak_msg != NULL) - LIBCFS_FREE(kptllnd_data.kptl_nak_msg, - offsetof(kptl_msg_t, ptlm_u)); - - memset(&kptllnd_data, 0, sizeof(kptllnd_data)); - module_put(THIS_MODULE); - return; -} - -int -kptllnd_base_startup (void) -{ - int i; - int rc; - int spares; - struct timeval tv; - lnet_process_id_t target; - ptl_err_t ptl_rc; - char name[16]; - - if (*kptllnd_tunables.kptl_max_procs_per_node < 1) { - CERROR("max_procs_per_node must be >= 1\n"); - return -EINVAL; - } - - if (*kptllnd_tunables.kptl_peertxcredits > PTLLND_MSG_MAX_CREDITS) { - CERROR("peercredits must be <= %d\n", PTLLND_MSG_MAX_CREDITS); - return -EINVAL; - } - - *kptllnd_tunables.kptl_max_msg_size &= ~7; - if (*kptllnd_tunables.kptl_max_msg_size < PTLLND_MIN_BUFFER_SIZE) - *kptllnd_tunables.kptl_max_msg_size = PTLLND_MIN_BUFFER_SIZE; - - CLASSERT ((PTLLND_MIN_BUFFER_SIZE & 7) == 0); - CLASSERT (sizeof(kptl_msg_t) <= PTLLND_MIN_BUFFER_SIZE); - - /* Zero pointers, flags etc; put everything into a known state. */ - memset (&kptllnd_data, 0, sizeof (kptllnd_data)); - - LIBCFS_ALLOC(kptllnd_data.kptl_nak_msg, offsetof(kptl_msg_t, ptlm_u)); - if (kptllnd_data.kptl_nak_msg == NULL) { - CERROR("Can't allocate NAK msg\n"); - return -ENOMEM; - } - memset(kptllnd_data.kptl_nak_msg, 0, offsetof(kptl_msg_t, ptlm_u)); - - kptllnd_data.kptl_eqh = PTL_INVALID_HANDLE; - kptllnd_data.kptl_nih = PTL_INVALID_HANDLE; - - rwlock_init(&kptllnd_data.kptl_net_rw_lock); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_nets); - - /* Setup the sched locks/lists/waitq */ - spin_lock_init(&kptllnd_data.kptl_sched_lock); - init_waitqueue_head(&kptllnd_data.kptl_sched_waitq); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_sched_txq); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxq); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_sched_rxbq); - - /* Init kptl_ptlid2str_lock before any call to kptllnd_ptlid2str */ - spin_lock_init(&kptllnd_data.kptl_ptlid2str_lock); - - /* Setup the tx locks/lists */ - spin_lock_init(&kptllnd_data.kptl_tx_lock); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_idle_txs); - cfs_atomic_set(&kptllnd_data.kptl_ntx, 0); - - /* Uptick the module reference count */ - try_module_get(THIS_MODULE); - - kptllnd_data.kptl_expected_peers = - *kptllnd_tunables.kptl_max_nodes * - *kptllnd_tunables.kptl_max_procs_per_node; - - /* - * Initialize the Network interface instance - * We use the default because we don't have any - * way to choose a better interface. - * Requested and actual limits are ignored. - */ - ptl_rc = PtlNIInit( -#ifdef _USING_LUSTRE_PORTALS_ - PTL_IFACE_DEFAULT, -#else - CRAY_KERN_NAL, -#endif - *kptllnd_tunables.kptl_pid, NULL, NULL, - &kptllnd_data.kptl_nih); - - /* - * Note: PTL_IFACE_DUP simply means that the requested - * interface was already inited and that we're sharing it. - * Which is ok. - */ - if (ptl_rc != PTL_OK && ptl_rc != PTL_IFACE_DUP) { - CERROR ("PtlNIInit: error %s(%d)\n", - kptllnd_errtype2str(ptl_rc), ptl_rc); - rc = -EINVAL; - goto failed; - } - - /* NB eq size irrelevant if using a callback */ - ptl_rc = PtlEQAlloc(kptllnd_data.kptl_nih, - 8, /* size */ - kptllnd_eq_callback, /* handler callback */ - &kptllnd_data.kptl_eqh); /* output handle */ - if (ptl_rc != PTL_OK) { - CERROR("PtlEQAlloc failed %s(%d)\n", - kptllnd_errtype2str(ptl_rc), ptl_rc); - rc = -ENOMEM; - goto failed; - } - - /* Fetch the lower NID */ - ptl_rc = PtlGetId(kptllnd_data.kptl_nih, - &kptllnd_data.kptl_portals_id); - if (ptl_rc != PTL_OK) { - CERROR ("PtlGetID: error %s(%d)\n", - kptllnd_errtype2str(ptl_rc), ptl_rc); - rc = -EINVAL; - goto failed; - } - - if (kptllnd_data.kptl_portals_id.pid != *kptllnd_tunables.kptl_pid) { - /* The kernel ptllnd must have the expected PID */ - CERROR("Unexpected PID: %u (%u expected)\n", - kptllnd_data.kptl_portals_id.pid, - *kptllnd_tunables.kptl_pid); - rc = -EINVAL; - goto failed; - } - - /* Initialized the incarnation - it must be for-all-time unique, even - * accounting for the fact that we increment it when we disconnect a - * peer that's using it */ - do_gettimeofday(&tv); - kptllnd_data.kptl_incarnation = (((__u64)tv.tv_sec) * 1000000) + - tv.tv_usec; - CDEBUG(D_NET, "Incarnation="LPX64"\n", kptllnd_data.kptl_incarnation); - - target.nid = LNET_NID_ANY; - target.pid = LNET_PID_ANY; /* NB target for NAK doesn't matter */ - kptllnd_init_msg(kptllnd_data.kptl_nak_msg, PTLLND_MSG_TYPE_NAK, target, 0); - kptllnd_data.kptl_nak_msg->ptlm_magic = PTLLND_MSG_MAGIC; - kptllnd_data.kptl_nak_msg->ptlm_version = PTLLND_MSG_VERSION; - kptllnd_data.kptl_nak_msg->ptlm_srcpid = the_lnet.ln_pid; - kptllnd_data.kptl_nak_msg->ptlm_srcstamp = kptllnd_data.kptl_incarnation; - - rwlock_init(&kptllnd_data.kptl_peer_rw_lock); - init_waitqueue_head(&kptllnd_data.kptl_watchdog_waitq); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_closing_peers); - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_zombie_peers); - - /* Allocate and setup the peer hash table */ - kptllnd_data.kptl_peer_hash_size = - *kptllnd_tunables.kptl_peer_hash_table_size; - LIBCFS_ALLOC(kptllnd_data.kptl_peers, - sizeof(cfs_list_t) * - kptllnd_data.kptl_peer_hash_size); - if (kptllnd_data.kptl_peers == NULL) { - CERROR("Failed to allocate space for peer hash table size=%d\n", - kptllnd_data.kptl_peer_hash_size); - rc = -ENOMEM; - goto failed; - } - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) - CFS_INIT_LIST_HEAD(&kptllnd_data.kptl_peers[i]); - - kptllnd_rx_buffer_pool_init(&kptllnd_data.kptl_rx_buffer_pool); - - kptllnd_data.kptl_rx_cache = - kmem_cache_create("ptllnd_rx", - sizeof(kptl_rx_t) + - *kptllnd_tunables.kptl_max_msg_size, - 0, /* offset */ - 0); /* flags */ - if (kptllnd_data.kptl_rx_cache == NULL) { - CERROR("Can't create slab for RX descriptors\n"); - rc = -ENOMEM; - goto failed; - } - - /* lists/ptrs/locks initialised */ - kptllnd_data.kptl_init = PTLLND_INIT_DATA; - - /*****************************************************/ - - rc = kptllnd_setup_tx_descs(); - if (rc != 0) { - CERROR("Can't pre-allocate %d TX descriptors: %d\n", - *kptllnd_tunables.kptl_ntx, rc); - goto failed; - } - - /* Start the scheduler threads for handling incoming requests. No need - * to advance the state because this will be automatically cleaned up - * now that PTLLND_INIT_DATA state has been entered */ - CDEBUG(D_NET, "starting %d scheduler threads\n", PTLLND_N_SCHED); - for (i = 0; i < PTLLND_N_SCHED; i++) { - snprintf(name, sizeof(name), "kptllnd_sd_%02d", i); - rc = kptllnd_thread_start(kptllnd_scheduler, (void *)((long)i)); - if (rc != 0) { - CERROR("Can't spawn scheduler[%d]: %d\n", i, rc); - goto failed; - } - } - - snprintf(name, sizeof(name), "kptllnd_wd_%02d", i); - rc = kptllnd_thread_start(kptllnd_watchdog, NULL, name); - if (rc != 0) { - CERROR("Can't spawn watchdog: %d\n", rc); - goto failed; - } - - /* Ensure that 'rxb_nspare' buffers can be off the net (being emptied) - * and we will still have enough buffers posted for all our peers */ - spares = *kptllnd_tunables.kptl_rxb_nspare * - ((*kptllnd_tunables.kptl_rxb_npages * PAGE_SIZE)/ - *kptllnd_tunables.kptl_max_msg_size); - - /* reserve and post the buffers */ - rc = kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool, - kptllnd_data.kptl_expected_peers + - spares); - if (rc != 0) { - CERROR("Can't reserve RX Buffer pool: %d\n", rc); - goto failed; - } - - /* flag everything initialised */ - kptllnd_data.kptl_init = PTLLND_INIT_ALL; - - /*****************************************************/ - - if (*kptllnd_tunables.kptl_checksum) - CWARN("Checksumming enabled\n"); - - CDEBUG(D_NET, "<<< kptllnd_base_startup SUCCESS\n"); - return 0; - - failed: - CERROR("kptllnd_base_startup failed: %d\n", rc); - kptllnd_base_shutdown(); - return rc; -} - -int -kptllnd_startup (lnet_ni_t *ni) -{ - int rc; - kptl_net_t *net; - - LASSERT (ni->ni_lnd == &kptllnd_lnd); - - if (kptllnd_data.kptl_init == PTLLND_INIT_NOTHING) { - rc = kptllnd_base_startup(); - if (rc != 0) - return rc; - } - - LIBCFS_ALLOC(net, sizeof(*net)); - ni->ni_data = net; - if (net == NULL) { - CERROR("Can't allocate kptl_net_t\n"); - rc = -ENOMEM; - goto failed; - } - memset(net, 0, sizeof(*net)); - net->net_ni = ni; - - ni->ni_maxtxcredits = *kptllnd_tunables.kptl_credits; - ni->ni_peertxcredits = *kptllnd_tunables.kptl_peertxcredits; - ni->ni_peerrtrcredits = *kptllnd_tunables.kptl_peerrtrcredits; - ni->ni_nid = kptllnd_ptl2lnetnid(ni->ni_nid, - kptllnd_data.kptl_portals_id.nid); - CDEBUG(D_NET, "ptl id=%s, lnet id=%s\n", - kptllnd_ptlid2str(kptllnd_data.kptl_portals_id), - libcfs_nid2str(ni->ni_nid)); - - /* NB LNET_NIDNET(ptlm_srcnid) of NAK doesn't matter in case of - * multiple NIs */ - kptllnd_data.kptl_nak_msg->ptlm_srcnid = ni->ni_nid; - - cfs_atomic_set(&net->net_refcount, 1); - write_lock(&kptllnd_data.kptl_net_rw_lock); - cfs_list_add_tail(&net->net_list, &kptllnd_data.kptl_nets); - write_unlock(&kptllnd_data.kptl_net_rw_lock); - return 0; - - failed: - kptllnd_shutdown(ni); - return rc; -} - -void -kptllnd_shutdown (lnet_ni_t *ni) -{ - kptl_net_t *net = ni->ni_data; - int i; - unsigned long flags; - - LASSERT (kptllnd_data.kptl_init == PTLLND_INIT_ALL); - - CDEBUG(D_MALLOC, "before LND cleanup: kmem %d\n", - cfs_atomic_read (&libcfs_kmemory)); - - if (net == NULL) - goto out; - - LASSERT (ni == net->net_ni); - LASSERT (!net->net_shutdown); - LASSERT (!cfs_list_empty(&net->net_list)); - LASSERT (cfs_atomic_read(&net->net_refcount) != 0); - ni->ni_data = NULL; - net->net_ni = NULL; - - write_lock(&kptllnd_data.kptl_net_rw_lock); - kptllnd_net_decref(net); - cfs_list_del_init(&net->net_list); - write_unlock(&kptllnd_data.kptl_net_rw_lock); - - /* Can't nuke peers here - they are shared among all NIs */ - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - net->net_shutdown = 1; /* Order with peer creation */ - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - i = 2; - while (cfs_atomic_read(&net->net_refcount) != 0) { - i++; - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, - "Waiting for %d references to drop\n", - cfs_atomic_read(&net->net_refcount)); - - cfs_pause(cfs_time_seconds(1)); - } - - LIBCFS_FREE(net, sizeof(*net)); -out: - /* NB no locking since I don't race with writers */ - if (cfs_list_empty(&kptllnd_data.kptl_nets)) - kptllnd_base_shutdown(); - CDEBUG(D_MALLOC, "after LND cleanup: kmem %d\n", - cfs_atomic_read (&libcfs_kmemory)); - return; -} - -int __init -kptllnd_module_init (void) -{ - int rc; - - kptllnd_assert_wire_constants(); - - rc = kptllnd_tunables_init(); - if (rc != 0) - return rc; - - kptllnd_init_ptltrace(); - - lnet_register_lnd(&kptllnd_lnd); - - return 0; -} - -void __exit -kptllnd_module_fini (void) -{ - lnet_unregister_lnd(&kptllnd_lnd); - kptllnd_tunables_fini(); -} - -MODULE_AUTHOR("Sun Microsystems, Inc. "); -MODULE_DESCRIPTION("Kernel Portals LND v1.00"); -MODULE_LICENSE("GPL"); - -module_init(kptllnd_module_init); -module_exit(kptllnd_module_fini); diff --git a/lnet/klnds/ptllnd/ptllnd.h b/lnet/klnds/ptllnd/ptllnd.h deleted file mode 100644 index 203fe06..0000000 --- a/lnet/klnds/ptllnd/ptllnd.h +++ /dev/null @@ -1,592 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd.h - * - * Author: PJ Kirner - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -#define DEBUG_SUBSYSTEM S_LND - -#include -#include -#include -#include -#include -#include /* Depends on portals/p30.h */ - -/* - * Define this to enable console debug logging - * and simulation - */ -//#define PJK_DEBUGGING - -#ifdef CONFIG_SMP -# define PTLLND_N_SCHED num_online_cpus() /* # schedulers */ -#else -# define PTLLND_N_SCHED 1 /* # schedulers */ -#endif - -#define PTLLND_CREDIT_HIGHWATER ((*kptllnd_tunables.kptl_peertxcredits)-1) - /* when eagerly to return credits */ - -typedef struct -{ - int *kptl_ntx; /* # tx descs to pre-allocate */ - int *kptl_max_nodes; /* max # nodes all talking to me */ - int *kptl_max_procs_per_node; /* max # processes per node */ - int *kptl_checksum; /* checksum kptl_msg_t? */ - int *kptl_timeout; /* comms timeout (seconds) */ - int *kptl_portal; /* portal number */ - int *kptl_pid; /* portals PID (self + kernel peers) */ - int *kptl_rxb_npages; /* number of pages for rx buffer */ - int *kptl_rxb_nspare; /* number of spare rx buffers */ - int *kptl_credits; /* number of credits */ - int *kptl_peertxcredits; /* number of peer tx credits */ - int *kptl_peerrtrcredits; /* number of peer router credits */ - int *kptl_max_msg_size; /* max immd message size*/ - int *kptl_peer_hash_table_size; /* # slots in peer hash table */ - int *kptl_reschedule_loops; /* scheduler yield loops */ - int *kptl_ack_puts; /* make portals ack PUTs */ -#ifdef PJK_DEBUGGING - int *kptl_simulation_bitmap;/* simulation bitmap */ -#endif - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - struct ctl_table_header *kptl_sysctl; /* sysctl interface */ -#endif -} kptl_tunables_t; - -#include "lnet/ptllnd_wire.h" - -/***********************************************************************/ - -typedef struct kptl_data kptl_data_t; -typedef struct kptl_net kptl_net_t; -typedef struct kptl_rx_buffer kptl_rx_buffer_t; -typedef struct kptl_peer kptl_peer_t; - -typedef struct { - char eva_type; -} kptl_eventarg_t; - -#define PTLLND_EVENTARG_TYPE_MSG 0x1 -#define PTLLND_EVENTARG_TYPE_RDMA 0x2 -#define PTLLND_EVENTARG_TYPE_BUF 0x3 - -typedef struct kptl_rx /* receive message */ -{ - cfs_list_t rx_list; /* queue for attention */ - kptl_rx_buffer_t *rx_rxb; /* the rx buffer pointer */ - kptl_msg_t *rx_msg; /* received message */ - int rx_nob; /* received message size */ - unsigned long rx_treceived; /* time received */ - ptl_process_id_t rx_initiator; /* sender's address */ - kptl_peer_t *rx_peer; /* pointer to peer */ - char rx_space[0]; /* copy of incoming request */ -} kptl_rx_t; - -#define PTLLND_POSTRX_DONT_POST 0 /* don't post */ -#define PTLLND_POSTRX_NO_CREDIT 1 /* post: no credits */ -#define PTLLND_POSTRX_PEER_CREDIT 2 /* post: give peer back 1 credit */ - -typedef struct kptl_rx_buffer_pool -{ - spinlock_t rxbp_lock; - cfs_list_t rxbp_list; /* all allocated buffers */ - int rxbp_count; /* # allocated buffers */ - int rxbp_reserved; /* # requests to buffer */ - int rxbp_shutdown; /* shutdown flag */ -} kptl_rx_buffer_pool_t; - -struct kptl_rx_buffer -{ - kptl_rx_buffer_pool_t *rxb_pool; - cfs_list_t rxb_list; /* for the rxb_pool list */ - cfs_list_t rxb_repost_list;/* for the kptl_sched_rxbq list */ - int rxb_posted:1; /* on the net */ - int rxb_idle:1; /* all done */ - kptl_eventarg_t rxb_eventarg; /* event->md.user_ptr */ - int rxb_refcount; /* reference count */ - ptl_handle_md_t rxb_mdh; /* the portals memory descriptor (MD) handle */ - char *rxb_buffer; /* the buffer */ - -}; - -enum kptl_tx_type -{ - TX_TYPE_RESERVED = 0, - TX_TYPE_SMALL_MESSAGE = 1, - TX_TYPE_PUT_REQUEST = 2, - TX_TYPE_GET_REQUEST = 3, - TX_TYPE_PUT_RESPONSE = 4, - TX_TYPE_GET_RESPONSE = 5, -}; - -typedef union { -#ifdef _USING_LUSTRE_PORTALS_ - struct iovec iov[PTL_MD_MAX_IOV]; - lnet_kiov_t kiov[PTL_MD_MAX_IOV]; -#else - ptl_md_iovec_t iov[PTL_MD_MAX_IOV]; -#endif -} kptl_fragvec_t; - -typedef struct kptl_tx /* transmit message */ -{ - cfs_list_t tx_list; /* queue on idle_txs etc */ - cfs_atomic_t tx_refcount; /* reference count*/ - enum kptl_tx_type tx_type; /* small msg/{put,get}{req,resp} */ - int tx_active:1; /* queued on the peer */ - int tx_idle:1; /* on the free list */ - int tx_acked:1; /* portals ACK wanted (for debug only) */ - kptl_eventarg_t tx_msg_eventarg; /* event->md.user_ptr */ - kptl_eventarg_t tx_rdma_eventarg; /* event->md.user_ptr */ - int tx_status; /* the status of this tx descriptor */ - ptl_handle_md_t tx_rdma_mdh; /* RDMA buffer */ - ptl_handle_md_t tx_msg_mdh; /* the portals MD handle for the initial message */ - lnet_msg_t *tx_lnet_msg; /* LNET message to finalize */ - lnet_msg_t *tx_lnet_replymsg; /* LNET reply message to finalize */ - kptl_msg_t *tx_msg; /* the message data */ - kptl_peer_t *tx_peer; /* the peer this is waiting on */ - unsigned long tx_deadline; /* deadline */ - unsigned long tx_tposted; /* time posted */ - ptl_md_t tx_rdma_md; /* rdma descriptor */ - kptl_fragvec_t *tx_frags; /* buffer fragments */ -} kptl_tx_t; - -enum kptllnd_peer_state -{ - PEER_STATE_UNINITIALIZED = 0, - PEER_STATE_ALLOCATED = 1, - PEER_STATE_WAITING_HELLO = 2, - PEER_STATE_ACTIVE = 3, - PEER_STATE_CLOSING = 4, - PEER_STATE_ZOMBIE = 5, -}; - -struct kptl_peer -{ - cfs_list_t peer_list; - cfs_atomic_t peer_refcount; /* The current references */ - enum kptllnd_peer_state peer_state; - spinlock_t peer_lock; /* serialize */ - cfs_list_t peer_noops; /* PTLLND_MSG_TYPE_NOOP txs */ - cfs_list_t peer_sendq; /* txs waiting for mh handles */ - cfs_list_t peer_activeq; /* txs awaiting completion */ - lnet_process_id_t peer_id; /* Peer's LNET id */ - ptl_process_id_t peer_ptlid; /* Peer's portals id */ - __u64 peer_incarnation; /* peer's incarnation */ - __u64 peer_myincarnation; /* my incarnation at HELLO */ - int peer_sent_hello; /* have I sent HELLO? */ - int peer_credits; /* number of send credits */ - int peer_outstanding_credits;/* number of peer credits to return */ - int peer_sent_credits; /* #msg buffers posted for peer */ - int peer_max_msg_size; /* peer's rx buffer size */ - int peer_error; /* errno on closing this peer */ - int peer_retry_noop; /* need to retry returning credits */ - int peer_check_stamp; /* watchdog check stamp */ - cfs_time_t peer_last_alive; /* when (in jiffies) I was last alive */ - __u64 peer_next_matchbits; /* Next value to register RDMA from peer */ - __u64 peer_last_matchbits_seen; /* last matchbits used to RDMA to peer */ -}; - -struct kptl_data -{ - int kptl_init; /* initialisation state */ - volatile int kptl_shutdown; /* shut down? */ - cfs_atomic_t kptl_nthreads; /* # live threads */ - ptl_handle_ni_t kptl_nih; /* network inteface handle */ - ptl_process_id_t kptl_portals_id; /* Portals ID of interface */ - __u64 kptl_incarnation; /* which one am I */ - ptl_handle_eq_t kptl_eqh; /* Event Queue (EQ) */ - - rwlock_t kptl_net_rw_lock; /* serialise... */ - cfs_list_t kptl_nets; /* kptl_net instance*/ - - spinlock_t kptl_sched_lock; /* serialise... */ - wait_queue_head_t kptl_sched_waitq; /* schedulers sleep here */ - cfs_list_t kptl_sched_txq; /* tx requiring attention */ - cfs_list_t kptl_sched_rxq; /* rx requiring attention */ - cfs_list_t kptl_sched_rxbq; /* rxb requiring reposting */ - - wait_queue_head_t kptl_watchdog_waitq; /* watchdog sleeps here */ - - kptl_rx_buffer_pool_t kptl_rx_buffer_pool; /* rx buffer pool */ - struct kmem_cache *kptl_rx_cache; /* rx descripter cache */ - - cfs_atomic_t kptl_ntx; /* # tx descs allocated */ - spinlock_t kptl_tx_lock; /* serialise idle tx list*/ - cfs_list_t kptl_idle_txs; /* idle tx descriptors */ - - rwlock_t kptl_peer_rw_lock; /* lock for peer table */ - cfs_list_t *kptl_peers; /* hash table of all my known peers */ - cfs_list_t kptl_closing_peers; /* peers being closed */ - cfs_list_t kptl_zombie_peers; /* peers waiting for refs to drain */ - int kptl_peer_hash_size; /* size of kptl_peers */ - int kptl_npeers; /* # peers extant */ - int kptl_n_active_peers; /* # active peers */ - int kptl_expected_peers; /* # peers I can buffer HELLOs from */ - - kptl_msg_t *kptl_nak_msg; /* common NAK message */ - spinlock_t kptl_ptlid2str_lock; /* serialise str ops */ -}; - -struct kptl_net -{ - cfs_list_t net_list; /* chain on kptl_data:: kptl_nets */ - lnet_ni_t *net_ni; - cfs_atomic_t net_refcount; /* # current references */ - int net_shutdown; /* lnd_shutdown called */ -}; - -enum -{ - PTLLND_INIT_NOTHING = 0, - PTLLND_INIT_DATA, - PTLLND_INIT_ALL, -}; - -extern kptl_tunables_t kptllnd_tunables; -extern kptl_data_t kptllnd_data; - -static inline lnet_nid_t -kptllnd_ptl2lnetnid(lnet_nid_t ni_nid, ptl_nid_t ptl_nid) -{ -#ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(ni_nid), LNET_NIDADDR(ptl_nid)); -#else - return LNET_MKNID(LNET_NIDNET(ni_nid), ptl_nid); -#endif -} - -static inline ptl_nid_t -kptllnd_lnet2ptlnid(lnet_nid_t lnet_nid) -{ -#ifdef _USING_LUSTRE_PORTALS_ - return LNET_MKNID(LNET_NIDNET(kptllnd_data.kptl_portals_id.nid), - LNET_NIDADDR(lnet_nid)); -#else - return LNET_NIDADDR(lnet_nid); -#endif -} - -int kptllnd_startup(lnet_ni_t *ni); -void kptllnd_shutdown(lnet_ni_t *ni); -int kptllnd_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); -void kptllnd_query (struct lnet_ni *ni, lnet_nid_t nid, cfs_time_t *when); -int kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); -int kptllnd_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, - int delayed, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen); -int kptllnd_eager_recv(struct lnet_ni *ni, void *private, - lnet_msg_t *msg, void **new_privatep); -void kptllnd_eq_callback(ptl_event_t *evp); -int kptllnd_scheduler(void *arg); -int kptllnd_watchdog(void *arg); -int kptllnd_thread_start(int (*fn)(void *arg), void *arg); -int kptllnd_tunables_init(void); -void kptllnd_tunables_fini(void); - -const char *kptllnd_evtype2str(int evtype); -const char *kptllnd_msgtype2str(int msgtype); -const char *kptllnd_errtype2str(int errtype); - -static inline void * -kptllnd_eventarg2obj (kptl_eventarg_t *eva) -{ - switch (eva->eva_type) { - default: - LBUG(); - case PTLLND_EVENTARG_TYPE_BUF: - return cfs_list_entry(eva, kptl_rx_buffer_t, rxb_eventarg); - case PTLLND_EVENTARG_TYPE_RDMA: - return cfs_list_entry(eva, kptl_tx_t, tx_rdma_eventarg); - case PTLLND_EVENTARG_TYPE_MSG: - return cfs_list_entry(eva, kptl_tx_t, tx_msg_eventarg); - } -} - -/* - * RX BUFFER SUPPORT FUNCTIONS - */ -void kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp); -void kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp); -int kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count); -void kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, int count); -void kptllnd_rx_buffer_callback(ptl_event_t *ev); -void kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb); - -static inline int -kptllnd_rx_buffer_size(void) -{ - return PAGE_SIZE * (*kptllnd_tunables.kptl_rxb_npages); -} - -static inline void -kptllnd_rx_buffer_addref(kptl_rx_buffer_t *rxb) -{ - unsigned long flags; - - spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags); - rxb->rxb_refcount++; - spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags); -} - -static inline void -kptllnd_rx_buffer_decref_locked(kptl_rx_buffer_t *rxb) -{ - if (--(rxb->rxb_refcount) == 0) { - spin_lock(&kptllnd_data.kptl_sched_lock); - - cfs_list_add_tail(&rxb->rxb_repost_list, - &kptllnd_data.kptl_sched_rxbq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock(&kptllnd_data.kptl_sched_lock); - } -} - -static inline void -kptllnd_rx_buffer_decref(kptl_rx_buffer_t *rxb) -{ - unsigned long flags; - int count; - - spin_lock_irqsave(&rxb->rxb_pool->rxbp_lock, flags); - count = --(rxb->rxb_refcount); - spin_unlock_irqrestore(&rxb->rxb_pool->rxbp_lock, flags); - - if (count == 0) - kptllnd_rx_buffer_post(rxb); -} - -/* - * RX SUPPORT FUNCTIONS - */ -void kptllnd_rx_parse(kptl_rx_t *rx); -void kptllnd_rx_done(kptl_rx_t *rx, int post_credit); - -/* - * PEER SUPPORT FUNCTIONS - */ -int kptllnd_get_peer_info(int index, - lnet_process_id_t *id, - int *state, int *sent_hello, - int *refcount, __u64 *incarnation, - __u64 *next_matchbits, __u64 *last_matchbits_seen, - int *nsendq, int *nactiveq, - int *credits, int *outstanding_credits); -void kptllnd_peer_destroy(kptl_peer_t *peer); -int kptllnd_peer_del(lnet_process_id_t id); -void kptllnd_peer_close_locked(kptl_peer_t *peer, int why); -void kptllnd_peer_close(kptl_peer_t *peer, int why); -void kptllnd_handle_closing_peers(void); -int kptllnd_peer_connect(kptl_tx_t *tx, lnet_nid_t nid); -void kptllnd_peer_check_sends(kptl_peer_t *peer); -void kptllnd_peer_check_bucket(int idx, int stamp); -void kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag); -int kptllnd_find_target(kptl_net_t *net, lnet_process_id_t target, - kptl_peer_t **peerp); -kptl_peer_t *kptllnd_peer_handle_hello(kptl_net_t *net, - ptl_process_id_t initiator, - kptl_msg_t *msg); -kptl_peer_t *kptllnd_id2peer_locked(lnet_process_id_t id); -void kptllnd_peer_alive(kptl_peer_t *peer); - -static inline void -kptllnd_peer_addref (kptl_peer_t *peer) -{ - cfs_atomic_inc(&peer->peer_refcount); -} - -static inline void -kptllnd_peer_decref (kptl_peer_t *peer) -{ - if (cfs_atomic_dec_and_test(&peer->peer_refcount)) - kptllnd_peer_destroy(peer); -} - -static inline void -kptllnd_net_addref (kptl_net_t *net) -{ - LASSERT (cfs_atomic_read(&net->net_refcount) > 0); - cfs_atomic_inc(&net->net_refcount); -} - -static inline void -kptllnd_net_decref (kptl_net_t *net) -{ - LASSERT (cfs_atomic_read(&net->net_refcount) > 0); - cfs_atomic_dec(&net->net_refcount); -} - -static inline void -kptllnd_set_tx_peer(kptl_tx_t *tx, kptl_peer_t *peer) -{ - LASSERT (tx->tx_peer == NULL); - - kptllnd_peer_addref(peer); - tx->tx_peer = peer; -} - -static inline cfs_list_t * -kptllnd_nid2peerlist(lnet_nid_t nid) -{ - /* Only one copy of peer state for all logical peers, so the net part - * of NIDs is ignored; e.g. A@ptl0 and A@ptl2 share peer state */ - unsigned int hash = ((unsigned int)LNET_NIDADDR(nid)) % - kptllnd_data.kptl_peer_hash_size; - - return &kptllnd_data.kptl_peers[hash]; -} - -static inline kptl_peer_t * -kptllnd_id2peer(lnet_process_id_t id) -{ - kptl_peer_t *peer; - unsigned long flags; - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - peer = kptllnd_id2peer_locked(id); - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return peer; -} - -static inline int -kptllnd_reserve_buffers(int n) -{ - return kptllnd_rx_buffer_pool_reserve(&kptllnd_data.kptl_rx_buffer_pool, - n); -} - -static inline int -kptllnd_peer_reserve_buffers(void) -{ - return kptllnd_reserve_buffers(*kptllnd_tunables.kptl_peertxcredits); -} - -static inline void -kptllnd_peer_unreserve_buffers(void) -{ - kptllnd_rx_buffer_pool_unreserve(&kptllnd_data.kptl_rx_buffer_pool, - *kptllnd_tunables.kptl_peertxcredits); -} - -/* - * TX SUPPORT FUNCTIONS - */ -int kptllnd_setup_tx_descs(void); -void kptllnd_cleanup_tx_descs(void); -void kptllnd_tx_fini(kptl_tx_t *tx); -void kptllnd_cancel_txlist(cfs_list_t *peerq, cfs_list_t *txs); -void kptllnd_restart_txs(kptl_net_t *net, lnet_process_id_t id, - cfs_list_t *restarts); -kptl_tx_t *kptllnd_get_idle_tx(enum kptl_tx_type purpose); -void kptllnd_tx_callback(ptl_event_t *ev); -const char *kptllnd_tx_typestr(int type); - -static inline void -kptllnd_tx_addref(kptl_tx_t *tx) -{ - cfs_atomic_inc(&tx->tx_refcount); -} - -static inline void -kptllnd_tx_decref(kptl_tx_t *tx) -{ - LASSERT (!in_interrupt()); /* Thread context only */ - - if (cfs_atomic_dec_and_test(&tx->tx_refcount)) - kptllnd_tx_fini(tx); -} - -/* - * MESSAGE SUPPORT FUNCTIONS - */ -void kptllnd_init_msg(kptl_msg_t *msg, int type, - lnet_process_id_t target, int body_nob); -void kptllnd_msg_pack(kptl_msg_t *msg, kptl_peer_t *peer); -int kptllnd_msg_unpack(kptl_msg_t *msg, int nob); - -/* - * MISC SUPPORT FUNCTIONS - */ -void kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob); -char *kptllnd_ptlid2str(ptl_process_id_t id); - -void kptllnd_init_ptltrace(void); -void kptllnd_dump_ptltrace(void); - -#ifdef PJK_DEBUGGING -#define SIMULATION_FAIL_TX_PUT_ALLOC 0 /* 0x00000001 */ -#define SIMULATION_FAIL_TX_GET_ALLOC 1 /* 0x00000002 */ -#define SIMULATION_FAIL_TX 2 /* 0x00000004 */ -#define SIMULATION_FAIL_RX_ALLOC 3 /* 0x00000008 */ - -#define IS_SIMULATION_ENABLED(x) \ - (((*kptllnd_tunables.kptl_simulation_bitmap) & 1<< SIMULATION_##x) != 0) -#else -#define IS_SIMULATION_ENABLED(x) 0 -#endif diff --git a/lnet/klnds/ptllnd/ptllnd_cb.c b/lnet/klnds/ptllnd/ptllnd_cb.c deleted file mode 100644 index c2ea55b..0000000 --- a/lnet/klnds/ptllnd/ptllnd_cb.c +++ /dev/null @@ -1,834 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd_cb.c - * - * Author: PJ Kirner - */ - -#include "ptllnd.h" - -#ifndef _USING_LUSTRE_PORTALS_ -int -kptllnd_extract_iov (int dst_niov, ptl_md_iovec_t *dst, - int src_niov, struct iovec *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} - -int -kptllnd_extract_phys (int dst_niov, ptl_md_iovec_t *dst, - int src_niov, lnet_kiov_t *src, - unsigned int offset, unsigned int len) -{ - /* Initialise 'dst' to the physical addresses of the subset of 'src' - * starting at 'offset', for exactly 'len' bytes, and return the number - * of entries. NB not destructive to 'src' */ - unsigned int frag_len; - unsigned int niov; - __u64 phys_page; - __u64 phys; - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (niov <= dst_niov); - - frag_len = min(src->kiov_len - offset, len); - phys_page = lnet_page2phys(src->kiov_page); - phys = phys_page + src->kiov_offset + offset; - - LASSERT (sizeof(void *) > 4 || - (phys <= 0xffffffffULL && - phys + (frag_len - 1) <= 0xffffffffULL)); - - dst->iov_base = (void *)((unsigned long)phys); - dst->iov_len = frag_len; - - if (frag_len == len) - return niov; - - len -= frag_len; - dst++; - src++; - niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -kptllnd_init_rdma_md(kptl_tx_t *tx, unsigned int niov, - struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int nob) -{ - LASSERT (iov == NULL || kiov == NULL); - - memset(&tx->tx_rdma_md, 0, sizeof(tx->tx_rdma_md)); - - tx->tx_rdma_md.start = tx->tx_frags; - tx->tx_rdma_md.user_ptr = &tx->tx_rdma_eventarg; - tx->tx_rdma_md.eq_handle = kptllnd_data.kptl_eqh; - tx->tx_rdma_md.options = PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE; - switch (tx->tx_type) { - default: - LBUG(); - - case TX_TYPE_PUT_REQUEST: /* passive: peer gets */ - tx->tx_rdma_md.threshold = 1; /* GET event */ - tx->tx_rdma_md.options |= PTL_MD_OP_GET; - break; - - case TX_TYPE_GET_REQUEST: /* passive: peer puts */ - tx->tx_rdma_md.threshold = 1; /* PUT event */ - tx->tx_rdma_md.options |= PTL_MD_OP_PUT; - break; - - case TX_TYPE_PUT_RESPONSE: /* active: I get */ - tx->tx_rdma_md.threshold = 2; /* SEND + REPLY */ - break; - - case TX_TYPE_GET_RESPONSE: /* active: I put */ - tx->tx_rdma_md.threshold = tx->tx_acked ? 2 : 1; /* SEND + ACK? */ - break; - } - - if (nob == 0) { - tx->tx_rdma_md.length = 0; - return; - } - -#ifdef _USING_LUSTRE_PORTALS_ - if (iov != NULL) { - tx->tx_rdma_md.options |= PTL_MD_IOVEC; - tx->tx_rdma_md.length = - lnet_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, iov, offset, nob); - return; - } - - /* Cheating OK since ptl_kiov_t == lnet_kiov_t */ - CLASSERT(sizeof(ptl_kiov_t) == sizeof(lnet_kiov_t)); - CLASSERT(offsetof(ptl_kiov_t, kiov_offset) == - offsetof(lnet_kiov_t, kiov_offset)); - CLASSERT(offsetof(ptl_kiov_t, kiov_page) == - offsetof(lnet_kiov_t, kiov_page)); - CLASSERT(offsetof(ptl_kiov_t, kiov_len) == - offsetof(lnet_kiov_t, kiov_len)); - - tx->tx_rdma_md.options |= PTL_MD_KIOV; - tx->tx_rdma_md.length = - lnet_extract_kiov(PTL_MD_MAX_IOV, tx->tx_frags->kiov, - niov, kiov, offset, nob); -#else - if (iov != NULL) { - tx->tx_rdma_md.options |= PTL_MD_IOVEC; - tx->tx_rdma_md.length = - kptllnd_extract_iov(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, iov, offset, nob); - return; - } - - tx->tx_rdma_md.options |= PTL_MD_IOVEC | PTL_MD_PHYS; - tx->tx_rdma_md.length = - kptllnd_extract_phys(PTL_MD_MAX_IOV, tx->tx_frags->iov, - niov, kiov, offset, nob); -#endif -} - -int -kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, int nob) -{ - kptl_tx_t *tx; - ptl_err_t ptlrc; - kptl_msg_t *rxmsg = rx->rx_msg; - kptl_peer_t *peer = rx->rx_peer; - unsigned long flags; - ptl_handle_md_t mdh; - - LASSERT (type == TX_TYPE_PUT_RESPONSE || - type == TX_TYPE_GET_RESPONSE); - - tx = kptllnd_get_idle_tx(type); - if (tx == NULL) { - CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n", - type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT", - libcfs_id2str(peer->peer_id)); - return -ENOMEM; - } - - kptllnd_set_tx_peer(tx, peer); - kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob); - - ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, - PTL_UNLINK, &mdh); - if (ptlrc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(ptlrc), ptlrc); - tx->tx_status = -EIO; - kptllnd_tx_decref(tx); - return -EIO; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - tx->tx_lnet_msg = lntmsg; - /* lnet_finalize() will be called when tx is torn down, so I must - * return success from here on... */ - - tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ); - tx->tx_rdma_mdh = mdh; - tx->tx_active = 1; - cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq); - - /* peer has now got my ref on 'tx' */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx->tx_tposted = jiffies; - - if (type == TX_TYPE_GET_RESPONSE) - ptlrc = PtlPut(mdh, - tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, - rx->rx_initiator, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - rxmsg->ptlm_u.rdma.kptlrm_matchbits, - 0, /* offset */ - (lntmsg != NULL) ? /* header data */ - PTLLND_RDMA_OK : - PTLLND_RDMA_FAIL); - else - ptlrc = PtlGet(mdh, - rx->rx_initiator, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - rxmsg->ptlm_u.rdma.kptlrm_matchbits, - 0); /* offset */ - - if (ptlrc != PTL_OK) { - CERROR("Ptl%s failed: %s(%d)\n", - (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get", - kptllnd_errtype2str(ptlrc), ptlrc); - - kptllnd_peer_close(peer, -EIO); - /* Everything (including this RDMA) queued on the peer will - * be completed with failure */ - } - - return 0; -} - -int -kptllnd_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg) -{ - lnet_hdr_t *hdr = &lntmsg->msg_hdr; - int type = lntmsg->msg_type; - lnet_process_id_t target = lntmsg->msg_target; - int target_is_router = lntmsg->msg_target_is_router; - int routing = lntmsg->msg_routing; - unsigned int payload_niov = lntmsg->msg_niov; - struct iovec *payload_iov = lntmsg->msg_iov; - lnet_kiov_t *payload_kiov = lntmsg->msg_kiov; - unsigned int payload_offset = lntmsg->msg_offset; - unsigned int payload_nob = lntmsg->msg_len; - kptl_net_t *net = ni->ni_data; - kptl_peer_t *peer = NULL; - int mpflag = 0; - kptl_tx_t *tx; - int nob; - int nfrag; - int rc; - - LASSERT (net->net_ni == ni); - LASSERT (!net->net_shutdown); - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= LNET_MAX_IOV); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); /* !!! */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - LASSERT (!in_interrupt()); - - if (lntmsg->msg_vmflush) - mpflag = cfs_memory_pressure_get_and_set(); - - rc = kptllnd_find_target(net, target, &peer); - if (rc != 0) - goto out; - - /* NB peer->peer_id does NOT always equal target, be careful with - * which one to use */ - switch (type) { - default: - LBUG(); - return -EINVAL; - - case LNET_MSG_REPLY: - case LNET_MSG_PUT: - /* Should the payload avoid RDMA? */ - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[payload_nob]); - if (payload_kiov == NULL && - nob <= peer->peer_max_msg_size) - break; - - tx = kptllnd_get_idle_tx(TX_TYPE_PUT_REQUEST); - if (tx == NULL) { - CERROR("Can't send %s to %s: can't allocate descriptor\n", - lnet_msgtyp2str(type), - libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - kptllnd_init_rdma_md(tx, payload_niov, - payload_iov, payload_kiov, - payload_offset, payload_nob); - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr; - kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_PUT, - target, sizeof(kptl_rdma_msg_t)); - - CDEBUG(D_NETTRACE, "%s: passive PUT p %d %p\n", - libcfs_id2str(target), - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx); - - kptllnd_tx_launch(peer, tx, 0); - goto out; - - case LNET_MSG_GET: - /* routed gets don't RDMA */ - if (target_is_router || routing) - break; - - /* Is the payload small enough not to need RDMA? */ - nob = lntmsg->msg_md->md_length; - nob = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload[nob]); - if (nob <= peer->peer_max_msg_size) - break; - - tx = kptllnd_get_idle_tx(TX_TYPE_GET_REQUEST); - if (tx == NULL) { - CERROR("Can't send GET to %s: can't allocate descriptor\n", - libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - tx->tx_lnet_replymsg = lnet_create_reply_msg(ni, lntmsg); - if (tx->tx_lnet_replymsg == NULL) { - CERROR("Failed to allocate LNET reply for %s\n", - libcfs_id2str(target)); - kptllnd_tx_decref(tx); - rc = -ENOMEM; - goto out; - } - - if ((lntmsg->msg_md->md_options & LNET_MD_KIOV) == 0) - kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov, - lntmsg->msg_md->md_iov.iov, NULL, - 0, lntmsg->msg_md->md_length); - else - kptllnd_init_rdma_md(tx, lntmsg->msg_md->md_niov, - NULL, lntmsg->msg_md->md_iov.kiov, - 0, lntmsg->msg_md->md_length); - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.rdma.kptlrm_hdr = *hdr; - kptllnd_init_msg (tx->tx_msg, PTLLND_MSG_TYPE_GET, - target, sizeof(kptl_rdma_msg_t)); - - CDEBUG(D_NETTRACE, "%s: passive GET p %d %p\n", - libcfs_id2str(target), - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index), tx); - - kptllnd_tx_launch(peer, tx, 0); - goto out; - - case LNET_MSG_ACK: - CDEBUG(D_NET, "LNET_MSG_ACK\n"); - LASSERT (payload_nob == 0); - break; - } - - /* I don't have to handle kiovs */ - LASSERT (payload_nob == 0 || payload_iov != NULL); - - tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (tx == NULL) { - CERROR("Can't send %s to %s: can't allocate descriptor\n", - lnet_msgtyp2str(type), libcfs_id2str(target)); - rc = -ENOMEM; - goto out; - } - - tx->tx_lnet_msg = lntmsg; - tx->tx_msg->ptlm_u.immediate.kptlim_hdr = *hdr; - - if (payload_nob == 0) { - nfrag = 0; - } else { - tx->tx_frags->iov[0].iov_base = tx->tx_msg; - tx->tx_frags->iov[0].iov_len = offsetof(kptl_msg_t, - ptlm_u.immediate.kptlim_payload); - - /* NB relying on lustre not asking for PTL_MD_MAX_IOV - * fragments!! */ -#ifdef _USING_LUSTRE_PORTALS_ - nfrag = 1 + lnet_extract_iov(PTL_MD_MAX_IOV - 1, - &tx->tx_frags->iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); -#else - nfrag = 1 + kptllnd_extract_iov(PTL_MD_MAX_IOV - 1, - &tx->tx_frags->iov[1], - payload_niov, payload_iov, - payload_offset, payload_nob); -#endif - } - - nob = offsetof(kptl_immediate_msg_t, kptlim_payload[payload_nob]); - kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_IMMEDIATE, target, nob); - - CDEBUG(D_NETTRACE, "%s: immediate %s p %d %p\n", - libcfs_id2str(target), - lnet_msgtyp2str(lntmsg->msg_type), - (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_PUT) ? - le32_to_cpu(lntmsg->msg_hdr.msg.put.ptl_index) : - (le32_to_cpu(lntmsg->msg_type) == LNET_MSG_GET) ? - le32_to_cpu(lntmsg->msg_hdr.msg.get.ptl_index) : -1, - tx); - - kptllnd_tx_launch(peer, tx, nfrag); - - out: - if (lntmsg->msg_vmflush) - cfs_memory_pressure_restore(mpflag); - if (peer) - kptllnd_peer_decref(peer); - return rc; -} - -int -kptllnd_eager_recv(struct lnet_ni *ni, void *private, - lnet_msg_t *msg, void **new_privatep) -{ - kptl_rx_t *rx = private; - - CDEBUG(D_NET, "Eager RX=%p RXB=%p\n", rx, rx->rx_rxb); - - /* I have to release my ref on rxb (if I have one) to ensure I'm an - * eager receiver, so I copy the incoming request from the buffer it - * landed in, into space reserved in the descriptor... */ - -#if (PTL_MD_LOCAL_ALIGN8 == 0) - if (rx->rx_rxb == NULL) /* already copied */ - return 0; /* to fix alignment */ -#else - LASSERT(rx->rx_rxb != NULL); -#endif - LASSERT(rx->rx_nob <= *kptllnd_tunables.kptl_max_msg_size); - - memcpy(rx->rx_space, rx->rx_msg, rx->rx_nob); - rx->rx_msg = (kptl_msg_t *)rx->rx_space; - - kptllnd_rx_buffer_decref(rx->rx_rxb); - rx->rx_rxb = NULL; - - return 0; -} - - -int -kptllnd_recv (lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, int delayed, - unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov, - unsigned int offset, unsigned int mlen, unsigned int rlen) -{ - kptl_rx_t *rx = private; - kptl_msg_t *rxmsg = rx->rx_msg; - int nob; - int rc; - - CDEBUG(D_NET, "%s niov=%d offset=%d mlen=%d rlen=%d\n", - kptllnd_msgtype2str(rxmsg->ptlm_type), - niov, offset, mlen, rlen); - - LASSERT (mlen <= rlen); - LASSERT (mlen >= 0); - LASSERT (!in_interrupt()); - LASSERT (!(kiov != NULL && iov != NULL)); /* never both */ - LASSERT (niov <= PTL_MD_MAX_IOV); /* !!! */ - - switch(rxmsg->ptlm_type) - { - default: - LBUG(); - rc = -EINVAL; - break; - - case PTLLND_MSG_TYPE_IMMEDIATE: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE %d,%d\n", mlen, rlen); - - nob = offsetof(kptl_msg_t, ptlm_u.immediate.kptlim_payload[rlen]); - if (nob > rx->rx_nob) { - CERROR ("Immediate message from %s too big: %d(%d)\n", - libcfs_id2str(rx->rx_peer->peer_id), nob, - rx->rx_nob); - rc = -EINVAL; - break; - } - - if (kiov != NULL) - lnet_copy_flat2kiov( - niov, kiov, offset, - *kptllnd_tunables.kptl_max_msg_size, - rxmsg->ptlm_u.immediate.kptlim_payload, - 0, - mlen); - else - lnet_copy_flat2iov( - niov, iov, offset, - *kptllnd_tunables.kptl_max_msg_size, - rxmsg->ptlm_u.immediate.kptlim_payload, - 0, - mlen); - - lnet_finalize (ni, lntmsg, 0); - rc = 0; - break; - - case PTLLND_MSG_TYPE_GET: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_GET %d,%d\n", mlen, rlen); - - /* NB always send RDMA so the peer can complete. I send - * success/failure in the portals 'hdr_data' */ - - if (lntmsg == NULL) - rc = kptllnd_active_rdma(rx, NULL, - TX_TYPE_GET_RESPONSE, - 0, NULL, NULL, 0, 0); - else - rc = kptllnd_active_rdma(rx, lntmsg, - TX_TYPE_GET_RESPONSE, - lntmsg->msg_niov, - lntmsg->msg_iov, - lntmsg->msg_kiov, - lntmsg->msg_offset, - lntmsg->msg_len); - break; - - case PTLLND_MSG_TYPE_PUT: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_PUT %d,%d\n", mlen, rlen); - - /* NB always send RDMA so the peer can complete; it'll be 0 - * bytes if there was no match (lntmsg == NULL). I have no way - * to let my peer know this, but she's only interested in when - * the net has stopped accessing her buffer in any case. */ - - rc = kptllnd_active_rdma(rx, lntmsg, TX_TYPE_PUT_RESPONSE, - niov, iov, kiov, offset, mlen); - break; - } - - /* - * We're done with the RX - */ - kptllnd_rx_done(rx, PTLLND_POSTRX_PEER_CREDIT); - return rc; -} - -void -kptllnd_eq_callback(ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - - switch (eva->eva_type) { - default: - LBUG(); - - case PTLLND_EVENTARG_TYPE_MSG: - case PTLLND_EVENTARG_TYPE_RDMA: - kptllnd_tx_callback(ev); - break; - - case PTLLND_EVENTARG_TYPE_BUF: - kptllnd_rx_buffer_callback(ev); - break; - } -} - -void -kptllnd_thread_fini (void) -{ - cfs_atomic_dec(&kptllnd_data.kptl_nthreads); -} - -int -kptllnd_thread_start(int (*fn)(void *arg), void *arg, char *name) -{ - struct task_struct *task; - - cfs_atomic_inc(&kptllnd_data.kptl_nthreads); - - task = kthread_run(fn, arg, name); - if (IS_ERR(task)) { - CERROR("Failed to start thread: error %ld\n", PTR_ERR(task)); - kptllnd_thread_fini(); - } - return PTR_ERR(task); -} - -int -kptllnd_watchdog(void *arg) -{ - int id = (long)arg; - wait_queue_t waitlink; - int stamp = 0; - int peer_index = 0; - unsigned long deadline = jiffies; - int timeout; - int i; - - cfs_block_allsigs(); - - init_waitqueue_entry_current(&waitlink); - - /* threads shut down in phase 2 after all peers have been destroyed */ - while (kptllnd_data.kptl_shutdown < 2) { - - timeout = (int)(deadline - jiffies); - if (timeout <= 0) { - const int n = 4; - const int p = 1; - int chunk = kptllnd_data.kptl_peer_hash_size; - - - /* Time to check for RDMA timeouts on a few more - * peers: I do checks every 'p' seconds on a - * proportion of the peer table and I need to check - * every connection 'n' times within a timeout - * interval, to ensure I detect a timeout on any - * connection within (n+1)/n times the timeout - * interval. */ - - if ((*kptllnd_tunables.kptl_timeout) > n * p) - chunk = (chunk * n * p) / - (*kptllnd_tunables.kptl_timeout); - if (chunk == 0) - chunk = 1; - - for (i = 0; i < chunk; i++) { - kptllnd_peer_check_bucket(peer_index, stamp); - peer_index = (peer_index + 1) % - kptllnd_data.kptl_peer_hash_size; - } - - deadline += p * HZ; - stamp++; - continue; - } - - kptllnd_handle_closing_peers(); - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kptllnd_data.kptl_watchdog_waitq, - &waitlink); - - waitq_timedwait(&waitlink, TASK_INTERRUPTIBLE, timeout); - - set_current_state (TASK_RUNNING); - remove_wait_queue(&kptllnd_data.kptl_watchdog_waitq, &waitlink); - } - - kptllnd_thread_fini(); - CDEBUG(D_NET, "<<<\n"); - return (0); -}; - -int -kptllnd_scheduler (void *arg) -{ - int id = (long)arg; - wait_queue_t waitlink; - unsigned long flags; - int did_something; - int counter = 0; - kptl_rx_t *rx; - kptl_rx_buffer_t *rxb; - kptl_tx_t *tx; - - cfs_block_allsigs(); - - init_waitqueue_entry_current(&waitlink); - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - /* threads shut down in phase 2 after all peers have been destroyed */ - while (kptllnd_data.kptl_shutdown < 2) { - - did_something = 0; - - if (!cfs_list_empty(&kptllnd_data.kptl_sched_rxq)) { - rx = cfs_list_entry (kptllnd_data.kptl_sched_rxq.next, - kptl_rx_t, rx_list); - cfs_list_del(&rx->rx_list); - - spin_unlock_irqrestore(&kptllnd_data. \ - kptl_sched_lock, - flags); - - kptllnd_rx_parse(rx); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, - flags); - } - - if (!cfs_list_empty(&kptllnd_data.kptl_sched_rxbq)) { - rxb = cfs_list_entry (kptllnd_data.kptl_sched_rxbq.next, - kptl_rx_buffer_t, - rxb_repost_list); - cfs_list_del(&rxb->rxb_repost_list); - - spin_unlock_irqrestore(&kptllnd_data. \ - kptl_sched_lock, - flags); - - kptllnd_rx_buffer_post(rxb); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, - flags); - } - - if (!cfs_list_empty(&kptllnd_data.kptl_sched_txq)) { - tx = cfs_list_entry (kptllnd_data.kptl_sched_txq.next, - kptl_tx_t, tx_list); - cfs_list_del_init(&tx->tx_list); - - spin_unlock_irqrestore(&kptllnd_data. \ - kptl_sched_lock, flags); - - kptllnd_tx_fini(tx); - did_something = 1; - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, - flags); - } - - if (did_something) { - if (++counter != *kptllnd_tunables.kptl_reschedule_loops) - continue; - } - - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue_exclusive(&kptllnd_data.kptl_sched_waitq, - &waitlink); - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - - if (!did_something) - waitq_wait(&waitlink, TASK_INTERRUPTIBLE); - else - cond_resched(); - - set_current_state(TASK_RUNNING); - remove_wait_queue(&kptllnd_data.kptl_sched_waitq, &waitlink); - - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - counter = 0; - } - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, flags); - - kptllnd_thread_fini(); - return 0; -} diff --git a/lnet/klnds/ptllnd/ptllnd_modparams.c b/lnet/klnds/ptllnd/ptllnd_modparams.c deleted file mode 100644 index ed4f13b..0000000 --- a/lnet/klnds/ptllnd/ptllnd_modparams.c +++ /dev/null @@ -1,364 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd_modparams.c - * - * Author: PJ Kirner - */ - - -#include "ptllnd.h" - -static int ntx = 256; -CFS_MODULE_PARM(ntx, "i", int, 0444, - "# of TX descriptors"); - -static int max_nodes = 1152; -CFS_MODULE_PARM(max_nodes, "i", int, 0444, - "maximum number of peer nodes"); - -static int max_procs_per_node = 2; -CFS_MODULE_PARM(max_procs_per_node, "i", int, 0444, - "maximum number of processes per peer node to cache"); - -static int checksum = 0; -CFS_MODULE_PARM(checksum, "i", int, 0644, - "set non-zero to enable message (not RDMA) checksums"); - -/* NB 250 is the Cray Portals wire timeout */ -static int timeout = 250; -CFS_MODULE_PARM(timeout, "i", int, 0644, - "timeout (seconds)"); - -static int portal = PTLLND_PORTAL; /* */ -CFS_MODULE_PARM(portal, "i", int, 0444, - "portal id"); - -static int pid = PTLLND_PID; /* */ -CFS_MODULE_PARM(pid, "i", int, 0444, - "portals pid"); - -static int rxb_npages = 1; -CFS_MODULE_PARM(rxb_npages, "i", int, 0444, - "# of pages per rx buffer"); - -static int rxb_nspare = 8; -CFS_MODULE_PARM(rxb_nspare, "i", int, 0444, - "# of spare rx buffers"); - -static int credits = 128; -CFS_MODULE_PARM(credits, "i", int, 0444, - "concurrent sends"); - -static int peercredits = PTLLND_PEERCREDITS; /* */ -CFS_MODULE_PARM(peercredits, "i", int, 0444, - "concurrent sends to 1 peer"); - -static int peer_buffer_credits = 0; -CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444, - "# per-peer router buffer credits"); - -static int max_msg_size = PTLLND_MAX_KLND_MSG_SIZE; /* */ -CFS_MODULE_PARM(max_msg_size, "i", int, 0444, - "max size of immediate message"); - -static int peer_hash_table_size = 101; -CFS_MODULE_PARM(peer_hash_table_size, "i", int, 0444, - "# of slots in the peer hash table"); - -static int reschedule_loops = 100; -CFS_MODULE_PARM(reschedule_loops, "i", int, 0644, - "# of loops before scheduler does cond_resched()"); - -static int ack_puts = 0; -CFS_MODULE_PARM(ack_puts, "i", int, 0644, - "get portals to ack all PUTs"); - -#ifdef PJK_DEBUGGING -static int simulation_bitmap = 0; -CFS_MODULE_PARM(simulation_bitmap, "i", int, 0444, - "simulation bitmap"); -#endif - - -kptl_tunables_t kptllnd_tunables = { - .kptl_ntx = &ntx, - .kptl_max_nodes = &max_nodes, - .kptl_max_procs_per_node = &max_procs_per_node, - .kptl_checksum = &checksum, - .kptl_portal = &portal, - .kptl_pid = &pid, - .kptl_timeout = &timeout, - .kptl_rxb_npages = &rxb_npages, - .kptl_rxb_nspare = &rxb_nspare, - .kptl_credits = &credits, - .kptl_peertxcredits = &peercredits, - .kptl_peerrtrcredits = &peer_buffer_credits, - .kptl_max_msg_size = &max_msg_size, - .kptl_peer_hash_table_size = &peer_hash_table_size, - .kptl_reschedule_loops = &reschedule_loops, - .kptl_ack_puts = &ack_puts, -#ifdef PJK_DEBUGGING - .kptl_simulation_bitmap = &simulation_bitmap, -#endif -}; - - -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - -#ifndef HAVE_SYSCTL_UNNUMBERED - -enum { - KPTLLND_NTX = 1, - KPTLLND_MAX_NODES, - KPTLLND_MAX_PROC_PER_NODE, - KPTLLND_CHECKSUM, - KPTLLND_TIMEOUT, - KPTLLND_PORTAL, - KPTLLND_PID, - KPTLLND_RXB_PAGES, - KPTLLND_CREDITS, - KPTLLND_PEERTXCREDITS, - KPTLLND_PEERRTRCREDITS, - KPTLLND_MAX_MSG_SIZE, - KPTLLND_PEER_HASH_SIZE, - KPTLLND_RESHEDULE_LOOPS, - KPTLLND_ACK_PUTS, - KPTLLND_TRACETIMEOUT, - KPTLLND_TRACEFAIL, - KPTLLND_TRACEBASENAME, - KPTLLND_SIMULATION_BITMAP -}; -#else - -#define KPTLLND_NTX CTL_UNNUMBERED -#define KPTLLND_MAX_NODES CTL_UNNUMBERED -#define KPTLLND_MAX_PROC_PER_NODE CTL_UNNUMBERED -#define KPTLLND_CHECKSUM CTL_UNNUMBERED -#define KPTLLND_TIMEOUT CTL_UNNUMBERED -#define KPTLLND_PORTAL CTL_UNNUMBERED -#define KPTLLND_PID CTL_UNNUMBERED -#define KPTLLND_RXB_PAGES CTL_UNNUMBERED -#define KPTLLND_CREDITS CTL_UNNUMBERED -#define KPTLLND_PEERTXCREDITS CTL_UNNUMBERED -#define KPTLLND_PEERRTRCREDITS CTL_UNNUMBERED -#define KPTLLND_MAX_MSG_SIZE CTL_UNNUMBERED -#define KPTLLND_PEER_HASH_SIZE CTL_UNNUMBERED -#define KPTLLND_RESHEDULE_LOOPS CTL_UNNUMBERED -#define KPTLLND_ACK_PUTS CTL_UNNUMBERED -#define KPTLLND_TRACETIMEOUT CTL_UNNUMBERED -#define KPTLLND_TRACEFAIL CTL_UNNUMBERED -#define KPTLLND_TRACEBASENAME CTL_UNNUMBERED -#define KPTLLND_SIMULATION_BITMAP CTL_UNNUMBERED -#endif - -static struct ctl_table kptllnd_ctl_table[] = { - { - .ctl_name = KPTLLND_NTX, - .procname = "ntx", - .data = &ntx, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_MAX_NODES, - .procname = "max_nodes", - .data = &max_nodes, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_MAX_PROC_PER_NODE, - .procname = "max_procs_per_node", - .data = &max_procs_per_node, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_CHECKSUM, - .procname = "checksum", - .data = &checksum, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_TIMEOUT, - .procname = "timeout", - .data = &timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_PORTAL, - .procname = "portal", - .data = &portal, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_PID, - .procname = "pid", - .data = &pid, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_RXB_PAGES, - .procname = "rxb_npages", - .data = &rxb_npages, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_CREDITS, - .procname = "credits", - .data = &credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_PEERTXCREDITS, - .procname = "peercredits", - .data = &peercredits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_PEERRTRCREDITS, - .procname = "peer_buffer_credits", - .data = &peer_buffer_credits, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_MAX_MSG_SIZE, - .procname = "max_msg_size", - .data = &max_msg_size, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_PEER_HASH_SIZE, - .procname = "peer_hash_table_size", - .data = &peer_hash_table_size, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_RESHEDULE_LOOPS, - .procname = "reschedule_loops", - .data = &reschedule_loops, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, - { - .ctl_name = KPTLLND_ACK_PUTS, - .procname = "ack_puts", - .data = &ack_puts, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, -#ifdef PJK_DEBUGGING - { - .ctl_name = KPTLLND_SIMULATION_BITMAP, - .procname = "simulation_bitmap", - .data = &simulation_bitmap, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec - }, -#endif - - {0} -}; - -static struct ctl_table kptllnd_top_ctl_table[] = { - { - .ctl_name = CTL_PTLLND, - .procname = "ptllnd", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = kptllnd_ctl_table - }, - {0} -}; - -int -kptllnd_tunables_init () -{ - kptllnd_tunables.kptl_sysctl = - register_sysctl_table(kptllnd_top_ctl_table, 0); - - if (kptllnd_tunables.kptl_sysctl == NULL) - CWARN("Can't setup /proc tunables\n"); - - return 0; -} - -void -kptllnd_tunables_fini () -{ - if (kptllnd_tunables.kptl_sysctl != NULL) - unregister_sysctl_table(kptllnd_tunables.kptl_sysctl); -} - -#else - -int -kptllnd_tunables_init () -{ - return 0; -} - -void -kptllnd_tunables_fini () -{ -} - -#endif diff --git a/lnet/klnds/ptllnd/ptllnd_peer.c b/lnet/klnds/ptllnd/ptllnd_peer.c deleted file mode 100644 index ae10ef7..0000000 --- a/lnet/klnds/ptllnd/ptllnd_peer.c +++ /dev/null @@ -1,1463 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd_peer.c - * - * Author: PJ Kirner - * Author: E Barton - */ - -#include "ptllnd.h" -#include - -static int -kptllnd_count_queue(cfs_list_t *q) -{ - cfs_list_t *e; - int n = 0; - - cfs_list_for_each(e, q) { - n++; - } - - return n; -} - -int -kptllnd_get_peer_info(int index, - lnet_process_id_t *id, - int *state, int *sent_hello, - int *refcount, __u64 *incarnation, - __u64 *next_matchbits, __u64 *last_matchbits_seen, - int *nsendq, int *nactiveq, - int *credits, int *outstanding_credits) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - unsigned long flags; - cfs_list_t *ptmp; - kptl_peer_t *peer; - int i; - int rc = -ENOENT; - - read_lock_irqsave(g_lock, flags); - - for (i = 0; i < kptllnd_data.kptl_peer_hash_size; i++) { - cfs_list_for_each (ptmp, &kptllnd_data.kptl_peers[i]) { - peer = cfs_list_entry(ptmp, kptl_peer_t, peer_list); - - if (index-- > 0) - continue; - - *id = peer->peer_id; - *state = peer->peer_state; - *sent_hello = peer->peer_sent_hello; - *refcount = cfs_atomic_read(&peer->peer_refcount); - *incarnation = peer->peer_incarnation; - - spin_lock(&peer->peer_lock); - - *next_matchbits = peer->peer_next_matchbits; - *last_matchbits_seen = peer->peer_last_matchbits_seen; - *credits = peer->peer_credits; - *outstanding_credits = peer->peer_outstanding_credits; - - *nsendq = kptllnd_count_queue(&peer->peer_sendq); - *nactiveq = kptllnd_count_queue(&peer->peer_activeq); - - spin_unlock(&peer->peer_lock); - - rc = 0; - goto out; - } - } - - out: - read_unlock_irqrestore(g_lock, flags); - return rc; -} - -void -kptllnd_peer_add_peertable_locked (kptl_peer_t *peer) -{ - LASSERT (kptllnd_data.kptl_n_active_peers < - kptllnd_data.kptl_expected_peers); - - LASSERT (peer->peer_state == PEER_STATE_WAITING_HELLO || - peer->peer_state == PEER_STATE_ACTIVE); - - kptllnd_data.kptl_n_active_peers++; - cfs_atomic_inc(&peer->peer_refcount); /* +1 ref for the list */ - - /* NB add to HEAD of peer list for MRU order! - * (see kptllnd_cull_peertable) */ - cfs_list_add(&peer->peer_list, kptllnd_nid2peerlist(peer->peer_id.nid)); -} - -void -kptllnd_cull_peertable_locked (lnet_process_id_t pid) -{ - /* I'm about to add a new peer with this portals ID to the peer table, - * so (a) this peer should not exist already and (b) I want to leave at - * most (max_procs_per_nid - 1) peers with this NID in the table. */ - cfs_list_t *peers = kptllnd_nid2peerlist(pid.nid); - int cull_count = *kptllnd_tunables.kptl_max_procs_per_node; - int count; - cfs_list_t *tmp; - cfs_list_t *nxt; - kptl_peer_t *peer; - - count = 0; - cfs_list_for_each_safe (tmp, nxt, peers) { - /* NB I rely on kptllnd_peer_add_peertable_locked to add peers - * in MRU order */ - peer = cfs_list_entry(tmp, kptl_peer_t, peer_list); - - if (LNET_NIDADDR(peer->peer_id.nid) != LNET_NIDADDR(pid.nid)) - continue; - - LASSERT (peer->peer_id.pid != pid.pid); - - count++; - - if (count < cull_count) /* recent (don't cull) */ - continue; - - CDEBUG(D_NET, "Cull %s(%s)\n", - libcfs_id2str(peer->peer_id), - kptllnd_ptlid2str(peer->peer_ptlid)); - - kptllnd_peer_close_locked(peer, 0); - } -} - -kptl_peer_t * -kptllnd_peer_allocate (kptl_net_t *net, lnet_process_id_t lpid, ptl_process_id_t ppid) -{ - unsigned long flags; - kptl_peer_t *peer; - - LIBCFS_ALLOC(peer, sizeof (*peer)); - if (peer == NULL) { - CERROR("Can't create peer %s (%s)\n", - libcfs_id2str(lpid), - kptllnd_ptlid2str(ppid)); - return NULL; - } - - memset(peer, 0, sizeof(*peer)); /* zero flags etc */ - - CFS_INIT_LIST_HEAD (&peer->peer_noops); - CFS_INIT_LIST_HEAD (&peer->peer_sendq); - CFS_INIT_LIST_HEAD (&peer->peer_activeq); - spin_lock_init(&peer->peer_lock); - - peer->peer_state = PEER_STATE_ALLOCATED; - peer->peer_error = 0; - peer->peer_last_alive = 0; - peer->peer_id = lpid; - peer->peer_ptlid = ppid; - peer->peer_credits = 1; /* enough for HELLO */ - peer->peer_next_matchbits = PTL_RESERVED_MATCHBITS; - peer->peer_outstanding_credits = *kptllnd_tunables.kptl_peertxcredits - 1; - peer->peer_sent_credits = 1; /* HELLO credit is implicit */ - peer->peer_max_msg_size = PTLLND_MIN_BUFFER_SIZE; /* until we know better */ - - cfs_atomic_set(&peer->peer_refcount, 1); /* 1 ref for caller */ - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - peer->peer_myincarnation = kptllnd_data.kptl_incarnation; - - /* Only increase # peers under lock, to guarantee we dont grow it - * during shutdown */ - if (net->net_shutdown) { - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - LIBCFS_FREE(peer, sizeof(*peer)); - return NULL; - } - - kptllnd_data.kptl_npeers++; - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - return peer; -} - -void -kptllnd_peer_destroy (kptl_peer_t *peer) -{ - unsigned long flags; - - CDEBUG(D_NET, "Peer=%p\n", peer); - - LASSERT (!in_interrupt()); - LASSERT (cfs_atomic_read(&peer->peer_refcount) == 0); - LASSERT (peer->peer_state == PEER_STATE_ALLOCATED || - peer->peer_state == PEER_STATE_ZOMBIE); - LASSERT (cfs_list_empty(&peer->peer_noops)); - LASSERT (cfs_list_empty(&peer->peer_sendq)); - LASSERT (cfs_list_empty(&peer->peer_activeq)); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (peer->peer_state == PEER_STATE_ZOMBIE) - cfs_list_del(&peer->peer_list); - - kptllnd_data.kptl_npeers--; - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - LIBCFS_FREE (peer, sizeof (*peer)); -} - -void -kptllnd_cancel_txlist (cfs_list_t *peerq, cfs_list_t *txs) -{ - cfs_list_t *tmp; - cfs_list_t *nxt; - kptl_tx_t *tx; - - cfs_list_for_each_safe (tmp, nxt, peerq) { - tx = cfs_list_entry(tmp, kptl_tx_t, tx_list); - - cfs_list_del(&tx->tx_list); - cfs_list_add_tail(&tx->tx_list, txs); - - tx->tx_status = -EIO; - tx->tx_active = 0; - } -} - -void -kptllnd_peer_cancel_txs(kptl_peer_t *peer, cfs_list_t *txs) -{ - unsigned long flags; - - spin_lock_irqsave(&peer->peer_lock, flags); - - kptllnd_cancel_txlist(&peer->peer_noops, txs); - kptllnd_cancel_txlist(&peer->peer_sendq, txs); - kptllnd_cancel_txlist(&peer->peer_activeq, txs); - - spin_unlock_irqrestore(&peer->peer_lock, flags); -} - -void -kptllnd_peer_alive (kptl_peer_t *peer) -{ - /* This is racy, but everyone's only writing cfs_time_current() */ - peer->peer_last_alive = cfs_time_current(); - smp_mb(); -} - -void -kptllnd_peer_notify (kptl_peer_t *peer) -{ - unsigned long flags; - kptl_net_t *net; - kptl_net_t **nets; - int i = 0; - int nnets = 0; - int error = 0; - cfs_time_t last_alive = 0; - - spin_lock_irqsave(&peer->peer_lock, flags); - - if (peer->peer_error != 0) { - error = peer->peer_error; - peer->peer_error = 0; - last_alive = peer->peer_last_alive; - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (error == 0) - return; - - read_lock(&kptllnd_data.kptl_net_rw_lock); - cfs_list_for_each_entry (net, &kptllnd_data.kptl_nets, net_list) - nnets++; - read_unlock(&kptllnd_data.kptl_net_rw_lock); - - if (nnets == 0) /* shutdown in progress */ - return; - - LIBCFS_ALLOC(nets, nnets * sizeof(*nets)); - if (nets == NULL) { - CERROR("Failed to allocate nets[%d]\n", nnets); - return; - } - memset(nets, 0, nnets * sizeof(*nets)); - - read_lock(&kptllnd_data.kptl_net_rw_lock); - i = 0; - cfs_list_for_each_entry (net, &kptllnd_data.kptl_nets, net_list) { - LASSERT (i < nnets); - nets[i] = net; - kptllnd_net_addref(net); - i++; - } - read_unlock(&kptllnd_data.kptl_net_rw_lock); - - for (i = 0; i < nnets; i++) { - lnet_nid_t peer_nid; - - net = nets[i]; - if (net == NULL) - break; - - if (!net->net_shutdown) { - peer_nid = kptllnd_ptl2lnetnid(net->net_ni->ni_nid, - peer->peer_ptlid.nid); - lnet_notify(net->net_ni, peer_nid, 0, last_alive); - } - - kptllnd_net_decref(net); - } - - LIBCFS_FREE(nets, nnets * sizeof(*nets)); -} - -void -kptllnd_handle_closing_peers () -{ - unsigned long flags; - cfs_list_t txs; - kptl_peer_t *peer; - cfs_list_t *tmp; - cfs_list_t *nxt; - kptl_tx_t *tx; - int idle; - - /* Check with a read lock first to avoid blocking anyone */ - - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - idle = cfs_list_empty(&kptllnd_data.kptl_closing_peers) && - cfs_list_empty(&kptllnd_data.kptl_zombie_peers); - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - if (idle) - return; - - CFS_INIT_LIST_HEAD(&txs); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - /* Cancel txs on all zombie peers. NB anyone dropping the last peer - * ref removes it from this list, so I musn't drop the lock while - * scanning it. */ - cfs_list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { - peer = cfs_list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT (peer->peer_state == PEER_STATE_ZOMBIE); - - kptllnd_peer_cancel_txs(peer, &txs); - } - - /* Notify LNET and cancel txs on closing (i.e. newly closed) peers. NB - * I'm the only one removing from this list, but peers can be added on - * the end any time I drop the lock. */ - - cfs_list_for_each_safe (tmp, nxt, &kptllnd_data.kptl_closing_peers) { - peer = cfs_list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT (peer->peer_state == PEER_STATE_CLOSING); - - cfs_list_del(&peer->peer_list); - cfs_list_add_tail(&peer->peer_list, - &kptllnd_data.kptl_zombie_peers); - peer->peer_state = PEER_STATE_ZOMBIE; - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - kptllnd_peer_notify(peer); - kptllnd_peer_cancel_txs(peer, &txs); - kptllnd_peer_decref(peer); - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - } - - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - /* Drop peer's ref on all cancelled txs. This will get - * kptllnd_tx_fini() to abort outstanding comms if necessary. */ - - cfs_list_for_each_safe (tmp, nxt, &txs) { - tx = cfs_list_entry(tmp, kptl_tx_t, tx_list); - cfs_list_del(&tx->tx_list); - kptllnd_tx_decref(tx); - } -} - -void -kptllnd_peer_close_locked(kptl_peer_t *peer, int why) -{ - switch (peer->peer_state) { - default: - LBUG(); - - case PEER_STATE_WAITING_HELLO: - case PEER_STATE_ACTIVE: - /* Ensure new peers see a new incarnation of me */ - LASSERT(peer->peer_myincarnation <= kptllnd_data.kptl_incarnation); - if (peer->peer_myincarnation == kptllnd_data.kptl_incarnation) - kptllnd_data.kptl_incarnation++; - - /* Removing from peer table */ - kptllnd_data.kptl_n_active_peers--; - LASSERT (kptllnd_data.kptl_n_active_peers >= 0); - - cfs_list_del(&peer->peer_list); - kptllnd_peer_unreserve_buffers(); - - peer->peer_error = why; /* stash 'why' only on first close */ - peer->peer_state = PEER_STATE_CLOSING; - - /* Schedule for immediate attention, taking peer table's ref */ - cfs_list_add_tail(&peer->peer_list, - &kptllnd_data.kptl_closing_peers); - wake_up(&kptllnd_data.kptl_watchdog_waitq); - break; - - case PEER_STATE_ZOMBIE: - case PEER_STATE_CLOSING: - break; - } -} - -void -kptllnd_peer_close(kptl_peer_t *peer, int why) -{ - unsigned long flags; - - write_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - kptllnd_peer_close_locked(peer, why); - write_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); -} - -int -kptllnd_peer_del(lnet_process_id_t id) -{ - cfs_list_t *ptmp; - cfs_list_t *pnxt; - kptl_peer_t *peer; - int lo; - int hi; - int i; - unsigned long flags; - int rc = -ENOENT; - - /* - * Find the single bucket we are supposed to look at or if nid is a - * wildcard (LNET_NID_ANY) then look at all of the buckets - */ - if (id.nid != LNET_NID_ANY) { - cfs_list_t *l = kptllnd_nid2peerlist(id.nid); - - lo = hi = l - kptllnd_data.kptl_peers; - } else { - if (id.pid != LNET_PID_ANY) - return -EINVAL; - - lo = 0; - hi = kptllnd_data.kptl_peer_hash_size - 1; - } - -again: - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - for (i = lo; i <= hi; i++) { - cfs_list_for_each_safe (ptmp, pnxt, - &kptllnd_data.kptl_peers[i]) { - peer = cfs_list_entry (ptmp, kptl_peer_t, peer_list); - - if (!(id.nid == LNET_NID_ANY || - (LNET_NIDADDR(peer->peer_id.nid) == LNET_NIDADDR(id.nid) && - (id.pid == LNET_PID_ANY || - peer->peer_id.pid == id.pid)))) - continue; - - kptllnd_peer_addref(peer); /* 1 ref for me... */ - - read_unlock_irqrestore(&kptllnd_data. \ - kptl_peer_rw_lock, - flags); - - kptllnd_peer_close(peer, 0); - kptllnd_peer_decref(peer); /* ...until here */ - - rc = 0; /* matched something */ - - /* start again now I've dropped the lock */ - goto again; - } - } - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); - - return (rc); -} - -void -kptllnd_queue_tx(kptl_peer_t *peer, kptl_tx_t *tx) -{ - /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */ - unsigned long flags; - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* Ensure HELLO is sent first */ - if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) - cfs_list_add(&tx->tx_list, &peer->peer_noops); - else if (tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) - cfs_list_add(&tx->tx_list, &peer->peer_sendq); - else - cfs_list_add_tail(&tx->tx_list, &peer->peer_sendq); - - spin_unlock_irqrestore(&peer->peer_lock, flags); -} - - -void -kptllnd_post_tx(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) -{ - /* CAVEAT EMPTOR: I take over caller's ref on 'tx' */ - ptl_handle_md_t msg_mdh; - ptl_md_t md; - ptl_err_t prc; - - LASSERT (!tx->tx_idle); - LASSERT (!tx->tx_active); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (tx->tx_type == TX_TYPE_SMALL_MESSAGE || - tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST); - - kptllnd_set_tx_peer(tx, peer); - - memset(&md, 0, sizeof(md)); - - md.threshold = tx->tx_acked ? 2 : 1; /* SEND END + ACK? */ - md.options = PTL_MD_OP_PUT | - PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE; - md.user_ptr = &tx->tx_msg_eventarg; - md.eq_handle = kptllnd_data.kptl_eqh; - - if (nfrag == 0) { - md.start = tx->tx_msg; - md.length = tx->tx_msg->ptlm_nob; - } else { - LASSERT (nfrag > 1); - LASSERT (tx->tx_frags->iov[0].iov_base == (void *)tx->tx_msg); - - md.start = tx->tx_frags; - md.length = nfrag; - md.options |= PTL_MD_IOVEC; - } - - prc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &msg_mdh); - if (prc != PTL_OK) { - CERROR("PtlMDBind(%s) failed: %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(prc), prc); - tx->tx_status = -EIO; - kptllnd_tx_decref(tx); - return; - } - - - tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * HZ); - tx->tx_active = 1; - tx->tx_msg_mdh = msg_mdh; - kptllnd_queue_tx(peer, tx); -} - -/* NB "restarts" comes from peer_sendq of a single peer */ -void -kptllnd_restart_txs (kptl_net_t *net, lnet_process_id_t target, - cfs_list_t *restarts) -{ - kptl_tx_t *tx; - kptl_tx_t *tmp; - kptl_peer_t *peer; - - LASSERT (!cfs_list_empty(restarts)); - - if (kptllnd_find_target(net, target, &peer) != 0) - peer = NULL; - - cfs_list_for_each_entry_safe (tx, tmp, restarts, tx_list) { - LASSERT (tx->tx_peer != NULL); - LASSERT (tx->tx_type == TX_TYPE_GET_REQUEST || - tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_SMALL_MESSAGE); - - cfs_list_del_init(&tx->tx_list); - - if (peer == NULL || - tx->tx_msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) { - kptllnd_tx_decref(tx); - continue; - } - - LASSERT (tx->tx_msg->ptlm_type != PTLLND_MSG_TYPE_NOOP); - tx->tx_status = 0; - tx->tx_active = 1; - kptllnd_peer_decref(tx->tx_peer); - tx->tx_peer = NULL; - kptllnd_set_tx_peer(tx, peer); - kptllnd_queue_tx(peer, tx); /* takes over my ref on tx */ - } - - if (peer == NULL) - return; - - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); -} - -static inline int -kptllnd_peer_send_noop (kptl_peer_t *peer) -{ - if (!peer->peer_sent_hello || - peer->peer_credits == 0 || - !cfs_list_empty(&peer->peer_noops) || - peer->peer_outstanding_credits < PTLLND_CREDIT_HIGHWATER) - return 0; - - /* No tx to piggyback NOOP onto or no credit to send a tx */ - return (cfs_list_empty(&peer->peer_sendq) || peer->peer_credits == 1); -} - -void -kptllnd_peer_check_sends (kptl_peer_t *peer) -{ - ptl_handle_me_t meh; - kptl_tx_t *tx; - int rc; - int msg_type; - unsigned long flags; - - LASSERT(!in_interrupt()); - - spin_lock_irqsave(&peer->peer_lock, flags); - - peer->peer_retry_noop = 0; - - if (kptllnd_peer_send_noop(peer)) { - /* post a NOOP to return credits */ - spin_unlock_irqrestore(&peer->peer_lock, flags); - - tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (tx == NULL) { - CERROR("Can't return credits to %s: can't allocate descriptor\n", - libcfs_id2str(peer->peer_id)); - } else { - kptllnd_init_msg(tx->tx_msg, PTLLND_MSG_TYPE_NOOP, - peer->peer_id, 0); - kptllnd_post_tx(peer, tx, 0); - } - - spin_lock_irqsave(&peer->peer_lock, flags); - peer->peer_retry_noop = (tx == NULL); - } - - for (;;) { - if (!cfs_list_empty(&peer->peer_noops)) { - LASSERT (peer->peer_sent_hello); - tx = cfs_list_entry(peer->peer_noops.next, - kptl_tx_t, tx_list); - } else if (!cfs_list_empty(&peer->peer_sendq)) { - tx = cfs_list_entry(peer->peer_sendq.next, - kptl_tx_t, tx_list); - } else { - /* nothing to send right now */ - break; - } - - LASSERT (tx->tx_active); - LASSERT (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - - LASSERT (peer->peer_outstanding_credits >= 0); - LASSERT (peer->peer_sent_credits >= 0); - LASSERT (peer->peer_sent_credits + - peer->peer_outstanding_credits <= - *kptllnd_tunables.kptl_peertxcredits); - LASSERT (peer->peer_credits >= 0); - - msg_type = tx->tx_msg->ptlm_type; - - /* Ensure HELLO is sent first */ - if (!peer->peer_sent_hello) { - LASSERT (cfs_list_empty(&peer->peer_noops)); - if (msg_type != PTLLND_MSG_TYPE_HELLO) - break; - peer->peer_sent_hello = 1; - } - - if (peer->peer_credits == 0) { - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: no credits for %s[%p]\n", - libcfs_id2str(peer->peer_id), - peer->peer_credits, - peer->peer_outstanding_credits, - peer->peer_sent_credits, - kptllnd_msgtype2str(msg_type), tx); - break; - } - - /* Last/Initial credit reserved for NOOP/HELLO */ - if (peer->peer_credits == 1 && - msg_type != PTLLND_MSG_TYPE_HELLO && - msg_type != PTLLND_MSG_TYPE_NOOP) { - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: " - "not using last credit for %s[%p]\n", - libcfs_id2str(peer->peer_id), - peer->peer_credits, - peer->peer_outstanding_credits, - peer->peer_sent_credits, - kptllnd_msgtype2str(msg_type), tx); - break; - } - - cfs_list_del(&tx->tx_list); - - /* Discard any NOOP I queued if I'm not at the high-water mark - * any more or more messages have been queued */ - if (msg_type == PTLLND_MSG_TYPE_NOOP && - !kptllnd_peer_send_noop(peer)) { - tx->tx_active = 0; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CDEBUG(D_NET, "%s: redundant noop\n", - libcfs_id2str(peer->peer_id)); - kptllnd_tx_decref(tx); - - spin_lock_irqsave(&peer->peer_lock, flags); - continue; - } - - /* fill last-minute msg fields */ - kptllnd_msg_pack(tx->tx_msg, peer); - - if (tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST) { - /* peer_next_matchbits must be known good */ - LASSERT (peer->peer_state >= PEER_STATE_ACTIVE); - /* Assume 64-bit matchbits can't wrap */ - LASSERT (peer->peer_next_matchbits >= PTL_RESERVED_MATCHBITS); - tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits = - peer->peer_next_matchbits++; - } - - peer->peer_sent_credits += peer->peer_outstanding_credits; - peer->peer_outstanding_credits = 0; - peer->peer_credits--; - - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s tx=%p nob=%d cred=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - kptllnd_msgtype2str(msg_type), tx, tx->tx_msg->ptlm_nob, - tx->tx_msg->ptlm_credits); - - cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq); - - kptllnd_tx_addref(tx); /* 1 ref for me... */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (tx->tx_type == TX_TYPE_PUT_REQUEST || - tx->tx_type == TX_TYPE_GET_REQUEST) { - /* Post bulk now we have safe matchbits */ - rc = PtlMEAttach(kptllnd_data.kptl_nih, - *kptllnd_tunables.kptl_portal, - peer->peer_ptlid, - tx->tx_msg->ptlm_u.rdma.kptlrm_matchbits, - 0, /* ignore bits */ - PTL_UNLINK, - PTL_INS_BEFORE, - &meh); - if (rc != PTL_OK) { - CERROR("PtlMEAttach(%s) failed: %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(rc), rc); - goto failed; - } - - rc = PtlMDAttach(meh, tx->tx_rdma_md, PTL_UNLINK, - &tx->tx_rdma_mdh); - if (rc != PTL_OK) { - CERROR("PtlMDAttach(%s) failed: %s(%d)\n", - libcfs_id2str(tx->tx_peer->peer_id), - kptllnd_errtype2str(rc), rc); - rc = PtlMEUnlink(meh); - LASSERT(rc == PTL_OK); - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - goto failed; - } - /* I'm not racing with the event callback here. It's a - * bug if there's an event on the MD I just attached - * before I actually send the RDMA request message - - * probably matchbits re-used in error. */ - } - - tx->tx_tposted = jiffies; /* going on the wire */ - - rc = PtlPut (tx->tx_msg_mdh, - tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ, - peer->peer_ptlid, - *kptllnd_tunables.kptl_portal, - 0, /* acl cookie */ - LNET_MSG_MATCHBITS, - 0, /* offset */ - 0); /* header data */ - if (rc != PTL_OK) { - CERROR("PtlPut %s error %s(%d)\n", - libcfs_id2str(peer->peer_id), - kptllnd_errtype2str(rc), rc); - goto failed; - } - - kptllnd_tx_decref(tx); /* drop my ref */ - - spin_lock_irqsave(&peer->peer_lock, flags); - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - return; - - failed: - /* Nuke everything (including tx we were trying) */ - kptllnd_peer_close(peer, -EIO); - kptllnd_tx_decref(tx); -} - -kptl_tx_t * -kptllnd_find_timed_out_tx(kptl_peer_t *peer) -{ - kptl_tx_t *tx; - cfs_list_t *ele; - - cfs_list_for_each(ele, &peer->peer_sendq) { - tx = cfs_list_entry(ele, kptl_tx_t, tx_list); - - if (cfs_time_aftereq(jiffies, tx->tx_deadline)) { - kptllnd_tx_addref(tx); - return tx; - } - } - - cfs_list_for_each(ele, &peer->peer_activeq) { - tx = cfs_list_entry(ele, kptl_tx_t, tx_list); - - if (cfs_time_aftereq(jiffies, tx->tx_deadline)) { - kptllnd_tx_addref(tx); - return tx; - } - } - - return NULL; -} - - -void -kptllnd_peer_check_bucket (int idx, int stamp) -{ - cfs_list_t *peers = &kptllnd_data.kptl_peers[idx]; - kptl_peer_t *peer; - unsigned long flags; - - CDEBUG(D_NET, "Bucket=%d, stamp=%d\n", idx, stamp); - - again: - /* NB. Shared lock while I just look */ - read_lock_irqsave(&kptllnd_data.kptl_peer_rw_lock, flags); - - cfs_list_for_each_entry (peer, peers, peer_list) { - kptl_tx_t *tx; - int check_sends; - int c = -1, oc = -1, sc = -1; - int nsend = -1, nactive = -1; - int sent_hello = -1, state = -1; - - CDEBUG(D_NET, "Peer=%s Credits=%d Outstanding=%d Send=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits); - - spin_lock(&peer->peer_lock); - - if (peer->peer_check_stamp == stamp) { - /* checked already this pass */ - spin_unlock(&peer->peer_lock); - continue; - } - - peer->peer_check_stamp = stamp; - tx = kptllnd_find_timed_out_tx(peer); - check_sends = peer->peer_retry_noop; - - if (tx != NULL) { - c = peer->peer_credits; - sc = peer->peer_sent_credits; - oc = peer->peer_outstanding_credits; - state = peer->peer_state; - sent_hello = peer->peer_sent_hello; - nsend = kptllnd_count_queue(&peer->peer_sendq); - nactive = kptllnd_count_queue(&peer->peer_activeq); - } - - spin_unlock(&peer->peer_lock); - - if (tx == NULL && !check_sends) - continue; - - kptllnd_peer_addref(peer); /* 1 ref for me... */ - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, - flags); - - if (tx == NULL) { /* nothing timed out */ - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); /* ...until here or... */ - - /* rescan after dropping the lock */ - goto again; - } - - LCONSOLE_ERROR_MSG(0x126, "Timing out %s: %s\n", - libcfs_id2str(peer->peer_id), - (tx->tx_tposted == 0) ? - "no free peer buffers" : - "please check Portals"); - - if (tx->tx_tposted) { - CERROR("Could not send to %s after %ds (sent %lds ago); " - "check Portals for possible issues\n", - libcfs_id2str(peer->peer_id), - *kptllnd_tunables.kptl_timeout, - cfs_duration_sec(jiffies - tx->tx_tposted)); - } else if (state < PEER_STATE_ACTIVE) { - CERROR("Could not connect %s (%d) after %ds; " - "peer might be down\n", - libcfs_id2str(peer->peer_id), state, - *kptllnd_tunables.kptl_timeout); - } else { - CERROR("Could not get credits for %s after %ds; " - "possible Lustre networking issues\n", - libcfs_id2str(peer->peer_id), - *kptllnd_tunables.kptl_timeout); - } - - CERROR("%s timed out: cred %d outstanding %d, sent %d, " - "state %d, sent_hello %d, sendq %d, activeq %d " - "Tx %p %s %s (%s%s%s) status %d %sposted %lu T/O %ds\n", - libcfs_id2str(peer->peer_id), c, oc, sc, - state, sent_hello, nsend, nactive, - tx, kptllnd_tx_typestr(tx->tx_type), - kptllnd_msgtype2str(tx->tx_msg->ptlm_type), - tx->tx_active ? "A" : "", - PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) ? - "" : "M", - PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) ? - "" : "D", - tx->tx_status, - (tx->tx_tposted == 0) ? "not " : "", - (tx->tx_tposted == 0) ? 0UL : (jiffies - tx->tx_tposted), - *kptllnd_tunables.kptl_timeout); - - kptllnd_tx_decref(tx); - - kptllnd_peer_close(peer, -ETIMEDOUT); - kptllnd_peer_decref(peer); /* ...until here */ - - /* start again now I've dropped the lock */ - goto again; - } - - read_unlock_irqrestore(&kptllnd_data.kptl_peer_rw_lock, flags); -} - -kptl_peer_t * -kptllnd_id2peer_locked (lnet_process_id_t id) -{ - cfs_list_t *peers = kptllnd_nid2peerlist(id.nid); - cfs_list_t *tmp; - kptl_peer_t *peer; - - cfs_list_for_each (tmp, peers) { - peer = cfs_list_entry (tmp, kptl_peer_t, peer_list); - - LASSERT(peer->peer_state == PEER_STATE_WAITING_HELLO || - peer->peer_state == PEER_STATE_ACTIVE); - - /* NB logical LNet peers share one kptl_peer_t */ - if (peer->peer_id.pid != id.pid || - LNET_NIDADDR(id.nid) != LNET_NIDADDR(peer->peer_id.nid)) - continue; - - kptllnd_peer_addref(peer); - - CDEBUG(D_NET, "%s -> %s (%d)\n", - libcfs_id2str(id), - kptllnd_ptlid2str(peer->peer_ptlid), - cfs_atomic_read (&peer->peer_refcount)); - return peer; - } - - return NULL; -} - -void -kptllnd_peertable_overflow_msg(char *str, lnet_process_id_t id) -{ - LCONSOLE_ERROR_MSG(0x127, "%s %s overflows the peer table[%d]: " - "messages may be dropped\n", - str, libcfs_id2str(id), - kptllnd_data.kptl_n_active_peers); - LCONSOLE_ERROR_MSG(0x128, "Please correct by increasing " - "'max_nodes' or 'max_procs_per_node'\n"); -} - -__u64 -kptllnd_get_last_seen_matchbits_locked(lnet_process_id_t lpid) -{ - kptl_peer_t *peer; - cfs_list_t *tmp; - - /* Find the last matchbits I saw this new peer using. Note.. - A. This peer cannot be in the peer table - she's new! - B. If I can't find the peer in the closing/zombie peers, all - matchbits are safe because all refs to the (old) peer have gone - so all txs have completed so there's no risk of matchbit - collision! - */ - - LASSERT(kptllnd_id2peer_locked(lpid) == NULL); - - /* peer's last matchbits can't change after it comes out of the peer - * table, so first match is fine */ - - cfs_list_for_each (tmp, &kptllnd_data.kptl_closing_peers) { - peer = cfs_list_entry (tmp, kptl_peer_t, peer_list); - - if (LNET_NIDADDR(peer->peer_id.nid) == LNET_NIDADDR(lpid.nid) && - peer->peer_id.pid == lpid.pid) - return peer->peer_last_matchbits_seen; - } - - cfs_list_for_each (tmp, &kptllnd_data.kptl_zombie_peers) { - peer = cfs_list_entry (tmp, kptl_peer_t, peer_list); - - if (LNET_NIDADDR(peer->peer_id.nid) == LNET_NIDADDR(lpid.nid) && - peer->peer_id.pid == lpid.pid) - return peer->peer_last_matchbits_seen; - } - - return PTL_RESERVED_MATCHBITS; -} - -kptl_peer_t * -kptllnd_peer_handle_hello (kptl_net_t *net, - ptl_process_id_t initiator, kptl_msg_t *msg) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - kptl_peer_t *peer; - kptl_peer_t *new_peer; - lnet_process_id_t lpid; - unsigned long flags; - kptl_tx_t *hello_tx; - int rc; - __u64 safe_matchbits; - __u64 last_matchbits_seen; - - lpid.nid = msg->ptlm_srcnid; - lpid.pid = msg->ptlm_srcpid; - - CDEBUG(D_NET, "hello from %s(%s)\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); - - if (initiator.pid != kptllnd_data.kptl_portals_id.pid && - (msg->ptlm_srcpid & LNET_PID_USERFLAG) == 0) { - /* If the peer's PID isn't _the_ ptllnd kernel pid, she must be - * userspace. Refuse the connection if she hasn't set the - * correct flag in her PID... */ - CERROR("Userflag not set in hello from %s (%s)\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator)); - return NULL; - } - - /* kptlhm_matchbits are the highest matchbits my peer may have used to - * RDMA to me. I ensure I never register buffers for RDMA that could - * match any she used */ - safe_matchbits = msg->ptlm_u.hello.kptlhm_matchbits + 1; - - if (safe_matchbits < PTL_RESERVED_MATCHBITS) { - CERROR("Illegal matchbits "LPX64" in HELLO from %s\n", - safe_matchbits, libcfs_id2str(lpid)); - return NULL; - } - - if (msg->ptlm_u.hello.kptlhm_max_msg_size < PTLLND_MIN_BUFFER_SIZE) { - CERROR("%s: max message size %d < MIN %d", - libcfs_id2str(lpid), - msg->ptlm_u.hello.kptlhm_max_msg_size, - PTLLND_MIN_BUFFER_SIZE); - return NULL; - } - - if (msg->ptlm_credits <= 1) { - CERROR("Need more than 1+%d credits from %s\n", - msg->ptlm_credits, libcfs_id2str(lpid)); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - - peer = kptllnd_id2peer_locked(lpid); - if (peer != NULL) { - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* Completing HELLO handshake */ - LASSERT(peer->peer_incarnation == 0); - - if (msg->ptlm_dststamp != 0 && - msg->ptlm_dststamp != peer->peer_myincarnation) { - write_unlock_irqrestore(g_lock, flags); - - CERROR("Ignoring HELLO from %s: unexpected " - "dststamp "LPX64" ("LPX64" wanted)\n", - libcfs_id2str(lpid), - msg->ptlm_dststamp, - peer->peer_myincarnation); - kptllnd_peer_decref(peer); - return NULL; - } - - /* Concurrent initiation or response to my HELLO */ - peer->peer_state = PEER_STATE_ACTIVE; - peer->peer_incarnation = msg->ptlm_srcstamp; - peer->peer_next_matchbits = safe_matchbits; - peer->peer_max_msg_size = - msg->ptlm_u.hello.kptlhm_max_msg_size; - - write_unlock_irqrestore(g_lock, flags); - return peer; - } - - if (msg->ptlm_dststamp != 0 && - msg->ptlm_dststamp <= peer->peer_myincarnation) { - write_unlock_irqrestore(g_lock, flags); - - CERROR("Ignoring stale HELLO from %s: " - "dststamp "LPX64" (current "LPX64")\n", - libcfs_id2str(lpid), - msg->ptlm_dststamp, - peer->peer_myincarnation); - kptllnd_peer_decref(peer); - return NULL; - } - - /* Brand new connection attempt: remove old incarnation */ - kptllnd_peer_close_locked(peer, 0); - } - - kptllnd_cull_peertable_locked(lpid); - - write_unlock_irqrestore(g_lock, flags); - - if (peer != NULL) { - CDEBUG(D_NET, "Peer %s (%s) reconnecting:" - " stamp "LPX64"("LPX64")\n", - libcfs_id2str(lpid), kptllnd_ptlid2str(initiator), - msg->ptlm_srcstamp, peer->peer_incarnation); - - kptllnd_peer_decref(peer); - peer = NULL; - } - - hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (hello_tx == NULL) { - CERROR("Unable to allocate HELLO message for %s\n", - libcfs_id2str(lpid)); - return NULL; - } - - kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO, - lpid, sizeof(kptl_hello_msg_t)); - - new_peer = kptllnd_peer_allocate(net, lpid, initiator); - if (new_peer == NULL) { - kptllnd_tx_decref(hello_tx); - return NULL; - } - - rc = kptllnd_peer_reserve_buffers(); - if (rc != 0) { - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - - CERROR("Failed to reserve buffers for %s\n", - libcfs_id2str(lpid)); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - - again: - if (net->net_shutdown) { - write_unlock_irqrestore(g_lock, flags); - - CERROR ("Shutdown started, refusing connection from %s\n", - libcfs_id2str(lpid)); - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return NULL; - } - - peer = kptllnd_id2peer_locked(lpid); - if (peer != NULL) { - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* An outgoing message instantiated 'peer' for me */ - LASSERT(peer->peer_incarnation == 0); - - peer->peer_state = PEER_STATE_ACTIVE; - peer->peer_incarnation = msg->ptlm_srcstamp; - peer->peer_next_matchbits = safe_matchbits; - peer->peer_max_msg_size = - msg->ptlm_u.hello.kptlhm_max_msg_size; - - write_unlock_irqrestore(g_lock, flags); - - CWARN("Outgoing instantiated peer %s\n", - libcfs_id2str(lpid)); - } else { - LASSERT (peer->peer_state == PEER_STATE_ACTIVE); - - write_unlock_irqrestore(g_lock, flags); - - /* WOW! Somehow this peer completed the HELLO - * handshake while I slept. I guess I could have slept - * while it rebooted and sent a new HELLO, so I'll fail - * this one... */ - CWARN("Wow! peer %s\n", libcfs_id2str(lpid)); - kptllnd_peer_decref(peer); - peer = NULL; - } - - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return peer; - } - - if (kptllnd_data.kptl_n_active_peers == - kptllnd_data.kptl_expected_peers) { - /* peer table full */ - write_unlock_irqrestore(g_lock, flags); - - kptllnd_peertable_overflow_msg("Connection from ", lpid); - - rc = kptllnd_reserve_buffers(1); /* HELLO headroom */ - if (rc != 0) { - CERROR("Refusing connection from %s\n", - libcfs_id2str(lpid)); - kptllnd_peer_unreserve_buffers(); - kptllnd_peer_decref(new_peer); - kptllnd_tx_decref(hello_tx); - return NULL; - } - - write_lock_irqsave(g_lock, flags); - kptllnd_data.kptl_expected_peers++; - goto again; - } - - last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(lpid); - - hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen; - hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size = - *kptllnd_tunables.kptl_max_msg_size; - - new_peer->peer_state = PEER_STATE_ACTIVE; - new_peer->peer_incarnation = msg->ptlm_srcstamp; - new_peer->peer_next_matchbits = safe_matchbits; - new_peer->peer_last_matchbits_seen = last_matchbits_seen; - new_peer->peer_max_msg_size = msg->ptlm_u.hello.kptlhm_max_msg_size; - - LASSERT (!net->net_shutdown); - kptllnd_peer_add_peertable_locked(new_peer); - - write_unlock_irqrestore(g_lock, flags); - - /* NB someone else could get in now and post a message before I post - * the HELLO, but post_tx/check_sends take care of that! */ - - CDEBUG(D_NETTRACE, "%s: post response hello %p\n", - libcfs_id2str(new_peer->peer_id), hello_tx); - - kptllnd_post_tx(new_peer, hello_tx, 0); - kptllnd_peer_check_sends(new_peer); - - return new_peer; -} - -void -kptllnd_tx_launch(kptl_peer_t *peer, kptl_tx_t *tx, int nfrag) -{ - kptllnd_post_tx(peer, tx, nfrag); - kptllnd_peer_check_sends(peer); -} - -int -kptllnd_find_target(kptl_net_t *net, lnet_process_id_t target, - kptl_peer_t **peerp) -{ - rwlock_t *g_lock = &kptllnd_data.kptl_peer_rw_lock; - ptl_process_id_t ptl_id; - kptl_peer_t *new_peer; - kptl_tx_t *hello_tx; - unsigned long flags; - int rc; - __u64 last_matchbits_seen; - - /* I expect to find the peer, so I only take a read lock... */ - read_lock_irqsave(g_lock, flags); - *peerp = kptllnd_id2peer_locked(target); - read_unlock_irqrestore(g_lock, flags); - - if (*peerp != NULL) - return 0; - - if ((target.pid & LNET_PID_USERFLAG) != 0) { - CWARN("Refusing to create a new connection to %s " - "(non-kernel peer)\n", libcfs_id2str(target)); - return -EHOSTUNREACH; - } - - /* The new peer is a kernel ptllnd, and kernel ptllnds all have the - * same portals PID, which has nothing to do with LUSTRE_SRV_LNET_PID */ - ptl_id.nid = kptllnd_lnet2ptlnid(target.nid); - ptl_id.pid = kptllnd_data.kptl_portals_id.pid; - - hello_tx = kptllnd_get_idle_tx(TX_TYPE_SMALL_MESSAGE); - if (hello_tx == NULL) { - CERROR("Unable to allocate connect message for %s\n", - libcfs_id2str(target)); - return -ENOMEM; - } - - hello_tx->tx_acked = 1; - kptllnd_init_msg(hello_tx->tx_msg, PTLLND_MSG_TYPE_HELLO, - target, sizeof(kptl_hello_msg_t)); - - new_peer = kptllnd_peer_allocate(net, target, ptl_id); - if (new_peer == NULL) { - rc = -ENOMEM; - goto unwind_0; - } - - rc = kptllnd_peer_reserve_buffers(); - if (rc != 0) - goto unwind_1; - - write_lock_irqsave(g_lock, flags); - again: - /* Called only in lnd_send which can't happen after lnd_shutdown */ - LASSERT (!net->net_shutdown); - - *peerp = kptllnd_id2peer_locked(target); - if (*peerp != NULL) { - write_unlock_irqrestore(g_lock, flags); - goto unwind_2; - } - - kptllnd_cull_peertable_locked(target); - - if (kptllnd_data.kptl_n_active_peers == - kptllnd_data.kptl_expected_peers) { - /* peer table full */ - write_unlock_irqrestore(g_lock, flags); - - kptllnd_peertable_overflow_msg("Connection to ", target); - - rc = kptllnd_reserve_buffers(1); /* HELLO headroom */ - if (rc != 0) { - CERROR("Can't create connection to %s\n", - libcfs_id2str(target)); - rc = -ENOMEM; - goto unwind_2; - } - write_lock_irqsave(g_lock, flags); - kptllnd_data.kptl_expected_peers++; - goto again; - } - - last_matchbits_seen = kptllnd_get_last_seen_matchbits_locked(target); - - hello_tx->tx_msg->ptlm_u.hello.kptlhm_matchbits = last_matchbits_seen; - hello_tx->tx_msg->ptlm_u.hello.kptlhm_max_msg_size = - *kptllnd_tunables.kptl_max_msg_size; - - new_peer->peer_state = PEER_STATE_WAITING_HELLO; - new_peer->peer_last_matchbits_seen = last_matchbits_seen; - - kptllnd_peer_add_peertable_locked(new_peer); - - write_unlock_irqrestore(g_lock, flags); - - /* NB someone else could get in now and post a message before I post - * the HELLO, but post_tx/check_sends take care of that! */ - - CDEBUG(D_NETTRACE, "%s: post initial hello %p\n", - libcfs_id2str(new_peer->peer_id), hello_tx); - - kptllnd_post_tx(new_peer, hello_tx, 0); - kptllnd_peer_check_sends(new_peer); - - *peerp = new_peer; - return 0; - - unwind_2: - kptllnd_peer_unreserve_buffers(); - unwind_1: - kptllnd_peer_decref(new_peer); - unwind_0: - kptllnd_tx_decref(hello_tx); - - return rc; -} diff --git a/lnet/klnds/ptllnd/ptllnd_rx_buf.c b/lnet/klnds/ptllnd/ptllnd_rx_buf.c deleted file mode 100644 index 1b63ebe..0000000 --- a/lnet/klnds/ptllnd/ptllnd_rx_buf.c +++ /dev/null @@ -1,826 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd_rx_buf.c - * - * Author: PJ Kirner - */ - - #include "ptllnd.h" - -void -kptllnd_rx_buffer_pool_init(kptl_rx_buffer_pool_t *rxbp) -{ - memset(rxbp, 0, sizeof(*rxbp)); - spin_lock_init(&rxbp->rxbp_lock); - CFS_INIT_LIST_HEAD(&rxbp->rxbp_list); -} - -void -kptllnd_rx_buffer_destroy(kptl_rx_buffer_t *rxb) -{ - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - - LASSERT(rxb->rxb_refcount == 0); - LASSERT(PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE)); - LASSERT(!rxb->rxb_posted); - LASSERT(rxb->rxb_idle); - - cfs_list_del(&rxb->rxb_list); - rxbp->rxbp_count--; - - LIBCFS_FREE(rxb->rxb_buffer, kptllnd_rx_buffer_size()); - LIBCFS_FREE(rxb, sizeof(*rxb)); -} - -int -kptllnd_rx_buffer_pool_reserve(kptl_rx_buffer_pool_t *rxbp, int count) -{ - int bufsize; - int msgs_per_buffer; - int rc; - kptl_rx_buffer_t *rxb; - char *buffer; - unsigned long flags; - - bufsize = kptllnd_rx_buffer_size(); - msgs_per_buffer = bufsize / (*kptllnd_tunables.kptl_max_msg_size); - - CDEBUG(D_NET, "kptllnd_rx_buffer_pool_reserve(%d)\n", count); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - for (;;) { - if (rxbp->rxbp_shutdown) { - rc = -ESHUTDOWN; - break; - } - - if (rxbp->rxbp_reserved + count <= - rxbp->rxbp_count * msgs_per_buffer) { - rc = 0; - break; - } - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - LIBCFS_ALLOC(rxb, sizeof(*rxb)); - LIBCFS_ALLOC(buffer, bufsize); - - if (rxb == NULL || buffer == NULL) { - CERROR("Failed to allocate rx buffer\n"); - - if (rxb != NULL) - LIBCFS_FREE(rxb, sizeof(*rxb)); - if (buffer != NULL) - LIBCFS_FREE(buffer, bufsize); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rc = -ENOMEM; - break; - } - - memset(rxb, 0, sizeof(*rxb)); - - rxb->rxb_eventarg.eva_type = PTLLND_EVENTARG_TYPE_BUF; - rxb->rxb_refcount = 0; - rxb->rxb_pool = rxbp; - rxb->rxb_idle = 0; - rxb->rxb_posted = 0; - rxb->rxb_buffer = buffer; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - if (rxbp->rxbp_shutdown) { - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - LIBCFS_FREE(rxb, sizeof(*rxb)); - LIBCFS_FREE(buffer, bufsize); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rc = -ESHUTDOWN; - break; - } - - cfs_list_add_tail(&rxb->rxb_list, &rxbp->rxbp_list); - rxbp->rxbp_count++; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - kptllnd_rx_buffer_post(rxb); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - } - - if (rc == 0) - rxbp->rxbp_reserved += count; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - return rc; -} - -void -kptllnd_rx_buffer_pool_unreserve(kptl_rx_buffer_pool_t *rxbp, - int count) -{ - unsigned long flags; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - CDEBUG(D_NET, "kptllnd_rx_buffer_pool_unreserve(%d)\n", count); - rxbp->rxbp_reserved -= count; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -void -kptllnd_rx_buffer_pool_fini(kptl_rx_buffer_pool_t *rxbp) -{ - kptl_rx_buffer_t *rxb; - int rc; - int i; - unsigned long flags; - cfs_list_t *tmp; - cfs_list_t *nxt; - ptl_handle_md_t mdh; - - /* CAVEAT EMPTOR: I'm racing with everything here!!! - * - * Buffers can still be posted after I set rxbp_shutdown because I - * can't hold rxbp_lock while I'm posting them. - * - * Calling PtlMDUnlink() here races with auto-unlinks; i.e. a buffer's - * MD handle could become invalid under me. I am vulnerable to portals - * re-using handles (i.e. make the same handle valid again, but for a - * different MD) from when the MD is actually unlinked, to when the - * event callback tells me it has been unlinked. */ - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - rxbp->rxbp_shutdown = 1; - - for (i = 9;; i++) { - cfs_list_for_each_safe(tmp, nxt, &rxbp->rxbp_list) { - rxb = cfs_list_entry (tmp, kptl_rx_buffer_t, rxb_list); - - if (rxb->rxb_idle) { - spin_unlock_irqrestore(&rxbp->rxbp_lock, - flags); - kptllnd_rx_buffer_destroy(rxb); - spin_lock_irqsave(&rxbp->rxbp_lock, - flags); - continue; - } - - mdh = rxb->rxb_mdh; - if (PtlHandleIsEqual(mdh, PTL_INVALID_HANDLE)) - continue; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - rc = PtlMDUnlink(mdh); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - /* callback clears rxb_mdh and drops net's ref - * (which causes repost, but since I set - * shutdown, it will just set the buffer - * idle) */ -#else - if (rc == PTL_OK) { - rxb->rxb_posted = 0; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - kptllnd_rx_buffer_decref_locked(rxb); - } -#endif - } - - if (cfs_list_empty(&rxbp->rxbp_list)) - break; - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - /* Wait a bit for references to be dropped */ - CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */ - "Waiting for %d Busy RX Buffers\n", - rxbp->rxbp_count); - - cfs_pause(cfs_time_seconds(1)); - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - } - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -void -kptllnd_rx_buffer_post(kptl_rx_buffer_t *rxb) -{ - int rc; - ptl_md_t md; - ptl_handle_me_t meh; - ptl_handle_md_t mdh; - ptl_process_id_t any; - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - unsigned long flags; - - LASSERT (!in_interrupt()); - LASSERT (rxb->rxb_refcount == 0); - LASSERT (!rxb->rxb_idle); - LASSERT (!rxb->rxb_posted); - LASSERT (PtlHandleIsEqual(rxb->rxb_mdh, PTL_INVALID_HANDLE)); - - any.nid = PTL_NID_ANY; - any.pid = PTL_PID_ANY; - - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - if (rxbp->rxbp_shutdown) { - rxb->rxb_idle = 1; - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - return; - } - - rxb->rxb_refcount = 1; /* net's ref */ - rxb->rxb_posted = 1; /* I'm posting */ - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - - rc = PtlMEAttach(kptllnd_data.kptl_nih, - *kptllnd_tunables.kptl_portal, - any, - LNET_MSG_MATCHBITS, - 0, /* all matchbits are valid - ignore none */ - PTL_UNLINK, - PTL_INS_AFTER, - &meh); - if (rc != PTL_OK) { - CERROR("PtlMeAttach rxb failed %s(%d)\n", - kptllnd_errtype2str(rc), rc); - goto failed; - } - - /* - * Setup MD - */ - md.start = rxb->rxb_buffer; - md.length = kptllnd_rx_buffer_size(); - md.threshold = PTL_MD_THRESH_INF; - md.options = PTL_MD_OP_PUT | - PTL_MD_LUSTRE_COMPLETION_SEMANTICS | - PTL_MD_EVENT_START_DISABLE | - PTL_MD_MAX_SIZE | - PTL_MD_LOCAL_ALIGN8; - md.user_ptr = &rxb->rxb_eventarg; - md.max_size = *kptllnd_tunables.kptl_max_msg_size; - md.eq_handle = kptllnd_data.kptl_eqh; - - rc = PtlMDAttach(meh, md, PTL_UNLINK, &mdh); - if (rc == PTL_OK) { - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - if (rxb->rxb_posted) /* Not auto-unlinked yet!!! */ - rxb->rxb_mdh = mdh; - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - return; - } - - CERROR("PtlMDAttach rxb failed %s(%d)\n", - kptllnd_errtype2str(rc), rc); - rc = PtlMEUnlink(meh); - LASSERT(rc == PTL_OK); - - failed: - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - rxb->rxb_posted = 0; - /* XXX this will just try again immediately */ - kptllnd_rx_buffer_decref_locked(rxb); - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); -} - -kptl_rx_t * -kptllnd_rx_alloc(void) -{ - kptl_rx_t* rx; - - if (IS_SIMULATION_ENABLED(FAIL_RX_ALLOC)) { - CERROR ("FAIL_RX_ALLOC SIMULATION triggered\n"); - return NULL; - } - - rx = kmem_cache_alloc(kptllnd_data.kptl_rx_cache, GFP_ATOMIC); - if (rx == NULL) { - CERROR("Failed to allocate rx\n"); - return NULL; - } - - memset(rx, 0, sizeof(*rx)); - return rx; -} - -void -kptllnd_rx_done(kptl_rx_t *rx, int post_credit) -{ - kptl_rx_buffer_t *rxb = rx->rx_rxb; - kptl_peer_t *peer = rx->rx_peer; - unsigned long flags; - - LASSERT (post_credit == PTLLND_POSTRX_NO_CREDIT || - post_credit == PTLLND_POSTRX_PEER_CREDIT); - - CDEBUG(D_NET, "rx=%p rxb %p peer %p\n", rx, rxb, peer); - - if (rxb != NULL) - kptllnd_rx_buffer_decref(rxb); - - if (peer != NULL) { - /* Update credits (after I've decref-ed the buffer) */ - spin_lock_irqsave(&peer->peer_lock, flags); - - if (post_credit == PTLLND_POSTRX_PEER_CREDIT) - peer->peer_outstanding_credits++; - - LASSERT (peer->peer_outstanding_credits + - peer->peer_sent_credits <= - *kptllnd_tunables.kptl_peertxcredits); - - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: rx %p done\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - rx); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* I might have to send back credits */ - kptllnd_peer_check_sends(peer); - kptllnd_peer_decref(peer); - } - - kmem_cache_free(kptllnd_data.kptl_rx_cache, rx); -} - -void -kptllnd_rx_buffer_callback (ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - kptl_rx_buffer_t *rxb = kptllnd_eventarg2obj(eva); - kptl_rx_buffer_pool_t *rxbp = rxb->rxb_pool; - kptl_rx_t *rx; - int unlinked; - unsigned long flags; - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - unlinked = ev->unlinked; -#else - unlinked = ev->type == PTL_EVENT_UNLINK; -#endif - - CDEBUG(D_NET, "%s: %s(%d) rxb=%p fail=%s(%d) unlink=%d\n", - kptllnd_ptlid2str(ev->initiator), - kptllnd_evtype2str(ev->type), ev->type, rxb, - kptllnd_errtype2str(ev->ni_fail_type), ev->ni_fail_type, - unlinked); - - LASSERT (!rxb->rxb_idle); - LASSERT (ev->md.start == rxb->rxb_buffer); - LASSERT (ev->offset + ev->mlength <= - PAGE_SIZE * *kptllnd_tunables.kptl_rxb_npages); - LASSERT (ev->type == PTL_EVENT_PUT_END || - ev->type == PTL_EVENT_UNLINK); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->match_bits == LNET_MSG_MATCHBITS); - - if (ev->ni_fail_type != PTL_NI_OK) { - CERROR("Portals error from %s: %s(%d) rxb=%p fail=%s(%d) unlink=%dn", - kptllnd_ptlid2str(ev->initiator), - kptllnd_evtype2str(ev->type), ev->type, rxb, - kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - } else if (ev->type == PTL_EVENT_PUT_END && - !rxbp->rxbp_shutdown) { - - /* rxbp_shutdown sampled without locking! I only treat it as a - * hint since shutdown can start while rx's are queued on - * kptl_sched_rxq. */ -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force message alignment - someone sending an - * odd-length message will misalign subsequent messages and - * force the fixup below... */ - if ((ev->mlength & 7) != 0) - CWARN("Message from %s has odd length "LPU64": " - "probable version incompatibility\n", - kptllnd_ptlid2str(ev->initiator), - (__u64)ev->mlength); -#endif - rx = kptllnd_rx_alloc(); - if (rx == NULL) { - CERROR("Message from %s dropped: ENOMEM", - kptllnd_ptlid2str(ev->initiator)); - } else { - if ((ev->offset & 7) == 0) { - kptllnd_rx_buffer_addref(rxb); - rx->rx_rxb = rxb; - rx->rx_nob = ev->mlength; - rx->rx_msg = (kptl_msg_t *) - (rxb->rxb_buffer + ev->offset); - } else { -#if (PTL_MD_LOCAL_ALIGN8 == 0) - /* Portals can't force alignment - copy into - * rx_space (avoiding overflow) to fix */ - int maxlen = *kptllnd_tunables.kptl_max_msg_size; - - rx->rx_rxb = NULL; - rx->rx_nob = MIN(maxlen, ev->mlength); - rx->rx_msg = (kptl_msg_t *)rx->rx_space; - memcpy(rx->rx_msg, rxb->rxb_buffer + ev->offset, - rx->rx_nob); -#else - /* Portals should have forced the alignment */ - LBUG(); -#endif - } - - rx->rx_initiator = ev->initiator; - rx->rx_treceived = jiffies; - /* Queue for attention */ - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, - flags); - - cfs_list_add_tail(&rx->rx_list, - &kptllnd_data.kptl_sched_rxq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock_irqrestore(&kptllnd_data. \ - kptl_sched_lock, flags); - } - } - - if (unlinked) { - spin_lock_irqsave(&rxbp->rxbp_lock, flags); - - rxb->rxb_posted = 0; - rxb->rxb_mdh = PTL_INVALID_HANDLE; - kptllnd_rx_buffer_decref_locked(rxb); - - spin_unlock_irqrestore(&rxbp->rxbp_lock, flags); - } -} - -void -kptllnd_nak (ptl_process_id_t dest) -{ - /* Fire-and-forget a stub message that will let the peer know my - * protocol magic/version and make her drop/refresh any peer state she - * might have with me. */ - ptl_md_t md = { - .start = kptllnd_data.kptl_nak_msg, - .length = kptllnd_data.kptl_nak_msg->ptlm_nob, - .threshold = 1, - .options = 0, - .user_ptr = NULL, - .eq_handle = PTL_EQ_NONE}; - ptl_handle_md_t mdh; - int rc; - - rc = PtlMDBind(kptllnd_data.kptl_nih, md, PTL_UNLINK, &mdh); - if (rc != PTL_OK) { - CWARN("Can't NAK %s: bind failed %s(%d)\n", - kptllnd_ptlid2str(dest), kptllnd_errtype2str(rc), rc); - return; - } - - rc = PtlPut(mdh, PTL_NOACK_REQ, dest, - *kptllnd_tunables.kptl_portal, 0, - LNET_MSG_MATCHBITS, 0, 0); - if (rc != PTL_OK) { - CWARN("Can't NAK %s: put failed %s(%d)\n", - kptllnd_ptlid2str(dest), kptllnd_errtype2str(rc), rc); - } -} - -kptl_net_t * -kptllnd_find_net (lnet_nid_t nid) -{ - kptl_net_t *net; - - read_lock(&kptllnd_data.kptl_net_rw_lock); - cfs_list_for_each_entry (net, &kptllnd_data.kptl_nets, net_list) { - LASSERT (!net->net_shutdown); - - if (net->net_ni->ni_nid == nid) { - kptllnd_net_addref(net); - read_unlock(&kptllnd_data.kptl_net_rw_lock); - return net; - } - } - read_unlock(&kptllnd_data.kptl_net_rw_lock); - - return NULL; -} - -void -kptllnd_rx_parse(kptl_rx_t *rx) -{ - kptl_msg_t *msg = rx->rx_msg; - int rc = 0; - int post_credit = PTLLND_POSTRX_PEER_CREDIT; - kptl_net_t *net = NULL; - kptl_peer_t *peer; - cfs_list_t txs; - unsigned long flags; - lnet_process_id_t srcid; - - LASSERT (!in_interrupt()); - LASSERT (rx->rx_peer == NULL); - - CFS_INIT_LIST_HEAD(&txs); - - if ((rx->rx_nob >= 4 && - (msg->ptlm_magic == LNET_PROTO_MAGIC || - msg->ptlm_magic == __swab32(LNET_PROTO_MAGIC))) || - (rx->rx_nob >= 6 && - ((msg->ptlm_magic == PTLLND_MSG_MAGIC && - msg->ptlm_version != PTLLND_MSG_VERSION) || - (msg->ptlm_magic == __swab32(PTLLND_MSG_MAGIC) && - msg->ptlm_version != __swab16(PTLLND_MSG_VERSION))))) { - /* NAK incompatible versions - * See other LNDs for how to handle this if/when ptllnd begins - * to allow different versions to co-exist */ - CERROR("Bad version: got %04x expected %04x from %s\n", - (__u32)(msg->ptlm_magic == PTLLND_MSG_MAGIC ? - msg->ptlm_version : __swab16(msg->ptlm_version)), - PTLLND_MSG_VERSION, kptllnd_ptlid2str(rx->rx_initiator)); - /* NB backward compatibility */ - kptllnd_nak(rx->rx_initiator); - goto rx_done; - } - - rc = kptllnd_msg_unpack(msg, rx->rx_nob); - if (rc != 0) { - CERROR ("Error %d unpacking rx from %s\n", - rc, kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - srcid.nid = msg->ptlm_srcnid; - srcid.pid = msg->ptlm_srcpid; - - CDEBUG(D_NETTRACE, "%s: RX %s c %d %p rxb %p queued %lu ticks (%ld s)\n", - libcfs_id2str(srcid), kptllnd_msgtype2str(msg->ptlm_type), - msg->ptlm_credits, rx, rx->rx_rxb, - jiffies - rx->rx_treceived, - cfs_duration_sec(jiffies - rx->rx_treceived)); - - if (kptllnd_lnet2ptlnid(srcid.nid) != rx->rx_initiator.nid) { - CERROR("Bad source nid %s from %s\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_NAK) { - peer = kptllnd_id2peer(srcid); - if (peer == NULL) - goto rx_done; - - CWARN("NAK from %s (%d:%s)\n", - libcfs_id2str(srcid), peer->peer_state, - kptllnd_ptlid2str(rx->rx_initiator)); - - /* NB can't nuke new peer - bug 17546 comment 31 */ - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - CDEBUG(D_NET, "Stale NAK from %s(%s): WAITING_HELLO\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator)); - kptllnd_peer_decref(peer); - goto rx_done; - } - - rc = -EPROTO; - goto failed; - } - - net = kptllnd_find_net(msg->ptlm_dstnid); - if (net == NULL || msg->ptlm_dstpid != the_lnet.ln_pid) { - CERROR("Bad dstid %s from %s\n", - libcfs_id2str((lnet_process_id_t) { - .nid = msg->ptlm_dstnid, - .pid = msg->ptlm_dstpid}), - kptllnd_ptlid2str(rx->rx_initiator)); - goto rx_done; - } - - if (LNET_NIDNET(srcid.nid) != LNET_NIDNET(net->net_ni->ni_nid)) { - lnet_nid_t nid = LNET_MKNID(LNET_NIDNET(net->net_ni->ni_nid), - LNET_NIDADDR(srcid.nid)); - CERROR("Bad source nid %s from %s, %s expected.\n", - libcfs_id2str(srcid), - kptllnd_ptlid2str(rx->rx_initiator), - libcfs_nid2str(nid)); - goto rx_done; - } - - if (msg->ptlm_type == PTLLND_MSG_TYPE_HELLO) { - peer = kptllnd_peer_handle_hello(net, rx->rx_initiator, msg); - if (peer == NULL) - goto rx_done; - } else { - peer = kptllnd_id2peer(srcid); - if (peer == NULL) { - CWARN("NAK %s: no connection, %s must reconnect\n", - kptllnd_msgtype2str(msg->ptlm_type), - libcfs_id2str(srcid)); - /* NAK to make the peer reconnect */ - kptllnd_nak(rx->rx_initiator); - goto rx_done; - } - - /* Ignore any messages for a previous incarnation of me */ - if (msg->ptlm_dststamp < peer->peer_myincarnation) { - kptllnd_peer_decref(peer); - goto rx_done; - } - - if (msg->ptlm_dststamp != peer->peer_myincarnation) { - CERROR("%s: Unexpected dststamp "LPX64" " - "("LPX64" expected)\n", - libcfs_id2str(peer->peer_id), msg->ptlm_dststamp, - peer->peer_myincarnation); - rc = -EPROTO; - goto failed; - } - - if (peer->peer_state == PEER_STATE_WAITING_HELLO) { - /* recoverable error - restart txs */ - spin_lock_irqsave(&peer->peer_lock, flags); - kptllnd_cancel_txlist(&peer->peer_sendq, &txs); - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CWARN("NAK %s: Unexpected %s message\n", - libcfs_id2str(srcid), - kptllnd_msgtype2str(msg->ptlm_type)); - kptllnd_nak(rx->rx_initiator); - rc = -EPROTO; - goto failed; - } - - if (msg->ptlm_srcstamp != peer->peer_incarnation) { - CERROR("%s: Unexpected srcstamp "LPX64" " - "("LPX64" expected)\n", - libcfs_id2str(srcid), - msg->ptlm_srcstamp, - peer->peer_incarnation); - rc = -EPROTO; - goto failed; - } - } - - LASSERTF (LNET_NIDADDR(msg->ptlm_srcnid) == - LNET_NIDADDR(peer->peer_id.nid), "m %s p %s\n", - libcfs_nid2str(msg->ptlm_srcnid), - libcfs_nid2str(peer->peer_id.nid)); - LASSERTF (msg->ptlm_srcpid == peer->peer_id.pid, "m %u p %u\n", - msg->ptlm_srcpid, peer->peer_id.pid); - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* Check peer only sends when I've sent her credits */ - if (peer->peer_sent_credits == 0) { - int c = peer->peer_credits; - int oc = peer->peer_outstanding_credits; - int sc = peer->peer_sent_credits; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - CERROR("%s: buffer overrun [%d/%d+%d]\n", - libcfs_id2str(peer->peer_id), c, sc, oc); - rc = -EPROTO; - goto failed; - } - peer->peer_sent_credits--; - - /* No check for credit overflow - the peer may post new - * buffers after the startup handshake. */ - peer->peer_credits += msg->ptlm_credits; - - /* This ensures the credit taken by NOOP can be returned */ - if (msg->ptlm_type == PTLLND_MSG_TYPE_NOOP) { - peer->peer_outstanding_credits++; - post_credit = PTLLND_POSTRX_NO_CREDIT; - } - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* See if something can go out now that credits have come in */ - if (msg->ptlm_credits != 0) - kptllnd_peer_check_sends(peer); - - /* ptllnd-level protocol correct - rx takes my ref on peer and increments - * peer_outstanding_credits when it completes */ - rx->rx_peer = peer; - kptllnd_peer_alive(peer); - - switch (msg->ptlm_type) { - default: - /* already checked by kptllnd_msg_unpack() */ - LBUG(); - - case PTLLND_MSG_TYPE_HELLO: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_HELLO\n"); - goto rx_done; - - case PTLLND_MSG_TYPE_NOOP: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_NOOP\n"); - goto rx_done; - - case PTLLND_MSG_TYPE_IMMEDIATE: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_IMMEDIATE\n"); - rc = lnet_parse(net->net_ni, - &msg->ptlm_u.immediate.kptlim_hdr, - msg->ptlm_srcnid, - rx, 0); - if (rc >= 0) { /* kptllnd_recv owns 'rx' now */ - kptllnd_net_decref(net); - return; - } - goto failed; - - case PTLLND_MSG_TYPE_PUT: - case PTLLND_MSG_TYPE_GET: - CDEBUG(D_NET, "PTLLND_MSG_TYPE_%s\n", - msg->ptlm_type == PTLLND_MSG_TYPE_PUT ? - "PUT" : "GET"); - - /* checked in kptllnd_msg_unpack() */ - LASSERT (msg->ptlm_u.rdma.kptlrm_matchbits >= - PTL_RESERVED_MATCHBITS); - - /* Update last match bits seen */ - spin_lock_irqsave(&peer->peer_lock, flags); - - if (msg->ptlm_u.rdma.kptlrm_matchbits > - rx->rx_peer->peer_last_matchbits_seen) - rx->rx_peer->peer_last_matchbits_seen = - msg->ptlm_u.rdma.kptlrm_matchbits; - - spin_unlock_irqrestore(&rx->rx_peer->peer_lock, flags); - - rc = lnet_parse(net->net_ni, - &msg->ptlm_u.rdma.kptlrm_hdr, - msg->ptlm_srcnid, - rx, 1); - if (rc >= 0) { /* kptllnd_recv owns 'rx' now */ - kptllnd_net_decref(net); - return; - } - goto failed; - } - - failed: - LASSERT (rc != 0); - kptllnd_peer_close(peer, rc); - if (rx->rx_peer == NULL) /* drop ref on peer */ - kptllnd_peer_decref(peer); /* unless rx_done will */ - if (!cfs_list_empty(&txs)) { - LASSERT (net != NULL); - kptllnd_restart_txs(net, srcid, &txs); - } - rx_done: - if (net != NULL) - kptllnd_net_decref(net); - kptllnd_rx_done(rx, post_credit); -} diff --git a/lnet/klnds/ptllnd/ptllnd_tx.c b/lnet/klnds/ptllnd/ptllnd_tx.c deleted file mode 100644 index 1fa8d05..0000000 --- a/lnet/klnds/ptllnd/ptllnd_tx.c +++ /dev/null @@ -1,526 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - * - * Copyright (c) 2012, Intel Corporation. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/ptllnd_tx.c - * - * Author: PJ Kirner - */ - - #include "ptllnd.h" - -void -kptllnd_free_tx(kptl_tx_t *tx) -{ - if (tx->tx_msg != NULL) - LIBCFS_FREE(tx->tx_msg, sizeof(*tx->tx_msg)); - - if (tx->tx_frags != NULL) - LIBCFS_FREE(tx->tx_frags, sizeof(*tx->tx_frags)); - - LIBCFS_FREE(tx, sizeof(*tx)); - - cfs_atomic_dec(&kptllnd_data.kptl_ntx); - - /* Keep the tunable in step for visibility */ - *kptllnd_tunables.kptl_ntx = cfs_atomic_read(&kptllnd_data.kptl_ntx); -} - -kptl_tx_t * -kptllnd_alloc_tx(void) -{ - kptl_tx_t *tx; - - LIBCFS_ALLOC(tx, sizeof(*tx)); - if (tx == NULL) { - CERROR("Failed to allocate TX\n"); - return NULL; - } - - cfs_atomic_inc(&kptllnd_data.kptl_ntx); - - /* Keep the tunable in step for visibility */ - *kptllnd_tunables.kptl_ntx = cfs_atomic_read(&kptllnd_data.kptl_ntx); - - tx->tx_idle = 1; - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - tx->tx_msg_mdh = PTL_INVALID_HANDLE; - tx->tx_rdma_eventarg.eva_type = PTLLND_EVENTARG_TYPE_RDMA; - tx->tx_msg_eventarg.eva_type = PTLLND_EVENTARG_TYPE_MSG; - tx->tx_msg = NULL; - tx->tx_peer = NULL; - tx->tx_frags = NULL; - - LIBCFS_ALLOC(tx->tx_msg, sizeof(*tx->tx_msg)); - if (tx->tx_msg == NULL) { - CERROR("Failed to allocate TX payload\n"); - goto failed; - } - - LIBCFS_ALLOC(tx->tx_frags, sizeof(*tx->tx_frags)); - if (tx->tx_frags == NULL) { - CERROR("Failed to allocate TX frags\n"); - goto failed; - } - - return tx; - - failed: - kptllnd_free_tx(tx); - return NULL; -} - -int -kptllnd_setup_tx_descs() -{ - int n = *kptllnd_tunables.kptl_ntx; - int i; - - for (i = 0; i < n; i++) { - kptl_tx_t *tx = kptllnd_alloc_tx(); - if (tx == NULL) - return -ENOMEM; - - spin_lock(&kptllnd_data.kptl_tx_lock); - cfs_list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs); - spin_unlock(&kptllnd_data.kptl_tx_lock); - } - - return 0; -} - -void -kptllnd_cleanup_tx_descs() -{ - kptl_tx_t *tx; - - /* No locking; single threaded now */ - LASSERT (kptllnd_data.kptl_shutdown == 2); - - while (!cfs_list_empty(&kptllnd_data.kptl_idle_txs)) { - tx = cfs_list_entry(kptllnd_data.kptl_idle_txs.next, - kptl_tx_t, tx_list); - - cfs_list_del(&tx->tx_list); - kptllnd_free_tx(tx); - } - - LASSERT (cfs_atomic_read(&kptllnd_data.kptl_ntx) == 0); -} - -kptl_tx_t * -kptllnd_get_idle_tx(enum kptl_tx_type type) -{ - kptl_tx_t *tx = NULL; - - if (IS_SIMULATION_ENABLED(FAIL_TX_PUT_ALLOC) && - type == TX_TYPE_PUT_REQUEST) { - CERROR("FAIL_TX_PUT_ALLOC SIMULATION triggered\n"); - return NULL; - } - - if (IS_SIMULATION_ENABLED(FAIL_TX_GET_ALLOC) && - type == TX_TYPE_GET_REQUEST) { - CERROR ("FAIL_TX_GET_ALLOC SIMULATION triggered\n"); - return NULL; - } - - if (IS_SIMULATION_ENABLED(FAIL_TX)) { - CERROR ("FAIL_TX SIMULATION triggered\n"); - return NULL; - } - - spin_lock(&kptllnd_data.kptl_tx_lock); - - if (cfs_list_empty (&kptllnd_data.kptl_idle_txs)) { - spin_unlock(&kptllnd_data.kptl_tx_lock); - - tx = kptllnd_alloc_tx(); - if (tx == NULL) - return NULL; - } else { - tx = cfs_list_entry(kptllnd_data.kptl_idle_txs.next, - kptl_tx_t, tx_list); - cfs_list_del(&tx->tx_list); - - spin_unlock(&kptllnd_data.kptl_tx_lock); - } - - LASSERT (cfs_atomic_read(&tx->tx_refcount)== 0); - LASSERT (tx->tx_idle); - LASSERT (!tx->tx_active); - LASSERT (tx->tx_lnet_msg == NULL); - LASSERT (tx->tx_lnet_replymsg == NULL); - LASSERT (tx->tx_peer == NULL); - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - - tx->tx_type = type; - cfs_atomic_set(&tx->tx_refcount, 1); - tx->tx_status = 0; - tx->tx_idle = 0; - tx->tx_tposted = 0; - tx->tx_acked = *kptllnd_tunables.kptl_ack_puts; - - CDEBUG(D_NET, "tx=%p\n", tx); - return tx; -} - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS -int -kptllnd_tx_abort_netio(kptl_tx_t *tx) -{ - kptl_peer_t *peer = tx->tx_peer; - ptl_handle_md_t msg_mdh; - ptl_handle_md_t rdma_mdh; - unsigned long flags; - - LASSERT (cfs_atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_active); - - spin_lock_irqsave(&peer->peer_lock, flags); - - msg_mdh = tx->tx_msg_mdh; - rdma_mdh = tx->tx_rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* Uncompleted comms: there must have been some error and it must be - * propagated to LNET... */ - LASSERT (tx->tx_status != 0 || - (tx->tx_lnet_msg == NULL && - tx->tx_lnet_replymsg == NULL)); - - /* stash the tx on its peer until it completes */ - cfs_atomic_set(&tx->tx_refcount, 1); - tx->tx_active = 1; - cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* These unlinks will ensure completion events (normal or unlink) will - * happen ASAP */ - - if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) - PtlMDUnlink(msg_mdh); - - if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) - PtlMDUnlink(rdma_mdh); - - return -EAGAIN; -} -#else -int -kptllnd_tx_abort_netio(kptl_tx_t *tx) -{ - ptl_peer_t *peer = tx->tx_peer; - ptl_handle_md_t msg_mdh; - ptl_handle_md_t rdma_mdh; - unsigned long flags; - ptl_err_t prc; - - LASSERT (cfs_atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_active); - - spin_lock_irqsave(&peer->peer_lock, flags); - - msg_mdh = tx->tx_msg_mdh; - rdma_mdh = tx->tx_rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* Uncompleted comms: there must have been some error and it must be - * propagated to LNET... */ - LASSERT (tx->tx_status != 0 || - (tx->tx_lnet_msg == NULL && - tx->tx_replymsg == NULL)); - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - if (!PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE)) { - prc = PtlMDUnlink(msg_mdh); - if (prc == PTL_OK) - msg_mdh = PTL_INVALID_HANDLE; - } - - if (!PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - prc = PtlMDUnlink(rdma_mdh); - if (prc == PTL_OK) - rdma_mdh = PTL_INVALID_HANDLE; - } - - spin_lock_irqsave(&peer->peer_lock, flags); - - /* update tx_???_mdh if callback hasn't fired */ - if (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)) - msg_mdh = PTL_INVALID_HANDLE; - else - tx->tx_msg_mdh = msg_mdh; - - if (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)) - rdma_mdh = PTL_INVALID_HANDLE; - else - tx->tx_rdma_mdh = rdma_mdh; - - if (PtlHandleIsEqual(msg_mdh, PTL_INVALID_HANDLE) && - PtlHandleIsEqual(rdma_mdh, PTL_INVALID_HANDLE)) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return 0; - } - - /* stash the tx on its peer until it completes */ - cfs_atomic_set(&tx->tx_refcount, 1); - tx->tx_active = 1; - cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq); - - kptllnd_peer_addref(peer); /* extra ref for me... */ - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* This will get the watchdog thread to try aborting all the peer's - * comms again. NB, this deems it fair that 1 failing tx which can't - * be aborted immediately (i.e. its MDs are still busy) is valid cause - * to nuke everything to the same peer! */ - kptllnd_peer_close(peer, tx->tx_status); - - kptllnd_peer_decref(peer); - - return -EAGAIN; -} -#endif - -void -kptllnd_tx_fini (kptl_tx_t *tx) -{ - lnet_msg_t *replymsg = tx->tx_lnet_replymsg; - lnet_msg_t *msg = tx->tx_lnet_msg; - kptl_peer_t *peer = tx->tx_peer; - int status = tx->tx_status; - int rc; - - LASSERT (!in_interrupt()); - LASSERT (cfs_atomic_read(&tx->tx_refcount) == 0); - LASSERT (!tx->tx_idle); - LASSERT (!tx->tx_active); - - /* TX has completed or failed */ - - if (peer != NULL) { - rc = kptllnd_tx_abort_netio(tx); - if (rc != 0) - return; - } - - LASSERT (PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - LASSERT (PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - - tx->tx_lnet_msg = tx->tx_lnet_replymsg = NULL; - tx->tx_peer = NULL; - tx->tx_idle = 1; - - spin_lock(&kptllnd_data.kptl_tx_lock); - cfs_list_add_tail(&tx->tx_list, &kptllnd_data.kptl_idle_txs); - spin_unlock(&kptllnd_data.kptl_tx_lock); - - /* Must finalize AFTER freeing 'tx' */ - if (msg != NULL) - lnet_finalize(NULL, msg, (replymsg == NULL) ? status : 0); - - if (replymsg != NULL) - lnet_finalize(NULL, replymsg, status); - - if (peer != NULL) - kptllnd_peer_decref(peer); -} - -const char * -kptllnd_tx_typestr(int type) -{ - switch (type) { - default: - return ""; - - case TX_TYPE_SMALL_MESSAGE: - return "msg"; - - case TX_TYPE_PUT_REQUEST: - return "put_req"; - - case TX_TYPE_GET_REQUEST: - return "get_req"; - break; - - case TX_TYPE_PUT_RESPONSE: - return "put_rsp"; - break; - - case TX_TYPE_GET_RESPONSE: - return "get_rsp"; - } -} - -void -kptllnd_tx_callback(ptl_event_t *ev) -{ - kptl_eventarg_t *eva = ev->md.user_ptr; - int ismsg = (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG); - kptl_tx_t *tx = kptllnd_eventarg2obj(eva); - kptl_peer_t *peer = tx->tx_peer; - int ok = (ev->ni_fail_type == PTL_OK); - int unlinked; - unsigned long flags; - - LASSERT (peer != NULL); - LASSERT (eva->eva_type == PTLLND_EVENTARG_TYPE_MSG || - eva->eva_type == PTLLND_EVENTARG_TYPE_RDMA); - LASSERT (!ismsg || !PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE)); - LASSERT (ismsg || !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE)); - -#ifdef LUSTRE_PORTALS_UNLINK_SEMANTICS - unlinked = ev->unlinked; -#else - unlinked = (ev->type == PTL_EVENT_UNLINK); -#endif - CDEBUG(D_NETTRACE, "%s[%d/%d+%d]: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n", - libcfs_id2str(peer->peer_id), peer->peer_credits, - peer->peer_outstanding_credits, peer->peer_sent_credits, - kptllnd_evtype2str(ev->type), ev->type, - tx, kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - - switch (tx->tx_type) { - default: - LBUG(); - - case TX_TYPE_SMALL_MESSAGE: - LASSERT (ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - (ev->type == PTL_EVENT_ACK && tx->tx_acked)); - break; - - case TX_TYPE_PUT_REQUEST: - LASSERT (ev->type == PTL_EVENT_UNLINK || - (ismsg && ev->type == PTL_EVENT_SEND_END) || - (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) || - (!ismsg && ev->type == PTL_EVENT_GET_END)); - break; - - case TX_TYPE_GET_REQUEST: - LASSERT (ev->type == PTL_EVENT_UNLINK || - (ismsg && ev->type == PTL_EVENT_SEND_END) || - (ismsg && ev->type == PTL_EVENT_ACK && tx->tx_acked) || - (!ismsg && ev->type == PTL_EVENT_PUT_END)); - - if (!ismsg && ok && ev->type == PTL_EVENT_PUT_END) { - if (ev->hdr_data == PTLLND_RDMA_OK) { - lnet_set_reply_msg_len(NULL, - tx->tx_lnet_replymsg, - ev->mlength); - } else { - /* no match at peer */ - tx->tx_status = -EIO; - } - } - break; - - case TX_TYPE_PUT_RESPONSE: - LASSERT (!ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - ev->type == PTL_EVENT_REPLY_END); - break; - - case TX_TYPE_GET_RESPONSE: - LASSERT (!ismsg); - LASSERT (ev->type == PTL_EVENT_UNLINK || - ev->type == PTL_EVENT_SEND_END || - (ev->type == PTL_EVENT_ACK && tx->tx_acked)); - break; - } - - if (ok) { - kptllnd_peer_alive(peer); - } else { - CERROR("Portals error to %s: %s(%d) tx=%p fail=%s(%d) unlinked=%d\n", - libcfs_id2str(peer->peer_id), - kptllnd_evtype2str(ev->type), ev->type, - tx, kptllnd_errtype2str(ev->ni_fail_type), - ev->ni_fail_type, unlinked); - tx->tx_status = -EIO; - kptllnd_peer_close(peer, -EIO); - } - - if (!unlinked) - return; - - spin_lock_irqsave(&peer->peer_lock, flags); - - if (ismsg) - tx->tx_msg_mdh = PTL_INVALID_HANDLE; - else - tx->tx_rdma_mdh = PTL_INVALID_HANDLE; - - if (!PtlHandleIsEqual(tx->tx_msg_mdh, PTL_INVALID_HANDLE) || - !PtlHandleIsEqual(tx->tx_rdma_mdh, PTL_INVALID_HANDLE) || - !tx->tx_active) { - spin_unlock_irqrestore(&peer->peer_lock, flags); - return; - } - - cfs_list_del(&tx->tx_list); - tx->tx_active = 0; - - spin_unlock_irqrestore(&peer->peer_lock, flags); - - /* drop peer's ref, but if it was the last one... */ - if (cfs_atomic_dec_and_test(&tx->tx_refcount)) { - /* ...finalize it in thread context! */ - spin_lock_irqsave(&kptllnd_data.kptl_sched_lock, flags); - - cfs_list_add_tail(&tx->tx_list, &kptllnd_data.kptl_sched_txq); - wake_up(&kptllnd_data.kptl_sched_waitq); - - spin_unlock_irqrestore(&kptllnd_data.kptl_sched_lock, - flags); - } -} diff --git a/lnet/klnds/ptllnd/wirecheck.c b/lnet/klnds/ptllnd/wirecheck.c deleted file mode 100644 index 30f0e8e..0000000 --- a/lnet/klnds/ptllnd/wirecheck.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. - * - * GPL HEADER END - */ -/* - * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. - * Use is subject to license terms. - */ -/* - * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. - * - * lnet/klnds/ptllnd/wirecheck.c - * - * Author: PJ Kirner - */ -#include -#include -#include -#include - -#include - -#include - -/* This ghastly hack to allows me to include lib-types.h It doesn't affect any - * assertions generated here (but fails-safe if it ever does) */ -typedef struct { - int counter; -} cfs_atomic_t; - -#include -#include - -#ifndef HAVE_STRNLEN -#define strnlen(s, i) strlen(s) -#endif - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#undef STRINGIFY -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" CLASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" CLASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE((int)offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -system_string (char *cmdline, char *str, int len) -{ - int fds[2]; - int rc; - pid_t pid; - - rc = pipe (fds); - if (rc != 0) - abort (); - - pid = fork (); - if (pid == 0) { - /* child */ - int fd = fileno(stdout); - - rc = dup2(fds[1], fd); - if (rc != fd) - abort(); - - exit(system(cmdline)); - /* notreached */ - } else if ((int)pid < 0) { - abort(); - } else { - FILE *f = fdopen (fds[0], "r"); - - if (f == NULL) - abort(); - - close(fds[1]); - - if (fgets(str, len, f) == NULL) - abort(); - - if (waitpid(pid, &rc, 0) != pid) - abort(); - - if (!WIFEXITED(rc) || - WEXITSTATUS(rc) != 0) - abort(); - - if (strnlen(str, len) == len) - str[len - 1] = 0; - - if (str[strlen(str) - 1] == '\n') - str[strlen(str) - 1] = 0; - - fclose(f); - } -} - -int -main (int argc, char **argv) -{ - char unameinfo[80]; - char gccinfo[80]; - - system_string("uname -a", unameinfo, sizeof(unameinfo)); - system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo)); - - printf ("void kptllnd_assert_wire_constants (void)\n" - "{\n" - " /* Wire protocol assertions generated by 'wirecheck'\n" - " * running on %s\n" - " * with %s */\n" - "\n", unameinfo, gccinfo); - - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (PTL_RESERVED_MATCHBITS); - CHECK_DEFINE (LNET_MSG_MATCHBITS); - - CHECK_DEFINE (PTLLND_MSG_MAGIC); - CHECK_DEFINE (PTLLND_MSG_VERSION); - - CHECK_DEFINE (PTLLND_RDMA_OK); - CHECK_DEFINE (PTLLND_RDMA_FAIL); - - CHECK_DEFINE (PTLLND_MSG_TYPE_INVALID); - CHECK_DEFINE (PTLLND_MSG_TYPE_PUT); - CHECK_DEFINE (PTLLND_MSG_TYPE_GET); - CHECK_DEFINE (PTLLND_MSG_TYPE_IMMEDIATE); - CHECK_DEFINE (PTLLND_MSG_TYPE_NOOP); - CHECK_DEFINE (PTLLND_MSG_TYPE_HELLO); - CHECK_DEFINE (PTLLND_MSG_TYPE_NAK); - - CHECK_STRUCT (kptl_msg_t); - CHECK_MEMBER (kptl_msg_t, ptlm_magic); - CHECK_MEMBER (kptl_msg_t, ptlm_version); - CHECK_MEMBER (kptl_msg_t, ptlm_type); - CHECK_MEMBER (kptl_msg_t, ptlm_credits); - CHECK_MEMBER (kptl_msg_t, ptlm_nob); - CHECK_MEMBER (kptl_msg_t, ptlm_cksum); - CHECK_MEMBER (kptl_msg_t, ptlm_srcnid); - CHECK_MEMBER (kptl_msg_t, ptlm_srcstamp); - CHECK_MEMBER (kptl_msg_t, ptlm_dstnid); - CHECK_MEMBER (kptl_msg_t, ptlm_dststamp); - CHECK_MEMBER (kptl_msg_t, ptlm_srcpid); - CHECK_MEMBER (kptl_msg_t, ptlm_dstpid); - CHECK_MEMBER (kptl_msg_t, ptlm_u.immediate); - CHECK_MEMBER (kptl_msg_t, ptlm_u.rdma); - CHECK_MEMBER (kptl_msg_t, ptlm_u.hello); - - CHECK_STRUCT (kptl_immediate_msg_t); - CHECK_MEMBER (kptl_immediate_msg_t, kptlim_hdr); - CHECK_MEMBER (kptl_immediate_msg_t, kptlim_payload[13]); - - CHECK_STRUCT (kptl_rdma_msg_t); - CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_hdr); - CHECK_MEMBER (kptl_rdma_msg_t, kptlrm_matchbits); - - CHECK_STRUCT (kptl_hello_msg_t); - CHECK_MEMBER (kptl_hello_msg_t, kptlhm_matchbits); - CHECK_MEMBER (kptl_hello_msg_t, kptlhm_max_msg_size); - - printf ("}\n\n"); - - return (0); -} diff --git a/lnet/lnet/portals.xcode/project.pbxproj b/lnet/lnet/portals.xcode/project.pbxproj deleted file mode 100644 index 1dc0146..0000000 --- a/lnet/lnet/portals.xcode/project.pbxproj +++ /dev/null @@ -1,430 +0,0 @@ -// !$*UTF8*$! -{ - archiveVersion = 1; - classes = { - }; - objectVersion = 39; - objects = { - 06AA1262FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = NO; - GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; - GCC_GENERATE_DEBUGGING_SYMBOLS = YES; - GCC_OPTIMIZATION_LEVEL = 0; - OPTIMIZATION_CFLAGS = "-O0"; - ZERO_LINK = YES; - }; - isa = PBXBuildStyle; - name = Development; - }; - 06AA1263FFB20DD611CA28AA = { - buildRules = ( - ); - buildSettings = { - COPY_PHASE_STRIP = YES; - GCC_ENABLE_FIX_AND_CONTINUE = NO; - ZERO_LINK = NO; - }; - isa = PBXBuildStyle; - name = Deployment; - }; -//060 -//061 -//062 -//063 -//064 -//080 -//081 -//082 -//083 -//084 - 089C1669FE841209C02AAC07 = { - buildSettings = { - }; - buildStyles = ( - 06AA1262FFB20DD611CA28AA, - 06AA1263FFB20DD611CA28AA, - ); - hasScannedForEncodings = 1; - isa = PBXProject; - mainGroup = 089C166AFE841209C02AAC07; - projectDirPath = ""; - targets = ( - 32A4FEB80562C75700D090E7, - ); - }; - 089C166AFE841209C02AAC07 = { - children = ( - 247142CAFF3F8F9811CA285C, - 089C167CFE841241C02AAC07, - 19C28FB6FE9D52B211CA2CBB, - ); - isa = PBXGroup; - name = portals; - refType = 4; - sourceTree = ""; - }; - 089C167CFE841241C02AAC07 = { - children = ( - 32A4FEC30562C75700D090E7, - ); - isa = PBXGroup; - name = Resources; - refType = 4; - sourceTree = ""; - }; -//080 -//081 -//082 -//083 -//084 -//190 -//191 -//192 -//193 -//194 - 19A778270730EACD00846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = module.c; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778280730EACD00846375 = { - fileRef = 19A778270730EACD00846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7782B0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-errno.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782C0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-ni.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782D0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "api-wrap.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782E0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-eq.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A7782F0730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-init.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778300730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-md.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778310730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-me.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778320730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-move.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778330730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-msg.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778340730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-ni.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778350730EB8400846375 = { - fileEncoding = 30; - isa = PBXFileReference; - lastKnownFileType = sourcecode.c.c; - path = "lib-pid.c"; - refType = 2; - sourceTree = SOURCE_ROOT; - }; - 19A778360730EB8400846375 = { - fileRef = 19A7782B0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778370730EB8400846375 = { - fileRef = 19A7782C0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778380730EB8400846375 = { - fileRef = 19A7782D0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778390730EB8400846375 = { - fileRef = 19A7782E0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783A0730EB8400846375 = { - fileRef = 19A7782F0730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783B0730EB8400846375 = { - fileRef = 19A778300730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783C0730EB8400846375 = { - fileRef = 19A778310730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783D0730EB8400846375 = { - fileRef = 19A778320730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783E0730EB8400846375 = { - fileRef = 19A778330730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A7783F0730EB8400846375 = { - fileRef = 19A778340730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19A778400730EB8400846375 = { - fileRef = 19A778350730EB8400846375; - isa = PBXBuildFile; - settings = { - }; - }; - 19C28FB6FE9D52B211CA2CBB = { - children = ( - 32A4FEC40562C75800D090E7, - ); - isa = PBXGroup; - name = Products; - refType = 4; - sourceTree = ""; - }; -//190 -//191 -//192 -//193 -//194 -//240 -//241 -//242 -//243 -//244 - 247142CAFF3F8F9811CA285C = { - children = ( - 19A7782B0730EB8400846375, - 19A7782C0730EB8400846375, - 19A7782D0730EB8400846375, - 19A7782E0730EB8400846375, - 19A7782F0730EB8400846375, - 19A778300730EB8400846375, - 19A778310730EB8400846375, - 19A778320730EB8400846375, - 19A778330730EB8400846375, - 19A778340730EB8400846375, - 19A778350730EB8400846375, - 19A778270730EACD00846375, - ); - isa = PBXGroup; - name = Source; - path = ""; - refType = 4; - sourceTree = ""; - }; -//240 -//241 -//242 -//243 -//244 -//320 -//321 -//322 -//323 -//324 - 32A4FEB80562C75700D090E7 = { - buildPhases = ( - 32A4FEB90562C75700D090E7, - 32A4FEBA0562C75700D090E7, - 32A4FEBB0562C75700D090E7, - 32A4FEBD0562C75700D090E7, - 32A4FEBF0562C75700D090E7, - 32A4FEC00562C75700D090E7, - 32A4FEC10562C75700D090E7, - ); - buildRules = ( - ); - buildSettings = { - FRAMEWORK_SEARCH_PATHS = ""; - GCC_WARN_FOUR_CHARACTER_CONSTANTS = NO; - GCC_WARN_UNKNOWN_PRAGMAS = NO; - HEADER_SEARCH_PATHS = ../include; - INFOPLIST_FILE = Info.plist; - INSTALL_PATH = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - LIBRARY_SEARCH_PATHS = ""; - MODULE_NAME = com.clusterfs.lustre.portals.portals.portals; - MODULE_START = portals_start; - MODULE_STOP = portals_stop; - MODULE_VERSION = 1.0.1; - OTHER_CFLAGS = "-D__KERNEL__"; - OTHER_LDFLAGS = ""; - OTHER_REZFLAGS = ""; - PRODUCT_NAME = portals; - SECTORDER_FLAGS = ""; - WARNING_CFLAGS = "-Wmost"; - WRAPPER_EXTENSION = kext; - }; - dependencies = ( - ); - isa = PBXNativeTarget; - name = portals; - productInstallPath = "$(SYSTEM_LIBRARY_DIR)/Extensions"; - productName = portals; - productReference = 32A4FEC40562C75800D090E7; - productType = "com.apple.product-type.kernel-extension"; - }; - 32A4FEB90562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPreprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEBA0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXHeadersBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBB0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXResourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBD0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - 19A778280730EACD00846375, - 19A778360730EB8400846375, - 19A778370730EB8400846375, - 19A778380730EB8400846375, - 19A778390730EB8400846375, - 19A7783A0730EB8400846375, - 19A7783B0730EB8400846375, - 19A7783C0730EB8400846375, - 19A7783D0730EB8400846375, - 19A7783E0730EB8400846375, - 19A7783F0730EB8400846375, - 19A778400730EB8400846375, - ); - isa = PBXSourcesBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEBF0562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXFrameworksBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC00562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXRezBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - }; - 32A4FEC10562C75700D090E7 = { - buildActionMask = 2147483647; - files = ( - ); - isa = PBXShellScriptBuildPhase; - runOnlyForDeploymentPostprocessing = 0; - shellPath = /bin/sh; - shellScript = "script=\"${SYSTEM_DEVELOPER_DIR}/ProjectBuilder Extras/Kernel Extension Support/KEXTPostprocess\";\nif [ -x \"$script\" ]; then\n . \"$script\"\nfi"; - }; - 32A4FEC30562C75700D090E7 = { - isa = PBXFileReference; - lastKnownFileType = text.plist.xml; - path = Info.plist; - refType = 4; - sourceTree = ""; - }; - 32A4FEC40562C75800D090E7 = { - explicitFileType = wrapper.cfbundle; - includeInIndex = 0; - isa = PBXFileReference; - path = portals.kext; - refType = 3; - sourceTree = BUILT_PRODUCTS_DIR; - }; - }; - rootObject = 089C1669FE841209C02AAC07; -} diff --git a/lnet/lnet/router.c b/lnet/lnet/router.c index b594fed..4da9573 100644 --- a/lnet/lnet/router.c +++ b/lnet/lnet/router.c @@ -646,7 +646,6 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) return; /* can't carry NI status info */ cfs_list_for_each_entry(rtr, &gw->lp_routes, lr_gwlist) { - int ptl_status = LNET_NI_STATUS_INVALID; int down = 0; int up = 0; int i; @@ -666,10 +665,7 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) continue; if (stat->ns_status == LNET_NI_STATUS_DOWN) { - if (LNET_NETTYP(LNET_NIDNET(nid)) != PTLLND) - down++; - else if (ptl_status != LNET_NI_STATUS_UP) - ptl_status = LNET_NI_STATUS_DOWN; + down++; continue; } @@ -678,10 +674,6 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) up = 1; break; } - /* ptl NIs are considered down only when - * they're all down */ - if (LNET_NETTYP(LNET_NIDNET(nid)) == PTLLND) - ptl_status = LNET_NI_STATUS_UP; continue; } @@ -695,7 +687,7 @@ lnet_parse_rc_info(lnet_rc_data_t *rcd) rtr->lr_downis = 0; continue; } - rtr->lr_downis = down + (ptl_status == LNET_NI_STATUS_DOWN); + rtr->lr_downis = down; } } diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index 5bfee30..a0c0ce1 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -850,7 +850,6 @@ static struct mod_paths { { "lnet", "lnet/lnet" }, { "kmxlnd", "lnet/klnds/mxlnd" }, { "ko2iblnd", "lnet/klnds/o2iblnd" }, - { "kptllnd", "lnet/klnds/ptllnd" }, { "kgnilnd", "lnet/klnds/gnilnd"}, { "kqswlnd", "lnet/klnds/qswlnd" }, { "kralnd", "lnet/klnds/ralnd" }, diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c index cc7da92..110716c 100644 --- a/lnet/utils/portals.c +++ b/lnet/utils/portals.c @@ -566,7 +566,7 @@ jt_ptl_print_peers (int argc, char **argv) int index; int rc; - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, PTLLND, MXLND, + if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, O2IBLND, GNILND, 0)) return -1; @@ -593,25 +593,6 @@ jt_ptl_print_peers (int argc, char **argv) sizeof(buffer[1]), 1), data.ioc_u32[1], /* peer port */ data.ioc_u32[3]); /* conn_count */ - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - id.nid = data.ioc_nid; - id.pid = data.ioc_u32[4]; - printf ("%-20s s %d%s [%d] "LPD64".%06d" - " m "LPD64"/"LPD64" q %d/%d c %d/%d\n", - libcfs_id2str(id), - data.ioc_net, /* state */ - data.ioc_flags ? "" : " ~!h", /* sent_hello */ - data.ioc_count, /* refcount */ - data.ioc_u64[0]/1000000, /* incarnation secs */ - (int)(data.ioc_u64[0]%1000000), /* incarnation usecs */ - (((__u64)data.ioc_u32[1])<<32) | - ((__u64)data.ioc_u32[0]), /* next_matchbits */ - (((__u64)data.ioc_u32[3])<<32) | - ((__u64)data.ioc_u32[2]), /* last_matchbits_seen */ - data.ioc_u32[5] >> 16, /* nsendq */ - data.ioc_u32[5] & 0xffff, /* nactiveq */ - data.ioc_u32[6] >> 16, /* credits */ - data.ioc_u32[6] & 0xffff); /* outstanding_credits */ } else if (g_net_is_compatible(NULL, RALND, 0)) { printf ("%-20s [%d]@%s:%d\n", libcfs_nid2str(data.ioc_nid), /* peer nid */ @@ -716,10 +697,9 @@ jt_ptl_del_peer (int argc, char **argv) lnet_nid_t nid = LNET_NID_ANY; lnet_pid_t pid = LNET_PID_ANY; __u32 ip = 0; - char *end; int rc; - if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, PTLLND, + if (!g_net_is_compatible (argv[0], SOCKLND, RALND, MXLND, O2IBLND, GNILND, 0)) return -1; @@ -729,12 +709,6 @@ jt_ptl_del_peer (int argc, char **argv) argv[0]); return 0; } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 3) { - fprintf (stderr, "usage: %s [nid] [pid]\n", - argv[0]); - return 0; - } } else if (argc > 2) { fprintf (stderr, "usage: %s [nid]\n", argv[0]); return 0; @@ -753,15 +727,6 @@ jt_ptl_del_peer (int argc, char **argv) argv[2]); return -1; } - } else if (g_net_is_compatible(NULL, PTLLND, 0)) { - if (argc > 2) { - pid = strtol(argv[2], &end, 0); - if (end == argv[2] || *end == 0) { - fprintf(stderr, "Can't parse pid %s\n", - argv[2]); - return -1; - } - } } LIBCFS_IOC_INIT(data); -- 1.8.3.1