Whamcloud - gitweb
Move portals into its own CVS module for HEAD.
authorjacob <jacob>
Sun, 19 Dec 2004 23:11:15 +0000 (23:11 +0000)
committerjacob <jacob>
Sun, 19 Dec 2004 23:11:15 +0000 (23:11 +0000)
220 files changed:
COPYING [new file with mode: 0644]
README
ldiskfs/ldiskfs/autoMakefile.am
lnet/autoMakefile.am
lnet/autoconf/.cvsignore [moved from lustre/portals/include/linux/.cvsignore with 100% similarity]
lnet/autoconf/Makefile.am [new file with mode: 0644]
lnet/autoconf/lustre-lnet.m4 [new file with mode: 0644]
lustre/Makefile.in
lustre/README [deleted file]
lustre/README.kernel-source [deleted file]
lustre/autoMakefile.am
lustre/autoconf/Makefile.am [new file with mode: 0644]
lustre/autoconf/lustre-core.m4 [new file with mode: 0644]
lustre/autoconf/lustre-version.ac [new file with mode: 0644]
lustre/autogen.sh [deleted file]
lustre/conf/Makefile.am
lustre/configure.in [deleted file]
lustre/include/Makefile.am
lustre/include/linux/Makefile.am
lustre/include/linux/lustre_compat25.h
lustre/ldiskfs/autoMakefile.am
lustre/liblustre/tests/Makefile.am
lustre/lvfs/autoMakefile.am
lustre/portals/.cvsignore [deleted file]
lustre/portals/AUTHORS [deleted file]
lustre/portals/ChangeLog [deleted file]
lustre/portals/Kernelenv.in [deleted file]
lustre/portals/Kernelenv.mk [deleted file]
lustre/portals/Makefile.in [deleted file]
lustre/portals/Makefile.mk [deleted file]
lustre/portals/NEWS [deleted file]
lustre/portals/README [deleted file]
lustre/portals/archdep.m4 [deleted file]
lustre/portals/autoMakefile.am [deleted file]
lustre/portals/autogen.sh [deleted file]
lustre/portals/build.m4 [deleted file]
lustre/portals/doc/.cvsignore [deleted file]
lustre/portals/doc/Data-structures [deleted file]
lustre/portals/doc/Makefile.am [deleted file]
lustre/portals/doc/Message-life-cycle [deleted file]
lustre/portals/doc/NAL-HOWTO [deleted file]
lustre/portals/doc/file.fig [deleted file]
lustre/portals/doc/flow_new.fig [deleted file]
lustre/portals/doc/get.fig [deleted file]
lustre/portals/doc/ieee.bst [deleted file]
lustre/portals/doc/mpi.fig [deleted file]
lustre/portals/doc/portals.fig [deleted file]
lustre/portals/doc/portals3.bib [deleted file]
lustre/portals/doc/portals3.lyx [deleted file]
lustre/portals/doc/put.fig [deleted file]
lustre/portals/include/.cvsignore [deleted file]
lustre/portals/include/Makefile.am [deleted file]
lustre/portals/include/cygwin-ioctl.h [deleted file]
lustre/portals/include/linux/Makefile.am [deleted file]
lustre/portals/include/linux/kp30.h [deleted file]
lustre/portals/include/linux/kpr.h [deleted file]
lustre/portals/include/linux/libcfs.h [deleted file]
lustre/portals/include/linux/lustre_list.h [deleted file]
lustre/portals/include/linux/portals_compat25.h [deleted file]
lustre/portals/include/linux/portals_lib.h [deleted file]
lustre/portals/include/portals/.cvsignore [deleted file]
lustre/portals/include/portals/Makefile.am [deleted file]
lustre/portals/include/portals/api-support.h [deleted file]
lustre/portals/include/portals/api.h [deleted file]
lustre/portals/include/portals/build_check.h [deleted file]
lustre/portals/include/portals/errno.h [deleted file]
lustre/portals/include/portals/internal.h [deleted file]
lustre/portals/include/portals/lib-p30.h [deleted file]
lustre/portals/include/portals/lib-types.h [deleted file]
lustre/portals/include/portals/list.h [deleted file]
lustre/portals/include/portals/lltrace.h [deleted file]
lustre/portals/include/portals/myrnal.h [deleted file]
lustre/portals/include/portals/nal.h [deleted file]
lustre/portals/include/portals/nalids.h [deleted file]
lustre/portals/include/portals/p30.h [deleted file]
lustre/portals/include/portals/ptlctl.h [deleted file]
lustre/portals/include/portals/socknal.h [deleted file]
lustre/portals/include/portals/stringtab.h [deleted file]
lustre/portals/include/portals/types.h [deleted file]
lustre/portals/knals/.cvsignore [deleted file]
lustre/portals/knals/Makefile.in [deleted file]
lustre/portals/knals/Makefile.mk [deleted file]
lustre/portals/knals/autoMakefile.am [deleted file]
lustre/portals/knals/gmnal/.cvsignore [deleted file]
lustre/portals/knals/gmnal/Makefile.in [deleted file]
lustre/portals/knals/gmnal/Makefile.mk [deleted file]
lustre/portals/knals/gmnal/autoMakefile.am [deleted file]
lustre/portals/knals/gmnal/gmnal.h [deleted file]
lustre/portals/knals/gmnal/gmnal_api.c [deleted file]
lustre/portals/knals/gmnal/gmnal_cb.c [deleted file]
lustre/portals/knals/gmnal/gmnal_comm.c [deleted file]
lustre/portals/knals/gmnal/gmnal_module.c [deleted file]
lustre/portals/knals/gmnal/gmnal_utils.c [deleted file]
lustre/portals/knals/iibnal/.cvsignore [deleted file]
lustre/portals/knals/iibnal/Makefile.in [deleted file]
lustre/portals/knals/iibnal/Makefile.mk [deleted file]
lustre/portals/knals/iibnal/autoMakefile.am [deleted file]
lustre/portals/knals/iibnal/iibnal.c [deleted file]
lustre/portals/knals/iibnal/iibnal.h [deleted file]
lustre/portals/knals/iibnal/iibnal_cb.c [deleted file]
lustre/portals/knals/lonal/.cvsignore [deleted file]
lustre/portals/knals/lonal/Makefile.in [deleted file]
lustre/portals/knals/lonal/autoMakefile.am [deleted file]
lustre/portals/knals/lonal/lonal.c [deleted file]
lustre/portals/knals/lonal/lonal.h [deleted file]
lustre/portals/knals/lonal/lonal_cb.c [deleted file]
lustre/portals/knals/openibnal/.cvsignore [deleted file]
lustre/portals/knals/openibnal/Makefile.in [deleted file]
lustre/portals/knals/openibnal/Makefile.mk [deleted file]
lustre/portals/knals/openibnal/autoMakefile.am [deleted file]
lustre/portals/knals/openibnal/openibnal.c [deleted file]
lustre/portals/knals/openibnal/openibnal.h [deleted file]
lustre/portals/knals/openibnal/openibnal_cb.c [deleted file]
lustre/portals/knals/qswnal/.cvsignore [deleted file]
lustre/portals/knals/qswnal/Makefile.in [deleted file]
lustre/portals/knals/qswnal/autoMakefile.am [deleted file]
lustre/portals/knals/qswnal/qswnal.c [deleted file]
lustre/portals/knals/qswnal/qswnal.h [deleted file]
lustre/portals/knals/qswnal/qswnal_cb.c [deleted file]
lustre/portals/knals/ranal/.cvsignore [deleted file]
lustre/portals/knals/ranal/Makefile.in [deleted file]
lustre/portals/knals/ranal/autoMakefile.am [deleted file]
lustre/portals/knals/ranal/ranal.c [deleted file]
lustre/portals/knals/ranal/ranal.h [deleted file]
lustre/portals/knals/ranal/ranal_cb.c [deleted file]
lustre/portals/knals/socknal/.cvsignore [deleted file]
lustre/portals/knals/socknal/Makefile.in [deleted file]
lustre/portals/knals/socknal/Makefile.mk [deleted file]
lustre/portals/knals/socknal/autoMakefile.am [deleted file]
lustre/portals/knals/socknal/socknal.c [deleted file]
lustre/portals/knals/socknal/socknal.h [deleted file]
lustre/portals/knals/socknal/socknal_cb.c [deleted file]
lustre/portals/libcfs/.cvsignore [deleted file]
lustre/portals/libcfs/Makefile.in [deleted file]
lustre/portals/libcfs/Makefile.mk [deleted file]
lustre/portals/libcfs/autoMakefile.am [deleted file]
lustre/portals/libcfs/debug.c [deleted file]
lustre/portals/libcfs/lwt.c [deleted file]
lustre/portals/libcfs/module.c [deleted file]
lustre/portals/libcfs/proc.c [deleted file]
lustre/portals/libcfs/tracefile.c [deleted file]
lustre/portals/libcfs/tracefile.h [deleted file]
lustre/portals/libcfs/watchdog.c [deleted file]
lustre/portals/packaging/.cvsignore [deleted file]
lustre/portals/packaging/Makefile.am [deleted file]
lustre/portals/packaging/portals.spec.in [deleted file]
lustre/portals/portals/.cvsignore [deleted file]
lustre/portals/portals/Makefile.in [deleted file]
lustre/portals/portals/Makefile.mk [deleted file]
lustre/portals/portals/api-errno.c [deleted file]
lustre/portals/portals/api-ni.c [deleted file]
lustre/portals/portals/api-wrap.c [deleted file]
lustre/portals/portals/autoMakefile.am [deleted file]
lustre/portals/portals/lib-eq.c [deleted file]
lustre/portals/portals/lib-init.c [deleted file]
lustre/portals/portals/lib-md.c [deleted file]
lustre/portals/portals/lib-me.c [deleted file]
lustre/portals/portals/lib-move.c [deleted file]
lustre/portals/portals/lib-msg.c [deleted file]
lustre/portals/portals/lib-ni.c [deleted file]
lustre/portals/portals/lib-pid.c [deleted file]
lustre/portals/portals/module.c [deleted file]
lustre/portals/router/.cvsignore [deleted file]
lustre/portals/router/Makefile.in [deleted file]
lustre/portals/router/Makefile.mk [deleted file]
lustre/portals/router/autoMakefile.am [deleted file]
lustre/portals/router/proc.c [deleted file]
lustre/portals/router/router.c [deleted file]
lustre/portals/router/router.h [deleted file]
lustre/portals/tests/.cvsignore [deleted file]
lustre/portals/tests/Makefile.in [deleted file]
lustre/portals/tests/Makefile.mk [deleted file]
lustre/portals/tests/autoMakefile.am [deleted file]
lustre/portals/tests/ping.h [deleted file]
lustre/portals/tests/ping_cli.c [deleted file]
lustre/portals/tests/ping_srv.c [deleted file]
lustre/portals/tests/sping_cli.c [deleted file]
lustre/portals/tests/sping_srv.c [deleted file]
lustre/portals/tests/startclient.sh [deleted file]
lustre/portals/tests/startserver.sh [deleted file]
lustre/portals/tests/stopclient.sh [deleted file]
lustre/portals/tests/stopserver.sh [deleted file]
lustre/portals/unals/.cvsignore [deleted file]
lustre/portals/unals/Makefile.am [deleted file]
lustre/portals/unals/README [deleted file]
lustre/portals/unals/address.c [deleted file]
lustre/portals/unals/bridge.h [deleted file]
lustre/portals/unals/connection.c [deleted file]
lustre/portals/unals/connection.h [deleted file]
lustre/portals/unals/debug.c [deleted file]
lustre/portals/unals/dispatch.h [deleted file]
lustre/portals/unals/ipmap.h [deleted file]
lustre/portals/unals/pqtimer.c [deleted file]
lustre/portals/unals/pqtimer.h [deleted file]
lustre/portals/unals/procapi.c [deleted file]
lustre/portals/unals/procbridge.h [deleted file]
lustre/portals/unals/proclib.c [deleted file]
lustre/portals/unals/select.c [deleted file]
lustre/portals/unals/table.c [deleted file]
lustre/portals/unals/table.h [deleted file]
lustre/portals/unals/tcpnal.c [deleted file]
lustre/portals/unals/timer.h [deleted file]
lustre/portals/unals/utypes.h [deleted file]
lustre/portals/utils/.cvsignore [deleted file]
lustre/portals/utils/Makefile.am [deleted file]
lustre/portals/utils/Makefile.mk [deleted file]
lustre/portals/utils/acceptor.c [deleted file]
lustre/portals/utils/debug.c [deleted file]
lustre/portals/utils/debugctl.c [deleted file]
lustre/portals/utils/gmnalnid.c [deleted file]
lustre/portals/utils/l_ioctl.c [deleted file]
lustre/portals/utils/parser.c [deleted file]
lustre/portals/utils/parser.h [deleted file]
lustre/portals/utils/portals.c [deleted file]
lustre/portals/utils/ptlctl.c [deleted file]
lustre/portals/utils/routerstat.c [deleted file]
lustre/portals/utils/wirecheck.c [deleted file]
lustre/ptlrpc/Makefile.in
lustre/ptlrpc/autoMakefile.am
lustre/scripts/Makefile.am

diff --git a/COPYING b/COPYING
new file mode 100644 (file)
index 0000000..c69cfd8
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,352 @@
+
+   NOTE! This copyright does *not* cover user programs that use kernel
+ services by normal system calls - this is merely considered normal use
+ of the kernel, and does *not* fall under the heading of "derived work".
+ Also note that the GPL below is copyrighted by the Free Software
+ Foundation, but the instance of code that it refers to (the Linux
+ kernel) is copyrighted by me and others who actually wrote it.
+
+                       Linus Torvalds
+
+----------------------------------------
+
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+\f
+                   GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+\f
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+\f
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+\f
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                           NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+\f
+           How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) 19yy  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) 19yy name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/README b/README
index e69de29..c052124 100644 (file)
--- a/README
+++ b/README
@@ -0,0 +1,2 @@
+Instructions for building, configuring and running Lustre can be found at:
+    http://projects.clusterfs.com/lustre/LustreHowto.
index e33e4c9..4fd32d4 100644 (file)
@@ -27,8 +27,8 @@ linux/ldiskfs%.h: linux-stage/include/linux/ext3%.h
 # FIXME: we need to grab the series in configure somehow
 # (see bug 1679)
 #
-series := @top_srcdir@/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
-patches := @top_srcdir@/kernel_patches/patches
+series := @top_srcdir@/lustre/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
+patches := @top_srcdir@/lustre/kernel_patches/patches
 
 sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
        rm -rf linux-stage linux sources $(ldiskfs_SOURCES)
index 485ff04..f2ba240 100644 (file)
@@ -3,6 +3,7 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-EXTRA_DIST = archdep.m4 build.m4
+SUBDIRS = portals libcfs knals unals router tests doc utils include    \
+       autoconf
 
-SUBDIRS = portals libcfs knals unals router tests doc utils include
+sources:
diff --git a/lnet/autoconf/Makefile.am b/lnet/autoconf/Makefile.am
new file mode 100644 (file)
index 0000000..f65d2c0
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST := lustre-portals.m4
diff --git a/lnet/autoconf/lustre-lnet.m4 b/lnet/autoconf/lustre-lnet.m4
new file mode 100644 (file)
index 0000000..0cb49a2
--- /dev/null
@@ -0,0 +1,483 @@
+#
+# LP_CONFIG_ZEROCOPY
+#
+# check if zerocopy is available/wanted
+#
+AC_DEFUN([LP_CONFIG_ZEROCOPY],
+[AC_MSG_CHECKING([for zero-copy TCP support])
+AC_ARG_ENABLE([zerocopy],
+       AC_HELP_STRING([--disable-zerocopy],
+                      [disable socknal zerocopy]),
+       [],[enable_zerocopy='yes'])
+if test x$enable_zerocopy = xno ; then
+       AC_MSG_RESULT([no (by request)])
+else
+       ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`"
+       if test "$ZCCD" != 0 ; then
+               AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP])
+               AC_MSG_RESULT(yes)
+       else
+               AC_MSG_RESULT([no (no kernel support)])
+       fi
+fi
+])
+
+#
+# LP_CONFIG_AFFINITY
+#
+# check if cpu affinity is available/wanted
+#
+AC_DEFUN([LP_CONFIG_AFFINITY],
+[AC_ARG_ENABLE([affinity],
+       AC_HELP_STRING([--disable-affinity],
+                      [disable process/irq affinity]),
+       [],[enable_affinity='yes'])
+
+AC_MSG_CHECKING([for CPU affinity support])
+if test x$enable_affinity = xno ; then
+       AC_MSG_RESULT([no (by request)])
+else
+       LB_LINUX_TRY_COMPILE([
+               #include <linux/sched.h>
+       ],[
+               struct task_struct t;
+               #ifdef CPU_ARRAY_SIZE
+               cpumask_t m;
+               #else
+               unsigned long m;
+               #endif
+               set_cpus_allowed(&t, m);
+       ],[
+               AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support])
+               AC_MSG_RESULT([yes])
+       ],[
+               AC_MSG_RESULT([no (no kernel support)])
+       ])
+fi
+])
+
+#
+# LP_CONFIG_QUADRICS
+#
+# check if quadrics support is in this kernel
+#
+AC_DEFUN([LP_CONFIG_QUADRICS],
+[AC_MSG_CHECKING([if quadrics kernel headers are present])
+if test -d $LINUX/drivers/net/qsnet ; then
+       AC_MSG_RESULT([yes])
+       QSWNAL="qswnal"
+       AC_MSG_CHECKING([for multirail EKC])
+       if test -f $LINUX/include/elan/epcomms.h; then
+               AC_MSG_RESULT([supported])
+               QSWCPPFLAGS="-DMULTIRAIL_EKC=1"
+       else
+               AC_MSG_RESULT([not supported])
+               if test -d $LINUX/drivers/net/qsnet/include; then
+                       QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include"
+               else
+                       QSWCPPFLAGS="-I$LINUX/include/linux"
+               fi
+       fi
+else
+       AC_MSG_RESULT([no])
+       QSWNAL=""
+       QSWCPPFLAGS=""
+fi
+AC_SUBST(QSWCPPFLAGS)
+AC_SUBST(QSWNAL)
+])
+
+#
+# LP_CONFIG_GM
+#
+# check if infiniband support is available
+#
+AC_DEFUN([LP_CONFIG_GM],
+[AC_MSG_CHECKING([if gm support was requested])
+AC_ARG_WITH([gm],
+       AC_HELP_STRING([--with-gm=path],
+                      [build gmnal against path]),
+       [
+               case $with_gm in 
+                       yes)
+                               AC_MSG_RESULT([yes])
+                               GMCPPFLAGS="-I/usr/local/gm/include"
+                               GMNAL="gmnal"
+                               ;;
+                       no)
+                               AC_MSG_RESULT([no])
+                               GMCPPFLAGS=""
+                               GMNAL=""
+                               ;;
+                       *)
+                               AC_MSG_RESULT([yes])
+                               GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm"
+                               GMNAL="gmnal"
+                               ;;
+               esac
+       ],[
+               AC_MSG_RESULT([no])
+               GMCPPFLAGS=""
+               GMNAL=""
+       ])
+AC_SUBST(GMCPPFLAGS)
+AC_SUBST(GMNAL)
+])
+
+#
+# LP_CONFIG_OPENIB
+#
+# check for OpenIB in the kernel
+AC_DEFUN([LP_CONFIG_OPENIB],
+[AC_MSG_CHECKING([if OpenIB kernel headers are present])
+OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD"
+EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS"
+LB_LINUX_TRY_COMPILE(
+       [
+               #include <ts_ib_core.h>
+       ],[
+                struct ib_device_properties props;
+               return 0;
+       ],[
+               AC_MSG_RESULT([yes])
+               OPENIBNAL="openibnal"
+       ],[
+               AC_MSG_RESULT([no])
+               OPENIBNAL=""
+               OPENIBCPPFLAGS=""
+       ])
+EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
+AC_SUBST(OPENIBCPPFLAGS)
+AC_SUBST(OPENIBNAL)
+])
+
+#
+# LP_CONFIG_IIB
+#
+# check for infinicon infiniband support
+#
+AC_DEFUN([LP_CONFIG_IIB],
+[AC_MSG_CHECKING([if Infinicon IB kernel headers are present])
+# for how the only infinicon ib build has headers in /usr/include/iba
+IIBCPPFLAGS="-I/usr/include -DIN_TREE_BUILD"
+EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
+EXTRA_KCFLAGS="$EXTRA_KCFLAGS $IIBCPPFLAGS"
+LB_LINUX_TRY_COMPILE([
+       #include <linux/iba/ibt.h>
+],[
+        IBT_INTERFACE_UNION interfaces;
+        FSTATUS             rc;
+
+         rc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2,
+                                      &interfaces);
+
+       return rc == FSUCCESS ? 0 : 1;
+],[
+       AC_MSG_RESULT([yes])
+       IIBNAL="iibnal"
+],[
+       AC_MSG_RESULT([no])
+       IIBNAL=""
+       IIBCPPFLAGS=""
+])
+EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
+AC_SUBST(IIBCPPFLAGS)
+AC_SUBST(IIBNAL)
+])
+
+#
+# LP_STRUCT_PAGE_LIST
+#
+# 2.6.4 no longer has page->list
+#
+AC_DEFUN([LP_STRUCT_PAGE_LIST],
+[AC_MSG_CHECKING([if struct page has a list field])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/mm.h>
+],[
+       struct page page;
+       &page.list;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LP_STRUCT_SIGHAND
+#
+# red hat 2.4 adds sighand to struct task_struct
+#
+AC_DEFUN([LP_STRUCT_SIGHAND],
+[AC_MSG_CHECKING([if task_struct has a sighand field])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       struct task_struct p;
+       p.sighand = NULL;
+],[
+       AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
+       AC_MSG_RESULT([yes])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LP_FUNC_CPU_ONLINE
+#
+# cpu_online is different in rh 2.4, vanilla 2.4, and 2.6
+#
+AC_DEFUN([LP_FUNC_CPU_ONLINE],
+[AC_MSG_CHECKING([if kernel defines cpu_online()])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       cpu_online(0);
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LP_TYPE_CPUMASK_T
+#
+# same goes for cpumask_t
+#
+AC_DEFUN([LP_TYPE_CPUMASK_T],
+[AC_MSG_CHECKING([if kernel defines cpumask_t])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       return sizeof (cpumask_t);
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LP_FUNC_SHOW_TASK
+#
+# we export show_task(), but not all kernels have it (yet)
+#
+AC_DEFUN([LP_FUNC_SHOW_TASK],
+[AC_MSG_CHECKING([if kernel exports show_task])
+have_show_task=0
+for file in ksyms sched ; do
+       if grep -q "EXPORT_SYMBOL(show_task)" \
+                "$LINUX/kernel/$file.c" 2>/dev/null ; then
+               have_show_task=1
+               break
+       fi
+done
+if test x$have_show_task = x1 ; then
+       AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported])
+       AC_MSG_RESULT([yes])
+else
+       AC_MSG_RESULT([no])
+fi
+])
+
+#
+# LP_PROG_LINUX
+#
+# Portals linux kernel checks
+#
+AC_DEFUN([LP_PROG_LINUX],
+[LP_CONFIG_ZEROCOPY
+LP_CONFIG_AFFINITY
+LP_CONFIG_QUADRICS
+LP_CONFIG_GM
+if test $linux25 = 'no' ; then
+       LP_CONFIG_OPENIB
+fi
+LP_CONFIG_IIB
+
+LP_STRUCT_PAGE_LIST
+LP_STRUCT_SIGHAND
+LP_FUNC_CPU_ONLINE
+LP_TYPE_CPUMASK_T
+LP_FUNC_SHOW_TASK
+])
+
+#
+# LP_PATH_DEFAULTS
+#
+# default paths for installed files
+#
+AC_DEFUN([LP_PATH_DEFAULTS],
+[
+])
+
+#
+# LP_CONFIGURE
+#
+# other configure checks
+#
+AC_DEFUN([LP_CONFIGURE],
+[# portals/utils/portals.c
+AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h])
+AC_CHECK_FUNCS([gethostbyname socket connect])
+
+# portals/utils/debug.c
+AC_CHECK_HEADERS([linux/version.h])
+
+AC_CHECK_TYPE([spinlock_t],
+       [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])],
+       [],
+       [#include <linux/spinlock.h>])
+
+# --------  Check for required packages  --------------
+
+# this doesn't seem to work on older autoconf
+# AC_CHECK_LIB(readline, readline,,)
+AC_MSG_CHECKING([for readline support])
+AC_ARG_ENABLE(readline,
+       AC_HELP_STRING([--disable-readline],
+                       [do not use readline library]),
+       [],[enable_readline='yes'])
+AC_MSG_RESULT([$enable_readline]) 
+if test x$enable_readline = xyes ; then
+       LIBREADLINE="-lreadline -lncurses"
+       AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available])
+else 
+       LIBREADLINE=""
+fi
+AC_SUBST(LIBREADLINE)
+
+AC_MSG_CHECKING([if efence debugging support is requested])
+AC_ARG_ENABLE(efence,
+       AC_HELP_STRING([--enable-efence],
+                       [use efence library]),
+       [],[enable_efence='no'])
+AC_MSG_RESULT([$enable_efence])
+if test "$enable_efence" = "yes" ; then
+       LIBEFENCE="-lefence"
+       AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested])
+else 
+       LIBEFENCE=""
+fi
+AC_SUBST(LIBEFENCE)
+
+# -------- enable acceptor libwrap (TCP wrappers) support? -------
+AC_MSG_CHECKING([if libwrap support is requested])
+AC_ARG_ENABLE([libwrap],
+       AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]),
+       [case "${enableval}" in
+               yes) enable_libwrap=yes ;;
+               no) enable_libwrap=no ;;
+               *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;;
+       esac],[enable_libwrap=no])
+AC_MSG_RESULT([$enable_libwrap])
+if test x$enable_libwrap = xyes ; then
+       LIBWRAP="-lwrap"
+       AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested])
+else
+       LIBWRAP=""
+fi
+AC_SUBST(LIBWRAP)
+
+# ----------------------------------------
+# some tests for catamount-like systems
+# ----------------------------------------
+AC_ARG_ENABLE([sysio_init],
+       AC_HELP_STRING([--disable-sysio-init],
+               [call sysio init functions when initializing liblustre]),
+       [],[enable_sysio_init=yes])
+AC_MSG_CHECKING([whether to initialize libsysio])
+AC_MSG_RESULT([$enable_sysio_init])
+if test x$enable_sysio_init != xno ; then
+       AC_DEFINE([INIT_SYSIO], 1, [call sysio init functions])
+fi
+
+AC_ARG_ENABLE([urandom],
+       AC_HELP_STRING([--disable-urandom],
+               [disable use of /dev/urandom for liblustre]),
+       [],[enable_urandom=yes])
+AC_MSG_CHECKING([whether to use /dev/urandom for liblustre])
+AC_MSG_RESULT([$enable_urandom])
+if test x$enable_urandom != xno ; then
+       AC_DEFINE([LIBLUSTRE_USE_URANDOM], 1, [use /dev/urandom for random data])
+fi
+
+# -------- check for -lcap and -lpthread ----
+if test x$enable_liblustre = xyes ; then
+       AC_CHECK_LIB([cap], [cap_get_proc],
+               [
+                       CAP_LIBS="-lcap"
+                       AC_DEFINE([HAVE_LIBCAP], 1, [use libcap])
+               ],
+               [CAP_LIBS=""])
+       AC_SUBST(CAP_LIBS)
+       AC_CHECK_LIB([pthread], [pthread_create],
+               [
+                       PTHREAD_LIBS="-lpthread"
+                       AC_DEFINE([HAVE_LIBPTHREAD], 1, [use libpthread])
+               ],
+               [PTHREAD_LIBS=""])
+       AC_SUBST(PTHREAD_LIBS)
+fi
+])
+
+#
+# LP_CONDITIONALS
+#
+# AM_CONDITOINAL defines for portals
+#
+AC_DEFUN([LP_CONDITIONALS],
+[AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal")
+AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal")
+AM_CONDITIONAL(BUILD_OPENIBNAL, test x$OPENIBNAL = "xopenibnal")
+AM_CONDITIONAL(BUILD_IIBNAL, test x$IIBNAL = "xiibnal")
+])
+
+#
+# LP_CONFIG_FILES
+#
+# files that should be generated with AC_OUTPUT
+#
+AC_DEFUN([LP_CONFIG_FILES],
+[AC_CONFIG_FILES([
+portals/Kernelenv
+portals/Makefile
+portals/autoMakefile
+portals/autoconf/Makefile
+portals/doc/Makefile
+portals/include/Makefile
+portals/include/linux/Makefile
+portals/include/portals/Makefile
+portals/knals/Makefile
+portals/knals/autoMakefile
+portals/knals/gmnal/Makefile
+portals/knals/gmnal/autoMakefile
+portals/knals/iibnal/Makefile
+portals/knals/iibnal/autoMakefile
+portals/knals/openibnal/Makefile
+portals/knals/openibnal/autoMakefile
+portals/knals/qswnal/Makefile
+portals/knals/qswnal/autoMakefile
+portals/knals/socknal/Makefile
+portals/knals/socknal/autoMakefile
+portals/knals/lonal/Makefile
+portals/knals/lonal/autoMakefile
+portals/libcfs/Makefile
+portals/libcfs/autoMakefile
+portals/portals/Makefile
+portals/portals/autoMakefile
+portals/router/Makefile
+portals/router/autoMakefile
+portals/tests/Makefile
+portals/tests/autoMakefile
+portals/unals/Makefile
+portals/utils/Makefile
+])
+])
index 7a9e460..1907eb1 100644 (file)
@@ -1,5 +1,3 @@
-subdir-m += portals
-
 @LDISKFS_TRUE@subdir-m  += ldiskfs
 
 subdir-m += lvfs
diff --git a/lustre/README b/lustre/README
deleted file mode 100644 (file)
index c052124..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-Instructions for building, configuring and running Lustre can be found at:
-    http://projects.clusterfs.com/lustre/LustreHowto.
diff --git a/lustre/README.kernel-source b/lustre/README.kernel-source
deleted file mode 100644 (file)
index 0b88efe..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-Lustre can currently build against Red Hat 2.4-style kernel-source
-RPMs.  All other kernel-source RPMs are *unsupported* at this time.
-
-Note that a Lustre-patched kernel is required for building Lustre; in
-most cases a kernel-source RPM from your Linux vendor will not contain
-the necessary patches.
-
-1.  kernel.h
-
-Building against a kernel-source RPM requires a special header.  On
-Red Hat systems, this file should be automatically created at boot
-time, and saved in /boot/kernel.h.
-
- *** If you are not running Red Hat Linux, or are not booted into the
- *** kernel you are trying to build against, you need to create this
- *** file manually.
-
- *** If you do not, the Lustre build may fail, or may fail to build
- *** modules that work with your kernel.
-
-Here is an example /boot/kernel.h file.  If you are building on
-x86_64, the first defines should be __MODULE_KERNEL_x86_64, etc.  The
-other defines should be simple to figure out.
-
-/* This file is automatically generated at boot time. */
-#ifndef __BOOT_KERNEL_H_
-#define __BOOT_KERNEL_H_
-
-/* Kernel type i686-smp */
-
-#ifndef __MODULE_KERNEL_i686
-#define __MODULE_KERNEL_i686 1
-#endif
-
-#ifndef __BOOT_KERNEL_ENTERPRISE
-#define __BOOT_KERNEL_ENTERPRISE 0
-#endif
-
-#ifndef __BOOT_KERNEL_BIGMEM
-#define __BOOT_KERNEL_BIGMEM 0
-#endif
-
-#ifndef __BOOT_KERNEL_HUGEMEM
-#define __BOOT_KERNEL_HUGEMEM 0
-#endif
-
-#ifndef __BOOT_KERNEL_SMP
-#define __BOOT_KERNEL_SMP 1
-#endif
-
-#ifndef __BOOT_KERNEL_UP
-#define __BOOT_KERNEL_UP 0
-#endif
-
-#endif
-
-You should save this somewhere, and pass the location of this file to
-./configure using the --with-kernel-source-header option.
-
-2.  .config
-
-You will also need to tell Lustre about the .config file for your
-kernel.  The two likely locations of this file are
-/boot/config-$(uname -r), and /usr/src/linux-2.4/configs/.  You should
-pass the location of this file to Lustre using the --with-linux-config
-option.
-
-3.  An Example
-
-Here is an example for configuring Lustre:
-
-./configure --with-linux=/usr/src/linux-2.4.20-28.9_lustre.1.0.3 \
---with-kernel-source-header=/boot/kernel.h \
---with-linux-config=/boot/config-2.4.20-28.9_lustre.1.0.3smp
index 6d9b324..8211b00 100644 (file)
@@ -5,47 +5,13 @@
 
 AUTOMAKE_OPTIONS = foreign
 
-SUBDIRS = . include portals ldiskfs lvfs obdclass lov ldlm ptlrpc      \
-       obdecho osc mdc lmv  mds obdfilter ost llite cobd ptlbd smfs cmobd \
-       liblustre doc utils tests conf scripts
+SUBDIRS = include ldiskfs lvfs obdclass lov ldlm ptlrpc      \
+       obdecho osc mdc lmv  mds obdfilter ost llite cobd ptlbd smfs snapfs \
+       cmobd liblustre doc utils tests conf scripts autoconf
 
-EXTRA_DIST = BUGS FDL Rules.in kernel_patches kernel-tests/Makefile    \
+EXTRA_DIST = BUGS FDL Rules.in kernel_patches \
        README.kernel-source
 
-# these empty rules are needed so that automake doesn't add its own
-# recursive rules
-etags-recursive:
-
-ctags-recursive:
-
-tags-recursive:
-
-TAGS:
-
-tags:
-       rm -f $(top_srcdir)/TAGS
-       ETAGSF=`etags --version | grep -iq exuberant && \
-               echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \
-       find $(top_srcdir) -name '*.[hc]' | xargs etags $$ETAGSF -a
-
-       rm -f $(top_srcdir)/tags
-       CTAGSF=`ctags --version | grep -iq exuberant && \
-               echo "-I __initdata,__exitdata,EXPORT_SYMBOL"`; \
-       find $(top_srcdir) -name '*.[hc]' | xargs ctags $$CTAGSF -a
-
-if MODULES
-all-am: modules
-
-if !LINUX25
-DEP = dep
-dep: .depend
-
-.depend:
-       $(MAKE) $(ARCH_UM) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) -o scripts -o include/config/MARKER _sfdep_$(PWD) _FASTDEP_ALL_SUB_DIRS="$(PWD)"
-
-CLEANFILES = .depend
-endif
-
 if LDISKFS
 LDISKFS = ldiskfs-sources
 ldiskfs-sources:
@@ -55,27 +21,17 @@ endif
 lvfs-sources:
        $(MAKE) sources -C lvfs
 
-modules: lustre_build_version $(DEP) $(LDISKFS) lvfs-sources
-       $(MAKE) $(ARCH_UM) -C $(LINUX) -f $(PWD)/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$(LINUX_CONFIG) $(MODULE_TARGET)=$(PWD) -o tmp_include_depends -o scripts -o include/config/MARKER $@
-
-endif # MODULES
+sources: $(LDISKFS) lvfs-sources lustre_build_version
 
 all-recursive: lustre_build_version
 
 lustre_build_version:
-       perl $(top_builddir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
+       perl $(top_builddir)/lustre/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver
        echo "#define LUSTRE_RELEASE @RELEASE@" >> tmpver
-       cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \
+       cmp -s $(top_builddir)/lustre/include/linux/lustre_build_version.h tmpver \
                 2> /dev/null &&                                            \
                 $(RM) tmpver ||                                            \
-                mv tmpver $(top_builddir)/include/linux/lustre_build_version.h
-
-dist-hook:
-       find $(distdir) -name .deps | xargs rm -rf
-       find $(distdir) -name CVS | xargs rm -rf
-
-rpms: dist Makefile
-       rpmbuild -ta $(distdir).tar.gz
+                mv tmpver $(top_builddir)/lustre/include/linux/lustre_build_version.h
 
 CSTK=/tmp/checkstack
 CSTKO=/tmp/checkstack.orig
diff --git a/lustre/autoconf/Makefile.am b/lustre/autoconf/Makefile.am
new file mode 100644 (file)
index 0000000..7a747da
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST := lustre-core.m4 lustre-version.ac
diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4
new file mode 100644 (file)
index 0000000..be1bed5
--- /dev/null
@@ -0,0 +1,489 @@
+#
+# LC_CONFIG_SRCDIR
+#
+# Wrapper for AC_CONFIG_SUBDIR
+#
+AC_DEFUN([LC_CONFIG_SRCDIR],
+[AC_CONFIG_SRCDIR([lustre/obdclass/obdo.c])
+])
+
+#
+# LC_PATH_DEFAULTS
+#
+# lustre specific paths
+#
+AC_DEFUN([LC_PATH_DEFAULTS],
+[# ptlrpc kernel build requires this
+LUSTRE="$PWD/lustre"
+AC_SUBST(LUSTRE)
+
+# mount.lustre
+rootsbindir='/sbin'
+AC_SUBST(rootsbindir)
+
+demodir='$(docdir)/demo'
+AC_SUBST(demodir)
+
+pkgexampledir='${pkgdatadir}/examples'
+AC_SUBST(pkgexampledir)
+
+pymoddir='${pkglibdir}/python/Lustre'
+AC_SUBST(pymoddir)
+])
+
+#
+# LC_TARGET_SUPPORTED
+#
+# is the target os supported?
+#
+AC_DEFUN([LC_TARGET_SUPPORTED],
+[case $target_os in
+       linux*)
+$1
+               ;;
+       *)
+$2
+               ;;
+esac
+])
+
+#
+# LC_CONFIG_EXT3
+#
+# that ext3 is enabled in the kernel
+#
+AC_DEFUN([LC_CONFIG_EXT3],
+[LB_LINUX_CONFIG([EXT3_FS],[],[
+       LB_LINUX_CONFIG([EXT3_FS_MODULE],[],[$2])
+])
+LB_LINUX_CONFIG([EXT3_FS_XATTR],[$1],[$3])
+])
+
+#
+# LC_FSHOOKS
+#
+# If we have (and can build) fshooks.h
+#
+AC_DEFUN([LC_FSHOOKS],
+[AC_CHECK_FILE([$LINUX/include/linux/fshooks.h],[
+       AC_MSG_CHECKING([if fshooks.h can be compiled])
+       LB_LINUX_TRY_COMPILE([
+               #include <linux/fshooks.h>
+       ],[],[
+               AC_MSG_RESULT([yes])
+       ],[
+               AC_MSG_RESULT([no])
+               AC_MSG_WARN([You might have better luck with gcc 3.3.x.])
+               AC_MSG_WARN([You can set CC=gcc33 before running configure.])
+               AC_MSG_ERROR([Your compiler cannot build fshooks.h.])
+       ])
+$1
+],[
+$2
+])
+])
+
+#
+# LC_STRUCT_KIOBUF
+#
+# rh 2.4.18 has iobuf->dovary, but other kernels do not
+#
+AC_DEFUN([LC_STRUCT_KIOBUF],
+[AC_MSG_CHECKING([if struct kiobuf has a dovary field])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/iobuf.h>
+],[
+       struct kiobuf iobuf;
+       iobuf.dovary = 1;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_FUNC_COND_RESCHED
+#
+# cond_resched() was introduced in 2.4.20
+#
+AC_DEFUN([LC_FUNC_COND_RESCHED],
+[AC_MSG_CHECKING([if kernel offers cond_resched])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/sched.h>
+],[
+       cond_resched();
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_FUNC_ZAP_PAGE_RANGE
+#
+# if zap_page_range() takes a vma arg
+#
+AC_DEFUN([LC_FUNC_ZAP_PAGE_RANGE],
+[AC_MSG_CHECKING([if zap_pag_range with vma parameter])
+ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
+if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
+       AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
+       AC_MSG_RESULT([yes])
+else
+       AC_MSG_RESULT([no])
+fi
+])
+
+#
+# LC_FUNC_PDE
+#
+# if proc_fs.h defines PDE()
+#
+AC_DEFUN([LC_FUNC_PDE],
+[AC_MSG_CHECKING([if kernel defines PDE])
+HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
+if test "$HAVE_PDE" != 0 ; then
+       AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
+       AC_MSG_RESULT([yes])
+else
+       AC_MSG_RESULT([no])
+fi
+])
+
+#
+# LC_FUNC_DIRECT_IO
+#
+# if direct_IO takes a struct file argument
+#
+AC_DEFUN([LC_FUNC_DIRECT_IO],
+[AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
+HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
+if test "$HAVE_DIO_FILE" != 0 ; then
+       AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
+       AC_MSG_RESULT(yes)
+else
+       AC_MSG_RESULT(no)
+fi
+])
+
+#
+# LC_HEADER_MM_INLINE
+#
+# RHEL kernels define page_count in mm_inline.h
+#
+AC_DEFUN([LC_HEADER_MM_INLINE],
+[AC_MSG_CHECKING([if kernel has mm_inline.h header])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/mm_inline.h>
+],[
+       #ifndef page_count
+       #error mm_inline.h does not define page_count
+       #endif
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_STRUCT_INODE
+#
+# if inode->i_alloc_sem exists
+#
+AC_DEFUN([LC_STRUCT_INODE],
+[AC_MSG_CHECKING([if struct inode has i_alloc_sem])
+LB_LINUX_TRY_COMPILE([
+       #include <linux/fs.h>
+       #include <linux/version.h>
+],[
+       #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24))
+       #error "down_read_trylock broken before 2.4.24"
+       #endif
+       struct inode i;
+       return (char *)&i.i_alloc_sem - (char *)&i;
+],[
+       AC_MSG_RESULT([yes])
+       AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
+],[
+       AC_MSG_RESULT([no])
+])
+])
+
+#
+# LC_CONFIG_BACKINGFS
+#
+# whether to use extN or ldiskfs instead of ext3
+#
+AC_DEFUN([LC_CONFIG_BACKINGFS],
+[
+BACKINGFS='ext3'
+
+# LLNL patches their ext3 and calls it extN
+AC_MSG_CHECKING([whether to use extN])
+AC_ARG_ENABLE([extN],
+       AC_HELP_STRING([--enable-extN],
+                       [use extN instead of ext3 for lustre backend]),
+       [BACKINGFS='extN'],[enable_extN='no'])
+AC_MSG_RESULT([$enable_extN])
+
+# SuSE gets ldiskfs
+AC_MSG_CHECKING([whether to enable ldiskfs])
+AC_ARG_ENABLE([ldiskfs],
+       AC_HELP_STRING([--enable-ldiskfs],
+                       [use ldiskfs for the Lustre backing FS]),
+       [],[enable_ldiskfs="$linux25"])
+AC_MSG_RESULT([$enable_ldiskfs])
+
+if test x$enable_ldiskfs = xyes ; then
+       BACKINGFS="ldiskfs"
+
+       AC_PATH_PROG(PATCH, patch, [no])
+       AC_PATH_PROG(QUILT, quilt, [no])
+
+       if test x$enable_ldiskfs$PATCH$QUILT = xyesnono ; then
+               AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)])
+       fi
+
+       AC_DEFINE(CONFIG_LDISKFS_FS_MODULE, 1, [build ldiskfs as a module])
+       AC_DEFINE(CONFIG_LDISKFS_FS_XATTR, 1, [enable extended attributes for ldiskfs])
+       AC_DEFINE(CONFIG_LDISKFS_FS_POSIX_ACL, 1, [enable posix acls])
+       AC_DEFINE(CONFIG_LDISKFS_FS_SECURITY, 1, [enable fs security])
+fi
+
+AC_MSG_CHECKING([which backing filesystem to use])
+AC_MSG_RESULT([$BACKINGFS])
+AC_SUBST(BACKINGFS)
+
+case $BACKINGFS in
+       ext3)
+               # --- Check that ext3 and ext3 xattr are enabled in the kernel
+               LC_CONFIG_EXT3([],[
+                       AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel])
+               ],[
+                       AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel])
+                       AC_MSG_WARN([This build may fail.])
+               ])
+               ;;
+       ldiskfs)
+               LC_FSHOOKS([
+                       LDISKFS_SERIES="2.6-suse.series"
+               ],[
+                       LDISKFS_SERIES="2.6-vanilla.series"
+               ])
+               AC_SUBST(LDISKFS_SERIES)
+               ;;
+esac # $BACKINGFS
+])
+
+#
+# LC_CONFIG_PINGER
+#
+# the pinger is temporary, until we have the recovery node in place
+#
+AC_DEFUN([LC_CONFIG_PINGER],
+[AC_MSG_CHECKING([whether to enable pinger support])
+AC_ARG_ENABLE([pinger],
+       AC_HELP_STRING([--disable-pinger],
+                       [disable recovery pinger support]),
+       [],[enable_pinger='yes'])
+AC_MSG_RESULT([$enable_pinger])
+if test x$enable_pinger != xno ; then
+  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
+fi
+])
+
+#
+# LC_CONFIG_OBD_BUFFER_SIZE
+#
+# the maximum buffer size of lctl ioctls
+#
+AC_DEFUN([LC_CONFIG_OBD_BUFFER_SIZE],
+[AC_MSG_CHECKING([maximum OBD ioctl size])
+AC_ARG_WITH([obd-buffer-size],
+       AC_HELP_STRING([--with-obd-buffer-size=[size]],
+                       [set lctl ioctl maximum bytes (default=8192)]),
+       [
+               OBD_BUFFER_SIZE=$with_obd_buffer_size
+       ],[
+               OBD_BUFFER_SIZE=8192
+       ])
+AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
+AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
+])
+
+#
+# LC_CONFIG_SNAPFS
+#
+# Whether snapfs is desired
+#
+AC_DEFUN([LC_CONFIG_SNAPFS],
+[# snap compilation
+AC_MSG_CHECKING([whether to enable snapfs support])
+AC_ARG_ENABLE([snapfs],
+       AC_HELP_STRING([--enable-snapfs],
+                       [build snapfs]),
+       [],[enable_snapfs='no'])
+AC_MSG_RESULT([$enable_snapfs])
+])
+
+#
+# LC_CONFIG_SMFS
+#
+# whether smfs is desired
+#
+AC_DEFUN([LC_CONFIG_SMFS],
+[AC_MSG_CHECKING([whether to enable smfs support])
+AC_ARG_ENABLE([smfs],
+       AC_HELP_STRING([--enable-smfs],
+                       [build smfs]),
+       [],[enable_smfs='no'])
+AC_MSG_RESULT([$enable_smfs])
+])
+
+#
+# LC_PROG_LINUX
+#
+# Lustre linux kernel checks
+#
+AC_DEFUN([LC_PROG_LINUX],
+[LC_CONFIG_BACKINGFS
+LC_CONFIG_PINGER
+LC_CONFIG_OBD_BUFFER_SIZE
+LC_CONFIG_SNAPFS
+LC_CONFIG_SMFS
+
+LC_STRUCT_KIOBUF
+LC_FUNC_COND_RESCHED
+LC_FUNC_ZAP_PAGE_RANGE
+LC_FUNC_PDE
+LC_FUNC_DIRECT_IO
+LC_HEADER_MM_INLINE
+LC_STRUCT_INODE
+])
+
+#
+# LC_CONFIG_LIBLUSTRE
+#
+# whether to build liblustre
+#
+AC_DEFUN([LC_CONFIG_LIBLUSTRE],
+[AC_MSG_CHECKING([whether to build Lustre library])
+AC_ARG_ENABLE([liblustre],
+       AC_HELP_STRING([--disable-liblustre],
+                       [disable building of Lustre library]),
+       [],[enable_liblustre=$enable_libsysio])
+AC_MSG_RESULT([$enable_liblustre])
+
+AC_MSG_CHECKING([whether to build mpitests])
+AC_ARG_ENABLE([mpitests],
+       AC_HELP_STRING([--enable-mpitests],
+                       [build liblustre mpi tests]),
+       [],[enable_mpitests=no])
+AC_MSG_RESULT([$enable_mpitests])
+])
+
+#
+# LC_CONFIGURE
+#
+# other configure checks
+#
+AC_DEFUN([LC_CONFIGURE],
+[# include/liblustre.h
+AC_CHECK_HEADERS([asm/page.h sys/user.h stdint.h])
+
+# liblustre/llite_lib.h
+AC_CHECK_HEADERS([xtio.h file.h])
+
+# liblustre/dir.c
+AC_CHECK_HEADERS([linux/types.h sys/types.h linux/unistd.h unistd.h])
+
+# liblustre/lutil.c
+AC_CHECK_HEADERS([netinet/in.h arpa/inet.h catamount/data.h])
+AC_CHECK_FUNCS([inet_ntoa])
+])
+
+#
+# LC_CONDITIONALS
+#
+# AM_CONDITIONALS for lustre
+#
+AC_DEFUN([LC_CONDITIONALS],
+[AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
+AM_CONDITIONAL(EXTN, test x$enable_extN = xyes)
+AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes)
+AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
+AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
+AM_CONDITIONAL(SNAPFS, test x$enable_snapfs = xyes)
+AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes)
+AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
+AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
+])
+
+#
+# LC_CONFIG_FILES
+#
+# files that should be generated with AC_OUTPUT
+#
+AC_DEFUN([LC_CONFIG_FILES],
+[AC_CONFIG_FILES([
+lustre/Makefile
+lustre/autoMakefile
+lustre/autoconf/Makefile
+lustre/cmobd/Makefile
+lustre/cmobd/autoMakefile
+lustre/cobd/Makefile
+lustre/cobd/autoMakefile
+lustre/conf/Makefile
+lustre/doc/Makefile
+lustre/include/Makefile
+lustre/include/linux/Makefile
+lustre/include/lustre/Makefile
+lustre/ldiskfs/Makefile
+lustre/ldiskfs/autoMakefile
+lustre/ldlm/Makefile
+lustre/liblustre/Makefile
+lustre/liblustre/tests/Makefile
+lustre/llite/Makefile
+lustre/llite/autoMakefile
+lustre/lmv/Makefile
+lustre/lmv/autoMakefile
+lustre/lov/Makefile
+lustre/lov/autoMakefile
+lustre/lvfs/Makefile
+lustre/lvfs/autoMakefile
+lustre/mdc/Makefile
+lustre/mdc/autoMakefile
+lustre/mds/Makefile
+lustre/mds/autoMakefile
+lustre/obdclass/Makefile
+lustre/obdclass/autoMakefile
+lustre/obdecho/Makefile
+lustre/obdecho/autoMakefile
+lustre/obdfilter/Makefile
+lustre/obdfilter/autoMakefile
+lustre/osc/Makefile
+lustre/osc/autoMakefile
+lustre/ost/Makefile
+lustre/ost/autoMakefile
+lustre/ptlbd/Makefile
+lustre/ptlbd/autoMakefile
+lustre/ptlrpc/Makefile
+lustre/ptlrpc/autoMakefile
+lustre/scripts/Makefile
+lustre/scripts/version_tag.pl
+lustre/smfs/Makefile
+lustre/smfs/autoMakefile
+lustre/snapfs/Makefile
+lustre/snapfs/autoMakefile
+lustre/snapfs/utils/Makefile
+lustre/tests/Makefile
+lustre/utils/Lustre/Makefile
+lustre/utils/Makefile
+])
+])
diff --git a/lustre/autoconf/lustre-version.ac b/lustre/autoconf/lustre-version.ac
new file mode 100644 (file)
index 0000000..756fba6
--- /dev/null
@@ -0,0 +1 @@
+m4_define([LUSTRE_VERSION],[1.3.9.1])
diff --git a/lustre/autogen.sh b/lustre/autogen.sh
deleted file mode 100644 (file)
index e1c2c6c..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/bash
-
-# taken from gnome-common/macros2/autogen.sh
-compare_versions() {
-    ch_min_version=$1
-    ch_actual_version=$2
-    ch_status=0
-    IFS="${IFS=         }"; ch_save_IFS="$IFS"; IFS="."
-    set $ch_actual_version
-    for ch_min in $ch_min_version; do
-        ch_cur=`echo $1 | sed 's/[^0-9].*$//'`; shift # remove letter suffixes
-        if [ -z "$ch_min" ]; then break; fi
-        if [ -z "$ch_cur" ]; then ch_status=1; break; fi
-        if [ $ch_cur -gt $ch_min ]; then break; fi
-        if [ $ch_cur -lt $ch_min ]; then ch_status=1; break; fi
-    done
-    IFS="$ch_save_IFS"
-    return $ch_status
-}
-
-error_msg() {
-       echo "$cmd is $1.  version $required is required to build Lustre."
-
-       if [ -e /usr/lib/autolustre/bin/$cmd ]; then
-               cat >&2 <<-EOF
-               You apparently already have Lustre-specific autoconf/make RPMs
-               installed on your system at /usr/lib/autolustre/share/$cmd.
-               Please set your PATH to point to those versions:
-
-               export PATH="/usr/lib/autolustre/bin:\$PATH"
-               EOF
-       else
-               cat >&2 <<-EOF
-               CFS provides RPMs which can be installed alongside your
-               existing autoconf/make RPMs, if you are nervous about
-               upgrading.  See
-
-               ftp://ftp.lustre.org/pub/other/autolustre/README.autolustre
-
-               You may be able to download newer version from:
-
-               http://ftp.gnu.org/gnu/$cmd/$cmd-$required.tar.gz
-       EOF
-       fi
-       [ "$cmd" = "autoconf" -a "$required" = "2.57" ] && cat >&2 <<EOF
-
-or for RH9 systems you can use:
-
-ftp://fr2.rpmfind.net/linux/redhat/9/en/os/i386/RedHat/RPMS/autoconf-2.57-3.noarch.rpm
-EOF
-       [ "$cmd" = "automake-1.7" -a "$required" = "1.7.8" ] && cat >&2 <<EOF
-
-or for RH9 systems you can use:
-
-ftp://fr2.rpmfind.net/linux/fedora/core/1/i386/os/Fedora/RPMS/automake-1.7.8-1.noarch.rpm
-EOF
-       exit 1
-}
-
-check_version() {
-    local tool
-    local cmd
-    local required
-    local version
-
-    tool=$1
-    cmd=$2
-    required=$3
-    echo -n "checking for $cmd $required... "
-    if ! $cmd --version >/dev/null ; then
-       error_msg "missing"
-    fi
-    version=$($cmd --version | awk "BEGIN { IGNORECASE=1 } /$tool \(GNU $tool\)/ { print \$4 }")
-    echo "found $version"
-    if ! compare_versions "$required" "$version" ; then
-       error_msg "too old"
-    fi
-}
-
-check_version automake automake-1.7 "1.7.8"
-check_version autoconf autoconf "2.57"
-echo "Running aclocal..."
-aclocal-1.7
-echo "Running autoheader..."
-autoheader
-echo "Running automake..."
-automake-1.7 -a -c
-echo "Running autoconf..."
-autoconf
-
index 6e3666b..978cf29 100644 (file)
@@ -6,7 +6,9 @@
 EXTRA_DIST = lustre.dtd lustre.schema slapd-lustre.conf lustre2ldif.xsl top.ldif
 ldapconfdir = $(sysconfdir)/openldap
 ldapschemadir = $(sysconfdir)/openldap/schema
+
+if UTILS
 ldapconf_SCRIPTS = slapd-lustre.conf
 ldapschema_SCRIPTS = lustre.schema
-pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)'
-pkglib_DATA = top.ldif lustre2ldif.xsl
+pkgdata_DATA = top.ldif lustre2ldif.xsl
+endif
diff --git a/lustre/configure.in b/lustre/configure.in
deleted file mode 100644 (file)
index 5c9ffc0..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-# Copyright (C) 2001-2003 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-AC_INIT
-AC_CANONICAL_SYSTEM
-AM_INIT_AUTOMAKE(lustre, 1.3.9.1)
-# AM_MAINTAINER_MODE
-
-# Four main targets: lustre kernel modules, utilities, tests, and liblustre
-AC_MSG_CHECKING([whether to build kernel modules])
-AC_ARG_ENABLE([modules],
-       AC_HELP_STRING([--disable-modules],
-                       [disable building of Lustre kernel modules]),
-       [],[enable_modules='yes'])
-AC_MSG_RESULT([$enable_modules])
-AM_CONDITIONAL(MODULES, test x$enable_modules = xyes)
-
-AC_MSG_CHECKING([whether to build Lustre library])
-AC_ARG_ENABLE([liblustre],
-       AC_HELP_STRING([--disable-liblustre],
-                       [disable building of Lustre library]),
-       [],[enable_liblustre='yes'])
-AC_MSG_RESULT([$enable_liblustre])
-AM_CONDITIONAL(LIBLUSTRE, test x$enable_liblustre = xyes)
-
-AC_MSG_CHECKING([whether to build utilities])
-AC_ARG_ENABLE([utils],
-       AC_HELP_STRING([--disable-utils],
-                       [disable building of Lustre utility programs]),
-       [],[enable_utils='yes'])
-AC_MSG_RESULT([$enable_utils])
-
-AC_MSG_CHECKING([whether to build Lustre tests])
-AC_ARG_ENABLE([tests],
-       AC_HELP_STRING([--disable-tests],
-                       [disable building of Lustre tests]),
-       [],[enable_tests='yes'])
-AC_MSG_RESULT([$enable_tests])
-
-# specify wether to build doc or not
-AC_MSG_CHECKING([whether to build docs])
-AC_ARG_ENABLE(doc,
-       AC_HELP_STRING([--disable-doc],
-                       [skip creation of pdf documentation]),
-       [
-               if test x$enable_doc = xyes ; then
-                   ENABLE_DOC=1           
-               else
-                   ENABLE_DOC=0
-               fi
-       ],[
-               ENABLE_DOC=0
-               enable_doc='no'
-       ])
-AC_MSG_RESULT([$enable_doc])
-AM_CONDITIONAL(DOC, test x$ENABLE_DOC = x1)
-AC_SUBST(ENABLE_DOC)
-
-# default backing fs is ext3
-BACKINGFS='ext3'
-
-# SuSE gets ldiskfs
-AC_MSG_CHECKING([whether to enable ldiskfs])
-AC_ARG_ENABLE([ldiskfs],
-       AC_HELP_STRING([--enable-ldiskfs],
-                       [use ldiskfs for the Lustre backing FS]),
-       [BACKINGFS='ldiskfs'],[enable_ldiskfs='no'])
-AC_MSG_RESULT([$enable_ldiskfs])
-
-AC_MSG_CHECKING([which backing filesystem to use])
-AC_MSG_RESULT([$BACKINGFS])
-AC_SUBST(BACKINGFS)
-
-# the pinger is temporary, until we have the recovery node in place
-AC_MSG_CHECKING([whether to enable pinger support])
-AC_ARG_ENABLE([pinger],
-       AC_HELP_STRING([--disable-pinger],
-                       [disable recovery pinger support]),
-       [],[enable_pinger='yes'])
-AC_MSG_RESULT([$enable_pinger])
-if test x$enable_pinger != xno ; then
-  AC_DEFINE(ENABLE_PINGER, 1, Use the Pinger)
-fi
-
-AC_MSG_CHECKING([maximum OBD ioctl size])
-AC_ARG_WITH([obd-buffer-size],
-       AC_HELP_STRING([--with-obd-buffer-size=[size]],
-                       [set lctl ioctl maximum bytes (default=8192)]),
-       [
-               OBD_BUFFER_SIZE=$with_obd_buffer_size
-       ],[
-               OBD_BUFFER_SIZE=8192
-       ])
-AC_MSG_RESULT([$OBD_BUFFER_SIZE bytes])
-AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size])
-
-# specify location of libsysio tree
-AC_MSG_CHECKING([location of libsysio])
-AC_ARG_WITH([sysio],
-       AC_HELP_STRING([--with-sysio=[path]],
-                       [set path to libsysio source (default=../libsysio)]),
-       [
-               SYSIO=$with_sysio
-               SYSIO_PATH=$SYSIO
-       ],[
-               SYSIO='$(top_srcdir)/../libsysio'
-               SYSIO_PATH='../libsysio'
-       ])
-AC_MSG_RESULT([$SYSIO_PATH])
-
-AC_CHECK_FILE([$SYSIO_PATH/lib/libsysio.a],[],
-       [
-               if test x$enable_liblustre = xyes ; then
-                  AC_MSG_ERROR([A built libsysio tree is required for building liblustre.])
-               fi
-       ])
-AC_SUBST(SYSIO)
-
-#build mpi-tests 
-AC_MSG_CHECKING([whether to build mpitests])
-AC_ARG_ENABLE([mpitests],
-       AC_HELP_STRING([--enable-mpitests],
-                       [build liblustre mpi tests]),
-       [],[enable_mpitests=no])
-AC_MSG_RESULT([$enable_mpitests])
-AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
-
-# snap compilation
-AC_MSG_CHECKING([whether to enable snapfs support])
-AC_ARG_ENABLE([snapfs],
-       AC_HELP_STRING([--enable-snapfs],
-                       [build snapfs]),
-       [],[enable_snapfs='no'])
-AC_MSG_RESULT([$enable_snapfs])
-AM_CONDITIONAL(SNAPFS, test x$enable_snapfs = xyes)
-
-# smfs compilation
-AC_MSG_CHECKING([whether to enable smfs support])
-AC_ARG_ENABLE([smfs],
-       AC_HELP_STRING([--enable-smfs],
-                       [build smfs]),
-       [],[enable_smfs='no'])
-AC_MSG_RESULT([$enable_smfs])
-AM_CONDITIONAL(SMFS, test x$enable_smfs = xyes)
-
-sinclude(portals/build.m4)
-sinclude(portals/archdep.m4)
-
-AM_CONDITIONAL(LDISKFS, test x$enable_ldiskfs = xyes)
-
-if test x$enable_inkernel = xyes ; then
-       find . -name Makefile.mk | sed 's/.mk$//' | xargs -n 1 \
-               sh -e -x -c '(cp -f $0.mk $0.in)'
-fi
-
-# we need to pass a full path here for kernel makefiles
-INCLUDE_RULES="include $PWD/Rules"
-AC_SUBST(INCLUDE_RULES)
-
-# ptlrpc kernel build requires this
-LUSTRE="$PWD"
-AC_SUBST(LUSTRE)
-
-#
-AM_CONDITIONAL(UTILS, test x$enable_utils = xyes)
-AM_CONDITIONAL(TESTS, test x$enable_tests = xyes)
-
-AM_CONFIG_HEADER(include/config.h)
-
-AC_OUTPUT([
-Makefile
-Rules
-autoMakefile
-cobd/Makefile
-cobd/autoMakefile
-conf/Makefile
-doc/Makefile
-include/Makefile
-include/linux/Makefile
-include/lustre/Makefile
-ldiskfs/Makefile
-ldiskfs/autoMakefile
-ldlm/Makefile
-liblustre/Makefile
-liblustre/tests/Makefile
-llite/Makefile
-llite/autoMakefile
-lov/Makefile
-lov/autoMakefile
-lmv/Makefile
-lmv/autoMakefile
-lvfs/Makefile
-lvfs/autoMakefile
-mdc/Makefile
-mdc/autoMakefile
-mds/Makefile
-mds/autoMakefile
-obdclass/Makefile
-obdclass/autoMakefile
-obdecho/Makefile
-obdecho/autoMakefile
-obdfilter/Makefile
-obdfilter/autoMakefile
-osc/Makefile
-osc/autoMakefile
-ost/Makefile
-ost/autoMakefile
-portals/Kernelenv
-portals/Makefile
-portals/autoMakefile
-portals/doc/Makefile
-portals/include/Makefile
-portals/include/linux/Makefile
-portals/include/portals/Makefile
-portals/knals/Makefile
-portals/knals/autoMakefile
-portals/knals/gmnal/Makefile
-portals/knals/gmnal/autoMakefile
-portals/knals/iibnal/Makefile
-portals/knals/iibnal/autoMakefile
-portals/knals/openibnal/Makefile
-portals/knals/openibnal/autoMakefile
-portals/knals/qswnal/Makefile
-portals/knals/qswnal/autoMakefile
-portals/knals/socknal/Makefile
-portals/knals/socknal/autoMakefile
-portals/knals/lonal/Makefile
-portals/knals/lonal/autoMakefile
-portals/knals/ranal/Makefile
-portals/knals/ranal/autoMakefile
-portals/libcfs/Makefile
-portals/libcfs/autoMakefile
-portals/portals/Makefile
-portals/portals/autoMakefile
-portals/router/Makefile
-portals/router/autoMakefile
-portals/tests/Makefile
-portals/tests/autoMakefile
-portals/unals/Makefile
-portals/utils/Makefile
-ptlbd/Makefile
-ptlbd/autoMakefile
-ptlrpc/Makefile
-ptlrpc/autoMakefile
-scripts/Makefile
-scripts/lustre.spec
-scripts/version_tag.pl
-smfs/Makefile
-smfs/autoMakefile
-cmobd/Makefile
-cmobd/autoMakefile
-snapfs/Makefile
-snapfs/autoMakefile
-snapfs/utils/Makefile
-tests/Makefile
-utils/Lustre/Makefile
-utils/Makefile
-])
index 2a3f201..f2c2d76 100644 (file)
@@ -5,5 +5,5 @@
 # See the file COPYING in this distribution
 
 SUBDIRS = linux lustre
-EXTRA_DIST = config.h.in ioctl.h liblustre.h 
+EXTRA_DIST = ioctl.h liblustre.h 
 
index 77c7afd..d187775 100644 (file)
@@ -14,4 +14,5 @@ EXTRA_DIST = lprocfs_status.h lustre_debug.h lustre_ha.h lustre_lib.h \
   lustre_net.h obd_class.h obd_ost.h obd_support.h lustre_commit_confd.h \
   lustre_export.h lustre_log.h obd_echo.h obd_ptlbd.h obd_trace.h \
   lustre_compat25.h lustre_fsfilt.h lustre_import.h lustre_mds.h obd.h \
-  lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h  lustre_idl.h lustre_smfs.h 
+  lvfs.h lvfs_linux.h lustre_cfg.h lustre_lite.h  lustre_idl.h lustre_smfs.h \
+  lustre_cmobd.h obd_lmv.h lustre_snap.h
index f976cda..db19c01 100644 (file)
@@ -164,8 +164,6 @@ static inline void lustre_daemonize_helper(void)
                 page->private = 0; \
         } while(0)
 
-#define smp_num_cpus    NR_CPUS
-
 #define kiobuf bio
 
 #include <linux/proc_fs.h>
index e33e4c9..4fd32d4 100644 (file)
@@ -27,8 +27,8 @@ linux/ldiskfs%.h: linux-stage/include/linux/ext3%.h
 # FIXME: we need to grab the series in configure somehow
 # (see bug 1679)
 #
-series := @top_srcdir@/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
-patches := @top_srcdir@/kernel_patches/patches
+series := @top_srcdir@/lustre/kernel_patches/series/ldiskfs-$(LDISKFS_SERIES)
+patches := @top_srcdir@/lustre/kernel_patches/patches
 
 sources: $(ext3_sources) $(ext3_headers) $(linux_headers) $(series)
        rm -rf linux-stage linux sources $(ldiskfs_SOURCES)
index f2e39bd..ee16557 100644 (file)
@@ -4,7 +4,7 @@ AM_CPPFLAGS = -I$(SYSIO)/include -I/opt/lam/include $(LLCPPFLAGS) -I$(top_srcdir
 AM_CFLAGS = $(LLCFLAGS)
 LIBS = $(LIBEFENCE) $(LIBREADLINE)
 
-LLIB_EXEC= $(top_builddir)/liblustre/liblustre.a -lcap -lpthread
+LLIB_EXEC= $(top_builddir)/lustre/liblustre/liblustre.a -lcap -lpthread
 
 if LIBLUSTRE
 noinst_LIBRARIES = libtestcommon.a
@@ -19,30 +19,33 @@ endif # LIBLUSTRE
 
 libtestcommon_a_SOURCES = test_common.c test_common.h
 
-echo_test_SOURCES = echo_test.c  $(top_srcdir)/utils/parser.c $(top_srcdir)/utils/obd.c $(top_srcdir)/utils/lustre_cfg.c
+echo_test_SOURCES = echo_test.c $(top_srcdir)/lustre/utils/parser.c    \
+       $(top_srcdir)/lustre/utils/obd.c                                \
+       $(top_srcdir)/lustre/utils/lustre_cfg.c
+
 echo_test_CFLAGS = $(LL_CFLAGS)
-echo_test_LDADD = $(top_builddir)/liblustre/liblsupport.a $(LIBREADLINE) -lcap -lpthread 
-echo_test_DEPENDENCIES=$(top_builddir)/liblustre/liblsupport.a
+echo_test_LDADD = $(top_builddir)/lustre/liblustre/liblsupport.a $(LIBREADLINE) -lcap -lpthread 
+echo_test_DEPENDENCIES=$(top_builddir)/lustre/liblustre/liblsupport.a
 
 sanity_SOURCES = sanity.c
 sanity_CFLAGS = $(LL_CFLAGS)
 sanity_LDADD := libtestcommon.a $(LLIB_EXEC)
-sanity_DEPENDENCIES = $(top_builddir)/liblustre/liblustre.a libtestcommon.a
+sanity_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 recovery_small_SOURCES = recovery_small.c
 recovery_small_CFLAGS = $(LL_CFLAGS)
 recovery_small_LDADD := libtestcommon.a $(LLIB_EXEC) 
-recovery_small_DEPENDENCIES = $(top_builddir)/liblustre/liblustre.a libtestcommon.a
+recovery_small_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 replay_single_SOURCES = replay_single.c
 replay_single_CFLAGS = $(LL_CFLAGS)
 replay_single_LDADD := libtestcommon.a $(LLIB_EXEC)
-replay_single_DEPENDENCIES = $(top_builddir)/liblustre/liblustre.a libtestcommon.a
+replay_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 replay_ost_single_SOURCES = replay_ost_single.c
 replay_ost_single_CFLAGS = $(LL_CFLAGS)
 replay_ost_single_LDADD := libtestcommon.a $(LLIB_EXEC)
-replay_ost_single_DEPENDENCIES = $(top_builddir)/liblustre/liblustre.a libtestcommon.a
+replay_ost_single_DEPENDENCIES = $(top_builddir)/lustre/liblustre/liblustre.a libtestcommon.a
 
 if MPITESTS
 test_lock_cancel_SOURCES = test_lock_cancel.c
index c2511c4..ca9c839 100644 (file)
@@ -43,7 +43,7 @@ endif # MODULES
 
 DIST_SOURCES = fsfilt.c fsfilt_ext3.c fsfilt_smfs.c fsfilt_reiserfs.c \
        lvfs_common.c lvfs_internal.h lvfs_linux.c llog.c llog_cat.c \
-       llog_lvfs.c lvfs_userfs.c
+       llog_lvfs.c lvfs_userfs.c fsfilt_snap_ext3.c
 
 MOSTLYCLEANFILES = *.o *.ko *.mod.c
 CLEANFILES = fsfilt-*.c fsfilt_ldiskfs.c fsfilt_extN.c sources
diff --git a/lustre/portals/.cvsignore b/lustre/portals/.cvsignore
deleted file mode 100644 (file)
index f30d862..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-Kernelenv
-Makefile
-autoMakefile
-autoMakefile.in
-aclocal.m4
-autom4te.cache
-config.log
-config.status
-configure
-.*.cmd
-.depend
diff --git a/lustre/portals/AUTHORS b/lustre/portals/AUTHORS
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lustre/portals/ChangeLog b/lustre/portals/ChangeLog
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lustre/portals/Kernelenv.in b/lustre/portals/Kernelenv.in
deleted file mode 100644 (file)
index 7a48c58..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
-# portals/utils/debug.c wants <linux/version.h> from userspace.  sigh.
-HOSTCFLAGS := -I@LINUX@/include $(EXTRA_CFLAGS)
-LIBREADLINE := @LIBREADLINE@
-# 2.5's makefiles aren't nice to cross dir libraries in host programs
-PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/lustre/portals/Kernelenv.mk b/lustre/portals/Kernelenv.mk
deleted file mode 100644 (file)
index 7c66dfa..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-EXTRA_CFLAGS := -Ifs/lustre/include -Ifs/lustre/portals/include
-HOSTCFLAGS := $(EXTRA_CFLAGS)
-# the kernel doesn't want us to build archives for host binaries :/
-PTLCTLOBJS := debug.o l_ioctl.o parser.o portals.o
diff --git a/lustre/portals/Makefile.in b/lustre/portals/Makefile.in
deleted file mode 100644 (file)
index 71d0dc8..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-subdir-m += libcfs
-
-cray-subdirs += portals
-cray-subdirs += knals
-cray-subdirs += router
-cray-subdirs += tests
-@CRAY_PORTALS_FALSE@subdir-m += $(cray-subdirs)
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/Makefile.mk b/lustre/portals/Makefile.mk
deleted file mode 100644 (file)
index 73a19df..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-include $(src)/Kernelenv
-
-# The ordering of these determines the order that each subsystem's 
-# module_init() functions are called in.  if these are changed make sure
-# they reflect the dependencies between each subsystem's _init functions.
-obj-y += libcfs/
-obj-y += portals/
-obj-y += router/
-obj-y += knals/
-obj-y += tests/
-
-obj-m += utils/
diff --git a/lustre/portals/NEWS b/lustre/portals/NEWS
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lustre/portals/README b/lustre/portals/README
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4
deleted file mode 100644 (file)
index 2f1d563..0000000
+++ /dev/null
@@ -1,902 +0,0 @@
-# -------- we can't build modules unless srcdir = builddir
-if test x$enable_modules != xno ; then
-       AC_CHECK_FILE([autoMakefile.am],[],
-               [AC_MSG_ERROR([At this time, Lustre does not support building kernel modules with srcdir != buildir.])])
-fi
-
-# -------- in kernel compilation? (2.5 only) -------------
-AC_MSG_CHECKING([if inkernel build support is requested])
-AC_ARG_ENABLE([inkernel],
-       AC_HELP_STRING([--enable-inkernel],
-                      [set up 2.5 kernel makefiles]),
-       [],[enable_inkernel=no])
-AC_MSG_RESULT([$enable_inkernel])
-AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes)
-
-# -------- are we building against an external portals? -------
-AC_MSG_CHECKING([for Cray portals])
-AC_ARG_WITH([cray-portals],
-       AC_HELP_STRING([--with-cray-portals=path],
-                      [path to cray portals]),
-       [
-               if test "$with_cray_portals" != no; then
-                       CRAY_PORTALS_PATH=$with_cray_portals
-                       CRAY_PORTALS_INCLUDES="$with_cray_portals/include"
-                       CRAY_PORTALS_LIBS="$with_cray_portals"
-                fi
-       ],[with_cray_portals=no])
-AC_SUBST(CRAY_PORTALS_PATH)
-AC_MSG_RESULT([$CRAY_PORTALS_PATH])
-
-AC_MSG_CHECKING([for Cray portals includes])
-AC_ARG_WITH([cray-portals-includes],
-       AC_HELP_STRING([--with-cray-portals-includes=path],
-                      [path to cray portals includes]),
-       [
-               if test "$with_cray_portals_includes" != no; then
-                       CRAY_PORTALS_INCLUDES="$with_cray_portals_includes"
-                fi
-       ])
-AC_SUBST(CRAY_PORTALS_INCLUDES)
-AC_MSG_RESULT([$CRAY_PORTALS_INCLUDES])
-
-AC_MSG_CHECKING([for Cray portals libs])
-AC_ARG_WITH([cray-portals-libs],
-       AC_HELP_STRING([--with-cray-portals-libs=path],
-                      [path to cray portals libs]),
-       [
-               if test "$with_cray_portals_libs" != no; then
-                       CRAY_PORTALS_LIBS="$with_cray_portals_libs"
-                fi
-       ])
-AC_SUBST(CRAY_PORTALS_LIBS)
-AC_MSG_RESULT([$CRAY_PORTALS_LIBS])
-
-if test x$CRAY_PORTALS_INCLUDES != x ; then
-       if test ! -r $CRAY_PORTALS_INCLUDES/portals/api.h ; then
-               AC_MSG_ERROR([Cray portals headers were not found in $CRAY_PORTALS_INCLUDES.  Please check the paths passed to --with-cray-portals or --with-cray-portals-includes.])
-       fi
-fi
-if test x$CRAY_PORTALS_LIBS != x ; then
-       if test ! -r $CRAY_PORTALS_LIBS/libportals.a ; then
-               AC_MSG_ERROR([Cray portals libraries were not found in $CRAY_PORTALS_LIBS.  Please check the paths passed to --with-cray-portals or --with-cray-portals-libs.])
-       fi
-fi
-
-AC_MSG_CHECKING([whether to use Cray portals])
-if test x$CRAY_PORTALS_INCLUDES != x -a x$CRAY_PORTALS_LIBS != x ; then
-       with_cray_portals=yes
-       AC_DEFINE(CRAY_PORTALS, 1, [Building with Cray Portals])
-       CRAY_PORTALS_INCLUDES="-I$CRAY_PORTALS_INCLUDES"
-else
-       with_cray_portals=no
-fi
-AC_MSG_RESULT([$with_cray_portals])
-AM_CONDITIONAL(CRAY_PORTALS, test x$with_cray_portals != xno)
-
-# ----------------------------------------
-# some tests for catamount-like systems
-# ----------------------------------------
-AC_ARG_ENABLE([sysio_init],
-       AC_HELP_STRING([--disable-sysio-init],
-               [call sysio init functions when initializing liblustre]),
-       [],[enable_sysio_init=yes])
-AC_MSG_CHECKING([whether to initialize libsysio])
-AC_MSG_RESULT([$enable_sysio_init])
-if test x$enable_sysio_init != xno ; then
-       AC_DEFINE([INIT_SYSIO], 1, [call sysio init functions])
-fi
-
-AC_ARG_ENABLE([urandom],
-       AC_HELP_STRING([--disable-urandom],
-               [disable use of /dev/urandom for liblustre]),
-       [],[enable_urandom=yes])
-AC_MSG_CHECKING([whether to use /dev/urandom for liblustre])
-AC_MSG_RESULT([$enable_urandom])
-if test x$enable_urandom != xno ; then
-       AC_DEFINE([LIBLUSTRE_USE_URANDOM], 1, [use /dev/urandom for random data])
-fi
-
-# -------- check for -lcap and -lpthread ----
-if test x$enable_liblustre = xyes ; then
-       AC_CHECK_LIB([cap], [cap_get_proc],
-               [
-                       CAP_LIBS="-lcap"
-                       AC_DEFINE([HAVE_LIBCAP], 1, [use libcap])
-               ],
-               [CAP_LIBS=""])
-       AC_SUBST(CAP_LIBS)
-       AC_CHECK_LIB([pthread], [pthread_create],
-               [
-                       PTHREAD_LIBS="-lpthread"
-                       AC_DEFINE([HAVE_LIBPTHREAD], 1, [use libpthread])
-               ],
-               [PTHREAD_LIBS=""])
-       AC_SUBST(PTHREAD_LIBS)
-fi
-
-# -------- enable tests and utils? -------
-if test x$enable_tests = xno ; then
-       AC_MSG_NOTICE([disabling tests])
-       enable_tests=no
-fi
-if test x$enable_utils = xno ; then
-       AC_MSG_NOTICE([disabling utilities])
-       enable_utils=no
-fi
-
-if test x$enable_modules != xno ; then
-       # -------- set linuxdir ------------
-       AC_MSG_CHECKING([for Linux sources])
-       AC_ARG_WITH([linux],
-               AC_HELP_STRING([--with-linux=path],
-                              [set path to Linux source (default=/usr/src/linux)]),
-               [LINUX=$with_linux],
-               [LINUX=/usr/src/linux])
-       AC_MSG_RESULT([$LINUX])
-       AC_SUBST(LINUX)
-       if test x$enable_inkernel = xyes ; then
-               echo ln -s `pwd` $LINUX/fs/lustre
-               rm $LINUX/fs/lustre
-               ln -s `pwd` $LINUX/fs/lustre
-       fi
-
-       # -------- linux objects (for 2.6) --
-       AC_MSG_CHECKING([for Linux objects dir])
-       AC_ARG_WITH([linux-obj],
-               AC_HELP_STRING([--with-linux-obj=path],
-                               [set path to Linux objects dir (default=\$LINUX)]),
-               [LINUX_OBJ=$with_linux_obj],
-               [LINUX_OBJ=$LINUX])
-       AC_MSG_RESULT([$LINUX_OBJ])
-       AC_SUBST(LINUX_OBJ)
-
-       # -------- check for .confg --------
-       AC_ARG_WITH([linux-config],
-               [AC_HELP_STRING([--with-linux-config=path],
-                               [set path to Linux .conf (default=\$LINUX_OBJ/.config)])],
-               [LINUX_CONFIG=$with_linux_config],
-               [LINUX_CONFIG=$LINUX_OBJ/.config])
-       AC_SUBST(LINUX_CONFIG)
-
-       AC_CHECK_FILE([/boot/kernel.h],
-               [KERNEL_SOURCE_HEADER='/boot/kernel.h'],
-               [AC_CHECK_FILE([/var/adm/running-kernel.h]),
-                       [KERNEL_SOURCE_HEADER='/var/adm/running-kernel.h']])
-
-       AC_ARG_WITH([kernel-source-header],
-               AC_HELP_STRING([--with-kernel-source-header=path],
-                               [Use a different kernel version header.  Consult README.kernel-source for details.]),
-               [KERNEL_SOURCE_HEADER=$with_kernel_source_header])
-
-       #  --------------------
-       ARCH_UM=
-       UML_CFLAGS=
-
-       AC_MSG_CHECKING([if you are running user mode linux for $host_cpu])
-       if test -e $LINUX/include/asm-um ; then
-               if test  X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then
-                       ARCH_UM='ARCH=um'
-                       # see notes in Rules.in
-                       UML_CFLAGS='-O0'
-                       AC_MSG_RESULT(yes)
-               else
-                       AC_MSG_RESULT([no (asm doesn't point at asm-um)])
-               fi
-       else
-               AC_MSG_RESULT([no (asm-um missing)])
-       fi
-
-       AC_SUBST(ARCH_UM)
-       AC_SUBST(UML_CFLAGS)
-
-       # --------- Linux 25 ------------------
-       AC_CHECK_FILE([$LINUX/include/linux/namei.h],
-               [
-                       linux25="yes"
-                       KMODEXT=".ko"
-                       enable_ldiskfs="yes"
-                       BACKINGFS="ldiskfs"
-               ],[
-                       KMODEXT=".o"
-                       linux25="no"
-               ])
-       AC_MSG_CHECKING([if you are using Linux 2.6])
-       AC_MSG_RESULT([$linux25])
-
-       AC_SUBST(LINUX25)
-       AC_SUBST(KMODEXT)
-
-       AC_PATH_PROG(PATCH, patch, [no])
-       AC_PATH_PROG(QUILT, quilt, [no])
-
-       if test x$enable_ldiskfs$PATCH$QUILT = xyesnono ; then
-               AC_MSG_ERROR([Quilt or patch are needed to build the ldiskfs module (for Linux 2.6)])
-       fi
-fi
-AM_CONDITIONAL(LINUX25, test x$linux25 = xyes)
-AM_CONDITIONAL(USE_QUILT, test x$QUILT != xno)
-
-# -------  Makeflags ------------------
-
-CPPFLAGS="$CPPFLAGS $CRAY_PORTALS_INCLUDES -I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include"
-
-# liblustre are all the same
-LLCPPFLAGS="-D__arch_lib__ -D_LARGEFILE64_SOURCE=1"
-AC_SUBST(LLCPPFLAGS)
-
-LLCFLAGS="-g -Wall -fPIC"
-AC_SUBST(LLCFLAGS)
-
-# everyone builds against portals and lustre
-
-if test x$enable_ldiskfs = xyes ; then
-       AC_DEFINE(CONFIG_LDISKFS_FS_MODULE, 1, [build ldiskfs as a module])
-       AC_DEFINE(CONFIG_LDISKFS_FS_XATTR, 1, [enable extended attributes for ldiskfs])
-       AC_DEFINE(CONFIG_LDISKFS_FS_POSIX_ACL, 1, [enable posix acls])
-       AC_DEFINE(CONFIG_LDISKFS_FS_SECURITY, 1, [enable fs security])
-fi
-
-EXTRA_KCFLAGS="-g $CRAY_PORTALS_INCLUDES -I$PWD/portals/include -I$PWD/include"
-
-# these are like AC_TRY_COMPILE, but try to build modules against the
-# kernel, inside the kernel-tests directory
-
-AC_DEFUN([LUSTRE_MODULE_CONFTEST],
-[cat >conftest.c <<_ACEOF
-$1
-_ACEOF
-])
-
-AC_DEFUN([LUSTRE_MODULE_COMPILE_IFELSE],
-[m4_ifvaln([$1], [LUSTRE_MODULE_CONFTEST([$1])])dnl
-rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.ko
-AS_IF([AC_TRY_COMMAND(cp conftest.c kernel-tests && make [$2] CC="$CC" -f $PWD/kernel-tests/Makefile LUSTRE_LINUX_CONFIG=$LINUX_CONFIG -o tmp_include_depends -o scripts -o include/config/MARKER -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM $MODULE_TARGET=$PWD/kernel-tests) >/dev/null && AC_TRY_COMMAND([$3])],
-       [$4],
-       [_AC_MSG_LOG_CONFTEST
-m4_ifvaln([$5],[$5])dnl])dnl
-rm -f kernel-tests/conftest.o kernel-tests/conftest.mod.c kernel-tests/conftest.mod.o kernel-tests/conftest.ko m4_ifval([$1], [kernel-tests/conftest.c conftest.c])[]dnl
-])
-
-AC_DEFUN([LUSTRE_MODULE_TRY_COMPILE],
-[LUSTRE_MODULE_COMPILE_IFELSE(
-       [AC_LANG_PROGRAM([[$1]], [[$2]])],
-       [modules],
-       [test -s kernel-tests/conftest.o],
-       [$3], [$4])])
-
-AC_DEFUN([LUSTRE_MODULE_TRY_MAKE],
-[LUSTRE_MODULE_COMPILE_IFELSE([AC_LANG_PROGRAM([[$1]], [[$2]])], [$3], [$4], [$5], [$6])])
-
-# ------------ include paths ------------------
-
-if test x$enable_modules != xno ; then
-       # ------------ .config exists ----------------
-       AC_CHECK_FILE([$LINUX_CONFIG],[],
-               [AC_MSG_ERROR([Kernel config could not be found.  If you are building from a kernel-source rpm consult README.kernel-source])])
-
-       # ----------- make dep run? ------------------
-       AC_CHECK_FILES([$LINUX_OBJ/include/linux/autoconf.h
-                       $LINUX_OBJ/include/linux/version.h
-                       $LINUX/include/linux/config.h],[],
-               [AC_MSG_ERROR([Run make config in $LINUX.])])
-
-       # ------------ rhconfig.h includes runtime-generated bits --
-       # red hat kernel-source checks
-
-       # we know this exists after the check above.  if the user
-       # tarred up the tree and ran make dep etc. in it, then
-       # version.h gets overwritten with a standard linux one.
-
-       if grep rhconfig $LINUX_OBJ/include/linux/version.h >/dev/null ; then
-               # This is a clean kernel-source tree, we need to
-               # enable extensive workarounds to get this to build
-               # modules
-               AC_CHECK_FILE([$KERNEL_SOURCE_HEADER],
-                       [if test $KERNEL_SOURCE_HEADER = '/boot/kernel.h' ; then
-                               AC_MSG_WARN([Using /boot/kernel.h from RUNNING kernel.])
-                               AC_MSG_WARN([If this is not what you want, use --with-kernel-source-header.])
-                               AC_MSG_WARN([Consult README.kernel-source for details.])
-                       fi],
-                       [AC_MSG_ERROR([$KERNEL_SOURCE_HEADER not found.  Consult README.kernel-source for details.])])
-               EXTRA_KCFLAGS="-include $KERNEL_SOURCE_HEADER $EXTRA_KCFLAGS"
-       fi
-
-       # ------------ external module support ---------------------
-       MODULE_TARGET="SUBDIRS"
-       if test $linux25 = 'yes' ; then
-               makerule="$PWD/kernel-tests"
-               AC_MSG_CHECKING([for external module build support])
-               rm -f kernel-tests/conftest.i
-               LUSTRE_MODULE_TRY_MAKE([],[],
-                       [$makerule LUSTRE_KERNEL_TEST=conftest.i],
-                       [test -s kernel-tests/conftest.i],
-                       [
-                               AC_MSG_RESULT([no])
-                       ],[
-                               AC_MSG_RESULT([yes])
-                               makerule="_module_$makerule"
-                               MODULE_TARGET="M"
-                       ])
-       else
-               makerule="_dir_$PWD/kernel-tests"
-       fi
-       AC_SUBST(MODULE_TARGET)
-
-       # --- check that we can build modules at all
-       AC_MSG_CHECKING([that modules can be built])
-       LUSTRE_MODULE_TRY_COMPILE([],[],
-               [
-                       AC_MSG_RESULT([yes])
-               ],[
-                       AC_MSG_RESULT([no])
-                       AC_MSG_WARN([Consult config.log for details.])
-                       AC_MSG_WARN([If you are trying to build with a kernel-source rpm, consult README.kernel-source])
-                       AC_MSG_ERROR([Kernel modules could not be built.])
-               ])
-
-       # ------------ LINUXRELEASE and moduledir ------------------
-       LINUXRELEASE=
-       rm -f kernel-tests/conftest.i
-       AC_MSG_CHECKING([for Linux release])
-       LUSTRE_MODULE_TRY_MAKE(
-               [#include <linux/version.h>],
-               [char *LINUXRELEASE;
-                LINUXRELEASE=UTS_RELEASE;],
-               [$makerule LUSTRE_KERNEL_TEST=conftest.i],
-               [test -s kernel-tests/conftest.i],
-               [
-                       # LINUXRELEASE="UTS_RELEASE"
-                       eval $(grep "LINUXRELEASE=" kernel-tests/conftest.i)
-               ],[
-                       AC_MSG_RESULT([unknown])
-                       AC_MSG_ERROR([Could not preprocess test program.  Consult config.log for details.])
-               ])
-       rm -f kernel-tests/conftest.i
-       if test x$LINUXRELEASE = x ; then
-               AC_MSG_RESULT([unknown])
-               AC_MSG_ERROR([Could not determine Linux release version from linux/version.h.])
-       fi
-       AC_MSG_RESULT([$LINUXRELEASE])
-       AC_SUBST(LINUXRELEASE)
-
-       moduledir='/lib/modules/'$LINUXRELEASE/kernel
-       modulefsdir='$(moduledir)/fs/$(PACKAGE)'
-       modulenetdir='$(moduledir)/net/$(PACKAGE)'
-
-       AC_SUBST(moduledir)
-       AC_SUBST(modulefsdir)
-       AC_SUBST(modulenetdir)
-
-       # ------------ RELEASE --------------------------------
-       AC_MSG_CHECKING([for Lustre release])
-       RELEASE="`echo ${LINUXRELEASE} | tr '-' '_'`_`date +%Y%m%d%H%M`"
-       AC_MSG_RESULT($RELEASE)
-       AC_SUBST(RELEASE)
-
-       # ---------- Portals flags --------------------
-
-       AC_MSG_CHECKING([for zero-copy TCP support])
-       AC_ARG_ENABLE([zerocopy],
-               AC_HELP_STRING([--disable-zerocopy],
-                              [disable socknal zerocopy]),
-               [],[enable_zerocopy='yes'])
-       if test x$enable_zerocopy = xno ; then
-               AC_MSG_RESULT([no (by request)])
-       else
-               ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`"
-               if test "$ZCCD" != 0 ; then
-                       AC_DEFINE(SOCKNAL_ZC, 1, [use zero-copy TCP])
-                       AC_MSG_RESULT(yes)
-               else
-                       AC_MSG_RESULT([no (no kernel support)])
-               fi
-       fi
-
-       AC_ARG_ENABLE([affinity],
-               AC_HELP_STRING([--disable-affinity],
-                              [disable process/irq affinity]),
-               [],[enable_affinity='yes'])
-
-       AC_MSG_CHECKING([for CPU affinity support])
-       if test x$enable_affinity = xno ; then
-               AC_MSG_RESULT([no (by request)])
-       else
-               LUSTRE_MODULE_TRY_COMPILE(
-                       [
-                               #include <linux/sched.h>
-                       ],[
-                               struct task_struct t;
-                               #ifdef CPU_ARRAY_SIZE
-                               cpumask_t m;
-                               #else
-                               unsigned long m;
-                               #endif
-                               set_cpus_allowed(&t, m);
-                       ],[
-                               AC_DEFINE(CPU_AFFINITY, 1, [kernel has cpu affinity support])
-                               AC_MSG_RESULT([yes])
-                       ],[
-                               AC_MSG_RESULT([no (no kernel support)])
-                       ])
-       fi
-
-       #####################################
-
-       AC_MSG_CHECKING([if quadrics kernel headers are present])
-       if test -d $LINUX/drivers/net/qsnet ; then
-               AC_MSG_RESULT([yes])
-               QSWNAL="qswnal"
-               AC_MSG_CHECKING([for multirail EKC])
-               if test -f $LINUX/include/elan/epcomms.h; then
-                       AC_MSG_RESULT([supported])
-                       QSWCPPFLAGS="-DMULTIRAIL_EKC=1"
-               else
-                       AC_MSG_RESULT([not supported])
-                       if test -d $LINUX/drivers/net/qsnet/include; then
-                               QSWCPPFLAGS="-I$LINUX/drivers/net/qsnet/include"
-                       else
-                               QSWCPPFLAGS="-I$LINUX/include/linux"
-                       fi
-               fi
-       else
-               AC_MSG_RESULT([no])
-               QSWNAL=""
-               QSWCPPFLAGS=""
-       fi
-       AC_SUBST(QSWCPPFLAGS)
-       AC_SUBST(QSWNAL)
-
-       AC_MSG_CHECKING([if gm support was requested])
-       AC_ARG_WITH([gm],
-               AC_HELP_STRING([--with-gm=path],
-                              [build gmnal against path]),
-               [
-                       case $with_gm in 
-                               yes)
-                                       AC_MSG_RESULT([yes])
-                                       GMCPPFLAGS="-I/usr/local/gm/include"
-                                       GMNAL="gmnal"
-                                       ;;
-                               no)
-                                       AC_MSG_RESULT([no])
-                                       GMCPPFLAGS=""
-                                       GMNAL=""
-                                       ;;
-                               *)
-                                       AC_MSG_RESULT([yes])
-                                       GMCPPFLAGS="-I$with_gm/include -I$with_gm/drivers -I$with_gm/drivers/linux/gm"
-                                       GMNAL="gmnal"
-                                       ;;
-                       esac
-               ],[
-                       AC_MSG_RESULT([no])
-                       GMCPPFLAGS=""
-                       GMNAL=""
-               ])
-       AC_SUBST(GMCPPFLAGS)
-       AC_SUBST(GMNAL)
-
-       if test $linux25 = 'no' ; then
-       #### OpenIB 
-       AC_MSG_CHECKING([if OpenIB kernel headers are present])
-       OPENIBCPPFLAGS="-I$LINUX/drivers/infiniband/include -DIN_TREE_BUILD"
-       EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS $OPENIBCPPFLAGS"
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <ts_ib_core.h>
-               ],[
-                       struct ib_device_properties props;
-                       return 0;
-               ],[
-                       AC_MSG_RESULT([yes])
-                       OPENIBNAL="openibnal"
-               ],[
-                       AC_MSG_RESULT([no])
-                       OPENIBNAL=""
-                       OPENIBCPPFLAGS=""
-               ])
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-       AC_SUBST(OPENIBCPPFLAGS)
-       AC_SUBST(OPENIBNAL)
-       fi
-
-       #### Infinicon IB
-       AC_MSG_CHECKING([if Infinicon IB kernel headers are present])
-       # for how the only infinicon ib build has headers in /usr/include/iba
-       IIBCPPFLAGS="-I/usr/include -DIN_TREE_BUILD"
-       EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS $IIBCPPFLAGS"
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/iba/ibt.h>
-               ],[
-                       IBT_INTERFACE_UNION interfaces;
-                       FSTATUS             rc;
-
-                       rc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2,
-                                                     &interfaces);
-
-                       return rc == FSUCCESS ? 0 : 1;
-               ],[
-                       AC_MSG_RESULT([yes])
-                       IIBNAL="iibnal"
-               ],[
-                       AC_MSG_RESULT([no])
-                       IIBNAL=""
-                       IIBCPPFLAGS=""
-               ])
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-       AC_SUBST(IIBCPPFLAGS)
-       AC_SUBST(IIBNAL)
-
-       #### Rapid Array
-       AC_MSG_CHECKING([if RapidArray kernel headers are present])
-       # placeholder
-       RACPPFLAGS="-I/tmp"
-       EXTRA_KCFLAGS_save="$EXTRA_KCFLAGS"
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS $RACPPFLAGS"
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/types.h>
-                       #include <rapl.h>
-               ],[
-                       RAP_RETURN          rc;
-                       RAP_PVOID           dev_handle;
-
-                       rc = RapkGetDeviceByIndex(0, NULL, &dev_handle);
-
-                       return rc == RAP_SUCCESS ? 0 : 1;
-               ],[
-                       AC_MSG_RESULT([yes])
-                       RANAL="ranal"
-               ],[
-                       AC_MSG_RESULT([no])
-                       RANAL=""
-                       RACPPFLAGS=""
-               ])
-       EXTRA_KCFLAGS="$EXTRA_KCFLAGS_save"
-       AC_SUBST(RACPPFLAGS)
-       AC_SUBST(RANAL)
-
-       # ---------- Red Hat 2.4.18 has iobuf->dovary --------------
-       # But other kernels don't
-
-       AC_MSG_CHECKING([if struct kiobuf has a dovary field])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/iobuf.h>
-               ],[
-                       struct kiobuf iobuf;
-                       iobuf.dovary = 1;
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_KIOBUF_DOVARY, 1, [struct kiobuf has a dovary field])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])      
-
-       # ----------- 2.6.4 no longer has page->list ---------------
-       AC_MSG_CHECKING([if struct page has a list field])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/mm.h>
-               ],[
-                       struct page page;
-                       &page.list;
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_PAGE_LIST, 1, [struct page has a list field])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # ---------- Red Hat 2.4.20 backports some 2.5 bits --------
-       # This needs to run after we've defined the KCPPFLAGS
-
-       AC_MSG_CHECKING([if task_struct has a sighand field])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/sched.h>
-               ],[
-                       struct task_struct p;
-                       p.sighand = NULL;
-               ],[
-                       AC_DEFINE(CONFIG_RH_2_4_20, 1, [this kernel contains Red Hat 2.4.20 patches])
-                       AC_MSG_RESULT([yes])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # ---------- 2.4.20 introduced cond_resched --------------
-
-       AC_MSG_CHECKING([if kernel offers cond_resched])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/sched.h>
-               ],[
-                       cond_resched();
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_COND_RESCHED, 1, [cond_resched found])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # --------- zap_page_range(vma) --------------------------------
-       AC_MSG_CHECKING([if zap_pag_range with vma parameter])
-       ZAP_PAGE_RANGE_VMA="`grep -c 'zap_page_range.*struct vm_area_struct' $LINUX/include/linux/mm.h`"
-       if test "$ZAP_PAGE_RANGE_VMA" != 0 ; then
-               AC_DEFINE(ZAP_PAGE_RANGE_VMA, 1, [zap_page_range with vma parameter])
-               AC_MSG_RESULT([yes])
-       else
-               AC_MSG_RESULT([no])
-       fi
-
-       # ---------- Red Hat 2.4.21 backports some more 2.5 bits --------
-
-       AC_MSG_CHECKING([if kernel defines PDE])
-       HAVE_PDE="`grep -c 'proc_dir_entry..PDE' $LINUX/include/linux/proc_fs.h`"
-       if test "$HAVE_PDE" != 0 ; then
-               AC_DEFINE(HAVE_PDE, 1, [the kernel defines PDE])
-               AC_MSG_RESULT([yes])
-       else
-               AC_MSG_RESULT([no])
-       fi
-
-       AC_MSG_CHECKING([if kernel passes struct file to direct_IO])
-       HAVE_DIO_FILE="`grep -c 'direct_IO.*struct file' $LINUX/include/linux/fs.h`"
-       if test "$HAVE_DIO_FILE" != 0 ; then
-               AC_DEFINE(HAVE_DIO_FILE, 1, [the kernel passes struct file to direct_IO])
-               AC_MSG_RESULT(yes)
-       else
-               AC_MSG_RESULT(no)
-       fi
-
-       AC_MSG_CHECKING([if kernel defines cpu_online()])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/sched.h>
-               ],[
-                       cpu_online(0);
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_CPU_ONLINE, 1, [cpu_online found])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-       AC_MSG_CHECKING([if kernel defines cpumask_t])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/sched.h>
-               ],[
-                       return sizeof (cpumask_t);
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_CPUMASK_T, 1, [cpumask_t found])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # ---------- RHEL kernels define page_count in mm_inline.h
-       AC_MSG_CHECKING([if kernel has mm_inline.h header])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/mm_inline.h>
-               ],[
-                       #ifndef page_count
-                       #error mm_inline.h does not define page_count
-                       #endif
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_MM_INLINE, 1, [mm_inline found])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # ---------- inode->i_alloc_sem --------------
-       AC_MSG_CHECKING([if struct inode has i_alloc_sem])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/fs.h>
-                       #include <linux/version.h>
-               ],[
-                       #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,24))
-                       #error "down_read_trylock broken before 2.4.24"
-                       #endif
-                       struct inode i;
-                       return (char *)&i.i_alloc_sem - (char *)&i;
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_DEFINE(HAVE_I_ALLOC_SEM, 1, [struct inode has i_alloc_sem])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-
-       # ---------- modules? ------------------------
-       AC_MSG_CHECKING([for module support])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/config.h>
-               ],[
-                       #ifndef CONFIG_MODULES
-                       #error CONFIG_MODULES not #defined
-                       #endif
-               ],[
-                       AC_MSG_RESULT([yes])
-               ],[
-                       AC_MSG_RESULT([no])
-                       AC_MSG_ERROR([module support is required to build Lustre kernel modules.])
-               ])
-
-       # ---------- modversions? --------------------
-       AC_MSG_CHECKING([for MODVERSIONS])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/config.h>
-               ],[
-                       #ifndef CONFIG_MODVERSIONS
-                       #error CONFIG_MODVERSIONS not #defined
-                       #endif
-               ],[
-                       AC_MSG_RESULT([yes])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # ------------ preempt -----------------------
-       AC_MSG_CHECKING([if preempt is enabled])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/config.h>
-               ],[
-                       #ifndef CONFIG_PREEMPT
-                       #error CONFIG_PREEMPT is not #defined
-                       #endif
-               ],[
-                       AC_MSG_RESULT([yes])
-                       AC_MSG_ERROR([Lustre does not support kernels with preempt enabled.])
-               ],[
-                       AC_MSG_RESULT([no])
-               ])
-
-       # ------------ kallsyms (so software watchdogs produce useful stacks)
-       AC_MSG_CHECKING([if kallsyms is enabled])
-       LUSTRE_MODULE_TRY_COMPILE(
-               [
-                       #include <linux/config.h>
-               ],[
-                       #ifndef CONFIG_KALLSYMS
-                       #error CONFIG_KALLSYMS is not #defined
-                       #endif
-               ],[
-                       AC_MSG_RESULT([yes])
-               ],[
-                       AC_MSG_RESULT([no])
-                       if test "x$ARCH_UM" = "x" ; then
-                               AC_MSG_ERROR([Lustre requires that CONFIG_KALLSYMS is enabled in your kernel.])
-                       fi
-               ])
-
-       # ------------ check for our show_task patch
-       AC_MSG_CHECKING([if kernel exports show_task])
-       have_show_task=0
-       for file in ksyms sched ; do
-               if grep -q "EXPORT_SYMBOL(show_task)" \
-                        "$LINUX/kernel/$file.c" 2>/dev/null ; then
-                       have_show_task=1
-                       break
-               fi
-       done
-       if test x$have_show_task = x1 ; then
-               AC_DEFINE(HAVE_SHOW_TASK, 1, [show_task is exported])
-               AC_MSG_RESULT(yes)
-       else
-               AC_MSG_RESULT(no)
-       fi
-
-       case $BACKINGFS in
-               ext3)
-                       # --- Check that ext3 and ext3 xattr are enabled in the kernel
-                       AC_MSG_CHECKING([that ext3 is enabled in the kernel])
-                       LUSTRE_MODULE_TRY_COMPILE(
-                               [
-                                       #include <linux/config.h>
-                               ],[
-                                       #ifndef CONFIG_EXT3_FS
-                                       #ifndef CONFIG_EXT3_FS_MODULE
-                                       #error CONFIG_EXT3_FS not #defined
-                                       #endif
-                                       #endif
-                               ],[
-                                       AC_MSG_RESULT([yes])
-                               ],[
-                                       AC_MSG_RESULT([no])
-                                       AC_MSG_ERROR([Lustre requires that ext3 is enabled in the kernel (CONFIG_EXT3_FS)])
-                               ])
-       
-                       AC_MSG_CHECKING([that extended attributes for ext3 are enabled in the kernel])
-                       LUSTRE_MODULE_TRY_COMPILE(
-                               [
-                                       #include <linux/config.h>
-                               ],[
-                                       #ifndef CONFIG_EXT3_FS_XATTR
-                                       #error CONFIG_EXT3_FS_XATTR not #defined
-                                       #endif
-                               ],[
-                                       AC_MSG_RESULT([yes])
-                               ],[
-                                       AC_MSG_RESULT([no])
-                                       AC_MSG_WARN([Lustre requires that extended attributes for ext3 are enabled in the kernel (CONFIG_EXT3_FS_XATTR.)])
-                                       AC_MSG_WARN([This build may fail.])
-                               ])
-                       ;;
-               ldiskfs)
-                       AC_MSG_CHECKING([if fshooks are present])
-                       LUSTRE_MODULE_TRY_COMPILE(
-                               [
-                                       #include <linux/fshooks.h>
-                               ],[],[
-                                       AC_MSG_RESULT([yes])
-                                       LDISKFS_SERIES="2.6-suse.series"
-                               ],[
-                                       AC_MSG_RESULT([no])
-                                       LDISKFS_SERIES="2.6-vanilla.series"
-                               ])
-                       AC_SUBST(LDISKFS_SERIES)
-                       # --- check which ldiskfs series we should use
-                       ;;
-       esac # $BACKINGFS
-fi
-
-AM_CONDITIONAL(BUILD_QSWNAL, test x$QSWNAL = "xqswnal")
-AM_CONDITIONAL(BUILD_GMNAL, test x$GMNAL = "xgmnal")
-AM_CONDITIONAL(BUILD_OPENIBNAL, test x$OPENIBNAL = "xopenibnal")
-AM_CONDITIONAL(BUILD_IIBNAL, test x$IIBNAL = "xiibnal")
-AM_CONDITIONAL(BUILD_RANAL, test x$RANAL = "xranal")
-
-# portals/utils/portals.c
-AC_CHECK_HEADERS([netdb.h netinet/tcp.h asm/types.h])
-AC_CHECK_FUNCS([gethostbyname socket connect])
-
-# portals/utils/debug.c
-AC_CHECK_HEADERS([linux/version.h])
-
-# include/liblustre.h
-AC_CHECK_HEADERS([asm/page.h sys/user.h stdint.h])
-
-# liblustre/llite_lib.h
-AC_CHECK_HEADERS([xtio.h file.h])
-
-# liblustre/dir.c
-AC_CHECK_HEADERS([linux/types.h sys/types.h linux/unistd.h unistd.h])
-
-# liblustre/lutil.c
-AC_CHECK_HEADERS([netinet/in.h arpa/inet.h catamount/data.h])
-AC_CHECK_FUNCS([inet_ntoa])
-
-CPPFLAGS="-include \$(top_builddir)/include/config.h $CPPFLAGS"
-EXTRA_KCFLAGS="-include $PWD/include/config.h $EXTRA_KCFLAGS"
-AC_SUBST(EXTRA_KCFLAGS)
-
-echo "CPPFLAGS: $CPPFLAGS"
-echo "LLCPPFLAGS: $LLCPPFLAGS"
-echo "CFLAGS: $CFLAGS"
-echo "EXTRA_KCFLAGS: $EXTRA_KCFLAGS"
-echo "LLCFLAGS: $LLCFLAGS"
-
-ENABLE_INIT_SCRIPTS=0
-if test x$enable_utils = xyes ; then
-        AC_MSG_CHECKING([whether to install init scripts])
-        # our scripts only work on red hat systems
-        if test -f /etc/init.d/functions -a -f /etc/sysconfig/network ; then
-                ENABLE_INIT_SCRIPTS=1
-                AC_MSG_RESULT([yes])
-        else
-                AC_MSG_RESULT([no])
-        fi
-fi
-AM_CONDITIONAL(INIT_SCRIPTS, test x$ENABLE_INIT_SCRIPTS = "x1")
-AC_SUBST(ENABLE_INIT_SCRIPTS)
diff --git a/lustre/portals/autoMakefile.am b/lustre/portals/autoMakefile.am
deleted file mode 100644 (file)
index 485ff04..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-EXTRA_DIST = archdep.m4 build.m4
-
-SUBDIRS = portals libcfs knals unals router tests doc utils include
diff --git a/lustre/portals/autogen.sh b/lustre/portals/autogen.sh
deleted file mode 100755 (executable)
index 9deed73..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-aclocal &&
-automake --add-missing &&
-${AUTOCONF:-autoconf}
diff --git a/lustre/portals/build.m4 b/lustre/portals/build.m4
deleted file mode 100644 (file)
index f158396..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-# ----------  other tests and settings ---------
-
-AC_CHECK_TYPE([spinlock_t],
-       [AC_DEFINE(HAVE_SPINLOCK_T, 1, [spinlock_t is defined])],
-       [],
-       [#include <linux/spinlock.h>])
-
-# ---------  unsigned long long sane? -------
-
-AC_CHECK_SIZEOF(unsigned long long, 0)
-echo "---> size SIZEOF $SIZEOF_unsigned_long_long"
-echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long"
-if test $ac_cv_sizeof_unsigned_long_long != 8 ; then
-        AC_MSG_ERROR([** we assume that sizeof(long long) == 8.  Tell phil@clusterfs.com])
-fi
-
-# directories for binaries
-ac_default_prefix=/usr
-
-# mount.lustre
-rootsbindir='/sbin'
-AC_SUBST(rootsbindir)
-sysconfdir='/etc'
-AC_SUBST(sysconfdir)
-# Directories for documentation and demos.
-docdir='${datadir}/doc/$(PACKAGE)'
-AC_SUBST(docdir)
-demodir='$(docdir)/demo'
-AC_SUBST(demodir)
-pkgexampledir='${pkgdatadir}/examples'
-AC_SUBST(pkgexampledir)
-pymoddir='${pkglibdir}/python/Lustre'
-AC_SUBST(pymoddir)
-
-# ----------  BAD gcc? ------------
-AC_PROG_RANLIB
-AC_PROG_CC
-AC_MSG_CHECKING([for buggy compiler])
-CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"`
-bad_cc() {
-       AC_MSG_RESULT([buggy compiler found!])
-       echo
-       echo "   '$CC_VERSION'"
-       echo "  has been known to generate bad code, "
-       echo "  please get an updated compiler."
-       AC_MSG_ERROR([sorry])
-}
-TMP_VERSION=`echo $CC_VERSION | cut -c 1-16`
-if test "$TMP_VERSION" = "gcc version 2.95"; then
-        bad_cc
-fi
-case "$CC_VERSION" in 
-       # ost_pack_niobuf putting 64bit NTOH temporaries on the stack
-       # without "sub    $0xc,%esp" to protect the stack from being
-       # stomped on by interrupts (bug 606)
-       "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)")
-               bad_cc
-               ;;
-       # mandrake's similar sub 0xc compiler bug
-       # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2
-       "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)")
-               bad_cc
-               ;;
-       # unpatched 'gcc' on rh9.  miscompiles a
-       #        struct = (type) { .member = value, };
-       # asignment in the iibnal where the struct is a mix
-       # of u64 and u32 bit-fields.
-       "gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5)")
-               bad_cc
-               ;;
-       *)
-               AC_MSG_RESULT([no known problems])
-               ;;
-esac
-# end ------  BAD gcc? ------------
-
-# --------  Check for required packages  --------------
-
-# this doesn't seem to work on older autoconf
-# AC_CHECK_LIB(readline, readline,,)
-AC_MSG_CHECKING([for readline support])
-AC_ARG_ENABLE(readline,
-       AC_HELP_STRING([--disable-readline],
-                       [do not use readline library]),
-       [],[enable_readline='yes'])
-AC_MSG_RESULT([$enable_readline]) 
-if test x$enable_readline = xyes ; then
-       LIBREADLINE="-lreadline -lncurses"
-       AC_DEFINE(HAVE_LIBREADLINE, 1, [readline library is available])
-else 
-       LIBREADLINE=""
-fi
-AC_SUBST(LIBREADLINE)
-
-AC_MSG_CHECKING([if efence debugging support is requested])
-AC_ARG_ENABLE(efence,
-       AC_HELP_STRING([--enable-efence],
-                       [use efence library]),
-       [],[enable_efence='no'])
-AC_MSG_RESULT([$enable_efence])
-if test "$enable_efence" = "yes" ; then
-       LIBEFENCE="-lefence"
-       AC_DEFINE(HAVE_LIBEFENCE, 1, [libefence support is requested])
-else 
-       LIBEFENCE=""
-fi
-AC_SUBST(LIBEFENCE)
-
-# -------- enable acceptor libwrap (TCP wrappers) support? -------
-AC_MSG_CHECKING([if libwrap support is requested])
-AC_ARG_ENABLE([libwrap],
-       AC_HELP_STRING([--enable-libwrap], [use TCP wrappers]),
-       [case "${enableval}" in
-               yes) enable_libwrap=yes ;;
-               no) enable_libwrap=no ;;
-               *) AC_MSG_ERROR(bad value ${enableval} for --enable-libwrap) ;;
-       esac],[enable_libwrap=no])
-AC_MSG_RESULT([$enable_libwrap])
-if test x$enable_libwrap = xyes ; then
-       LIBWRAP="-lwrap"
-       AC_DEFINE(HAVE_LIBWRAP, 1, [libwrap support is requested])
-else
-       LIBWRAP=""
-fi
-AC_SUBST(LIBWRAP)
-
-AC_SUBST(LIBS)
diff --git a/lustre/portals/doc/.cvsignore b/lustre/portals/doc/.cvsignore
deleted file mode 100644 (file)
index 827dca4..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-Makefile
-Makefile.in
-*.eps
-*.pdf
diff --git a/lustre/portals/doc/Data-structures b/lustre/portals/doc/Data-structures
deleted file mode 100644 (file)
index b5532b1..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-In this document I will try to draw the data structures and how they
-interrelate in the Portals 3 reference implementation.  It is probably
-best shown with a drawing, so there may be an additional xfig or
-Postscript figure.
-
-
-MEMORY POOLS:
-------------
-
-First, a digression on memory allocation in the library.  As mentioned
-in the NAL Writer's Guide, the library does not link against any
-standard C libraries and as such is unable to dynamically allocate
-memory on its own.  It requires that the NAL implement a method
-for allocation that is appropriate for the protection domain in
-which the library lives.  This is only called when a network
-interface is initialized to allocate the Portals object pools.
-
-These pools are preallocate blocks of objects that the library
-can rapidly make active and manage with a minimum of overhead.
-It is also cuts down on overhead for setting up structures
-since the NAL->malloc() callback does not need to be called
-for each object.
-
-The objects are maintained on a per-object type singly linked free
-list and contain a pointer to the next free object.  This pointer
-is NULL if the object is not on the free list and is non-zero
-if it is on the list.  The special sentinal value of 0xDEADBEEF
-is used to mark the end of the free list since NULL could
-indicate that the last object in the list is not free.
-
-When one of the lib_*_alloc() functions is called, the library
-returns the head of the free list and advances the head pointer
-to the next item on the list.  The special case of 0xDEADBEEF is
-checked and a NULL pointer is returned if there are no more
-objects of this type available.   The lib_*_free() functions
-are even simpler -- check to ensure that the object is not already
-free, set its next pointer to the current head and then set
-the head to be this newly freed object.
-
-Since C does not have templates, I did the next best thing and wrote
-the memory pool allocation code as a macro that expands based on the
-type of the argument.  The mk_alloc(T) macro expands to
-write the _lib_T_alloc() and lib_T_free() functions.
-It requires that the object have a pointer of the type T named
-"next_free".  There are also functions that map _lib_T_alloc()
-to lib_T_alloc() so that the library can add some extra
-functionality to the T constructor.
-
-
-
-LINKED LISTS:
-------------
-
-Many of the active Portals objects are stored in doubly linked lists
-when they are active.  These are always implemented with the pointer
-to the next object and a pointer to the next pointer of the
-previous object.  This avoids the "dummy head" object or
-special cases for inserting at the beginning or end of the list.
-The pointer manipulations are a little hairy at times, but
-I hope that they are understandable.
-
-The actual linked list code is implemented as macros in <lib-p30.h>,
-although the object has to know about 
-
-
diff --git a/lustre/portals/doc/Makefile.am b/lustre/portals/doc/Makefile.am
deleted file mode 100644 (file)
index b7f6252..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-LYX2PDF = lyx --export pdf
-LYX2TXT = lyx --export text
-LYX2HTML = lyx --export html
-SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps
-
-if DOC
-  DOCS = portals3.pdf
-else
-  DOCS =
-endif
-
-IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps
-LYXFILES= portals3.lyx
-
-MAINTAINERCLEANFILES =  $(IMAGES) $(DOCS) $(GENERATED)
-GENERATED = 
-EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES) 
-
-all: $(DOCS)
-
-# update date and version in document
-date := $(shell date +%x)
-tag := $(shell echo '$$Name:  $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/')
-addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g'
-
-# Regenerate when the $(VERSION) or $Name:  $ changes.
-.INTERMEDIATE: $(GENERATED)
-$(GENERATED) : %.lyx: %.lin Makefile
-       $(addversion) $< > $@
-
-.lyx.pdf:
-       @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n"
-
-.lyx.txt:
-       @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n"
-.lyx.html:
-       @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n"
-.fig.eps:
-       -fig2dev -L eps $< > $@
-
-portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx
-
-syncweb: portals3.pdf
-#      cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf
-#      ( cd /usr/src/www ; make lustre ; make synclustre )
-
diff --git a/lustre/portals/doc/Message-life-cycle b/lustre/portals/doc/Message-life-cycle
deleted file mode 100644 (file)
index e8cc7e2..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-This documents the life cycle of message as it arrives and is handled by
-a basic async, packetized NAL.  There are four types of messages that have
-slightly different life cycles, so they are addressed independently.
-
-
-Put request
------------
-
-1.  NAL notices that there is a incoming message header on the network
-and reads an ptl_hdr_t in from the wire.
-
-2.  It may store additional NAL specific data that provides context
-for this event in a void* that it will interpret in some fashion
-later.
-
-3.  The NAL calls lib_parse() with a pointer to the header and its
-private data structure.
-
-4.  The library decodes the header and may build a message state
-object that describes the event to be written and the ACK to be
-sent, if any.  It then calls nal->recv() with the private data
-that the NAL passed in, a pointer to the message state object
-and a translated user address.
-
-       The NAL will have been given a chance to pretranslate
-       all user addresses when the buffers are created.  This
-       process is described in the NAL-HOWTO.
-
-5.  The NAL should restore what ever context it required from the
-private data pointer, begin receiving the bytes and possibly store
-some extra state of its own.  It should return at this point.
-
-
-
-Get request
------------
-
-1.  As with a Put, the NAL notices the incoming message header and
-passes it to lib_parse().
-
-2.  The library decodes the header and calls nal->recv() with a
-zero byte length, offset and destination to instruct it to clean
-up the wire after reading the header.  The private data will
-be passed in as well, allowing the NAL to retrieve any state
-or context that it requires.
-
-3.  The library may build a message state object to possibly
-write an event log or invalidate a memory region.
-
-4.  The library will build a ptl_msg_t header that specifies the
-Portals protocol information for delivery at the remote end.
-
-5.  The library calls nal->send() with the pre-built header,
-the optional message state object, the four part address
-component, a translated user pointer + offset, and some
-other things.
-
-6.  The NAL is to put the header on the wire or copy it at
-this point (since it off the stack).  It should store some
-amount of state about its current position in the message and
-the destination address.
-
-7.  And then return to the library.
-
-
-Reply request
--------------
-
-1.  Starting at "The library decodes the header..."
-
-2.  The library decodes the header and calls nal->recv()
-to bring in the rest of the message.  Flow continues in
-exactly the same fashion as with all other receives.
-
-
-Ack request
------------
-
-1.  The library decodes the header, builds the appropriate data
-structures for the event in a message state object and calls nal->recv()
-with a zero byte length, etc.
-
-
-Packet arrival
---------------
-
-1.  The NAL should notice the arrival of a packet, retrieve whatever
-state it needs from the message ID or other NAL specific header data
-and place the data bytes directly into the user address that were
-given to nal->recv().
-
-       How this happens is outside the scope of the Portals library
-       and soley determined by the NAL...
-
-2.  If this is the last packet in a message, the NAL should retrieve
-the lib_msg_t *cookie that it was given in the call to nal->recv()
-and pass it to lib_finalize().  lib_finalize() may call nal->send()
-to send an ACK, nal->write() to record an entry in the event log,
-nal->invalidate() to unregister a region of memory or do nothing at all.
-
-3.  It should then clean up any remaining NAL specific state about
-the message and go back into the main loop.
-
-
-Outgoing packets
-----------------
-
-1.  When the NAL has pending output, it should put the packets on
-the wire wrapped with whatever implementation specified wrappers.
-
-2.  Once it has output all the packets of a message it should
-call lib_finalize() with the message state object that was
-handed to nal->send().  This will allows the library to clean
-up its state regarding the message and write any pending event
-entries.
-
-
-
diff --git a/lustre/portals/doc/NAL-HOWTO b/lustre/portals/doc/NAL-HOWTO
deleted file mode 100644 (file)
index ea38aed..0000000
+++ /dev/null
@@ -1,293 +0,0 @@
-This document is a first attempt at describing how to write a NAL
-for the Portals 3 library.  It also defines the library architecture
-and the abstraction of protection domains.
-
-
-First, an overview of the architecture:
-
-    Application
-
-----|----+--------
-         |
-   API  === NAL        (User space)
-         |   
----------+---|-----
-         |    
-   LIB  === NAL        (Library space)
-         |
----------+---|-----
-          
-    Physical wire      (NIC space)
-          
-
-Application
-    API
-API-side NAL
-------------
-LIB-side NAL
-    LIB
-LIB-side NAL
-   wire
-
-Communication is through the indicated paths via well defined
-interfaces.  The API and LIB portions are written to be portable
-across platforms and do not depend on the network interface.
-
-Communcation between the application and the API code is
-defined in the Portals 3 API specification.  This is the
-user-visible portion of the interface and should be the most
-stable.
-
-
-
-API-side NAL:
-------------
-
-The user space NAL needs to implement only a few functions
-that are stored in a nal_t data structure and called by the
-API-side library:
-
-       int forward( nal_t *nal,
-               int     index,
-               void    *args,
-               size_t  arg_len,
-               void    *ret,
-               size_t  ret_len
-       );
-
-Most of the data structures in the portals library are held in
-the LIB section of the code, so it is necessary to forward API
-calls across the protection domain to the library.  This is
-handled by the NAL's forward method.  Once the argument and return
-blocks are on the remote side the NAL should call lib_dispatch()
-to invoke the appropriate API function.
-
-       int validate( nal_t *nal,
-               void    *base,
-               size_t  extent,
-               void    **trans_base,
-               void    **trans_data
-       );
-
-The validate method provides a means for the NAL to prevalidate
-and possibly pretranslate user addresses into a form suitable
-for fast use by the network card or kernel module.  The trans_base
-pointer will be used by the library everytime it needs to
-refer to the block of memory.  The trans_data result is a
-cookie that will be handed to the NAL along with the trans_base.
-
-The library never performs calculations on the trans_base value;
-it only computes offsets that are then handed to the NAL.
-
-
-       int shutdown( nal_t *nal, int interface );
-
-Brings down the network interface.  The remote NAL side should
-call lib_fini() to bring down the library side of the network.
-
-       void yield( nal_t *nal );
-
-This allows the user application to gracefully give up the processor
-while busy waiting.  Performance critical applications may not
-want to take the time to call this function, so it should be an
-option to the PtlEQWait call.  Right now it is not implemented as such.
-
-Lastly, the NAL must implement a function named PTL_IFACE_*, where
-* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR.
-This initialization function is to set up communication with the
-library-side NAL, which should call lib_init() to bring up the
-network interface.
-
-
-
-LIB-side NAL:
-------------
-
-On the library-side, the NAL has much more responsibility.  It
-is responsible for calling lib_dispatch() on behalf of the user,
-it is also responsible for bringing packets off the wire and
-pushing bits out.  As on the user side, the methods are stored
-in a nal_cb_t structure that is defined on a per network
-interface basis.
-
-The calls to lib_dispatch() need to be examined.  The prototype:
-
-       void    lib_dispatch(
-                       nal_cb_t                *nal,
-                       void                    *private,
-                       int                     index,
-                       void                    *arg_block,
-                       void                    *ret_block
-       );
-
-has two complications.  The private field is a NAL-specific
-value that will be passed to any callbacks produced as a result
-of this API call.  Kernel module implementations may use this
-for task structures, or perhaps network card data.  It is ignored
-by the library.
-
-Secondly, the arg_block and ret_block must be in the same protection
-domain as the library.  The NAL's two halves must communicate the
-sizes and perform the copies.  After the call, the buffer pointed
-to by ret_block will be filled in and should be copied back to
-the user space.  How this is to be done is NAL specific.
-
-       int lib_parse(
-                       nal_cb_t                *nal,
-                       ptl_hdr_t               *hdr,
-                       void                    *private
-       );
-
-This is the only other entry point into the library from the NAL.
-When the NAL detects an incoming message on the wire it should read
-sizeof(ptl_hdr_t) bytes and pass a pointer to the header to
-lib_parse().  It may set private to be anything that it needs to
-tie the incoming message to callbacks that are made as a result
-of this event.
-
-The method calls are:
-
-       int     (*send)(
-                       nal_cb_t                *nal,
-                       void                    *private,
-                       lib_msg_t               *cookie,
-                       ptl_hdr_t               *hdr,
-                       int                     nid,
-                       int                     pid,
-                       int                     gid,
-                       int                     rid,
-                       user_ptr                trans_base,
-                       user_ptr                trans_data,
-                       size_t                  offset,
-                       size_t                  len
-       );
-
-This is a tricky function -- it must support async output
-of messages as well as properly syncronized event log writing.
-The private field is the same that was passed into lib_dispatch()
-or lib_parse() and may be used to tie this call to the event
-that initiated the entry to the library.
-
-The cookie is a pointer to a library private value that must
-be passed to lib_finalize() once the message has been completely
-sent.  It should not be examined by the NAL for any meaning.
-
-The four ID fields are passed in, although some implementations
-may not use all of them.
-
-The single base pointer has been replaced with the translated
-address that the API NAL generated in the api_nal->validate()
-call.  The trans_data is unchanged and the offset is in bytes.
-
-
-       int     (*recv)(
-                       nal_cb_t                *nal,
-                       void                    *private,
-                       lib_msg_t               *cookie,
-                       user_ptr                trans_base,
-                       user_ptr                trans_data,
-                       size_t                  offset,
-                       size_t                  mlen,
-                       size_t                  rlen
-       );
-
-This callback will only be called in response to lib_parse().
-The cookie, trans_addr and trans_data  are as discussed in send().
-The NAL should read mlen bytes from the wire, deposit them into
-trans_base + offset and then discard (rlen - mlen) bytes.
-Once the entire message has been received the NAL should call
-lib_finalize() with the lib_msg_t *cookie.
-
-The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0
-is used to indicate that the NAL should clean up the wire.  This could
-be implemented as a blocking call, although having it return as quickly
-as possible is desirable.
-
-       int     (*write)(
-                       nal_cb_t                *nal,
-                       void                    *private,
-                       user_ptr                trans_addr,
-                       user_ptr                trans_data,
-                       size_t                  offset,
-
-                       void                    *src_addr,
-                       size_t                  len
-       );
-
-This is essentially a cross-protection domain memcpy().  The user address
-has been pretranslated by the api_nal->translate() call.
-
-       void    *(*malloc)(
-                       nal_cb_t                *nal,
-                       size_t                  len
-       );
-
-       void    (*free)(
-                       nal_cb_t                *nal,
-                       void                    *buf
-       );
-
-Since the NAL may be in a non-standard hosted environment it can
-not call malloc().  This allows the library side NAL to implement
-the system specific malloc().  In the current reference implementation
-the libary only calls nal->malloc() when the network interface is
-initialized and then calls free when it is brought down.  The library
-maintains its own pool of objects for allocation so only one call to
-malloc is made per object type.
-
-       void    (*invalidate)(
-                       nal_cb_t                *nal,
-                       user_ptr                trans_base,
-                       user_ptr                trans_data,
-                       size_t                  extent
-       );
-
-User addresses are validated/translated at the user-level API NAL
-method, which is likely to push them to this level.  Meanwhile,
-the library NAL will be notified when the library no longer
-needs the buffer.  Overlapped buffers are not detected by the
-library, so the NAL should ref count each page involved.
-
-Unfortunately we have a few bugs when the invalidate method is
-called.  It is still in progress...
-
-       void    (*printf)(
-                       nal_cb_t                *nal,
-                       const char              *fmt,
-                       ...
-       );
-
-As with malloc(), the library does not have any way to do printf
-or printk.  It is not necessary for the NAL to implement the this
-call, although it will make debugging difficult.
-
-       void    (*cli)(
-                       nal_cb_t                *nal,
-                       unsigned long           *flags
-       );
-
-       void    (*sti)(
-                       nal_cb_t                *nal,
-                       unsigned long           *flags
-       );
-
-These are used by the library to mark critical sections.
-
-       int     (*gidrid2nidpid)(
-                       nal_cb_t                *nal,
-                       ptl_id_t                gid,
-                       ptl_id_t                rid,
-                       ptl_id_t                *nid,
-                       ptl_id_t                *pid
-       );
-
-
-       int     (*nidpid2gidrid)(
-                       nal_cb_t                *nal,
-                       ptl_id_t                nid,
-                       ptl_id_t                pid,
-                       ptl_id_t                *gid,
-                       ptl_id_t                *rid
-       );
-
-Rolf added these.  I haven't looked at how they have to work yet.
diff --git a/lustre/portals/doc/file.fig b/lustre/portals/doc/file.fig
deleted file mode 100644 (file)
index 914c294..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 1200 750 1650 1050
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-        1650 1050 1650 750 1200 750 1200 1050 1650 1050
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001
--6
-6 1200 2325 1650 2625
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-        1650 2625 1650 2325 1200 2325 1200 2625 1650 2625
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001
--6
-6 1200 1800 1650 2100
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-        1650 2100 1650 1800 1200 1800 1200 2100 1650 2100
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001
--6
-6 1200 1275 1650 1575
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-        1650 1575 1650 1275 1200 1275 1200 1575 1650 1575
-4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001
--6
-6 450 750 900 1200
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        450 825 450 1050
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 1050 900 825
--6
-6 450 2325 900 2775
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        450 2400 450 2625
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 2625 900 2400
--6
-6 450 1800 900 2250
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        450 1875 450 2100
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 2100 900 1875
--6
-6 450 1275 900 1725
-5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575
-1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        450 1350 450 1575
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 1575 900 1350
--6
-6 2250 750 3450 2625
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-        2550 1200 3150 1200
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-        2550 1500 3150 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-        2550 1800 3150 1800
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-        2550 2100 3150 2100
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2550 975 3150 975 3150 2625 2550 2625 2550 975
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2
-        2550 2400 3150 2400
-4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001
--6
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-       0 0 1.00 60.00 120.00
-       0 0 1.00 60.00 120.00
-        1650 2400 2550 1350
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-       0 0 1.00 60.00 120.00
-       0 0 1.00 60.00 120.00
-        1650 1875 2550 1050
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-       0 0 1.00 60.00 120.00
-       0 0 1.00 60.00 120.00
-        1650 1425 2550 1950
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-       0 0 1.00 60.00 120.00
-       0 0 1.00 60.00 120.00
-        1650 900 2550 1650
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 900 1200 900
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 1425 1200 1425
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 1950 1200 1950
-2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2
-        900 2475 1200 2475
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-       0 0 1.00 60.00 120.00
-       0 0 1.00 60.00 120.00
-        1650 2025 2550 2250
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2
-       0 0 1.00 60.00 120.00
-       0 0 1.00 60.00 120.00
-        1650 2550 2550 2475
-2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5
-        1875 2850 1875 600 225 600 225 2850 1875 2850
-4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001
diff --git a/lustre/portals/doc/flow_new.fig b/lustre/portals/doc/flow_new.fig
deleted file mode 100644 (file)
index d828dea..0000000
+++ /dev/null
@@ -1,213 +0,0 @@
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 525 2175 1575 2925
-6 675 2287 1425 2812
-4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001
-4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001
-4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-        1575 2550 1050 2175 525 2550 1050 2925 1575 2550
--6
-6 3450 1275 4350 1725
-6 3600 1312 4200 1687
-4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001
-4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        3450 1275 4350 1275 4350 1725 3450 1725 3450 1275
--6
-6 4650 1275 5550 1725
-6 4725 1312 5475 1687
-4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001
-4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        4650 1275 5550 1275 5550 1725 4650 1725 4650 1275
--6
-6 1350 525 2250 975
-6 1350 562 2250 937
-4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001
-4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1350 525 2250 525 2250 975 1350 975 1350 525
--6
-6 525 1125 1575 1875
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-        1575 1500 1050 1125 525 1500 1050 1875 1575 1500
-4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001
--6
-6 2340 1237 2940 1687
-6 2340 1237 2940 1687
-4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001
-4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001
-4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001
--6
--6
-6 525 3225 1575 3975
-6 675 3375 1425 3750
-4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001
-4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001
--6
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
-        525 3600 1050 3225 1575 3600 1050 3975 525 3600
--6
-6 3300 3375 4350 3825
-6 3300 3412 4350 3787
-4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001
-4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        3300 3375 4350 3375 4350 3825 3300 3825 3300 3375
--6
-6 1950 3225 3000 3975
-6 2250 3450 2700 3750
-4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001
-4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-        3000 3600 2475 3225 1950 3600 2475 3975 3000 3600
--6
-6 3150 4500 4200 4950
-6 3150 4537 4200 4912
-4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001
-4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        3150 4500 4200 4500 4200 4950 3150 4950 3150 4500
--6
-6 600 4500 1500 4950
-6 675 4537 1425 4912
-4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001
-4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        600 4500 1500 4500 1500 4950 600 4950 600 4500
--6
-6 4650 4350 5700 5100
-6 4950 4537 5400 4912
-6 4950 4537 5400 4912
-4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001
-4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001
--6
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-        5700 4725 5175 4350 4650 4725 5175 5100 5700 4725
--6
-6 6000 4500 6900 4950
-6 6225 4575 6675 4875
-4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001
-4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001
--6
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        6000 4500 6900 4500 6900 4950 6000 4950 6000 4500
--6
-6 1800 4350 2850 5100
-6 2100 4575 2550 4875
-4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001
-4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001
--6
-2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5
-        2850 4725 2325 4350 1800 4725 2325 5100 2850 4725
--6
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1050 1875 1050 2175
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1575 1500 2100 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1050 450 1050 1125
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1350 750 1050 750
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1050 2925 1050 3225
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3150 1500 3450 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        4350 1500 4650 1500
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
-        2100 1500 2625 1125 3150 1500 2625 1875 2100 1500
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1575 3600 1950 3600
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1050 3975 1050 4500
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3000 3600 3300 3600
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1500 4725 1800 4725
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        5700 4725 6000 4725
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        2850 4725 3150 4725
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        4200 4725 4650 4725
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        6900 4725 7950 4725
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
-       0 0 1.00 60.00 120.00
-        1575 2550 1650 2550 1800 2550 1800 2400 1800 1500
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
-       0 0 1.00 60.00 120.00
-        2250 750 2475 750 2625 750 2625 900 2625 1125
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5
-       0 0 1.00 60.00 120.00
-        7500 4725 7500 1650 7500 1500 7350 1500 5550 1500
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
-       0 0 1.00 60.00 120.00
-        2475 3225 2475 2400 2475 2250 2325 2250 1800 2250
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5
-       0 0 1.00 60.00 120.00
-        3825 3375 3825 2175 3825 2025 3675 2025 1800 2025
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
-       0 0 1.00 60.00 120.00
-        2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125
-        4425 4275 4425 4725
-        0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8
-       0 0 1.00 60.00 120.00
-        5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125
-        7275 4275 7275 4725
-        0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000
-4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001
-4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001
-4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001
-4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001
-4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001
-4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001
-4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001
-4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001
-4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001
-4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001
-4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001
-4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001
-4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001
diff --git a/lustre/portals/doc/get.fig b/lustre/portals/doc/get.fig
deleted file mode 100644 (file)
index 28db949..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 2775 900 3525 1200
-4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001
-4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001
--6
-6 1350 1725 2175 2025
-4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001
-4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001
--6
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        900 525 2700 750
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        2700 825 2700 1275
-2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2
-       0 0 1.00 60.00 120.00
-        2700 1350 900 1950
-2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
-        2400 300 3600 300 3600 2250 2400 2250 2400 300
-2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5
-        0 300 1200 300 1200 2250 0 2250 0 300
-4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001
-4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
-4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
diff --git a/lustre/portals/doc/ieee.bst b/lustre/portals/doc/ieee.bst
deleted file mode 100644 (file)
index 4df7c50..0000000
+++ /dev/null
@@ -1,1112 +0,0 @@
-% ---------------------------------------------------------------
-%
-% by Paolo.Ienne@di.epfl.ch
-%
-% ---------------------------------------------------------------
-%
-% no guarantee is given that the format corresponds perfectly to 
-% IEEE 8.5" x 11" Proceedings, but most features should be ok.
-%
-% ---------------------------------------------------------------
-%
-% `ieee' from BibTeX standard bibliography style `abbrv'
-% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09.
-% Copyright (C) 1985, all rights reserved.
-% Copying of this file is authorized only if either
-% (1) you make absolutely no changes to your copy, including name, or
-% (2) if you do make changes, you name it something other than
-% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst.
-% This restriction helps ensure that all standard styles are identical.
-% The file btxbst.doc has the documentation for this style.
-
-ENTRY
-  { address
-    author
-    booktitle
-    chapter
-    edition
-    editor
-    howpublished
-    institution
-    journal
-    key
-    month
-    note
-    number
-    organization
-    pages
-    publisher
-    school
-    series
-    title
-    type
-    volume
-    year
-  }
-  {}
-  { label }
-
-INTEGERS { output.state before.all mid.sentence after.sentence after.block }
-
-FUNCTION {init.state.consts}
-{ #0 'before.all :=
-  #1 'mid.sentence :=
-  #2 'after.sentence :=
-  #3 'after.block :=
-}
-
-STRINGS { s t }
-
-FUNCTION {output.nonnull}
-{ 's :=
-  output.state mid.sentence =
-    { ", " * write$ }
-    { output.state after.block =
- { add.period$ write$
-   newline$
-   "\newblock " write$
- }
- { output.state before.all =
-     'write$
-     { add.period$ " " * write$ }
-   if$
- }
-      if$
-      mid.sentence 'output.state :=
-    }
-  if$
-  s
-}
-
-FUNCTION {output}
-{ duplicate$ empty$
-    'pop$
-    'output.nonnull
-  if$
-}
-
-FUNCTION {output.check}
-{ 't :=
-  duplicate$ empty$
-    { pop$ "empty " t * " in " * cite$ * warning$ }
-    'output.nonnull
-  if$
-}
-
-FUNCTION {output.bibitem}
-{ newline$
-  "\bibitem{" write$
-  cite$ write$
-  "}" write$
-  newline$
-  ""
-  before.all 'output.state :=
-}
-
-FUNCTION {fin.entry}
-{ add.period$
-  write$
-  newline$
-}
-
-FUNCTION {new.block}
-{ output.state before.all =
-    'skip$
-    { after.block 'output.state := }
-  if$
-}
-
-FUNCTION {new.sentence}
-{ output.state after.block =
-    'skip$
-    { output.state before.all =
- 'skip$
- { after.sentence 'output.state := }
-      if$
-    }
-  if$
-}
-
-FUNCTION {not}
-{   { #0 }
-    { #1 }
-  if$
-}
-
-FUNCTION {and}
-{   'skip$
-    { pop$ #0 }
-  if$
-}
-
-FUNCTION {or}
-{   { pop$ #1 }
-    'skip$
-  if$
-}
-
-FUNCTION {new.block.checka}
-{ empty$
-    'skip$
-    'new.block
-  if$
-}
-
-FUNCTION {new.block.checkb}
-{ empty$
-  swap$ empty$
-  and
-    'skip$
-    'new.block
-  if$
-}
-
-FUNCTION {new.sentence.checka}
-{ empty$
-    'skip$
-    'new.sentence
-  if$
-}
-
-FUNCTION {new.sentence.checkb}
-{ empty$
-  swap$ empty$
-  and
-    'skip$
-    'new.sentence
-  if$
-}
-
-FUNCTION {field.or.null}
-{ duplicate$ empty$
-    { pop$ "" }
-    'skip$
-  if$
-}
-
-FUNCTION {emphasize}
-{ duplicate$ empty$
-    { pop$ "" }
-    { "{\em " swap$ * "}" * }
-  if$
-}
-
-INTEGERS { nameptr namesleft numnames }
-
-FUNCTION {format.names}
-{ 's :=
-  #1 'nameptr :=
-  s num.names$ 'numnames :=
-  numnames 'namesleft :=
-    { namesleft #0 > }
-    { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't :=
-      nameptr #1 >
- { namesleft #1 >
-     { ", " * t * }
-     { numnames #2 >
-  { "," * }
-  'skip$
-       if$
-       t "others" =
-  { " et~al." * }
-  { " and " * t * }
-       if$
-     }
-   if$
- }
- 't
-      if$
-      nameptr #1 + 'nameptr :=
-      namesleft #1 - 'namesleft :=
-    }
-  while$
-}
-
-FUNCTION {format.authors}
-{ author empty$
-    { "" }
-    { author format.names }
-  if$
-}
-
-FUNCTION {format.editors}
-{ editor empty$
-    { "" }
-    { editor format.names
-      editor num.names$ #1 >
- { ", editors" * }
- { ", editor" * }
-      if$
-    }
-  if$
-}
-
-FUNCTION {format.title}
-{ title empty$
-    { "" }
-    { title "t" change.case$ }
-  if$
-}
-
-FUNCTION {n.dashify}
-{ 't :=
-  ""
-    { t empty$ not }
-    { t #1 #1 substring$ "-" =
- { t #1 #2 substring$ "--" = not
-     { "--" *
-       t #2 global.max$ substring$ 't :=
-     }
-     {   { t #1 #1 substring$ "-" = }
-  { "-" *
-    t #2 global.max$ substring$ 't :=
-  }
-       while$
-     }
-   if$
- }
- { t #1 #1 substring$ *
-   t #2 global.max$ substring$ 't :=
- }
-      if$
-    }
-  while$
-}
-
-FUNCTION {format.date}
-{ year empty$
-    { month empty$
- { "" }
- { "there's a month but no year in " cite$ * warning$
-   month
- }
-      if$
-    }
-    { month empty$
- 'year
- { month " " * year * }
-      if$
-    }
-  if$
-}
-
-FUNCTION {format.btitle}
-{ title emphasize
-}
-
-FUNCTION {tie.or.space.connect}
-{ duplicate$ text.length$ #3 <
-    { "~" }
-    { " " }
-  if$
-  swap$ * *
-}
-
-FUNCTION {either.or.check}
-{ empty$
-    'pop$
-    { "can't use both " swap$ * " fields in " * cite$ * warning$ }
-  if$
-}
-
-FUNCTION {format.bvolume}
-{ volume empty$
-    { "" }
-    { "volume" volume tie.or.space.connect
-      series empty$
- 'skip$
- { " of " * series emphasize * }
-      if$
-      "volume and number" number either.or.check
-    }
-  if$
-}
-
-FUNCTION {format.number.series}
-{ volume empty$
-    { number empty$
- { series field.or.null }
- { output.state mid.sentence =
-     { "number" }
-     { "Number" }
-   if$
-   number tie.or.space.connect
-   series empty$
-     { "there's a number but no series in " cite$ * warning$ }
-     { " in " * series * }
-   if$
- }
-      if$
-    }
-    { "" }
-  if$
-}
-
-FUNCTION {format.edition}
-{ edition empty$
-    { "" }
-    { output.state mid.sentence =
- { edition "l" change.case$ " edition" * }
- { edition "t" change.case$ " edition" * }
-      if$
-    }
-  if$
-}
-
-INTEGERS { multiresult }
-
-FUNCTION {multi.page.check}
-{ 't :=
-  #0 'multiresult :=
-    { multiresult not
-      t empty$ not
-      and
-    }
-    { t #1 #1 substring$
-      duplicate$ "-" =
-      swap$ duplicate$ "," =
-      swap$ "+" =
-      or or
- { #1 'multiresult := }
- { t #2 global.max$ substring$ 't := }
-      if$
-    }
-  while$
-  multiresult
-}
-
-FUNCTION {format.pages}
-{ pages empty$
-    { "" }
-    { pages multi.page.check
- { "pages" pages n.dashify tie.or.space.connect }
- { "page" pages tie.or.space.connect }
-      if$
-    }
-  if$
-}
-
-FUNCTION {format.vol.num.pages}
-{ volume field.or.null
-  number empty$
-    'skip$
-    { "(" number * ")" * *
-      volume empty$
- { "there's a number but no volume in " cite$ * warning$ }
- 'skip$
-      if$
-    }
-  if$
-  pages empty$
-    'skip$
-    { duplicate$ empty$
- { pop$ format.pages }
- { ":" * pages n.dashify * }
-      if$
-    }
-  if$
-}
-
-FUNCTION {format.chapter.pages}
-{ chapter empty$
-    'format.pages
-    { type empty$
- { "chapter" }
- { type "l" change.case$ }
-      if$
-      chapter tie.or.space.connect
-      pages empty$
- 'skip$
- { ", " * format.pages * }
-      if$
-    }
-  if$
-}
-
-FUNCTION {format.in.ed.booktitle}
-{ booktitle empty$
-    { "" }
-    { editor empty$
- { "In " booktitle emphasize * }
- { "In " format.editors * ", " * booktitle emphasize * }
-      if$
-    }
-  if$
-}
-
-FUNCTION {empty.misc.check}
-{ author empty$ title empty$ howpublished empty$
-  month empty$ year empty$ note empty$
-  and and and and and
-  key empty$ not and
-    { "all relevant fields are empty in " cite$ * warning$ }
-    'skip$
-  if$
-}
-
-FUNCTION {format.thesis.type}
-{ type empty$
-    'skip$
-    { pop$
-      type "t" change.case$
-    }
-  if$
-}
-
-FUNCTION {format.tr.number}
-{ type empty$
-    { "Technical Report" }
-    'type
-  if$
-  number empty$
-    { "t" change.case$ }
-    { number tie.or.space.connect }
-  if$
-}
-
-FUNCTION {format.article.crossref}
-{ key empty$
-    { journal empty$
- { "need key or journal for " cite$ * " to crossref " * crossref *
-   warning$
-   ""
- }
- { "In {\em " journal * "\/}" * }
-      if$
-    }
-    { "In " key * }
-  if$
-  " \cite{" * crossref * "}" *
-}
-
-FUNCTION {format.crossref.editor}
-{ editor #1 "{vv~}{ll}" format.name$
-  editor num.names$ duplicate$
-  #2 >
-    { pop$ " et~al." * }
-    { #2 <
- 'skip$
- { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
-     { " et~al." * }
-     { " and " * editor #2 "{vv~}{ll}" format.name$ * }
-   if$
- }
-      if$
-    }
-  if$
-}
-
-FUNCTION {format.book.crossref}
-{ volume empty$
-    { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
-      "In "
-    }
-    { "Volume" volume tie.or.space.connect
-      " of " *
-    }
-  if$
-  editor empty$
-  editor field.or.null author field.or.null =
-  or
-    { key empty$
- { series empty$
-     { "need editor, key, or series for " cite$ * " to crossref " *
-       crossref * warning$
-       "" *
-     }
-     { "{\em " * series * "\/}" * }
-   if$
- }
- { key * }
-      if$
-    }
-    { format.crossref.editor * }
-  if$
-  " \cite{" * crossref * "}" *
-}
-
-FUNCTION {format.incoll.inproc.crossref}
-{ editor empty$
-  editor field.or.null author field.or.null =
-  or
-    { key empty$
- { booktitle empty$
-     { "need editor, key, or booktitle for " cite$ * " to crossref " *
-       crossref * warning$
-       ""
-     }
-     { "In {\em " booktitle * "\/}" * }
-   if$
- }
- { "In " key * }
-      if$
-    }
-    { "In " format.crossref.editor * }
-  if$
-  " \cite{" * crossref * "}" *
-}
-
-FUNCTION {article}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.title "title" output.check
-  new.block
-  crossref missing$
-    { journal emphasize "journal" output.check
-      format.vol.num.pages output
-      format.date "year" output.check
-    }
-    { format.article.crossref output.nonnull
-      format.pages output
-    }
-  if$
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {book}
-{ output.bibitem
-  author empty$
-    { format.editors "author and editor" output.check }
-    { format.authors output.nonnull
-      crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
-      if$
-    }
-  if$
-  new.block
-  format.btitle "title" output.check
-  crossref missing$
-    { format.bvolume output
-      new.block
-      format.number.series output
-      new.sentence
-      publisher "publisher" output.check
-      address output
-    }
-    { new.block
-      format.book.crossref output.nonnull
-    }
-  if$
-  format.edition output
-  format.date "year" output.check
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {booklet}
-{ output.bibitem
-  format.authors output
-  new.block
-  format.title "title" output.check
-  howpublished address new.block.checkb
-  howpublished output
-  address output
-  format.date output
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {inbook}
-{ output.bibitem
-  author empty$
-    { format.editors "author and editor" output.check }
-    { format.authors output.nonnull
-      crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
-      if$
-    }
-  if$
-  new.block
-  format.btitle "title" output.check
-  crossref missing$
-    { format.bvolume output
-      format.chapter.pages "chapter and pages" output.check
-      new.block
-      format.number.series output
-      new.sentence
-      publisher "publisher" output.check
-      address output
-    }
-    { format.chapter.pages "chapter and pages" output.check
-      new.block
-      format.book.crossref output.nonnull
-    }
-  if$
-  format.edition output
-  format.date "year" output.check
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {incollection}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.title "title" output.check
-  new.block
-  crossref missing$
-    { format.in.ed.booktitle "booktitle" output.check
-      format.bvolume output
-      format.number.series output
-      format.chapter.pages output
-      new.sentence
-      publisher "publisher" output.check
-      address output
-      format.edition output
-      format.date "year" output.check
-    }
-    { format.incoll.inproc.crossref output.nonnull
-      format.chapter.pages output
-    }
-  if$
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {inproceedings}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.title "title" output.check
-  new.block
-  crossref missing$
-    { format.in.ed.booktitle "booktitle" output.check
-      format.bvolume output
-      format.number.series output
-      format.pages output
-      address empty$
- { organization publisher new.sentence.checkb
-   organization output
-   publisher output
-   format.date "year" output.check
- }
- { address output.nonnull
-   format.date "year" output.check
-   new.sentence
-   organization output
-   publisher output
- }
-      if$
-    }
-    { format.incoll.inproc.crossref output.nonnull
-      format.pages output
-    }
-  if$
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {conference} { inproceedings }
-
-FUNCTION {manual}
-{ output.bibitem
-  author empty$
-    { organization empty$
- 'skip$
- { organization output.nonnull
-   address output
- }
-      if$
-    }
-    { format.authors output.nonnull }
-  if$
-  new.block
-  format.btitle "title" output.check
-  author empty$
-    { organization empty$
- { address new.block.checka
-   address output
- }
- 'skip$
-      if$
-    }
-    { organization address new.block.checkb
-      organization output
-      address output
-    }
-  if$
-  format.edition output
-  format.date output
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {mastersthesis}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.title "title" output.check
-  new.block
-  "Master's thesis" format.thesis.type output.nonnull
-  school "school" output.check
-  address output
-  format.date "year" output.check
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {misc}
-{ output.bibitem
-  format.authors output
-  title howpublished new.block.checkb
-  format.title output
-  howpublished new.block.checka
-  howpublished output
-  format.date output
-  new.block
-  note output
-  fin.entry
-  empty.misc.check
-}
-
-FUNCTION {phdthesis}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.btitle "title" output.check
-  new.block
-  "PhD thesis" format.thesis.type output.nonnull
-  school "school" output.check
-  address output
-  format.date "year" output.check
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {proceedings}
-{ output.bibitem
-  editor empty$
-    { organization output }
-    { format.editors output.nonnull }
-  if$
-  new.block
-  format.btitle "title" output.check
-  format.bvolume output
-  format.number.series output
-  address empty$
-    { editor empty$
- { publisher new.sentence.checka }
- { organization publisher new.sentence.checkb
-   organization output
- }
-      if$
-      publisher output
-      format.date "year" output.check
-    }
-    { address output.nonnull
-      format.date "year" output.check
-      new.sentence
-      editor empty$
- 'skip$
- { organization output }
-      if$
-      publisher output
-    }
-  if$
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {techreport}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.title "title" output.check
-  new.block
-  format.tr.number output.nonnull
-  institution "institution" output.check
-  address output
-  format.date "year" output.check
-  new.block
-  note output
-  fin.entry
-}
-
-FUNCTION {unpublished}
-{ output.bibitem
-  format.authors "author" output.check
-  new.block
-  format.title "title" output.check
-  new.block
-  note "note" output.check
-  format.date output
-  fin.entry
-}
-
-FUNCTION {default.type} { misc }
-
-MACRO {jan} {"Jan."}
-
-MACRO {feb} {"Feb."}
-
-MACRO {mar} {"Mar."}
-
-MACRO {apr} {"Apr."}
-
-MACRO {may} {"May"}
-
-MACRO {jun} {"June"}
-
-MACRO {jul} {"July"}
-
-MACRO {aug} {"Aug."}
-
-MACRO {sep} {"Sept."}
-
-MACRO {oct} {"Oct."}
-
-MACRO {nov} {"Nov."}
-
-MACRO {dec} {"Dec."}
-
-MACRO {acmcs} {"ACM Comput. Surv."}
-
-MACRO {acta} {"Acta Inf."}
-
-MACRO {cacm} {"Commun. ACM"}
-
-MACRO {ibmjrd} {"IBM J. Res. Dev."}
-
-MACRO {ibmsj} {"IBM Syst.~J."}
-
-MACRO {ieeese} {"IEEE Trans. Softw. Eng."}
-
-MACRO {ieeetc} {"IEEE Trans. Comput."}
-
-MACRO {ieeetcad}
- {"IEEE Trans. Comput.-Aided Design Integrated Circuits"}
-
-MACRO {ipl} {"Inf. Process. Lett."}
-
-MACRO {jacm} {"J.~ACM"}
-
-MACRO {jcss} {"J.~Comput. Syst. Sci."}
-
-MACRO {scp} {"Sci. Comput. Programming"}
-
-MACRO {sicomp} {"SIAM J. Comput."}
-
-MACRO {tocs} {"ACM Trans. Comput. Syst."}
-
-MACRO {tods} {"ACM Trans. Database Syst."}
-
-MACRO {tog} {"ACM Trans. Gr."}
-
-MACRO {toms} {"ACM Trans. Math. Softw."}
-
-MACRO {toois} {"ACM Trans. Office Inf. Syst."}
-
-MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."}
-
-MACRO {tcs} {"Theoretical Comput. Sci."}
-
-READ
-
-FUNCTION {sortify}
-{ purify$
-  "l" change.case$
-}
-
-INTEGERS { len }
-
-FUNCTION {chop.word}
-{ 's :=
-  'len :=
-  s #1 len substring$ =
-    { s len #1 + global.max$ substring$ }
-    's
-  if$
-}
-
-FUNCTION {sort.format.names}
-{ 's :=
-  #1 'nameptr :=
-  ""
-  s num.names$ 'numnames :=
-  numnames 'namesleft :=
-    { namesleft #0 > }
-    { nameptr #1 >
- { "   " * }
- 'skip$
-      if$
-      s nameptr "{vv{ } }{ll{ }}{  f{ }}{  jj{ }}" format.name$ 't :=
-      nameptr numnames = t "others" = and
- { "et al" * }
- { t sortify * }
-      if$
-      nameptr #1 + 'nameptr :=
-      namesleft #1 - 'namesleft :=
-    }
-  while$
-}
-
-FUNCTION {sort.format.title}
-{ 't :=
-  "A " #2
-    "An " #3
-      "The " #4 t chop.word
-    chop.word
-  chop.word
-  sortify
-  #1 global.max$ substring$
-}
-
-FUNCTION {author.sort}
-{ author empty$
-    { key empty$
- { "to sort, need author or key in " cite$ * warning$
-   ""
- }
- { key sortify }
-      if$
-    }
-    { author sort.format.names }
-  if$
-}
-
-FUNCTION {author.editor.sort}
-{ author empty$
-    { editor empty$
- { key empty$
-     { "to sort, need author, editor, or key in " cite$ * warning$
-       ""
-     }
-     { key sortify }
-   if$
- }
- { editor sort.format.names }
-      if$
-    }
-    { author sort.format.names }
-  if$
-}
-
-FUNCTION {author.organization.sort}
-{ author empty$
-    { organization empty$
- { key empty$
-     { "to sort, need author, organization, or key in " cite$ * warning$
-       ""
-     }
-     { key sortify }
-   if$
- }
- { "The " #4 organization chop.word sortify }
-      if$
-    }
-    { author sort.format.names }
-  if$
-}
-
-FUNCTION {editor.organization.sort}
-{ editor empty$
-    { organization empty$
- { key empty$
-     { "to sort, need editor, organization, or key in " cite$ * warning$
-       ""
-     }
-     { key sortify }
-   if$
- }
- { "The " #4 organization chop.word sortify }
-      if$
-    }
-    { editor sort.format.names }
-  if$
-}
-
-FUNCTION {presort}
-{ type$ "book" =
-  type$ "inbook" =
-  or
-    'author.editor.sort
-    { type$ "proceedings" =
- 'editor.organization.sort
- { type$ "manual" =
-     'author.organization.sort
-     'author.sort
-   if$
- }
-      if$
-    }
-  if$
-  "    "
-  *
-  year field.or.null sortify
-  *
-  "    "
-  *
-  title field.or.null
-  sort.format.title
-  *
-  #1 entry.max$ substring$
-  'sort.key$ :=
-}
-
-ITERATE {presort}
-
-SORT
-
-STRINGS { longest.label }
-
-INTEGERS { number.label longest.label.width }
-
-FUNCTION {initialize.longest.label}
-{ "" 'longest.label :=
-  #1 'number.label :=
-  #0 'longest.label.width :=
-}
-
-FUNCTION {longest.label.pass}
-{ number.label int.to.str$ 'label :=
-  number.label #1 + 'number.label :=
-  label width$ longest.label.width >
-    { label 'longest.label :=
-      label width$ 'longest.label.width :=
-    }
-    'skip$
-  if$
-}
-
-EXECUTE {initialize.longest.label}
-
-ITERATE {longest.label.pass}
-
-FUNCTION {begin.bib}
-{ preamble$ empty$
-    'skip$
-    { preamble$ write$ newline$ }
-  if$
-  "\begin{thebibliography}{"  longest.label  * 
-  "}\setlength{\itemsep}{-1ex}\small" * write$ newline$
-}
-
-EXECUTE {begin.bib}
-
-EXECUTE {init.state.consts}
-
-ITERATE {call.type$}
-
-FUNCTION {end.bib}
-{ newline$
-  "\end{thebibliography}" write$ newline$
-}
-
-EXECUTE {end.bib}
-
-% end of file ieee.bst
-% ---------------------------------------------------------------
diff --git a/lustre/portals/doc/mpi.fig b/lustre/portals/doc/mpi.fig
deleted file mode 100644 (file)
index e1a91b5..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 150 1650 900 2025
-4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001
-4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001
--6
-6 150 150 900 525
-4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001
-4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001
--6
-6 2550 4125 3150 4725
-4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001
-4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001
-4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001
--6
-6 1050 1575 1950 1875
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1050 1575 1950 1575 1950 1875 1050 1875 1050 1575
-4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001
--6
-6 5400 1575 6300 2175
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        5400 1575 6300 1575 6300 2175 5400 2175 5400 1575
-4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001
--6
-6 5400 2400 6300 3000
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        5400 2400 6300 2400 6300 3000 5400 3000 5400 2400
-4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001
--6
-6 1050 2400 1950 2700
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1050 2400 1950 2400 1950 2700 1050 2700 1050 2400
-4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001
--6
-6 1050 825 1950 1125
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1050 825 1950 825 1950 1125 1050 1125 1050 825
-4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001
--6
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1500 1125 1500 1575
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3225 2025 4050 3375
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
-        150 675 6600 675
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
-        150 1350 6600 1350
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2400 4125 3300 4125 3300 4725 2400 4725 2400 4125
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3225 4500 4050 3675
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3225 1725 5400 1725
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3225 2550 5400 2550
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3225 2850 4050 3450
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1500 1800 1500 2400
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2400 825 3300 825 3300 1275 2400 1275 2400 825
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1500 2625 1500 4125
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1050 4125 1950 4125 1950 4425 1050 4425 1050 4125
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1500 300 1500 825
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1875 975 2400 975
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1875 1725 2400 1725
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1875 2550 2400 2550
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        1875 4275 2400 4275
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2400 1575 3300 1575 3300 2175 2400 2175 2400 1575
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2400 2400 3300 2400 3300 3000 2400 3000 2400 2400
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        4050 3300 5250 3300 5250 3750 4050 3750 4050 3300
-4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001
-4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001
-4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001
-4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001
-4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001
-4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001
-4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001
-4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001
-4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001
-4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001
-4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001
-4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001
-4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001
-4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001
-4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001
diff --git a/lustre/portals/doc/portals.fig b/lustre/portals/doc/portals.fig
deleted file mode 100644 (file)
index 9b1271b..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1350 900 1650 900 1650 1200 1350 1200 1350 900
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        1800 1350 2100 1350 2100 1650 1800 1650 1800 1350
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2250 1800 2550 1800 2550 2100 2250 2100 2250 1800
-2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2
-        4200 375 4200 2100
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        525 600 1125 600 1125 2100 525 2100 525 600
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        4425 1275 4875 1275 4875 1950 4425 1950 4425 1275
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        2550 1200 3150 1200 3150 1500 2550 1500 2550 1200
-2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3000 1425 4425 1425
-2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5
-        3600 825 3750 825 3750 1125 3600 1125 3600 825
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        2025 1425 2550 1425
-2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5
-        4425 750 4875 750 4875 1125 4425 1125 4425 750
-2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        3675 975 4425 975
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2
-       0 0 1.00 60.00 120.00
-        825 1050 1350 1050
-        0.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
-       0 0 1.00 60.00 120.00
-        1500 1125 1500 1350 1500 1500 1650 1500 1800 1500
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5
-       0 0 1.00 60.00 120.00
-        1950 1575 1950 1800 1950 1950 2100 1950 2250 1950
-        0.000 1.000 1.000 1.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
-        525 975 1125 975
-        0.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2
-        525 1125 1125 1125
-        0.000 0.000
-3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7
-       0 0 1.00 60.00 120.00
-        3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975
-        3600 975
-        0.000 1.000 1.000 1.000 1.000 1.000 0.000
-4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001
-4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001
-4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001
-4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001
-4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001
-4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001
-4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001
-4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001
-4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001
diff --git a/lustre/portals/doc/portals3.bib b/lustre/portals/doc/portals3.bib
deleted file mode 100644 (file)
index 323b99f..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-@Article{           Cplant,
-    title       = { {M}assively {P}arallel {C}omputing with
-                    {C}ommodity {C}omponents },
-    author      = { Ron Brightwell and David S. Greenberg and Arthur
-                    B. Maccabe and Rolf Riesen },
-    journal     = { Parallel Computing },
-    volume      = { 26 },
-    month       = { February },
-    pages       = { 243-266 },
-    year        = { 2000 }
-}
-
-@Manual{     Portals,
-    organization = { Sandia National Laboratories },
-    title        = { {P}uma {P}ortals },
-    note         = { http://www.cs.sandia.gov/puma/portals },
-    year         = { 1997 }
-}
-
-@Techreport{      VIA,
-  title         = { {V}irtual {I}nterface {A}rchitecture
-                    {S}pecification {V}ersion 1.0 }, 
-  author        = { {Compaq, Microsoft, and Intel} },
-  institution   = { Compaq, Microsoft, and Intel },
-  month         = { December },
-  year          = { 1997 }
-}
-
-@Techreport{      ST,
-  title         = { {I}nformation {T}echnology - {S}cheduled
-                  {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 },
-  author        = { {Task Group of Technical Committee T11} },
-  institution   = { Accredited Standards Committee NCITS },
-  month         = { July },
-  year          = { 1998 }
-}
-
-@Manual{     TFLOPS,
-    organization = { Sandia National Laboratories },
-    title        = { ASCI Red },
-    note         = { http://www.sandia.gov/ASCI/TFLOP },
-    year         = { 1996 }
-}
-
-@Techreport{      GM,
-  title         = { The {GM} {M}essage {P}assing {S}ystem },
-  author         = { {Myricom, Inc.} },
-  institution    = { {Myricom, Inc.} },
-  year          = { 1997 },
-}
-
-@Article{           MPIstandard,
-    title        = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard },
-    author       = { {Message Passing Interface Forum} },
-    journal      = { The International Journal of Supercomputer Applications
-                     and High Performance Computing },
-    volume       = { 8 },
-    year         = { 1994 }
-}
-
-@Inproceedings{    PumaOS,
-    author       = "Lance Shuler and Chu Jong and Rolf Riesen and
-                    David van Dresser and Arthur B. Maccabe and
-                    Lee Ann Fisk and T. Mack Stallcup",
-    booktitle    = "Proceeding of the 1995 Intel Supercomputer
-                    User's Group Conference",
-    title        = "The {P}uma Operating System for Massively Parallel Computers",
-    organization = "Intel Supercomputer User's Group",
-    year         = 1995
-}
-
-@InProceedings{   SUNMOS,
-author          = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and
-                   Stephen R. Wheat",
-title           = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide",
-booktitle       = "Proceedings of the {Intel} Supercomputer Users' Group. 1994
-                   Annual North America Users' Conference.",
-year            = 1994,
-pages           = "245--251",
-month           = "June",
-location        = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps"
-}
-
-@InProceedings {   PumaMPI,
-    title        = { Design and Implementation of {MPI} on {P}uma Portals },
-    author       = { Ron Brightwell and Lance Shuler },
-    booktitle    = { Proceedings of the Second MPI Developer's Conference },
-    pages        = { 18-25 },
-    month        = { July },
-    year         = { 1996 }
-}
-
-@Inproceedings{     FM2,
-    author       = { Mario Lauria and Scott Pakin and Andrew Chien },
-    title        = { {E}fficient {L}ayering for {H}igh {S}peed
-                     {C}ommunication: {F}ast {M}essages 2.x },
-    Booktitle    = { Proceedings of the IEEE International Symposium
-                     on High Performance Distributed Computing },
-    year         = { 1998 }
-}
-
-@Manual {          CraySHMEM,
-    title        = "SHMEM Technical Note for C, SG-2516 2.3",
-    organization = "Cray Research, Inc.",
-    month        = "October",
-    year         = 1994
-}
-
-@Manual {          MPI2,
-    title        = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface",
-    organization = "Message Passing Interface Forum",
-    note         = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html",
-    month        = "July",
-    year         = 1997
-}
-
-@InProceedings {   PMMPI,
-    title        = { {The Design and Implementation of Zero Copy MPI Using
-                       Commodity Hardware with a High Performance Network} },
-    author       = { Francis O'Carroll and  Hiroshi Tezuka and Atsushi Hori
-                     and Yutaka Ishikawa  },
-    booktitle    = { Proceedings of the ICS },
-    year         = { 1998 }
-}
diff --git a/lustre/portals/doc/portals3.lyx b/lustre/portals/doc/portals3.lyx
deleted file mode 100644 (file)
index 8429280..0000000
+++ /dev/null
@@ -1,15944 +0,0 @@
-#LyX 1.2 created this file. For more info see http://www.lyx.org/
-\lyxformat 220
-\textclass report
-\begin_preamble
-\usepackage{fullpage}
-\renewenvironment{comment}%
-{\begin{quote}\textbf{Discussion}: \slshape}%
-{\end{quote}}
-\pagestyle{myheadings}
-\end_preamble
-\language american
-\inputencoding auto
-\fontscheme pslatex
-\graphics default
-\paperfontsize 10
-\spacing single 
-\papersize letterpaper
-\paperpackage a4
-\use_geometry 0
-\use_amsmath 0
-\use_natbib 0
-\use_numerical_citations 0
-\paperorientation portrait
-\secnumdepth 2
-\tocdepth 2
-\paragraph_separation indent
-\defskip medskip
-\quotes_language english
-\quotes_times 2
-\papercolumns 1
-\papersides 2
-\paperpagestyle headings
-
-\layout Title
-
-The Portals 3.2 Message Passing Interface 
-\newline 
- Revision 1.1
-\layout Author
-
-Ron Brightwell
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-R.
- Brightwell and R.
- Riesen are with the Scalable Computing Systems Department, Sandia National
- Laboratories, P.O.
- Box 5800, Albuquerque, NM\SpecialChar ~
-\SpecialChar ~
-87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov.
-\end_inset 
-
-, Arthur B.
- Maccabe
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-A.
- B.
- Maccabe is with the Computer Science Department, University of New Mexico,
- Albuquerque, NM\SpecialChar ~
-\SpecialChar ~
-87131-1386, maccabe@cs.unm.edu.
-\end_inset 
-
-, Rolf Riesen and Trammell Hudson
-\layout Abstract
-
-This report presents a specification for the Portals 3.2 message passing
- interface.
- Portals 3.2 is intended to allow scalable, high-performance network communicatio
-n between nodes of a parallel computing system.
- Specifically, it is designed to support a parallel computing platform composed
- of clusters of commodity workstations connected by a commodity system area
- network fabric.
- In addition, Portals 3.2 is well suited to massively parallel processing
- and embedded systems.
- Portals 3.2 represents an adaption of the data movement layer developed
- for massively parallel processing platforms, such as the 4500-node Intel
- TeraFLOPS machine.
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-clearpage
-\backslash 
-pagenumbering{roman}
-\backslash 
-setcounter{page}{3}
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset LatexCommand \tableofcontents{}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-cleardoublepage
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset FloatList figure
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-cleardoublepage
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset FloatList table
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-cleardoublepage
-\end_inset 
-
-
-\layout Chapter*
-
-Summary of Changes for Revision 1.1
-\layout Enumerate
-
-Updated version number to 3.2 throughout the document
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sub:PtlGetId}
-
-\end_inset 
-
-: added 
-\family typewriter 
-PTL_SEGV
-\family default 
- to error list for 
-\shape italic 
-PtlGetId
-\shape default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-: added 
-\family typewriter 
-PTL_ML_TOOLONG
-\family default 
- to error list for 
-\shape italic 
-PtlMEAttach
-\shape default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:meunlink}
-
-\end_inset 
-
-: removed text referring to a list of associated memory descriptors.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset 
-
-: added text to describe unlinking a free-floating memory descriptor.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:types}
-
-\end_inset 
-
-: added entry for 
-\family typewriter 
-ptl_seq_t
-\family default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-:
-\begin_deeper 
-\layout Enumerate
-
-added definition of 
-\family typewriter 
-max_offset
-\family default 
-.
-\layout Enumerate
-
-added text to clarify 
-\family typewriter 
-PTL_MD_MANAGE_REMOTE
-\family default 
-.
-\end_deeper 
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset 
-
-: modified text for 
-\family typewriter 
-unlink_op
-\family default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset 
-
-: added text to clarify multiple calls to 
-\shape italic 
-PtlNIInit
-\shape default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset 
-
-: added text to clarify 
-\family typewriter 
-unlink_nofit
-\family default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:receiving}
-
-\end_inset 
-
-: removed text indicating that an MD will reject a message if the associated
- EQ is full.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset 
-
-: added 
-\family typewriter 
-PTL_MD_INUSE
-\family default 
- error code and text to indicate that only MDs with no pending operations
- can be unlinked.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:retcodes}
-
-\end_inset 
-
-: added 
-\family typewriter 
-PTL_MD_INUSE
-\family default 
- return code.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset 
-
-: added user id field, MD handle field, and NI specific failure field to
- the 
-\family typewriter 
-ptl_event_t
-\family default 
- structure.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:types}
-
-\end_inset 
-
-: added 
-\family typewriter 
-ptl_ni_fail_t
-\family default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset 
-
-: added 
-\family typewriter 
-PTL_EVENT_UNLINK
-\family default 
- event type.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:func}
-
-\end_inset 
-
-: removed 
-\shape slanted 
-PtlTransId
-\shape default 
-.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-, Section 
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset 
-
-, Section 
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-: listed allowable constants with relevant fields.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:func}
-
-\end_inset 
-
-: added 
-\shape italic 
-PtlMEAttachAny
-\shape default 
- function.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:retcodes}
-
-\end_inset 
-
-: added 
-\family typewriter 
-PTL_PT_FULL
-\family default 
- return code for 
-\shape italic 
-PtlMEAttachAny
-\shape default 
-.
-\layout Enumerate
-
-Table 
-\begin_inset LatexCommand \ref{tab:oconsts}
-
-\end_inset 
-
-: updated to reflect new event types.
-\layout Enumerate
-
-Section 
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-: added 
-\family typewriter 
-ptl_nid_t
-\family default 
-, 
-\family typewriter 
-ptl_pid_t
-\family default 
-, and 
-\family typewriter 
-ptl_uid_t
-\family default 
-.
-\layout Chapter*
-
-Summary of Changes for Version 3.1
-\layout Section*
-
-Thread Issues
-\layout Standard
-
-The most significant change to the interface from version 3.0 to 3.1 involves
- the clarification of how the interface interacts with multi-threaded applicatio
-ns.
- We adopted a generic thread model in which processes define an address
- space and threads share the address space.
- Consideration of the API in the light of threads lead to several clarifications
- throughout the document: 
-\layout Enumerate
-
-Glossary: 
-\begin_deeper 
-\layout Enumerate
-
-added a definition for 
-\emph on 
-thread
-\emph default 
-, 
-\layout Enumerate
-
-reworded the definition for 
-\emph on 
-process
-\emph default 
-.
-\end_deeper 
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:apiover}
-
-\end_inset 
-
-: added section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:threads}
-
-\end_inset 
-
- to describe the multi-threading model used by the Portals API.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ptlinit}
-
-\end_inset 
-
-: 
-\emph on 
-PtlInit
-\emph default 
- must be called at least once and may be called any number of times.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ptlfini}
-
-\end_inset 
-
-: 
-\emph on 
-PtlFini
-\emph default 
- should be called once as the process is terminating and not as each thread
- terminates.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:pid}
-
-\end_inset 
-
-: Portals does not define thread ids.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-: network interfaces are associated with processes, not threads.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset 
-
-: 
-\emph on 
-PtlNIInit
-\emph default 
- must be called at least once and may be called any number of times.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:eqget}
-
-\end_inset 
-
-: 
-\emph on 
-PtlEQGet
-\emph default 
- returns 
-\family typewriter 
-PTL_EQ_EMPTY
-\family default 
- if a thread is blocked on 
-\emph on 
-PtlEQWait
-\emph default 
-.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:eqwait}
-
-\end_inset 
-
-: waiting threads are awakened in FIFO order.
-\layout Standard
-
-Two functions, 
-\emph on 
-PtlNIBarrier
-\emph default 
- and 
-\emph on 
-PtlEQCount
-\emph default 
- were removed from the API.
-\emph on 
-PtlNIBarrier
-\emph default 
- was defined to block the calling process until all of the processes in
- the application group had invoked 
-\emph on 
-PtlNIBarrier
-\emph default 
-.
- We now consider this functionality, along with the concept of groups (see
- the discussion under 
-\begin_inset Quotes eld
-\end_inset 
-
-other changes
-\begin_inset Quotes erd
-\end_inset 
-
-), to be part of the runtime system, not part of the Portals API.
-\emph on 
-PtlEQCount
-\emph default 
- was defined to return the number of events in an event queue.
- Because external operations may lead to new events being added and other
- threads may remove events, the value returned by 
-\emph on 
-PtlEQCount
-\emph default 
- would have to be a hint about the number of events in the event queue.
-\layout Section*
-
-Handling small, unexpected messages
-\layout Standard
-
-Another set of changes relates to handling small unexpected messages in
- MPI.
- In designing version 3.0, we assumed that each unexpected message would
- be placed in a unique memory descriptor.
- To avoid the need to process a long list of memory descriptors, we moved
- the memory descriptors out of the match list and hung them off of a single
- match list entry.
- In this way, large unexpected messages would only encounter a single 
-\begin_inset Quotes eld
-\end_inset 
-
-short message
-\begin_inset Quotes erd
-\end_inset 
-
- match list entry before encountering the 
-\begin_inset Quotes eld
-\end_inset 
-
-long message
-\begin_inset Quotes erd
-\end_inset 
-
- match list entry.
- Experience with this strategy identified resource management problems with
- this approach.
- In particular, a long sequence of very short (or zero length) messages
- could quickly exhaust the memory descriptors constructed for handling unexpecte
-d messages.
- Our new strategy involves the use of several very large memory descriptors
- for small unexpected messages.
- Consecutive unexpected messages will be written into the first of these
- memory descriptors until the memory descriptor fills up.
- When the first of the 
-\begin_inset Quotes eld
-\end_inset 
-
-small memory
-\begin_inset Quotes erd
-\end_inset 
-
- descriptors fills up, it will be unlinked and subsequent short messages
- will be written into the next 
-\begin_inset Quotes eld
-\end_inset 
-
-short message
-\begin_inset Quotes erd
-\end_inset 
-
- memory descriptor.
- In this case, a 
-\begin_inset Quotes eld
-\end_inset 
-
-short message
-\begin_inset Quotes erd
-\end_inset 
-
- memory descriptor will be declared full when it does not have sufficient
- space for the largest small unexpected message.
-\layout Standard
-
-This lead to two significant changes.
- First, each match list entry now has a single memory descriptor rather
- than a list of memory descriptors.
- Second, in addition to exceeding the operation threshold, a memory descriptor
- can be unlinked when the local offset exceeds a specified value.
- These changes have lead to several changes in this document: 
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{subsec:paddress}
-
-\end_inset 
-
-: 
-\begin_deeper 
-\layout Enumerate
-
-removed references to the memory descriptor list, 
-\layout Enumerate
-
-changed the portals address translation description to indicate that unlinking
- a memory descriptor implies unlinking the associated match list entry--match
- list entries can no longer be unlinked independently from the memory descriptor.
-\end_deeper 
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-: 
-\begin_deeper 
-\layout Enumerate
-
-removed unlink from argument list, 
-\layout Enumerate
-
-removed description of 
-\family typewriter 
-ptl_unlink
-\family default 
- type, 
-\layout Enumerate
-
-changed wording of the error condition when the Portal table index already
- has an associated match list.
-\end_deeper 
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset 
-
-: removed unlink from argument list.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-: added 
-\family typewriter 
-max_offset
-\family default 
-.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset 
-
-: 
-\begin_deeper 
-\layout Enumerate
-
-added description of 
-\family typewriter 
-ptl_unlink
-\family default 
- type, 
-\layout Enumerate
-
-removed reference to memory descriptor lists, 
-\layout Enumerate
-
-changed wording of the error condition when match list entry already has
- an associated memory descriptor, 
-\layout Enumerate
-
-changed the description of the 
-\family typewriter 
-unlink
-\family default 
- argument.
-\end_deeper 
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-: removed 
-\family typewriter 
-PtlMDInsert
-\family default 
- operation.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdbind}
-
-\end_inset 
-
-: removed references to memory descriptor list.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset 
-
-: removed reference to memory descriptor list.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:summary}
-
-\end_inset 
-
-: removed references to PtlMDInsert.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:semantics}
-
-\end_inset 
-
-: removed reference to memory descriptor list.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:exmpi}
-
-\end_inset 
-
-: revised the MPI example to reflect the changes to the interface.
-\layout Standard
-
-Several changes have been made to improve the general documentation of the
- interface.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-: documented the special value 
-\family typewriter 
-PTL_EQ_NONE
-\family default 
-.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-: documented the special value 
-\family typewriter 
-PTL_ID_ANY
-\family default 
-.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdbind}
-
-\end_inset 
-
-: documented the return value 
-\family typewriter 
-PTL_INV_EQ
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdupdate}
-
-\end_inset 
-
-: clarified the description of the 
-\emph on 
-PtlMDUpdate
-\emph default 
- function.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:implvals}
-
-\end_inset 
-
-: introduced a new section to document the implementation defined values.
-\layout Enumerate
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:summary}
-
-\end_inset 
-
-: modified Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:oconsts}
-
-\end_inset 
-
- to indicate where each constant is introduced and where it is used.
-\layout Section*
-
-Other changes
-\layout Subsection*
-
-Implementation defined limits (Section 
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset 
-
-)
-\layout Standard
-
-The earlier version provided implementation defined limits for the maximum
- number of match entries, the maximum number of memory descriptors, etc.
- Rather than spanning the entire implementation, these limits are now associated
- with individual network interfaces.
-\layout Subsection*
-
-Added User Ids (Section 
-\begin_inset LatexCommand \ref{sec:uid}
-
-\end_inset 
-
-)
-\layout Standard
-
-Group Ids had been used to simplify access control entries.
- In particular, a process could allow access for all of the processes in
- a group.
- User Ids have been introduced to regain this functionality.
- We use user ids to fill this role.
-\layout Subsection*
-
-Removed Group Ids and Rank Ids (Section 
-\begin_inset LatexCommand \ref{sec:pid}
-
-\end_inset 
-
-)
-\layout Standard
-
-The earlier version of Portals had two forms for addressing processes: <node
- id, process id> and <group id, rank id>.
- A process group was defined as the collection processes created during
- application launch.
- Each process in the group was given a unique rank id in the range 0 to
-\begin_inset Formula $n-1$
-\end_inset 
-
- where 
-\begin_inset Formula $n$
-\end_inset 
-
- was the number of processes in the group.
- We removed groups because they are better handled in the runtime system.
-\layout Subsection*
-
-Match lists (Section 
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-)
-\layout Standard
-
-It is no longer illegal to have an existing match entry when calling PtlMEAttach.
- A position argument was added to the list of arguments supplied to 
-\emph on 
-PtlMEAttach
-\emph default 
- to specify whether the new match entry is prepended or appended to the
- existing list.
- If there is no existing match list, the position argument is ignored.
-\layout Subsection*
-
-Unlinking Memory Descriptors (Section 
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-)
-\layout Standard
-
-Previously, a memory descriptor could be unlinked if the offset exceeded
- a threshold upon the completion of an operation.
- In this version, the unlinking is delayed until there is a matching operation
- which requires more memory than is currently available in the descriptor.
- In addition to changes in section, this lead to a revision of Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:flow}
-
-\end_inset 
-
-.
-\layout Subsection*
-
-Split Phase Operations and Events (Section 
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-)
-\layout Standard
-
-Previously, there were five types of events: 
-\family typewriter 
-PTL_EVENT_PUT
-\family default 
-, 
-\family typewriter 
-PTL_EVENT_GET
-\family default 
-, 
-\family typewriter 
-PTL_EVENT_REPLY
-\family default 
-, 
-\family typewriter 
-PTL_EVENT_SENT
-\family default 
-, and 
-\family typewriter 
-PTL_EVENT_ACK.
-\family default 
-The first four of these reflected the completion of potentially long operations.
- We have introduced new event types to reflect the fact that long operations
- have a distinct starting point and a distinct completion point.
- Moreover, the completion may be successful or unsuccessful.
-\layout Standard
-
-In addition to providing a mechanism for reporting failure to higher levels
- of software, this split provides an opportunity for for improved ordering
- semantics.
- Previously, if one process intiated two operations (e.g., two put operations)
- on a remote process, these operations were guaranteed to complete in the
- same order that they were initiated.
- Now, we only guarantee that the initiation events are delivered in the
- same order.
- In particular, the operations do not need to complete in the order that
- they were intiated.
-\layout Subsection*
-
-Well known proces ids (Section 
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset 
-
-)
-\layout Standard
-
-To support the notion of 
-\begin_inset Quotes eld
-\end_inset 
-
-well known process ids,
-\begin_inset Quotes erd
-\end_inset 
-
- we added a process id argument to the arguments for PtlNIInit.
-\layout Chapter*
-
-Glossary
-\layout Description
-
-API Application Programming Interface.
- A definition of the functions and semantics provided by library of functions.
-\layout Description
-
-Initiator A 
-\emph on 
-process
-\emph default 
- that initiates a message operation.
-\layout Description
-
-Message An application-defined unit of data that is exchanged between 
-\emph on 
-processes
-\emph default 
-.
-\layout Description
-
-Message\SpecialChar ~
-Operation Either a put operation, which writes data, or a get operation,
- which reads data.
-\layout Description
-
-Network A network provides point-to-point communication between 
-\emph on 
-nodes
-\emph default 
-.
- Internally, a network may provide multiple routes between endpoints (to
- improve fault tolerance or to improve performance characteristics); however,
- multiple paths will not be exposed outside of the network.
-\layout Description
-
-Node A node is an endpoint in a 
-\emph on 
-network
-\emph default 
-.
- Nodes provide processing capabilities and memory.
- A node may provide multiple processors (an SMP node) or it may act as a
-\emph on 
-gateway
-\emph default 
- between networks.
-\layout Description
-
-Process A context of execution.
- A process defines a virtual memory (VM) context.
- This context is not shared with other processes.
- Several threads may share the VM context defined by a process.
-\layout Description
-
-Target A 
-\emph on 
-process
-\emph default 
- that is acted upon by a message operation.
-\layout Description
-
-Thread A context of execution that shares a VM context with other threads.
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-cleardoublepage
-\layout Standard
-
-\backslash 
-setcounter{page}{1}
-\backslash 
-pagenumbering{arabic}
-\end_inset 
-
-
-\layout Chapter
-
-Introduction
-\begin_inset LatexCommand \label{sec:intro}
-
-\end_inset 
-
-
-\layout Section
-
-Overview
-\layout Standard
-
-This document describes an application programming interface for message
- passing between nodes in a system area network.
- The goal of this interface is to improve the scalability and performance
- of network communication by defining the functions and semantics of message
- passing required for scaling a parallel computing system to ten thousand
- nodes.
- This goal is achieved by providing an interface that will allow a quality
- implementation to take advantage of the inherently scalable design of Portals.
-\layout Standard
-
-This document is divided into several sections: 
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:intro}
-
-\end_inset 
-
----Introduction This section describes the purpose and scope of the Portals
- API.
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:apiover}
-
-\end_inset 
-
----An\SpecialChar ~
-Overview\SpecialChar ~
-of\SpecialChar ~
-the\SpecialChar ~
-Portals\SpecialChar ~
-3.1\SpecialChar ~
-API This section gives a brief overview of the
- Portals API.
- The goal is to introduce the key concepts and terminology used in the descripti
-on of the API.
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:api}
-
-\end_inset 
-
----The\SpecialChar ~
-Portals\SpecialChar ~
-3.2\SpecialChar ~
-API This section describes the functions and semantics of
- the Portals application programming interface.
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:semantics}
-
-\end_inset 
-
---The\SpecialChar ~
-Semantics\SpecialChar ~
-of\SpecialChar ~
-Message\SpecialChar ~
-Transmission This section describes the semantics
- of message transmission.
- In particular, the information transmitted in each type of message and
- the processing of incoming messages.
-\layout Description
-
-Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:examples}
-
-\end_inset 
-
----Examples This section presents several examples intended to illustrates
- the use of the Portals API.
-\layout Section
-
-Purpose
-\layout Standard
-
-Existing message passing technologies available for commodity cluster networking
- hardware do not meet the scalability goals required by the Cplant\SpecialChar ~
-
-\begin_inset LatexCommand \cite{Cplant}
-
-\end_inset 
-
- project at Sandia National Laboratories.
- The goal of the Cplant project is to construct a commodity cluster that
- can scale to the order of ten thousand nodes.
- This number greatly exceeds the capacity for which existing message passing
- technologies have been designed and implemented.
-\layout Standard
-
-In addition to the scalability requirements of the network, these technologies
- must also be able to support a scalable implementation of the Message Passing
- Interface (MPI)\SpecialChar ~
-
-\begin_inset LatexCommand \cite{MPIstandard}
-
-\end_inset 
-
- standard, which has become the 
-\shape italic 
-de facto
-\shape default 
- standard for parallel scientific computing.
- While MPI does not impose any scalability limitations, existing message
- passing technologies do not provide the functionality needed to allow implement
-ations of MPI to meet the scalability requirements of Cplant.
-\layout Standard
-
-The following are properties of a network architecture that do not impose
- any inherent scalability limitations: 
-\layout Itemize
-
-Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~
-
-\begin_inset LatexCommand \cite{VIA}
-
-\end_inset 
-
- and TCP/IP sockets, have limitations on the number of peer connections
- that can be established.
-\layout Itemize
-
-Network independence - Many communication systems depend on the host processor
- to perform operations in order for messages in the network to be consumed.
- Message consumption from the network should not be dependent on host processor
- activity, such as the operating system scheduler or user-level thread scheduler.
-\layout Itemize
-
-User-level flow control - Many communication systems manage flow control
- internally to avoid depleting resources, which can significantly impact
- performance as the number of communicating processes increases.
-\layout Itemize
-
-OS Bypass - High performance network communication should not involve memory
- copies into or out of a kernel-managed protocol stack.
-\layout Standard
-
-The following are properties of a network architecture that do not impose
- scalability limitations for an implementation of MPI:
-\layout Itemize
-
-Receiver-managed - Sender-managed message passing implementations require
- a persistent block of memory to be available for every process, requiring
- memory resources to increase with job size and requiring user-level flow
- control mechanisms to manage these resources.
-\layout Itemize
-
-User-level Bypass - While OS Bypass is necessary for high-performance, it
- alone is not sufficient to support the Progress Rule of MPI asynchronous
- operations.
-\layout Itemize
-
-Unexpected messages - Few communication systems have support for receiving
- messages for which there is no prior notification.
- Support for these types of messages is necessary to avoid flow control
- and protocol overhead.
-\layout Section
-
-Background
-\layout Standard
-
-Portals was originally designed for and implemented on the nCube machine
- as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~
-
-\begin_inset LatexCommand \cite{SUNMOS}
-
-\end_inset 
-
- and Puma\SpecialChar ~
-
-\begin_inset LatexCommand \cite{PumaOS}
-
-\end_inset 
-
- lightweight kernel development projects.
- Portals went through two design phases, the latter of which is used on
- the 4500-node Intel TeraFLOPS machine\SpecialChar ~
-
-\begin_inset LatexCommand \cite{TFLOPS}
-
-\end_inset 
-
-.
- Portals have been very successful in meeting the needs of such a large
- machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~
-
-\begin_inset LatexCommand \cite{PumaMPI}
-
-\end_inset 
-
-, but also for implementing the scalable run-time environment and parallel
- I/O capabilities of the machine.
-\layout Standard
-
-The second generation Portals implementation was designed to take full advantage
- of the hardware architecture of large MPP machines.
- However, efforts to implement this same design on commodity cluster technology
- identified several limitations, due to the differences in network hardware
- as well as to shortcomings in the design of Portals.
-\layout Section
-
-Scalability
-\layout Standard
-
-The primary goal in the design of Portals is scalability.
- Portals are designed specifically for an implementation capable of supporting
- a parallel job running on tens of thousands of nodes.
- Performance is critical only in terms of scalability.
- That is, the level of message passing performance is characterized by how
- far it allows an application to scale and not by how it performs in micro-bench
-marks (e.g., a two node bandwidth or latency test).
-\layout Standard
-
-The Portals API is designed to allow for scalability, not to guarantee it.
- Portals cannot overcome the shortcomings of a poorly designed application
- program.
- Applications that have inherent scalability limitations, either through
- design or implementation, will not be transformed by Portals into scalable
- applications.
- Scalability must be addressed at all levels.
- Portals do not inhibit scalability, but do not guarantee it either.
-\layout Standard
-
-To support scalability, the Portals interface maintains a minimal amount
- of state.
- Portals provide reliable, ordered delivery of messages between pairs of
- processes.
- They are connectionless: a process is not required to explicitly establish
- a point-to-point connection with another process in order to communicate.
- Moreover, all buffers used in the transmission of messages are maintained
- in user space.
- The target process determines how to respond to incoming messages, and
- messages for which there are no buffers are discarded.
-\layout Section
-
-Communication Model
-\layout Standard
-
-Portals combine the characteristics of both one-side and two-sided communication.
- They define a 
-\begin_inset Quotes eld
-\end_inset 
-
-matching put
-\begin_inset Quotes erd
-\end_inset 
-
- operation and a 
-\begin_inset Quotes eld
-\end_inset 
-
-matching get
-\begin_inset Quotes erd
-\end_inset 
-
- operation.
- The destination of a put (or send) is not an explicit address; instead,
- each message contains a set of match bits that allow the receiver to determine
- where incoming messages should be placed.
- This flexibility allows Portals to support both traditional one-sided operation
-s and two-sided send/receive operations.
-\layout Standard
-
-Portals allows the target to determine whether incoming messages are acceptable.
- A target process can choose to accept message operations from any specific
- process or can choose to ignore message operations from any specific process.
-\layout Section
-
-Zero Copy, OS Bypass and Application Bypass
-\layout Standard
-
-In traditional system architectures, network packets arrive at the network
- interface card (NIC), are passed through one or more protocol layers in
- the operating system, and eventually copied into the address space of the
- application.
- As network bandwidth began to approach memory copy rates, reduction of
- memory copies became a critical concern.
- This concern lead to the development of zero-copy message passing protocols
- in which message copies are eliminated or pipelined to avoid the loss of
- bandwidth.
-\layout Standard
-
-A typical zero-copy protocol has the NIC generate an interrupt for the CPU
- when a message arrives from the network.
- The interrupt handler then controls the transfer of the incoming message
- into the address space of the appropriate application.
- The interrupt latency, the time from the initiation of an interrupt until
- the interrupt handler is running, is fairly significant.
- To avoid this cost, some modern NICs have processors that can be programmed
- to implement part of a message passing protocol.
- Given a properly designed protocol, it is possible to program the NIC to
- control the transfer of incoming messages, without needing to interrupt
- the CPU.
- Because this strategy does not need to involve the OS on every message
- transfer, it is frequently called 
-\begin_inset Quotes eld
-\end_inset 
-
-OS Bypass.
-\begin_inset Quotes erd
-\end_inset 
-
- ST\SpecialChar ~
-
-\begin_inset LatexCommand \cite{ST}
-
-\end_inset 
-
-, VIA\SpecialChar ~
-
-\begin_inset LatexCommand \cite{VIA}
-
-\end_inset 
-
-, FM\SpecialChar ~
-
-\begin_inset LatexCommand \cite{FM2}
-
-\end_inset 
-
-, GM\SpecialChar ~
-
-\begin_inset LatexCommand \cite{GM}
-
-\end_inset 
-
-, and Portals are examples of OS Bypass protocols.
-\layout Standard
-
-Many protocols that support OS Bypass still require that the application
- actively participate in the protocol to ensure progress.
- As an example, the long message protocol of PM requires that the application
- receive and reply to a request to put or get a long message.
- This complicates the runtime environment, requiring a thread to process
- incoming requests, and significantly increases the latency required to
- initiate a long message protocol.
- The Portals message passing protocol does not require activity on the part
- of the application to ensure progress.
- We use the term 
-\begin_inset Quotes eld
-\end_inset 
-
-Application Bypass
-\begin_inset Quotes erd
-\end_inset 
-
- to refer to this aspect of the Portals protocol.
-\layout Section
-
-Faults 
-\layout Standard
-
-Given the number of components that we are dealing with and the fact that
- we are interested in supporting applications that run for very long times,
- failures are inevitable.
- The Portals API recognizes that the underlying transport may not be able
- to successfully complete an operation once it has been initiated.
- This is reflected in the fact that the Portals API reports three types
- of events: events indicating the initiation of an operation, events indicating
- the successful completion of an operation, and events indicating the unsuccessf
-ul completion of an operation.
- Every initiation event is eventually followed by a successful completion
- event or an unsuccessful completion event.
-\layout Standard
-
-Between the time an operation is started and the time that the operation
- completes (successfully or unsuccessfully), any memory associated with
- the operation should be considered volatile.
- That is, the memory may be changed in unpredictable ways while the operation
- is progressing.
- Once the operation completes, the memory associated with the operation
- will not be subject to further modification (from this operation).
- Notice that unsuccessful operations may alter memory in an essentially
- unpredictable fashion.
-\layout Chapter
-
-An Overview of the Portals API
-\begin_inset LatexCommand \label{sec:apiover}
-
-\end_inset 
-
-
-\layout Standard
-
-In this section, we give a conceptual overview of the Portals API.
- The goal is to provide a context for understanding the detailed description
- of the API presented in the next section.
-\layout Section
-
-Data Movement
-\begin_inset LatexCommand \label{sec:dmsemantics}
-
-\end_inset 
-
-
-\layout Standard
-
-A Portal represents an opening in the address space of a process.
- Other processes can use a Portal to read (get) or write (put) the memory
- associated with the portal.
- Every data movement operation involves two processes, the 
-\series bold 
-initiator
-\series default 
- and the 
-\series bold 
-target
-\series default 
-.
- The initiator is the process that initiates the data movement operation.
- The target is the process that responds to the operation by either accepting
- the data for a put operation, or replying with the data for a get operation.
-\layout Standard
-
-In this discussion, activities attributed to a process may refer to activities
- that are actually performed by the process or 
-\emph on 
-on behalf of the process
-\emph default 
-.
- The inclusiveness of our terminology is important in the context of 
-\emph on 
-application bypass
-\emph default 
-.
- In particular, when we note that the target sends a reply in the case of
- a get operation, it is possible that reply will be generated by another
- component in the system, bypassing the application.
-\layout Standard
-
-Figures\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:put}
-
-\end_inset 
-
- and 
-\begin_inset LatexCommand \ref{fig:get}
-
-\end_inset 
-
- present graphical interpretations of the Portal data movement operations:
- put and get.
- In the case of a put operation, the initiator sends a put request message
- containing the data to the target.
- The target translates the Portal addressing information in the request
- using its local Portal structures.
- When the request has been processed, the target optionally sends an acknowledge
-ment message.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center 
-
-\begin_inset Graphics FormatVersion 1
-       filename put.eps
-       display color
-       size_type 0
-       rotateOrigin center
-       lyxsize_type 1
-       lyxwidth 218pt
-       lyxheight 119pt
-\end_inset 
-
-
-\layout Caption
-
-Portal Put (Send)
-\begin_inset LatexCommand \label{fig:put}
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-In the case of a get operation, the initiator sends a get request to the
- target.
- As with the put operation, the target translates the Portal addressing
- information in the request using its local Portal structures.
- Once it has translated the Portal addressing information, the target sends
- a reply that includes the requested data.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center 
-
-\begin_inset Graphics FormatVersion 1
-       filename get.eps
-       display color
-       size_type 0
-       rotateOrigin center
-       lyxsize_type 1
-       lyxwidth 218pt
-       lyxheight 119pt
-\end_inset 
-
-
-\layout Caption
-
-Portal Get
-\begin_inset LatexCommand \label{fig:get}
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-We should note that Portal address translations are only performed on nodes
- that respond to operations initiated by other nodes.
- Acknowledgements and replies to get operations bypass the portals address
- translation structures.
-\layout Section
-
-Portal Addressing
-\begin_inset LatexCommand \label{subsec:paddress}
-
-\end_inset 
-
-
-\layout Standard
-
-One-sided data movement models (e.g., shmem\SpecialChar ~
-
-\begin_inset LatexCommand \cite{CraySHMEM}
-
-\end_inset 
-
-, ST\SpecialChar ~
-
-\begin_inset LatexCommand \cite{ST}
-
-\end_inset 
-
-, MPI-2\SpecialChar ~
-
-\begin_inset LatexCommand \cite{MPI2}
-
-\end_inset 
-
-) typically use a triple to address memory on a remote node.
- This triple consists of a process id, memory buffer id, and offset.
- The process id identifies the target process, the memory buffer id specifies
- the region of memory to be used for the operation, and the offset specifies
- an offset within the memory buffer.
-\layout Standard
-
-In addition to the standard address components (process id, memory buffer
- id, and offset), a Portal address includes a set of match bits.
- This addressing model is appropriate for supporting one-sided operations
- as well as traditional two-sided message passing operations.
- Specifically, the Portals API provides the flexibility needed for an efficient
- implementation of MPI-1, which defines two-sided operations with one-sided
- completion semantics.
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:portals}
-
-\end_inset 
-
- presents a graphical representation of the structures used by a target
- in the interpretation of a Portal address.
- The process id is used to route the message to the appropriate node and
- is not reflected in this diagram.
- The memory buffer id, called the 
-\series bold 
-portal id
-\series default 
-, is used as an index into the Portal table.
- Each element of the Portal table identifies a match list.
- Each element of the match list specifies two bit patterns: a set of 
-\begin_inset Quotes eld
-\end_inset 
-
-don't care
-\begin_inset Quotes erd
-\end_inset 
-
- bits, and a set of 
-\begin_inset Quotes eld
-\end_inset 
-
-must match
-\begin_inset Quotes erd
-\end_inset 
-
- bits.
- In addition to the two sets of match bits, each match list element has
- at most one memory descriptor.
- Each memory descriptor identifies a memory region and an optional event
- queue.
- The memory region specifies the memory to be used in the operation and
- the event queue is used to record information about these operations.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center 
-
-\begin_inset Graphics FormatVersion 1
-       filename portals.eps
-       display color
-       size_type 0
-       rotateOrigin center
-       lyxsize_type 1
-       lyxwidth 305pt
-       lyxheight 106pt
-\end_inset 
-
-
-\layout Caption
-
-Portal Addressing Structures
-\begin_inset LatexCommand \label{fig:portals}
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:flow}
-
-\end_inset 
-
- illustrates the steps involved in translating a Portal address, starting
- from the first element in a match list.
- If the match criteria specified in the match list entry are met and the
- memory descriptor list accepts the operation
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-Memory descriptors can reject operations because a threshold has been exceeded
- or because the memory region does not have sufficient space, see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-
-\end_inset 
-
-, the operation (put or get) is performed using the memory region specified
- in the memory descriptor.
- If the memory descriptor specifies that it is to be unlinked when a threshold
- has been exceeded, the match list entry is removed from the match list
- and the resources associated with the memory descriptor and match list
- entry are reclaimed.
- Finally, if there is an event queue specified in the memory descriptor,
- the operation is logged in the event queue.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center 
-
-\begin_inset Graphics FormatVersion 1
-       filename flow_new.eps
-       display color
-       size_type 0
-       rotateOrigin center
-       lyxsize_type 1
-       lyxwidth 447pt
-       lyxheight 282pt
-\end_inset 
-
-
-\layout Caption
-
-Portals Address Translation
-\begin_inset LatexCommand \label{fig:flow}
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-If the match criteria specified in the match list entry are not met, or
- there is no memory descriptor associated with the match list entry, or
- the memory descriptor associated with the match list entry rejects the
- operation, the address translation continues with the next match list entry.
- If the end of the match list has been reached, the address translation
- is aborted and the incoming requested is discarded.
-\layout Section
-
-Access Control
-\layout Standard
-
-A process can control access to its portals using an access control list.
- Each entry in the access control list specifies a process id and a Portal
- table index.
- The access control list is actually an array of entries.
- Each incoming request includes an index into the access control list (i.e.,
- a 
-\begin_inset Quotes eld
-\end_inset 
-
-cookie
-\begin_inset Quotes erd
-\end_inset 
-
- or hint).
- If the id of the process issuing the request doesn't match the id specified
- in the access control list entry or the Portal table index specified in
- the request doesn't match the Portal table index specified in the access
- control list entry, the request is rejected.
- Process identifiers and Portal table indexes may include wild card values
- to increase the flexibility of this mechanism.
-\layout Standard
-
-Two aspects of this design merit further discussion.
- First, the model assumes that the information in a message header, the
- sender's id in particular, is trustworthy.
- In most contexts, we assume that the entity that constructs the header
- is trustworthy; however, using cryptographic techniques, we could easily
- devise a protocol that would ensure the authenticity of the sender.
-\layout Standard
-
-Second, because the access check is performed by the receiver, it is possible
- that a malicious process will generate thousands of messages that will
- be denied by the receiver.
- This could saturate the network and/or the receiver, resulting in a 
-\emph on 
-denial of service
-\emph default 
- attack.
- Moving the check to the sender using capabilities, would remove the potential
- for this form of attack.
- However, the solution introduces the complexities of capability management
- (exchange of capabilities, revocation, protections, etc).
-\layout Section
-
-Multi-threaded Applications
-\begin_inset LatexCommand \label{sec:threads}
-
-\end_inset 
-
-\layout Standard
-
-The Portals API supports a generic view of multi-threaded applications.
- From the perspective of the Portals API, an application program is defined
- by a set of processes.
- Each process defines a unique address space.
- The Portals API defines access to this address space from other processes
- (using portals addressing and the data movement operations).
- A process may have one or more 
-\emph on 
-threads
-\emph default 
- executing in its address space.
-\layout Standard
-
-With the exception of 
-\emph on 
-PtlEQWait
-\emph default 
- every function in the Portals API is non-blocking and atomic with respect
- to both other threads and external operations that result from data movement
- operations.
- While individual operations are atomic, sequences of these operations may
- be interleaved between different threads and with external operations.
- The Portals API does not provide any mechanisms to control this interleaving.
- It is expected that these mechanisms will be provided by the API used to
- create threads.
-\layout Chapter
-
-The Portals API
-\begin_inset LatexCommand \label{sec:api}
-
-\end_inset 
-
-
-\layout Section
-
-Naming Conventions
-\begin_inset LatexCommand \label{sec:conv}
-
-\end_inset 
-
-
-\layout Standard
-
-The Portals API defines two types of entities: functions and types.
- Function always start with 
-\emph on 
-Ptl
-\emph default 
- and use mixed upper and lower case.
- When used in the body of this report, function names appear in italic face,
- e.g., 
-\emph on 
-PtlInit
-\emph default 
-.
- The functions associated with an object type will have names that start
- with 
-\emph on 
-Ptl
-\emph default 
-, followed by the two letter object type code shown in Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:objcodes}
-
-\end_inset 
-
-.
- As an example, the function 
-\emph on 
-PtlEQAlloc
-\emph default 
- allocates resources for an event queue.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Object Type Codes
-\begin_inset LatexCommand \label{tab:objcodes}
-
-\end_inset 
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-\backslash 
-medskip
-\newline 
-  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\emph on 
-xx
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Name 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Section 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-EQ 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Event Queue 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- MD 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Memory Descriptor 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- ME 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Match list Entry 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- NI 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Network Interface 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Type names use lower case with underscores to separate words.
- Each type name starts with 
-\family typewriter 
-ptl
-\family default 
-_ and ends with 
-\family typewriter 
-_t
-\family default 
-.
- When used in the body of this report, type names appear in a fixed font,
- e.g., 
-\family typewriter 
-ptl_match_bits_t
-\family default 
-.
-\layout Standard
-
-Names for constants use upper case with underscores to separate words.
- Each constant name starts with 
-\family typewriter 
-PTL_
-\family default 
-.
- When used in the body of this report, type names appear in a fixed font,
- e.g., 
-\family typewriter 
-PTL_OK
-\family default 
-.
-\layout Section
-
-Base Types
-\layout Standard
-
-The Portals API defines a variety of base types.
- These types represent a simple renaming of the base types provided by the
- C programming language.
- In most cases these new type names have been introduced to improve type
- safety and to avoid issues arising from differences in representation sizes
- (e.g., 16-bit or 32-bit integers).
-\layout Subsection
-
-Sizes
-\begin_inset LatexCommand \label{sec:size-t}
-
-\end_inset 
-
-\layout Standard
-
-The type 
-\family typewriter 
-ptl_size_t
-\family default 
- is an unsigned 64-bit integral type used for representing sizes.
-\layout Subsection
-
-Handles
-\begin_inset LatexCommand \label{sec:handle-type}
-
-\end_inset 
-
-\layout Standard
-
-Objects maintained by the API are accessed through handles.
- Handle types have names of the form 
-\family typewriter 
-ptl_handle_
-\emph on 
-xx
-\emph default 
-_t
-\family default 
-, where 
-\emph on 
-xx
-\emph default 
- is one of the two letter object type codes shown in Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:objcodes}
-
-\end_inset 
-
-.
- For example, the type 
-\family typewriter 
-ptl_handle_ni_t
-\family default 
- is used for network interface handles.
-\layout Standard
-
-Each type of object is given a unique handle type to enhance type checking.
- The type, 
-\family typewriter 
-ptl_handle_any_t
-\family default 
-, can be used when a generic handle is needed.
- Every handle value can be converted into a value of type 
-\family typewriter 
-ptl_handle_any_t
-\family default 
- without loss of information.
-\layout Standard
-
-Handles are not simple values.
- Every portals object is associated with a specific network interface and
- an identifier for this interface (along with an object identifier) is part
- of the handle for the object.
-\layout Standard
-
-The special value 
-\family typewriter 
-PTL_EQ_NONE
-\family default 
-, of type 
-\family typewriter 
-ptl_handle_eq_t
-\family default 
-, is used to indicate the absence of an event queue.
- See sections 
-\begin_inset LatexCommand \ref{sec:mdfree}
-
-\end_inset 
-
- and\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:mdupdate}
-
-\end_inset 
-
- for uses of this value.
-\layout Subsection
-
-Indexes
-\begin_inset LatexCommand \label{sec:index-type}
-
-\end_inset 
-
-\layout Standard
-
-The types 
-\family typewriter 
-ptl_pt_index_t
-\family default 
- and 
-\family typewriter 
-ptl_ac_index_t
-\family default 
- are integral types used for representing Portal table indexes and access
- control tables indexes, respectively.
- See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:niinit}
-
-\end_inset 
-
- for limits on values of these types.
-\layout Subsection
-
-Match Bits
-\begin_inset LatexCommand \label{sec:mb-type}
-
-\end_inset 
-
-\layout Standard
-
-The type 
-\family typewriter 
-ptl_match_bits_t
-\family default 
- is capable of holding unsigned 64-bit integer values.
-\layout Subsection
-
-Network Interfaces
-\begin_inset LatexCommand \label{sec:ni-type}
-
-\end_inset 
-
-\layout Standard
-
-The type 
-\family typewriter 
-ptl_interface_t
-\family default 
- is an integral type used for identifying different network interfaces.
- Users will need to consult the local documentation to determine appropriate
- values for the interfaces available.
- The special value 
-\family typewriter 
-PTL_IFACE_DEFAULT
-\family default 
- identifies the default interface.
-\layout Subsection
-
-Identifiers
-\begin_inset LatexCommand \label{sec:id-type}
-
-\end_inset 
-
-
-\layout Standard
-
-The type 
-\family typewriter 
-ptl_nid_t
-\family default 
- is an integral type used for representing node ids
-\family typewriter 
-, ptl_pid_t
-\family default 
- is an integral type for representing process ids, and 
-\family typewriter 
-ptl_uid_t 
-\family default 
-is an integral type for representing user ids.
-\layout Standard
-
-The special values 
-\family typewriter 
-PTL_PID_ANY
-\family default 
- matches any process identifier, PTL_NID_ANY matches any node identifier,
- and 
-\family typewriter 
-PTL_UID_ANY
-\family default 
- matches any user identifier.
- See sections 
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
- and\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset 
-
- for uses of these values.
-\layout Subsection
-
-Status Registers
-\begin_inset LatexCommand \label{sec:stat-type}
-
-\end_inset 
-
-
-\layout Standard
-
-Each network interface maintains an array of status registers that can be
- accessed using the 
-\family typewriter 
-PtlNIStatus
-\family default 
- function (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset 
-
-).
- The type 
-\family typewriter 
-ptl_sr_index_t
-\family default 
- defines the types of indexes that can be used to access the status registers.
- The only index defined for all implementations is 
-\family typewriter 
-PTL_SR_DROP_COUNT
-\family default 
- which identifies the status register that counts the dropped requests for
- the interface.
- Other indexes (and registers) may be defined by the implementation.
-\layout Standard
-
-The type 
-\family typewriter 
-ptl_sr_value_t
-\family default 
- defines the types of values held in status registers.
- This is a signed integer type.
- The size is implementation dependent, but must be at least 32 bits.
-\layout Section
-
-Initialization and Cleanup
-\begin_inset LatexCommand \label{sec:init}
-
-\end_inset 
-
-
-\layout Standard
-
-The Portals API includes a function, 
-\emph on 
-PtlInit
-\emph default 
-, to initialize the library and a function, 
-\emph on 
-PtlFini
-\emph default 
-, to cleanup after the application is done using the library.
-\layout Subsection
-
-PtlInit
-\begin_inset LatexCommand \label{sec:ptlinit}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlInit( int *max_interfaces );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlInit
-\emph default 
- function initializes the Portals library.
- PtlInit must be called at least once by a process before any thread makes
- a Portals function call, but may be safely called more than once.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_FAIL Indicates an error during initialization.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-max_interfaces
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-max_interfaces
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the maximum number of interfaces
- that can be initialized.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlFini
-\begin_inset LatexCommand \label{sec:ptlfini}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-void PtlFini( void );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlFini
-\emph default 
- function cleans up after the Portals library is no longer needed by a process.
- After this function is called, calls to any of the functions defined by
- the Portal API or use of the structures set up by the Portals API will
- result in undefined behavior.
- This function should be called once and only once during termination by
- a process.
- Typically, this function will be called in the exit sequence of a process.
- Individual threads should not call PtlFini when they terminate.
-\layout Section
-
-Network Interfaces
-\begin_inset LatexCommand \label{sec:ni}
-
-\end_inset 
-
-\layout Standard
-
-The Portals API supports the use of multiple network interfaces.
- However, each interface is treated as an independent entity.
- Combining interfaces (e.g., 
-\begin_inset Quotes eld
-\end_inset 
-
-bonding
-\begin_inset Quotes erd
-\end_inset 
-
- to create a higher bandwidth connection) must be implemented by the application
- or embedded in the underlying network.
- Interfaces are treated as independent entities to make it easier to cache
- information on individual network interface cards.
-\layout Standard
-
-Once initialized, each interface provides a Portal table, an access control
- table, and a collection of status registers.
- See Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset 
-
- for a discussion of updating Portal table entries using the 
-\emph on 
-PtlMEAttach
-\emph default 
- function.
- See Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ac}
-
-\end_inset 
-
- for a discussion of the initialization and updating of entries in the access
- control table.
- See Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset 
-
- for a discussion of the 
-\emph on 
-PtlNIStatus
-\emph default 
- function which can be used to determine the value of a status register.
-\layout Standard
-
-Every other type of Portal object (e.g., memory descriptor, event queue, or
- match list entry) is associated with a specific network interface.
- The association to a network interface is established when the object is
- created and is encoded in the handle for the object.
-\layout Standard
-
-Each network interface is initialized and shutdown independently.
- The initialization routine, 
-\emph on 
-PtlNIInit
-\emph default 
-, returns a handle for an interface object which is used in all subsequent
- Portal operations.
- The 
-\emph on 
-PtlNIFini
-\emph default 
- function is used to shutdown an interface and release any resources that
- are associated with the interface.
- Network interface handles are associated with processes, not threads.
- All threads in a process share all of the network interface handles.
-\layout Standard
-
-The Portals API also defines the 
-\emph on 
-PtlNIStatus
-\emph default 
- function to query the status registers for a network interface, the 
-\emph on 
-PtlNIDist
-\emph default 
- function to determine the 
-\begin_inset Quotes eld
-\end_inset 
-
-distance
-\begin_inset Quotes erd
-\end_inset 
-
- to another process, and the 
-\emph on 
-PtlNIHandle
-\emph default 
- function to determine the network interface that an object is associated
- with.
-\layout Subsection
-
-PtlNIInit
-\begin_inset LatexCommand \label{sec:niinit}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline 
-    int            max_match_entries;
-\newline 
-    int            max_mem_descriptors;
-\newline 
-    int            max_event_queues;
-\newline 
-    ptl_ac_index_t max_atable_index; 
-\newline 
-    ptl_pt_index_t max_ptable_index;
-\newline 
-} ptl_ni_limits_t;
-\newline 
-
-\newline 
-int PtlNIInit( ptl_interface_t  interface
-\newline 
-               ptl_pid_t        pid,
-\newline 
-               ptl_ni_limits_t* desired,
-\newline 
-               ptl_ni_limits_t* actual,
-\newline 
-               ptl_handle_ni_t* handle );
-\layout Standard
-
-Values of type 
-\family typewriter 
-ptl_ni_limits_t
-\family default 
- include the following members:
-\layout Description
-
-max_match_entries Maximum number of match entries that can be allocated
- at any one time.
-\layout Description
-
-max_mem_descriptors Maximum number of memory descriptors that can be allocated
- at any one time.
-\layout Description
-
-max_event_queues Maximum number of event queues that can be allocated at
- any one time.
-\layout Description
-
-max_atable_index Largest access control table index for this interface,
- valid indexes range from zero to 
-\family typewriter 
-max_atable_index
-\family default 
-, inclusive.
-\layout Description
-
-max_ptable_index Largest Portal table index for this interface, valid indexes
- range from zero to 
-\family typewriter 
-max_ptable_index
-\family default 
-, inclusive.
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlNIInit
-\emph default 
- function is used to initialized the Portals API for a network interface.
- This function must be called at least once by each process before any other
- operations that apply to the interface by any process or thread.
- For subsequent calls to 
-\shape italic 
-PtlNIInit
-\shape default 
- from within the same process (either by different threads or the same thread),
- the desired limits will be ignored and the call will return the existing
- NI handle.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INIT_DUP Indicates a duplicate initialization of 
-\family typewriter 
-interface
-\family default 
-.
-\layout Description
-
-PTL_INIT_INV Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to initialize the
- interface.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-pid
-\family default 
- is not a valid process id.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-actual 
-\family default 
-or
-\family typewriter 
- handle
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Identifies the network interface to be initialized.
-  (See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ni-type}
-
-\end_inset 
-
- for a discussion of  values used to identify network interfaces.)
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-pid
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Identifies the desired process id (for well known process ids).
- The value 
-\family typewriter 
-PTL_PID_ANY
-\family default 
- may be used to have the process id assigned by the underlying library.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-desired
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-If non-NULL, points to a structure that holds the desired limits.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-actual
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, the location pointed to by actual will hold the actual
- limits.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a  handle for the interface.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Comment
-
-The use of desired is implementation dependent.
- In particular, an implementation may choose to ignore this argument.
-\layout Subsection
-
-PtlNIFini
-\begin_inset LatexCommand \label{sec:nifini}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlNIFini( ptl_handle_ni_t interface );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlNIFini
-\emph default 
- function is used to release the resources allocated for a network interface.
- Once the 
-\emph on 
-PtlNIFini
-\emph default 
- operation has been started, the results of pending API operations (e.g.,
- operations initiated by another thread) for this interface are undefined.
- Similarly, the effects of incoming operations (puts and gets) or return
- values (acknowledgements and replies) for this interface are undefined.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-A handle for the interface to shutdown.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlNIStatus
-\begin_inset LatexCommand \label{sec:nistatus}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlNIStatus( ptl_handle_ni_t interface,
-\newline 
-                 ptl_sr_index_t  status_register,
-\newline 
-                 ptl_sr_value_t* status );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlNIStatus
-\emph default 
- function returns the value of a status register for the specified interface.
- (See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset 
-
- for more information on status register indexes and status register values.)
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_INV_SR_INDX Indicates that 
-\family typewriter 
-status_register
-\family default 
- is not a valid status register.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-status
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the interface to use.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-status_register
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-An index for the status register to read.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-status
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the current value of the status
- register.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Comment
-
-The only status register that must be defined is a drop count register (
-\family typewriter 
-PTL_SR_DROP_COUNT
-\family default 
-).
- Implementations may define additional status registers.
- Identifiers for the indexes associated with these registers should start
- with the prefix 
-\family typewriter 
-PTL_SR_
-\family default 
-.
-\layout Subsection
-
-PtlNIDist
-\layout LyX-Code
-
-int PtlNIDist( ptl_handle_ni_t  interface,
-\newline 
-               ptl_process_id_t process,
-\newline 
-               unsigned long*   distance );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlNIDist
-\emph default 
- function returns the distance to another process using the specified interface.
- Distances are only defined relative to an interface.
- Distance comparisons between different interfaces on the same process may
- be meaningless.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-process
-\family default 
- is not a valid process identifier.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-distance
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the interface to use.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-process
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-An identifier for the process whose distance is being  requested.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-distance
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the  distance to the remote
- process.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Comment
-
-This function should return a static measure of distance.
- Examples include minimum latency, the inverse of available bandwidth, or
- the number of switches between the two endpoints.
-\layout Subsection
-
-PtlNIHandle
-\layout LyX-Code
-
-int PtlNIHandle( ptl_handle_any_t handle,
-\newline 
-                 ptl_handle_ni_t* interface );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlNIHandle
-\emph default 
- function returns a handle for the network interface with which the object
- identified by 
-\family typewriter 
-handle
-\family default 
- is associated.
- If the object identified by 
-\family typewriter 
-handle
-\family default 
- is a network interface, this function returns the same value it is passed.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_HANDLE Indicates that 
-\family typewriter 
-handle
-\family default 
- is not a valid handle.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the object.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a handle for the network interface
- associated with 
-\family typewriter 
-handle
-\family default 
-.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Comment
-
-Every handle should encode the network interface and the object id relative
- to this handle.
- Both are presumably encoded using integer values.
-\layout Section
-
-User Identification
-\begin_inset LatexCommand \label{sec:uid}
-
-\end_inset 
-
-
-\layout Standard
-
-Every process runs on behalf of a user.
-\layout Subsection
-
-PtlGetUid
-\layout LyX-Code
-
-int PtlGetUid( ptl_handle_ni_t   ni_handle,
-\newline 
-               ptl_uid_t*        uid );
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-ni_handle
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A network interface handle.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-id
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the user id for the calling
- process.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Comment
-
-Note that user identifiers are dependent on the network interface(s).
- In particular, if a node has multiple interfaces, a process may have multiple
- user identifiers.
-\layout Section
-
-Process Identification
-\begin_inset LatexCommand \label{sec:pid}
-
-\end_inset 
-
-
-\layout Standard
-
-Processes that use the Portals API, can be identified using a node id and
- process id.
- Every node accessible through a network interface has a unique node identifier
- and every process running on a node has a unique process identifier.
- As such, any process in the computing system can be identified by its node
- id and process id.
-\layout Standard
-
-The Portals API defines a type, 
-\family typewriter 
-ptl_process_id_t
-\family default 
- for representing process ids and a function, 
-\emph on 
-PtlGetId
-\emph default 
-, which can be used to obtain the id of the current process.
-\layout Comment
-
-The portals API does not include thread identifiers.
-  Messages are delivered to processes (address spaces) not threads (contexts
- of  execution).
-\layout Subsection
-
-The Process Id Type
-\begin_inset LatexCommand \label{sec:pid-type}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline 
-    ptl_nid_t       nid; /* node id */
-\newline 
-    ptl_pid_t       pid; /* process id */
-\newline 
-} ptl_process_id_t;
-\layout Standard
-\noindent 
-The 
-\family typewriter 
-ptl_process_id_t
-\family default 
- type uses two identifiers to represent a process id: a node id and a process
- id.
-\layout Subsection
-
-PtlGetId
-\begin_inset LatexCommand \label{sub:PtlGetId}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlGetId( ptl_handle_ni_t   ni_handle,
-\newline 
-              ptl_process_id_t* id );
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-ni_handle
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-id
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A network interface handle.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-id
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the id for the calling process.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Comment
-
-Note that process identifiers are dependent on the network interface(s).
- In particular, if a node has multiple interfaces, it may have multiple
- node identifiers.
-\layout Section
-
-Match List Entries and Match Lists
-\begin_inset LatexCommand \label{sec:me}
-
-\end_inset 
-
-
-\layout Standard
-
-A match list is a chain of match list entries.
- Each match list entry includes a memory descriptor and a set of match criteria.
- The match criteria can be used to reject incoming requests based on process
- id or the match bits provided in the request.
- A match list is created using the 
-\emph on 
-PtlMEAttach
-\emph default 
- or 
-\shape italic 
-PtlMEAttachAny
-\shape default 
- functions, which create a match list consisting of a single match list
- entry, attaches the match list to the specified Portal index, and returns
- a handle for the match list entry.
- Match entries can be dynamically inserted and removed from a match list
- using the 
-\emph on 
-PtlMEInsert
-\emph default 
- and 
-\emph on 
-PtlMEUnlink
-\emph default 
- functions.
-\layout Subsection
-
-PtlMEAttach
-\begin_inset LatexCommand \label{sec:meattach}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t;
-\newline 
-
-\layout LyX-Code
-
-typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t;
-\newline 
-
-\layout LyX-Code
-
-int PtlMEAttach( ptl_handle_ni_t  interface,
-\newline 
-                 ptl_pt_index_t   index,
-\newline 
-                 ptl_process_id_t matchid,
-\newline 
-                 ptl_match_bits_t match_bits,
-\newline 
-                 ptl_match_bits_t ignorebits,
-\newline 
-                 ptl_unlink_t     unlink,
-\newline 
-                 ptl_ins_pos_t    position,
-\newline 
-                 ptl_handle_me_t* handle );
-\layout Standard
-\noindent 
-Values of the type 
-\family typewriter 
-ptl_ins_pos_t
-\family default 
- are used to control where a new item is inserted.
- The value 
-\family typewriter 
-PTL_INS_BEFORE
-\family default 
- is used to insert the new item before the current item or before the head
- of the list.
- The value 
-\family typewriter 
-PTL_INS_AFTER
-\family default 
- is used to insert the new item after the current item or after the last
- item in the list.
-\layout Standard
-
-The 
-\emph on 
-PtlMEAttach
-\emph default 
- function creates a match list consisting of a single entry and attaches
- this list to the Portal table for 
-\family typewriter 
-interface
-\family default 
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_PTINDEX Indicates that 
-\family typewriter 
-index
-\family default 
- is not a valid Portal table index.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-matchid
-\family default 
- is not a valid process identifier.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- match list entry.
-\layout Description
-
-PTL_ML_TOOLONG Indicates that the resulting match list is too long.
- The maximum length for a match list is defined by the interface.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="7" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0.8in">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.75in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the interface to use.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-index
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The Portal table index where the match list  should be attached.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-matchid
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Specifies the match criteria for the process id of the requestor.
-  The constants 
-\family typewriter 
-PTL_PID_ANY
-\family default 
- and 
-\family typewriter 
-PTL_NID_ANY
-\family default 
- can be used to  wildcard either of the ids in the 
-\family typewriter 
-ptl_process_id_t
-\family default 
- structure.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-match_bits, ignorebits
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Specify the match criteria to apply  to the match bits in the incoming request.
-  The 
-\family typewriter 
-ignorebits
-\family default 
- are used to mask out insignificant bits in the incoming match bits.
-  The resulting bits are then compared to the match list entry's match 
- bits to determine if the incoming request meets the match criteria.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-unlink
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Indicates the match list entry should be unlinked when the last memory descripto
-r associated with this match list  entry is unlinked.
-  (Note, the check for unlinking a match entry  only occurs when a memory
- descriptor is unlinked.) 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-position
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Indicates whether the new match entry should be prepended or appended to
- the existing match list.
- If there is no existing list, this argument is ignored and the new match
- entry becomes the only entry in the list.
- Allowed constants: 
-\family typewriter 
-PTL_INS_BEFORE
-\family default 
-, 
-\family typewriter 
-PTL_INS_AFTER
-\family default 
-.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a  handle for the newly created
- match list entry.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlMEAttachAny
-\begin_inset LatexCommand \label{sec:attachany}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMEAttachAny( ptl_handle_ni_t  interface,
-\newline 
-                    ptl_pt_index_t   *index,
-\newline 
-                    ptl_process_id_t matchid,
-\newline 
-                    ptl_match_bits_t match_bits,
-\newline 
-                    ptl_match_bits_t ignorebits,
-\newline 
-                    ptl_unlink_t     unlink,
-\newline 
-                    ptl_handle_me_t* handle );
-\layout Standard
-
-The 
-\emph on 
-PtlMEAttachAny
-\emph default 
- function creates a match list consisting of a single entry and attaches
- this list to an unused Portal table entry for 
-\family typewriter 
-interface
-\family default 
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-matchid
-\family default 
- is not a valid process identifier.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- match list entry.
-\layout Description
-
-PTL_PT_FULL Indicates that there are no free entries in the Portal table.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="4" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0.8in">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.75in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the interface to use.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-index
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On succesfful return, this location will hold the Portal index where the
- match list  has been attached.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-matchid, match_bits, ignorebits, unlink
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-See the discussion for 
-\shape italic 
-PtlMEAttach
-\shape default 
-.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a  handle for the newly created
- match list entry.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlMEInsert
-\begin_inset LatexCommand \label{sec:meinsert}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMEInsert( ptl_handle_me_t  current,
-\newline 
-                 ptl_process_id_t matchid,
-\newline 
-                 ptl_match_bits_t match_bits,
-\newline 
-                 ptl_match_bits_t ignorebits,
-\newline 
-                 ptl_ins_pos_t    position,
-\newline 
-                 ptl_handle_me_t* handle );
-\layout Standard
-
-The 
-\emph on 
-PtlMEInsert
-\emph default 
- function creates a new match list entry and inserts this entry into the
- match list containing 
-\family typewriter 
-current
-\family default 
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-matchid
-\family default 
- is not a valid process identifier.
-\layout Description
-
-PTL_INV_ME Indicates that 
-\family typewriter 
-current
-\family default 
- is not a valid match entry handle.
-\layout Description
-
-PTL_ML_TOOLONG Indicates that the resulting match list is too long.
- The maximum length for a match list is defined by the interface.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- match entry.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="4" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0.8in">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-current
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for a match entry.
-  The new match entry will be inserted immediately before or immediately
- after this match entry.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-matchid
-\family default 
-, 
-\family typewriter 
-match_bits
-\family default 
-, 
-\family typewriter 
-ignorebits
-\family default 
-,  
-\family typewriter 
-unlink
-\family default 
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-See the discussion  for 
-\emph on 
-PtlMEAttach
-\emph default 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-position
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Indicates whether the new match entry should be inserted before or after
- the 
-\family typewriter 
-current
-\family default 
- entry.
- Allowed constants: 
-\family typewriter 
-PTL_INS_BEFORE
-\family default 
-, 
-\family typewriter 
-PTL_INS_AFTER
-\family default 
-.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-See the discussion for 
-\emph on 
-PtlMEAttach
-\emph default 
-.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlMEUnlink
-\begin_inset LatexCommand \label{sec:meunlink}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMEUnlink( ptl_handle_me_t entry );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlMEUnlink
-\emph default 
- function can be used to unlink a match entry from a match list.
- This operation also releases any resources associated with the match entry
- (including the associated memory descriptor).
- It is an error to use the match entry handle after calling 
-\emph on 
-PtlMEUnlink
-\emph default 
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_ME Indicates that 
-\family typewriter 
-entry
-\family default 
- is not a valid match entry handle.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-entry
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-A handle for the match entry to be unlinked.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Section
-
-Memory Descriptors
-\begin_inset LatexCommand \label{sec:md}
-
-\end_inset 
-
-
-\layout Standard
-
-A memory descriptor contains information about a region of an application
- process' memory and an event queue where information about the operations
- performed on the memory descriptor are recorded.
- The Portals API provides two operations to create memory descriptors: 
-\emph on 
-PtlMDAttach
-\emph default 
-, and 
-\emph on 
-PtlMDBind
-\emph default 
-; an operation to update a memory descriptor, 
-\emph on 
-PtlMDUpdate
-\emph default 
-; and an operation to unlink and release the resources associated with a
- memory descriptor, 
-\emph on 
-PtlMDUnlink
-\emph default 
-.
-\layout Subsection
-
-The Memory Descriptor Type
-\begin_inset LatexCommand \label{sec:md-type}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline 
-    void*           start;
-\newline 
-    ptl_size_t      length;
-\newline 
-    int             threshold;
-\newline 
-    unsigned int    max_offset;
-\newline 
-    unsigned int    options;
-\newline 
-    void*           user_ptr;
-\newline 
-    ptl_handle_eq_t eventq;
-\newline 
-} ptl_md_t;
-\layout Standard
-\noindent 
-The 
-\family typewriter 
-ptl_md_t
-\family default 
- type defines the application view of a memory descriptor.
- Values of this type are used to initialize and update the memory descriptors.
-\layout Subsubsection
-
-Members
-\layout Description
-
-start,\SpecialChar ~
-length Specify the memory region associated with the memory descriptor.
- The 
-\family typewriter 
-start
-\family default 
- member specifies the starting address for the memory region and the 
-\family typewriter 
-length
-\family default 
- member specifies the length of the region.
- The 
-\family typewriter 
-start member
-\family default 
- can be NULL provided that the 
-\family typewriter 
-length
-\family default 
- member is zero.
- (Zero length buffers are useful to record events.) There are no alignment
- restrictions on the starting address or the length of the region; although,
- unaligned messages may be slower (i.e., lower bandwidth and/or longer latency)
- on some implementations.
-\layout Description
-
-threshold Specifies the maximum number of operations that can be performed
- on the memory descriptor.
- An operation is any action that could possibly generate an event (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
- for the different types of events).
- In the usual case, the threshold value is decremented for each operation
- on the memory descriptor.
- When the threshold value is zero, the memory descriptor is 
-\emph on 
-inactive
-\emph default 
-, and does not respond to operations.
- A memory descriptor can have an initial threshold value of zero to allow
- for manipulation of an inactive memory descriptor by the local process.
- A threshold value of 
-\family typewriter 
-PTL_MD_THRESH_INF
-\family default 
- indicates that there is no bound on the number of operations that may be
- applied to a memory descriptor.
- Note that local operations (e.g., 
-\emph on 
-PtlMDUpdate
-\emph default 
-) are not applied to the threshold count.
-\layout Description
-
-max_offset Specifies the maximum local offset of a memory descriptor.
- When the local offset of a memory descriptor exceeds this maximum, the
- memory descriptor becomes 
-\shape italic 
-inactive
-\shape default 
- and does not respond to further operations.
-\layout Description
-
-options Specifies the behavior of the memory descriptor.
- There are five options that can be selected: enable put operations (yes
- or no), enable get operations (yes or no), offset management (local or
- remote), message truncation (yes or no), and acknowledgement (yes or no).
- Values for this argument can be constructed using a bitwise or of the following
- values: 
-\begin_deeper 
-\begin_deeper 
-\layout Description
-
-PTL_MD_OP_PUT Specifies that the memory descriptor will respond to 
-\emph on 
-put
-\emph default 
- operations.
- By default, memory descriptors reject 
-\emph on 
-put
-\emph default 
- operations.
-\layout Description
-
-PTL_MD_OP_GET Specifies that the memory descriptor will respond to 
-\emph on 
-get
-\emph default 
- operations.
- By default, memory descriptors reject 
-\emph on 
-get
-\emph default 
- operations.
-\layout Description
-
-PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory
- region is provided by the incoming request.
- By default, the offset is maintained locally.
- When the offset is maintained locally, the offset is incremented by the
- length of the request so that the next operation (put and/or get) will
- access the next part of the memory region.
-\layout Description
-
-PTL_MD_TRUNCATE Specifies that the length provided in the incoming request
- can be reduced to match the memory available in the region.
- (The memory available in a memory region is determined by subtracting the
- offset from the length of the memory region.) By default, if the length
- in the incoming operation is greater than the amount of memory available,
- the operation is rejected.
-\layout Description
-
-PTL_MD_ACK_DISABLE Specifies that an acknowledgement should 
-\emph on 
-not
-\emph default 
- be sent for incoming 
-\emph on 
-put
-\emph default 
- operations, even if requested.
- By default, acknowledgements are sent for 
-\emph on 
-put
-\emph default 
- operations that request an acknowledgement.
- Acknowledgements are never sent for 
-\emph on 
-get
-\emph default 
- operations.
- The value sent in the reply serves as an implicit acknowledgement.
-\end_deeper 
-\layout Standard
-
-
-\series bold 
-Note
-\series default 
-: It is not considered an error to have a memory descriptor that does not
- respond to either 
-\emph on 
-put
-\emph default 
- or 
-\emph on 
-get
-\emph default 
- operations: Every memory descriptor responds to 
-\emph on 
-reply
-\emph default 
- operations.
- Nor is it considered an error to have a memory descriptor that responds
- to both 
-\emph on 
-put
-\emph default 
- and 
-\emph on 
-get
-\emph default 
- operations.
-\end_deeper 
-\layout Description
-
-user_ptr A user-specified value that is associated with the memory descriptor.
- The value does not need to be a pointer, but must fit in the space used
- by a pointer.
- This value (along with other values) is recorded in events associated with
- operations on this memory descriptor.
-\begin_inset Foot
-collapsed true
-
-\layout Standard
-
-Tying the memory descriptor to a user-defined value can be useful when multiple
- memory descriptor share the same event queue or when the memory descriptor
- needs to be associated with a data structure maintained by the application.
- For example, an MPI implementation can set the 
-\family typewriter 
-user_ptr
-\family default 
- argument to the value of an MPI Request.
- This direct association allows for processing of memory descriptor's by
- the MPI implementation without a table lookup or a search for the appropriate
- MPI Request.
-\end_inset 
-
-
-\layout Description
-
-eventq A handle for the event queue used to log the operations performed
- on the memory region.
- If this argument is 
-\family typewriter 
-PTl_EQ_NONE
-\family default 
-, operations performed on this memory descriptor are not logged.
-\layout Subsection
-
-PtlMDAttach
-\begin_inset LatexCommand \label{sec:mdattach}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMDAttach( ptl_handle_me_t  match,
-\newline 
-                 ptl_md_t         mem_desc,
-\newline 
-                 ptl_unlink_t     unlink_op,
-\newline 
-                 ptl_unlink_t     unlink_nofit,
-\newline 
-                 ptl_handle_md_t* handle );
-\layout Standard
-\noindent 
-Values of the type 
-\family typewriter 
-ptl_unlink_t
-\family default 
- are used to control whether an item is unlinked from a list.
- The value 
-\family typewriter 
-PTL_UNLINK
-\family default 
- enables unlinking.
- The value 
-\family typewriter 
-PTL_RETAIN
-\family default 
- disables unlinking.
-\layout Standard
-
-The 
-\emph on 
-PtlMDAttach
-\emph default 
- operation is used to create a memory descriptor and attach it to a match
- list entry.
- An error code is returned if this match list entry already has an associated
- memory descriptor.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INUSE Indicates that 
-\family typewriter 
-match
-\family default 
- already has a memory descriptor attached.
-\layout Description
-
-PTL_INV_ME Indicates that 
-\family typewriter 
-match
-\family default 
- is not a valid match entry handle.
-\layout Description
-
-PTL_ILL_MD Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- is not a legal memory descriptor.
- This may happen because the memory region defined in 
-\family typewriter 
-mem_desc
-\family default 
- is invalid or because the network interface associated with the 
-\family typewriter 
-eventq
-\family default 
- in 
-\family typewriter 
-mem_desc
-\family default 
- is not the same as the network interface associated with 
-\family typewriter 
-match
-\family default 
-.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- memory descriptor.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-match
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the match entry that the memory descriptor will be associated
- with.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Provides initial values for the application visible parts of a memory descriptor.
-  Other than its use for initialization, there is no linkage between this
- structure and the memory descriptor maintained by the API.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-unlink_op
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A flag to indicate whether the memory descriptor is  unlinked when it becomes
- inactive, either because the operation threshold drops to zero or because
- the maximum offset has been exceeded.
-  (Note, the check for unlinking a memory descriptor only occurs after a
- the completion of a successful operation.
-  If the threshold is set to zero during initialization or  using 
-\emph on 
-PtlMDUpdate
-\emph default 
-, the memory descriptor is 
-\series bold 
-not
-\series default 
-  unlinked.) 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-unlink_nofit
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A flag to indicate whether the memory descriptor is  unlinked when the space
- remaining in the memory descriptor is not sufficient for a matching operation.
- If an incoming message arrives arrives at a memory descriptor that does
- not have sufficient space and the 
-\series bold 
-PTL_MD_TRUNCATE
-\series default 
- operation is not specified, the memory descriptor will be unlinked.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a handle for the newly created
- memory descriptor.
-  The 
-\family typewriter 
-handle
-\family default 
- argument can be NULL, in which case the handle will not be returned.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlMDBind
-\begin_inset LatexCommand \label{sec:mdbind}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMDBind( ptl_handle_ni_t  interface,
-\newline 
-               ptl_md_t         mem_desc,
-\newline 
-               ptl_handle_md_t* handle );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlMDBind
-\emph default 
- operation is used to create a 
-\begin_inset Quotes eld
-\end_inset 
-
-free floating
-\begin_inset Quotes erd
-\end_inset 
-
- memory descriptor, i.e., a memory descriptor that is not associated with
- a match list entry.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid match entry handle.
-\layout Description
-
-PTL_ILL_MD Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- is not a legal memory descriptor.
- This may happen because the memory region defined in 
-\family typewriter 
-mem_desc
-\family default 
- is invalid or because the network interface associated with the 
-\family typewriter 
-eventq
-\family default 
- in 
-\family typewriter 
-mem_desc
-\family default 
- is not the same as the network interface, 
-\family typewriter 
-interface
-\family default 
-.
-\layout Description
-
-PTL_INV_EQ Indicates that the event queue associated with 
-\family typewriter 
-mem_desc
-\family default 
- is not valid.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- memory descriptor.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-handle
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the network interface with which the memory descriptor will
- be associated.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Provides initial values for the application visible parts of a memory descriptor.
-  Other than its use for initialization, there is no linkage between this
- structure and the  memory descriptor maintained by the API.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a  handle for the newly created
- memory descriptor.
-  The 
-\family typewriter 
-handle
-\family default 
- argument must be a valid address and cannot be NULL.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlMDUnlink
-\begin_inset LatexCommand \label{sec:mdfree}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMDUnlink( ptl_handle_md_t mem_desc );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlMDUnlink
-\emph default 
- function unlinks the memory descriptor from any match list entry it may
- be linked to and releases the resources associated with a memory descriptor.
- (This function does not free the memory region associated with the memory
- descriptor.) This function also releases the resources associated with a
- floating memory descriptor.
- Only memory descriptors with no pending operations may be unlinked.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_MD Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- is not a valid memory descriptor handle.
-\layout Description
-
-PTL_MD_INUSE Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- has pending operations and cannot be unlinked.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the memory descriptor to be released.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlMDUpdate
-\begin_inset LatexCommand \label{sec:mdupdate}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlMDUpdate( ptl_handle_md_t mem_desc,
-\newline 
-                 ptl_md_t*       old_md,
-\newline 
-                 ptl_md_t*       new_md,
-\newline 
-                 ptl_handle_eq_t testq );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlMDUpdate
-\emph default 
- function provides a conditional, atomic update operation for memory descriptors.
- The memory descriptor identified by 
-\family typewriter 
-mem_desc
-\family default 
- is only updated if the event queue identified by 
-\family typewriter 
-testq
-\family default 
- is empty.
- The intent is to only enable updates to the memory descriptor when no new
- messages have arrived since the last time the queue was checked.
- See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:exmpi}
-
-\end_inset 
-
- for an example of how this function can be used.
-\layout Standard
-
-If 
-\family typewriter 
-new
-\family default 
- is not NULL the memory descriptor identified by handle will be updated
- to reflect the values in the structure pointed to by 
-\family typewriter 
-new
-\family default 
- if 
-\family typewriter 
-testq
-\family default 
- has the value 
-\family typewriter 
-PTL_EQ_NONE
-\family default 
- or if the event queue identified by 
-\family typewriter 
-testq
-\family default 
- is empty.
- If 
-\family typewriter 
-old
-\family default 
- is not NULL, the current value of the memory descriptor identified by 
-\family typewriter 
-mem_desc
-\family default 
- is recorded in the location identified by 
-\family typewriter 
-old
-\family default 
-.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_NOUPDATE Indicates that the update was not performed because 
-\family typewriter 
-testq
-\family default 
- was not empty.
-\layout Description
-
-PTL_INV_MD Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- is not a valid memory descriptor handle.
-\layout Description
-
-PTL_ILL_MD Indicates that the value pointed to by 
-\family typewriter 
-new
-\family default 
- is not a legal memory descriptor (e.g., the memory region specified by the
- memory descriptor may be invalid).
-\layout Description
-
-PTL_INV_EQ Indicates that 
-\family typewriter 
-testq
-\family default 
- is not a valid event queue handle.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-new
-\family default 
- or 
-\family typewriter 
-old
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="4" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the memory descriptor to update.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-old_md
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-If 
-\family typewriter 
-old_md
-\family default 
- is not the value 
-\family typewriter 
-NULL
-\family default 
-, the current value of the memory descriptor will be stored in the location
- identified by 
-\family typewriter 
-old
-\family default 
-_md.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-new_md
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-If 
-\family typewriter 
-new_md
-\family default 
- is not the value 
-\family typewriter 
-NULL
-\family default 
-, this argument provides the new values for the memory descriptor, if the
- update is performed.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-testq
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for an event queue used to predicate the update.
- If 
-\family typewriter 
-testq
-\family default 
- is equal to 
-\family typewriter 
-PTL_EQ_NONE
-\family default 
-, the update is performed unconditionally.
-  Otherwise, the update is performed if and only if 
-\family typewriter 
-testq
-\family default 
- is empty.
-  If the update is  not performed, the function returns the value 
-\family typewriter 
-PTL_NOUPDATE
-\family default 
-.
-  (Note, the 
-\family typewriter 
-testq
-\family default 
- argument does not need to be the same as  the event queue associated with
- the memory descriptor.)
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Standard
-
-The conditional update can be used to ensure that the memory descriptor
- has not changed between the time it was examined and the time it is updated.
- In particular, it is needed to support an MPI implementation where the
- activity of searching an unexpected message queue and posting a receive
- must be atomic.
-\layout Section
-
-Events and Event Queues
-\begin_inset LatexCommand \label{sec:eq}
-
-\end_inset 
-
-
-\layout Standard
-
-Event queues are used to log operations performed on memory descriptors.
- They can also be used to hold acknowledgements for completed 
-\emph on 
-put
-\emph default 
- operations and to note when the data specified in a 
-\emph on 
-put
-\emph default 
- operation has been sent (i.e., when it is safe to reuse the buffer that holds
- this data).
- Multiple memory descriptors can share a single event queue.
-\layout Standard
-
-In addition to the 
-\family typewriter 
-ptl_handle_eq_t
-\family default 
- type, the Portals API defines two types associated with events: The 
-\family typewriter 
-
-\newline 
-ptl_event_kind_t
-\family default 
- type defines the kinds of events that can be stored in an event queue.
- The 
-\family typewriter 
-ptl_event_t
-\family default 
- type defines a structure that holds the information associated with an
- event.
-\layout Standard
-
-The Portals API also provides four functions for dealing with event queues:
- The 
-\emph on 
-PtlEQAlloc
-\emph default 
- function is used to allocate the API resources needed for an event queue,
- the 
-\emph on 
-PtlEQFree
-\emph default 
- function is used to release these resources, the 
-\emph on 
-PtlEQGet
-\emph default 
- function can be used to get the next event from an event queue, and the
-\emph on 
-PtlEQWait
-\emph default 
- function can be used to block a process (or thread) until an event queue
- has at least one event.
-\layout Subsection
-
-Kinds of Events
-\begin_inset LatexCommand \label{sec:ek-type}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef enum { 
-\newline 
-    PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL,
-\newline 
-    PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL,
-\newline 
-    PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL,
-\newline 
-    PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL,
-\newline 
-    PTL_EVENT_ACK,
-\newline 
-    PTL_EVENT_UNLINK
-\newline 
-} ptl_event_kind_t;
-\layout Standard
-\noindent 
-The Portals API defines fourteen types of events that can be logged in an
- event queue: 
-\layout Description
-
-PTL_EVENT_GET_START A remote 
-\emph on 
-get
-\emph default 
- operation has been started on the memory descriptor.
- The memory region associated with this descriptor should not be altered
- until the corresponding END or FAIL event is logged.
-\layout Description
-
-PTL_EVENT_GET_END A previously initiated 
-\emph on 
-get
-\emph default 
- operation completed successfully.
- This event is logged after the reply has been sent by the local node.
- As such, the process could free the memory descriptor once it sees this
- event.
-\layout Description
-
-PTL_EVENT_GET_FAIL A previously initiated 
-\emph on 
-get
-\emph default 
- operation completed unsuccessfully.
- This event is logged after the reply has been sent by the local node.
- As such, the process could free the memory descriptor once it sees this
- event.
-\layout Description
-
-PTL_EVENT_PUT_START A remote 
-\emph on 
-put
-\emph default 
- operation has been started on the memory descriptor.
- The memory region associated with this descriptor should should be considered
- volatile until the corresponding END or FAIL event is logged.
-\layout Description
-
-PTL_EVENT_PUT_END A previously initiated 
-\emph on 
-put
-\emph default 
- operation completed successfully.
- The underlying layers will not alter the memory (on behalf of this operation)
- once this event has been logged.
-\layout Description
-
-PTL_EVENT_PUT_FAIL A previously initiated 
-\emph on 
-put
-\emph default 
- operation completed unsuccessfully.
- The underlying layers will not alter the memory (on behalf of this operation)
- once this event has been logged.
-\layout Description
-
-PTL_EVENT_REPLY_START A 
-\emph on 
-reply
-\emph default 
- operation has been started on the memory descriptor.
-\layout Description
-
-PTL_EVENT_REPLY_END A previously initiated 
-\emph on 
-reply
-\emph default 
- operation has completed successfully .
- This event is logged after the data (if any) from the reply has been written
- into the memory descriptor.
-\layout Description
-
-PTL_EVENT_REPLY_FAIL A previously initiated 
-\emph on 
-reply
-\emph default 
- operation has completed unsuccessfully.
- This event is logged after the data (if any) from the reply has been written
- into the memory descriptor.
-\layout Description
-
-PTL_EVENT_ACK An 
-\emph on 
-acknowledgement
-\emph default 
- was received.
- This event is logged when the acknowledgement is received 
-\layout Description
-
-PTL_EVENT_SEND_START An outgoing 
-\emph on 
-send
-\emph default 
- operation has been started.
- The memory region associated with this descriptor should not be altered
- until the corresponding END or FAIL event is logged.
-\layout Description
-
-PTL_EVENT_SEND_END A previously initiated 
-\emph on 
-send
-\emph default 
- operation has completed successfully.
- This event is logged after the entire buffer has been sent and it is safe
- for the application to reuse the buffer.
-\layout Description
-
-PTL_EVENT_SEND_FAIL A previously initiated 
-\emph on 
-send
-\emph default 
- operation has completed unsuccessfully.
- The process can safely manipulate the memory or free the memory descriptor
- once it sees this event.
-\layout Description
-
-PTL_EVENT_UNLINK A memory descriptor associated with this event queue has
- been automatically unlinked.
- This event is not generated when a memory descriptor is explicitly unlinked
- by calling 
-\shape italic 
-PtlMDUnlink
-\shape default 
-.
- This event does not decrement the threshold count.
-\layout Subsection
-
-Event Ordering
-\layout Standard
-
-The Portals API guarantees that a when a process initiates two operations
- on a remote process, the operations will be initiated on the remote process
- in the same order that they were initiated on the original process.
- As an example, if process A intitates two 
-\emph on 
-put
-\emph default 
- operations, 
-\emph on 
-x
-\emph default 
- and 
-\emph on 
-y
-\emph default 
-, on process B, the Portals API guarantees that process A will receive the
-\family typewriter 
-PTL_EVENT_SEND_START
-\family default 
- events for 
-\emph on 
-x
-\emph default 
- and 
-\emph on 
-y
-\emph default 
- in the same order that process B receives the 
-\family typewriter 
-PTL_EVENT_PUT_START
-\family default 
- events for 
-\emph on 
-x
-\emph default 
- and 
-\emph on 
-y
-\emph default 
-.
- Notice that the API does not guarantee that the start events will be delivered
- in the same order that process A initiated the 
-\emph on 
-x
-\emph default 
- and 
-\emph on 
-y
-\emph default 
- operations.
- If process A needs to ensure the ordering of these operations, it should
- include code to wait for the initiation of 
-\emph on 
-x
-\emph default 
- before it initiates 
-\emph on 
-y
-\emph default 
-.
-\layout Subsection
-
-Failure Notification
-\layout Standard
-
-Operations may fail to complete successfully; however, unless the node itself
- fails, every operation that is started will eventually complete.
- While an operation is in progress, the memory associated with the operation
- should not be viewed (in the case of a put or a reply) or altered (in the
- case of a send or get).
- Operation completion, whether successful or unsuccessful, is final.
- That is, when an operation completes, the memory associated with the operation
- will no longer be read or altered by the operation.
- A network interface can use the 
-\family typewriter 
-ptl_ni_fail_t
-\family default 
- to define more specific information regarding the failure of the operation
- and record this information in the 
-\family typewriter 
-ni_fail_type
-\family default 
- field of the event.
-\layout Subsection
-
-The Event Type
-\begin_inset LatexCommand \label{sec:event-type}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef struct {
-\newline 
-    ptl_event_kind_t      type;
-\newline 
-    ptl_process_id_t      initiator;
-\newline 
-    ptl_uid_t             uid;
-\layout LyX-Code
-
-    ptl_pt_index_t        portal;
-\newline 
-    ptl_match_bits_t      match_bits;
-\newline 
-    ptl_size_t            rlength;
-\newline 
-    ptl_size_t            mlength;
-\newline 
-    ptl_size_t            offset; 
-\newline 
-    ptl_handle_md_t       md_handle;
-\newline 
-    ptl_md_t              mem_desc;
-\newline 
-    ptl_hdr_data_t        hdr_data;
-\newline 
-    ptl_seq_t             link;
-\newline 
-    ptl_ni_fail_t         ni_fail_type;
-\newline 
-    volatile ptl_seq_t    sequence;
-\newline 
-} ptl_event_t;
-\layout Standard
-\noindent 
-An event structure includes the following members: 
-\layout Description
-
-type Indicates the type of the event.
-\layout Description
-
-initiator The id of the initiator.
-\layout Description
-
-portal The Portal table index specified in the request.
-\layout Description
-
-match_bits A copy of the match bits specified in the request.
- See section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset 
-
- for more information on match bits.
-\layout Description
-
-rlength The length (in bytes) specified in the request.
-\layout Description
-
-mlength The length (in bytes) of the data that was manipulated by the operation.
- For truncated operations, the manipulated length will be the number of
- bytes specified by the memory descriptor (possibly with an offset) operation.
- For all other operations, the manipulated length will be the length of
- the requested operation.
-\layout Description
-
-offset Is the displacement (in bytes) into the memory region that the operation
- used.
- The offset can be determined by the operation (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:datamovement}
-
-\end_inset 
-
-) for a remote managed memory descriptor, or by the local memory descriptor
- (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-).
-\layout Description
-
-md_handle Is the handle to the memory descriptor associated with the event.
-\layout Description
-
-mem_desc Is the state of the memory descriptor immediately after the event
- has been processed.
-\layout Description
-
-hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-).
-\layout Description
-
-link The 
-\emph on 
-link
-\emph default 
- member is used to link 
-\family typewriter 
-START
-\family default 
- events with the 
-\family typewriter 
-END
-\family default 
- or 
-\family typewriter 
-FAIL
-\family default 
- event that signifies completion of the operation.
- The 
-\emph on 
-link
-\emph default 
- member will be the same for the two events associated with an operation.
- The link member is also used to link an 
-\family typewriter 
-UNLINK
-\family default 
- event with the event that caused the memory descriptor to be unlinked.
-\layout Description
-
-sequence The sequence number for this event.
- Sequence numbers are unique to each event.
-\layout Comment
-
-The 
-\emph on 
-sequence
-\emph default 
- member is the last member and is volatile to support SMP implementations.
- When an event structure is filled in, the 
-\emph on 
-sequence
-\emph default 
- member should be written after all other members have been updated.
- Moreover, a memory barrier should be inserted between the updating of other
- members and the updating of the 
-\emph on 
-sequence
-\emph default 
- member.
-\layout Subsection
-
-PtlEQAlloc
-\begin_inset LatexCommand \label{sec:eqalloc}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlEQAlloc( ptl_handle_ni_t  interface,
-\newline 
-                ptl_size_t       count,
-\newline 
-                ptl_handle_eq_t* handle );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlEQAlloc
-\emph default 
- function is used to build an event queue.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_NOSPACE Indicates that there is insufficient memory to allocate the
- event queue.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-handle
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="3" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the interface with which the event queue  will be associated.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-count
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The number of events that can be stored in the event queue.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-handle
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold a handle for the newly created
- event queue.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlEQFree
-\begin_inset LatexCommand \label{sec:eqfree}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlEQFree( ptl_handle_eq_t eventq );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlEQFree
-\emph default 
- function releases the resources associated with an event queue.
- It is up to the user to insure that no memory descriptors are associated
- with the event queue once it is freed.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_EQ Indicates that 
-\family typewriter 
-eventq
-\family default 
- is not a valid event queue handle.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="1" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-eventq
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-A handle for the event queue to be released.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlEQGet
-\begin_inset LatexCommand \label{sec:eqget}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlEQGet( ptl_handle_eq_t eventq,
-\newline 
-              ptl_event_t*    event );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PTLEQGet
-\emph default 
- function is a nonblocking function that can be used to get the next event
- in an event queue.
- The event is removed from the queue.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
- least one event between this event and the last event obtained (using 
-\emph on 
-PtlEQGet
-\emph default 
- or 
-\emph on 
-PtlEQWait
-\emph default 
-) from this event queue has been dropped due to limited space in the event
- queue.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_EQ_EMPTY Indicates that 
-\family typewriter 
-eventq
-\family default 
- is empty or another thread is waiting on 
-\emph on 
-PtlEQWait
-\emph default 
-.
-\layout Description
-
-PTL_INV_EQ Indicates that 
-\family typewriter 
-eventq
-\family default 
- is not a valid event queue handle.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-event
-\family default 
- is not a legal address.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.5in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-eventq
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the event queue.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-event
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the  values associated with
- the next event in the event queue.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlEQWait
-\begin_inset LatexCommand \label{sec:eqwait}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlEQWait( ptl_handle_eq_t eventq,
-\newline 
-               ptl_event_t*    event );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PTLEQWait
-\emph default 
- function can be used to block the calling process (thread) until there
- is an event in an event queue.
- This function also returns the next event in the event queue and removes
- this event from the queue.
- This is the only blocking operation in the Portals 3.2 API.
- In the event that multiple threads are waiting on the same event queue,
- PtlEQWait is guaranteed to wake exactly one thread, but the order in which
- they are awakened is not specified.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at
- least one event between this event and the last event obtained (using 
-\emph on 
-PtlEQGet
-\emph default 
- or 
-\emph on 
-PtlEQWait
-\emph default 
-) from this event queue has been dropped due to limited space in the event
- queue.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_EQ Indicates that 
-\family typewriter 
-eventq
-\family default 
- is not a valid event queue handle.
-\layout Description
-
-PTL_SEGV Indicates that 
-\family typewriter 
-event
-\family default 
- is not a legal address.
- queue handle.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-\noindent 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="2" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-eventq
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the event queue to wait on.
-  The calling process (thread) will be blocked until 
-\family typewriter 
-eventq
-\family default 
- is not empty.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-event
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-output
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-On successful return, this location will hold the values associated with
- the next event in the event queue.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Section
-
-The Access Control Table
-\begin_inset LatexCommand \label{sec:ac}
-
-\end_inset 
-
-
-\layout Standard
-
-Processes can use the access control table to control which processes are
- allowed to perform operations on Portal table entries.
- Each communication interface has a Portal table and an access control table.
- The access control table for the default interface contains an entry at
- index zero that allows all processes with the same user id to communicate.
- Entries in the access control table can be manipulated using the 
-\emph on 
-PtlACEntry
-\emph default 
- function.
-\layout Subsection
-
-PtlACEntry
-\begin_inset LatexCommand \label{sec:acentry}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlACEntry( ptl_handle_ni_t  interface,
-\newline 
-                ptl_ac_index_t   index,
-\newline 
-                ptl_process_id_t matchid,
-\newline 
-                ptl_uid_t        user_id,
-\newline 
-                ptl_pt_index_t   portal );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlACEntry
-\emph default 
- function can be used to update an entry in the access control table for
- an interface.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_NI Indicates that 
-\family typewriter 
-interface
-\family default 
- is not a valid network interface handle.
-\layout Description
-
-PTL_AC_INV_INDEX Indicates that 
-\family typewriter 
-index
-\family default 
- is not a valid access control table index.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-matchid
-\family default 
- is not a valid process identifier.
-\layout Description
-
-PTL_PT_INV_INDEX Indicates that 
-\family typewriter 
-portal
-\family default 
- is not a valid Portal table index.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="5" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-interface
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Identifies the interface to use.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-index
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The index of the entry in the access control table to update.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-matchid
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Identifies the process(es) that are allowed to  perform operations.
- The constants 
-\family typewriter 
-PTL_PID_ANY
-\family default 
- and 
-\family typewriter 
-PTL_NID_ANY
-\family default 
- can be used to wildcard either of the ids in the 
-\family typewriter 
-ptl_process_id_t
-\family default 
- structure.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-user_id
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Identifies the user that is allowed to  perform operations.
- The value 
-\family typewriter 
-PTL_UID_ANY
-\family default 
- can be used to wildcard the user.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-portal
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Identifies the Portal index(es) that can be used.
-  The value 
-\family typewriter 
-PTL_PT_INDEX_ANY
-\family default 
- can be used to wildcard the  Portal index.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Section
-
-Data Movement Operations
-\begin_inset LatexCommand \label{sec:datamovement}
-
-\end_inset 
-
-
-\layout Standard
-
-The Portals API provides two data movement operations: 
-\emph on 
-PtlPut
-\emph default 
- and 
-\emph on 
-PtlGet
-\emph default 
-.
-\layout Subsection
-
-PtlPut
-\begin_inset LatexCommand \label{sec:put}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t;
-\newline 
-
-\newline 
-int PtlPut( ptl_handle_md_t  mem_desc,
-\newline 
-            ptl_ack_req_t    ack_req,
-\newline 
-            ptl_process_id_t target,
-\newline 
-            ptl_pt_index_t   portal,
-\newline 
-            ptl_ac_index_t   cookie,
-\newline 
-            ptl_match_bits_t match_bits,
-\newline 
-            ptl_size_t       offset,
-\newline 
-            ptl_hdr_data_t   hdr_data );
-\layout Standard
-\noindent 
-Values of the type 
-\family typewriter 
-ptl_ack_req_t
-\family default 
- are used to control whether an acknowledgement should be sent when the
- operation completes (i.e., when the data has been written to a memory descriptor
- of the 
-\family typewriter 
-target
-\family default 
- process).
- The value 
-\family typewriter 
-PTL_ACK_REQ
-\family default 
- requests an acknowledgement, the value 
-\family typewriter 
-PTL_NOACK_REQ
-\family default 
- requests that no acknowledgement should be generated.
-\layout Standard
-
-The 
-\emph on 
-PtlPut
-\emph default 
- function initiates an asynchronous put operation.
- There are several events associated with a put operation: initiation of
- the send on the local node (
-\family typewriter 
-PTL_EVENT_SEND_START
-\family default 
-), completion of the send on the local node (
-\family typewriter 
-PTL_EVENT_SEND_END
-\family default 
- or 
-\family typewriter 
-PTL_EVENT_SEND_FAIL
-\family default 
-), and, when the send completes successfully, the receipt of an acknowledgement
- (
-\family typewriter 
-PTL_EVENT_ACK
-\family default 
-) indicating that the operation was accepted by the target.
- These events will be logged in the event queue associated with the memory
- descriptor (
-\family typewriter 
-mem_desc
-\family default 
-) used in the put operation.
- Using a memory descriptor that does not have an associated event queue
- results in these events being discarded.
- In this case, the application must have another mechanism (e.g., a higher
- level protocol) for determining when it is safe to modify the memory region
- associated with the memory descriptor.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_MD Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- is not a valid memory descriptor.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-target
-\family default 
- is not a valid process id.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="8" columns="3">
-<features>
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the memory descriptor that describes the memory to be sent.
-  If the memory descriptor has an event queue  associated with it, it will
- be used to record events when the  message has been sent (PTL_EVENT_SEND_START,
- PTL_EVENT_SEND_END).
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ack_req
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-Controls whether an acknowledgement event is requested.
-  Acknowledgements are only sent when they are requested by the initiating
- process 
-\series bold 
-and
-\series default 
- the memory descriptor has an event queue 
-\series bold 
-and
-\series default 
- the target memory descriptor enables them.
- Allowed constants: 
-\family typewriter 
-PTL_ACK_REQ
-\family default 
-, 
-\family typewriter 
-PTL_NOACK_REQ
-\family default 
-.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-target
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A process id for the target process.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-portal
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The index in the remote Portal table.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-cookie
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The index into the access control table of the target process.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-match_bits
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The match bits to use for message selection at the target process.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-offset
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The offset into the target memory descriptor (only used when the target
- memory descriptor has the 
-\family typewriter 
-PTL_MD_MANAGE_REMOTE
-\family default 
- option set).
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-hdr_data
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-64 bits of user data that can be included in message header.
-  This data is written to an event queue entry at the target if an event
- queue is present on the matching memory descriptor.
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Subsection
-
-PtlGet
-\begin_inset LatexCommand \label{sec:get}
-
-\end_inset 
-
-
-\layout LyX-Code
-
-int PtlGet( ptl_handle_md_t  mem_desc,
-\newline 
-            ptl_process_id_t target,
-\newline 
-            ptl_pt_index_t   portal,
-\newline 
-            ptl_ac_index_t   cookie,
-\newline 
-            ptl_match_bits_t match_bits,
-\newline 
-            ptl_size_t       offset );
-\layout Standard
-\noindent 
-The 
-\emph on 
-PtlGet
-\emph default 
- function initiates a remote read operation.
- There are two event pairs associated with a get operation , when the data
- is sent from the remote node, a 
-\family typewriter 
-PTL_EVENT_GET{START|END}
-\family default 
- event pair is registered on the remote node; and when the data is returned
- from the remote node a 
-\family typewriter 
-PTL_EVENT_REPLY{START|END}
-\family default 
- event pair is registered on the local node.
-\layout Subsubsection
-
-Return Codes
-\layout Description
-
-PTL_OK Indicates success.
-\layout Description
-
-PTL_NOINIT Indicates that the Portals API has not been successfully initialized.
-\layout Description
-
-PTL_INV_MD Indicates that 
-\family typewriter 
-mem_desc
-\family default 
- is not a valid memory descriptor.
-\layout Description
-
-PTL_INV_PROC Indicates that 
-\family typewriter 
-target
-\family default 
- is not a valid process id.
-\layout Subsubsection
-
-Arguments
-\layout Standard
-
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="6" columns="3">
-<features>
-<column alignment="right" valignment="top" width="0pt">
-<column alignment="center" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="4.7in">
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A handle for the memory descriptor that describes the memory into which
- the requested data will be received.
-  The memory descriptor can have an event queue associated with it to record
- events, such as when the message receive has started (
-\family typewriter 
-PTL_EVENT_REPLY
-\family default 
-_
-\family typewriter 
-START
-\family default 
-).
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-target
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-A process id for the target process.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-portal
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The index in the remote Portal table.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-cookie
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The index into the access control table of the target process.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-match_bits
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The match bits to use for message selection at the target process.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-offset
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-input
-\end_inset 
-</cell>
-<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-The offset into the target memory descriptor (only used when the target
- memory descriptor has the 
-\family typewriter 
-PTL_MD_MANAGE_REMOTE
-\family default 
- option set).
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\layout Section
-
-Summary
-\layout Standard
-
-
-\begin_inset LatexCommand \label{sec:summary}
-
-\end_inset 
-
- We conclude this section by summarizing the names introduced by the Portals
- 3.2 API.
- We start by summarizing the names of the types introduced by the API.
- This is followed by a summary of the functions introduced by the API.
- Which is followed by a summary of the function return codes.
- Finally, we conclude with a summary of the other constant values introduced
- by the API.
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:types}
-
-\end_inset 
-
- presents a summary of the types defined by the Portals API.
- The first column in this table gives the type name, the second column gives
- a brief description of the type, the third column identifies the section
- where the type is defined, and the fourth column lists the functions that
- have arguments of this type.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Types Defined by the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:types}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\noindent 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="25" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="2in">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="2.2in">
-<row bottomline="true">
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
- Name
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
- Meaning 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
- Sect
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
- Functions 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ac_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-indexes for an access control table 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:index-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlACEntry, PtlPut, PtlGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ack_req_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-acknowledgement request types 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlPut
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-kinds of events
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlGet
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-information about events 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlEQGet
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-plt_seq_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-event sequence number
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:event-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlEQGet, PtlEQWait
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_any_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-handles for any object 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIHandle 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_eq_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-handles for event queues 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_md_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-handles for memory descriptors 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert,
- PtlPut, PtlGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_me_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-handles for match entries 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_ni_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-handles for network interfaces 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut,
- PtlGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_nid_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-node identifiers
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlGetId,PtlACEntry
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pid_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-process identifier
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlGetId, PtlACEntry
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_uid_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-user indentifier
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlGetUid, PtlACEntry
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ins_pos_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-insertion position (before or after) 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_interface_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-identifiers for network interfaces 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_match_bits_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-match (and ignore) bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:mb-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_md_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-memory descriptors 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDAttach, PtlMDUpdate 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ni_fail_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-network interface-specific failures
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlEQGet, PtlEQWait
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-process identifiers 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:pid-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pt_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-indexes for Portal tables 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:index-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMEAttach, PtlMEAttachAny, PtlACEntry 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-sizes 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:size-t}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlEQAlloc, PtlPut, PtlGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_sr_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-indexes for status registers 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIStatus 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_sr_value_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-values in status registers 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIStatus 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_unlink_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-unlink options 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach 
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:func}
-
-\end_inset 
-
- presents a summary of the functions defined by the Portals API.
- The first column in this table gives the name for the function, the second
- column gives a brief description of the operation implemented by the function,
- and the third column identifies the section where the function is defined.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Functions Defined by the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:func}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="24" columns="3">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-Name 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Operation 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- Section 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlACEntry 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- update an entry in an access control table 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ac}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQAlloc 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create an event queue 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQGet 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the next event from an event queue 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQFree 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- release the resources for an event queue 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlEQWait 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- wait for a new event in an event queue 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:eq}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlFini 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- shutdown the Portals API 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:init}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlGet 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- perform a get operation 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:datamovement}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlGetId 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the id for the current process 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:pid}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlInit 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initialize the Portals API 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:init}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDAttach 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create a memory descriptor and attach it to a match entry 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDBind 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create a free-floating memory descriptor 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:mdbind}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDUnlink 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- remove a memory descriptor from a list and release its resources 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMDUpdate 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- update a memory descriptor 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMEAttach 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-create a match entry and attach it to a Portal table 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlMEAttachAny
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-create a match entry and attach it to a free Portal table entry
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:attachany}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMEInsert 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- create a match entry and insert it in a list 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlMEUnlink 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- remove a match entry from a list and release its resources 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:me}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIDist 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the distance to another process 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIFini 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- shutdown a network interface 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIHandle 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- get the network interface handle for an object 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIInit 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initialize a network interface 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlNIStatus 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- read a network interface status register 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- PtlPut 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- perform a put operation 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:datamovement}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:retcodes}
-
-\end_inset 
-
- summarizes the return codes used by functions defined by the Portals API.
- All of these constants are integer values.
- The first column of this table gives the symbolic name for the constant,
- the second column gives a brief description of the value, and the third
- column identifies the functions that can return this value.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Function Return Codes for the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:retcodes}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="27" columns="3">
-<features>
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="2.6in">
-<row bottomline="true">
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Name
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Meaning 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\series bold 
-Functions
-\series default 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_AC_INV_INDEX
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid access control table index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlACEntry 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EQ_DROPPED
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-at least one event has been dropped 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlEQGet, PtlWait 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EQ_EMPTY
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-no events available in an event queue 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlEQGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_FAIL 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-error during initialization or cleanup 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlInit, PtlFini 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_ILL_MD
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-illegal memory descriptor values 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDAttach, PtlMDBind, PtlMDUpdate 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INIT_DUP 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-duplicate initialization of an interface 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INIT_INV
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initialization of an invalid interface 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INUSE
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-the ME already has an MD
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDAttach
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_ASIZE
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid access control table size 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_EQ
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid event queue handle 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDUpdate, PtlEQFree, PtlEQGet 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_HANDLE 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid handle 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIHandle 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_MD 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid memory descriptor handle 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDUnlink, PtlMDUpdate 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_ME
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid match entry handle 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlMDAttach 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_NI 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid network interface handle 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_PROC 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid process identifier 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_PTINDEX
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid Portal table index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlMEAttach 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_REG 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid status register 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlNIStatus 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INV_SR_INDX 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-invalid status register index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlNIStatus 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_ML_TOOLONG 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match list too long 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlMEAttach, PtlMEInsert 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_INUSE
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-MD has pending operations
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlMDUnlink
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_NOINIT 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-uninitialized API 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-\emph on 
-all
-\emph default 
-, except PtlInit 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_NOSPACE
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-insufficient memory 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-PTL_NOUPDATE
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- no update was performed 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
- PtlMDUpdate 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_PT_FULL
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-Portal table is full
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-PtlMEAttachAny
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_OK 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- success 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-\emph on 
-all
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_SEGV 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-addressing violation 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-\noindent 
-PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate,
- PtlEQAlloc, PtlEQGet, PtlEQWait 
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:oconsts}
-
-\end_inset 
-
- summarizes the remaining constant values introduced by the Portals API.
- The first column in this table presents the symbolic name for the constant,
- the second column gives a brief description of the value, the third column
- identifies the type for the value, and the fourth column identifies the
- sections in which the value is mentioned.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Other Constants Defined by the Portals 3.2 API
-\begin_inset LatexCommand \label{tab:oconsts}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="36" columns="5">
-<features>
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Name
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Meaning 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Base type
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Intr.
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Ref.
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_ACK_REQ
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-request an acknowledgement 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ack_req_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EQ_NONE 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a NULL event queue handle 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_eq_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:handle-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md}
-
-\end_inset 
-
-, 
-\begin_inset LatexCommand \ref{sec:mdupdate}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_GET_START
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-get event start
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_GET_END
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-get event end
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_GET_FAIL
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-get event fail
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_PUT_START
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-put event start
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_PUT_END
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-put event end
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_PUT_FAIL
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-put event fail
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_REPLY_START
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-reply event start
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_REPLY_END
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-reply event end
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_REPLY_FAIL
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-reply event fail
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_ACK_START
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-acknowledgement event start
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_ACK_END
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-acknowledgement event end
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_ACK_FAIL
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-acknowledgement event fail
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_SEND_START
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-send event start
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_SEND_END
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-send event end
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_SEND_FAIL
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-send event fail
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_EVENT_UNLINK
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-unlink event
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_event_kind_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:ek-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_PID_ANY 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for process id fields 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pid_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-, 
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_NID_ANY
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for node id fields
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_nid_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-, 
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_UID_ANY
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for user id
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_uid_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:id-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\begin_inset LatexCommand \ref{sec:meattach}
-
-\end_inset 
-
-, 
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_IFACE_DEFAULT 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-default interface 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_interface_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:ni-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INS_AFTER 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-insert after 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ins_pos_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_INS_BEFORE 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-insert before 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ins_pos_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:meinsert}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_ACK_DISABLE 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to disable acknowledgements 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_MANAGE_REMOTE 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable the use of remote offsets 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-, 
-\begin_inset LatexCommand \ref{sec:get}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_OP_GET 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable get operations 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_OP_PUT 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable put operations 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_THRESH_INF 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-infinite threshold for a memory descriptor 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_MD_TRUNCATE 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-a flag to enable truncation of a request 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:md-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_NOACK_REQ 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-request no acknowledgement 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ack_req_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:put}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_PT_INDEX_ANY 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-wildcard for Portal indexes 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pt_index_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:acentry}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_RETAIN 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-disable unlinking 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_unlink_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_SR_DROP_COUNT 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-index for the dropped count register 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_sr_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:stat-type}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-PTL_UNLINK 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-enable unlinking 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_unlink_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\begin_inset LatexCommand \ref{sec:mdattach}
-
-\end_inset 
-
-
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Chapter
-
-The Semantics of Message Transmission
-\begin_inset LatexCommand \label{sec:semantics}
-
-\end_inset 
-
-
-\layout Standard
-
-The portals API uses four types of messages: put requests, acknowledgements,
- get requests, and replies.
- In this section, we describe the information passed on the wire for each
- type of message.
- We also describe how this information is used to process incoming messages.
-\layout Section
-
-Sending Messages
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:put-wire}
-
-\end_inset 
-
- summarizes the information that is transmitted for a put request.
- The first column provides a descriptive name for the information, the second
- column provides the type for this information, the third column identifies
- the source of the information, and the fourth column provides additional
- notes.
- Most information that is transmitted is obtained directly from the 
-\emph on 
-PtlPut
-\emph default 
- operation.
- Notice that the handle for the memory descriptor used in the 
-\emph on 
-PtlPut
-\emph default 
- operation is transmitted even though this value cannot be interpreted by
- the target.
- A value of anything other than 
-\family typewriter 
-PTL_MD_NONE
-\family default 
-, is interpreted as a request for an acknowledgement.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in a Put Request
-\begin_inset LatexCommand \label{tab:put-wire}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="12" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Information 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Type
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-\emph on 
-PtlPut
-\emph default 
- arg
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Notes 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-indicates a put request 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-user
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_uid_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-target
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pt_index_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-portal 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-cookie 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ac_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-cookie
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_match_bits_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-match_bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-offset
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_md_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-no ack if 
-\family typewriter 
-PTL_MD_NONE
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-length
-\family default 
- member 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-data 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family roman 
-\emph on 
-bytes
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-start
-\family default 
- and 
-\family typewriter 
-length
-\family default 
- members 
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:ack-wire}
-
-\end_inset 
-
- summarizes the information transmitted in an acknowledgement.
- Most of the information is simply echoed from the put request.
- Notice that the initiator and target are obtained directly from the put
- request, but are swapped in generating the acknowledgement.
- The only new piece of information in the acknowledgement is the manipulated
- length which is determined as the put request is satisfied.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in an Acknowledgement
-\begin_inset LatexCommand \label{tab:ack-wire}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="10" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\series bold 
-Information
-\series default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\series bold 
-Type
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\series bold 
-Put Information 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\series bold 
-Notes 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- indicates an acknowledgement 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initiator 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- initiator 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- portal index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_pt_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- portal index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- match bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_match_bits_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- match bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\family typewriter 
-ptl_size_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- memory desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
- ptl_handle_md_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- memory desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- requested length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
- ptl_size_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- manipulated length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
- ptl_size_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
- obtained from the operation 
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:get-wire}
-
-\end_inset 
-
- summarizes the information that is transmitted for a get request.
- Like the information transmitted in a put request, most of the information
- transmitted in a get request is obtained directly from the 
-\emph on 
-PtlGet
-\emph default 
- operation.
- Unlike put requests, get requests do not include the event queue handle.
- In this case, the reply is generated whenever the operation succeeds and
- the memory descriptor must not be unlinked until the reply is received.
- As such, there is no advantage to explicitly sending the event queue handle.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in a Get Request
-\begin_inset LatexCommand \label{tab:get-wire}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="11" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Information
-\series default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Type
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-\emph on 
-PtlGet
-\emph default 
- argument
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Notes 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-indicates a get operation 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-user
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_uid_t
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-local information
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pt_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-portal
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-cookie 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_ac_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-cookie 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_match_bits_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-match_bits
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_md_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc
-\family default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-mem_desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-length
-\family default 
- member 
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-Table\SpecialChar ~
-
-\begin_inset LatexCommand \ref{tab:reply-wire}
-
-\end_inset 
-
- summarizes the information transmitted in a reply.
- Like an acknowledgement, most of the information is simply echoed from
- the get request.
- The initiator and target are obtained directly from the get request, but
- are swapped in generating the acknowledgement.
- The only new information in the acknowledgement are the manipulated length
- and the data, which are determined as the get request is satisfied.
-\layout Standard
-
-
-\begin_inset Float table
-placement htbp
-wide false
-collapsed false
-
-\layout Caption
-
-Information Passed in a Reply
-\begin_inset LatexCommand \label{tab:reply-wire}
-
-\end_inset 
-
-
-\layout Standard
-
-
-\begin_inset ERT
-status Collapsed
-
-\layout Standard
-
-\backslash 
-medskip  
-\end_inset 
-
-
-\layout Standard
-\align center 
-
-\size small 
-
-\begin_inset  Tabular
-<lyxtabular version="3" rows="11" columns="4">
-<features firstHeadEmpty="true">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<column alignment="left" valignment="top" width="0pt">
-<row bottomline="true">
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Information
-\series default 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Type
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Put Information 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\series bold 
-Notes 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-operation 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-int
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-indicates an acknowledgement 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-target 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_process_id_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-initiator 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_pt_index_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-portal index 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" bottomline="true" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_match_bits_t 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-match bits 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-offset 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_handle_md_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-memory desc 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-requested length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-echo 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-manipulated length 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\family typewriter 
-ptl_size_t
-\family default 
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-obtained from the operation 
-\end_inset 
-</cell>
-</row>
-<row>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-data 
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-
-\emph on 
-bytes
-\end_inset 
-</cell>
-<cell alignment="left" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-\end_inset 
-</cell>
-<cell alignment="right" valignment="top" usebox="none">
-\begin_inset Text
-
-\layout Standard
-
-obtained from the operation 
-\end_inset 
-</cell>
-</row>
-</lyxtabular>
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Section
-
-Receiving Messages
-\begin_inset LatexCommand \label{sec:receiving}
-
-\end_inset 
-
-
-\layout Standard
-
-When an incoming message arrives on a network interface, the communication
- system first checks that the target process identified in the request is
- a valid process that has initialized the network interface (i.e., that the
- target process has a valid Portal table).
- If this test fails, the communication system discards the message and increment
-s the dropped message count for the interface.
- The remainder of the processing depends on the type of the incoming message.
- Put and get messages are subject to access control checks and translation
- (searching a match list), while acknowledgement and reply messages bypass
- the access control checks and the translation step.
-\layout Standard
-
-Acknowledgement messages include a handle for the memory descriptor used
- in the original 
-\emph on 
-PtlPut
-\emph default 
- operation.
- This memory descriptor will identify the event queue where the event should
- be recorded.
- Upon receipt of an acknowledgement, the runtime system only needs to confirm
- that the memory descriptor and event queue still exist and that there is
- space for another event.
- Should the any of these conditions fail,  the message is simply discarded
- and the dropped message count for the interface is incremented.
- Otherwise, the system builds an acknowledgement event from the information
- in the acknowledgement message and adds it to the event queue.
-\layout Standard
-
-Reception of reply messages is also relatively straightforward.
- Each reply message includes a handle for a memory descriptor.
- If this descriptor exists, it is used to receive the message.
- A reply message will be dropped if the memory descriptor identified in
- the request doesn't exist.
- In either of this case, the dropped message count for the interface is
- incremented.
- These are the only reasons for dropping reply messages.
- Every memory descriptor accepts and truncates incoming reply messages,
- eliminating the other potential reasons for rejecting a reply message.
-\layout Standard
-
-The critical step in processing an incoming put or get request involves
- mapping the request to a memory descriptor.
- This step starts by using the Portal index in the incoming request to identify
- a list of match entries.
- This list of match entries is searched in order until a match entry is
- found whose match criteria matches the match bits in the incoming request
- and whose memory descriptor accepts the request.
-\layout Standard
-
-Because acknowledge and reply messages are generated in response to requests
- made by the process receiving these messages, the checks performed by the
- runtime system for acknowledgements and replies are minimal.
- In contrast, put and get messages are generated by remote processes and
- the checks performed for these messages are more extensive.
- Incoming put or get messages may be rejected because: 
-\layout Itemize
-
-the Portal index supplied in the request is not valid; 
-\layout Itemize
-
-the cookie supplied in the request is not a valid access control entry;
-\layout Itemize
-
-the access control entry identified by the cookie does not match the identifier
- of the requesting process; 
-\layout Itemize
-
-the access control entry identified by the access control entry does not
- match the Portal index supplied in the request; or 
-\layout Itemize
-
-the match bits supplied in the request do not match any of the match entries
- with a memory descriptor that accepts the request.
-\layout Standard
-
-In all cases, if the message is rejected, the incoming message is discarded
- and the dropped message count for the interface is incremented.
-\layout Standard
-
-A memory descriptor may reject an incoming request for any of the following
- reasons: 
-\layout Itemize
-
-the 
-\family typewriter 
-PTL_MD_PUT
-\family default 
- or 
-\family typewriter 
-PTL_MD_GET
-\family default 
- option has not been enabled and the operation is put or get, respectively;
-\layout Itemize
-
-the length specified in the request is too long for the memory descriptor
- and the 
-\family typewriter 
-PTL_MD_TRUNCATE
-\family default 
- option has not been enabled.
-\layout Chapter
-
-Examples
-\begin_inset LatexCommand \label{sec:examples}
-
-\end_inset 
-
-
-\layout Comment
-
-The examples presented in this chapter have not been updated to reflect
- the current API.
-\layout Standard
-
-In this section we present several example to illustrate expected usage
- patterns for the Portals 3.2 API.
- The first example describes how to implement parallel servers using the
- features of the Portals 3.2 API.
- This example covers the access control list and the use of remote managed
- offsets.
- The second example presents an approach to dealing with dropped requests.
- This example covers aspects of match lists and memory descriptors.
- The final example covers message reception in MPI.
- This example illustrates more sophisticated uses of matching and a procedure
- to update a memory descriptor.
-\layout Section
-
-Parallel File Servers
-\begin_inset LatexCommand \label{sec:expfs}
-
-\end_inset 
-
-
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:file}
-
-\end_inset 
-
- illustrates the logical structure of a parallel file server.
- In this case, the parallel server consists of four servers that stripe
- application data across four disks.
- We would like to present applications with the illusion that the file server
- is a single entity.
- We will assume that all of the processes that constitute the parallel server
- have the same user id.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center 
-
-\begin_inset Graphics FormatVersion 1
-       filename file.eps
-       display color
-       size_type 0
-       rotateOrigin center
-       lyxsize_type 1
-       lyxwidth 196pt
-       lyxheight 147pt
-\end_inset 
-
-
-\layout Caption
-
-Parallel File Server
-\begin_inset LatexCommand \label{fig:file}
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-When an application establishes a connection to the parallel file server,
- it will allocate a Portal and access control list entry for communicating
- with the server.
- The access control list entry will include the Portal and match any process
- in the parallel file server's, so all of the file server processes will
- have access to the portal.
- The Portal information and access control entry will be sent to the file
- server at this time.
- If the application and server need to have multiple, concurrent I/O operations,
- they can use additional portals or match entries to keep the operations
- from interfering with one another.
-\layout Standard
-
-When an application initiates an I/O operation, it first builds a memory
- descriptor that describes the memory region involved in the operation.
- This memory descriptor will enable the appropriate operation (put for read
- operations and get for write operations) and enable the use of remote offsets
- (this lets the servers decide where their data should be placed in the
- memory region).
- After creating the memory descriptor and linking it into the appropriate
- Portal entry, the application sends a read or write request (using 
-\emph on 
-PtlPut
-\emph default 
-) to one of the file server processes.
- The file server processes can then use put or get operations with the appropria
-te offsets to fill or retrieve the contents of the application's buffer.
- To know when the operation has completed, the application can add an event
- queue to the memory descriptor and add up the lengths of the remote operations
- until the sum is the size of the requested I/O operation.
-\layout Section
-
-Dealing with Dropped Requests
-\begin_inset LatexCommand \label{sec:exdrop}
-
-\end_inset 
-
-
-\layout Standard
-
-If a process does not anticipate unexpected requests, they will be discarded.
- Applications using the Portals API can query the dropped count for the
- interface to determine the number of requests that have been dropped (see
- Section\SpecialChar ~
-
-\begin_inset LatexCommand \ref{sec:nistatus}
-
-\end_inset 
-
-).
- While this approach minimizes resource consumption, it does not provide
- information that might be critical in debugging the implementation of a
- higher level protocol.
-\layout Standard
-
-To keep track of more information about dropped requests, we use a memory
- descriptor that truncates each incoming request to zero bytes and logs
- the 
-\begin_inset Quotes eld
-\end_inset 
-
-dropped
-\begin_inset Quotes erd
-\end_inset 
-
- operations in an event queue.
- Note that the operations are not dropped in the Portals sense, because
- the operation succeeds.
-\layout Standard
-
-The following code fragment illustrates an implementation of this approach.
- In this case, we assume that a thread is launched to execute the function
-\family typewriter 
-watch_drop
-\family default 
-.
- This code starts by building an event queue to log truncated operations
- and a memory descriptor to truncate the incoming requests.
- This example only captures 
-\begin_inset Quotes eld
-\end_inset 
-
-dropped
-\begin_inset Quotes erd
-\end_inset 
-
- requests for a single portal.
- In a more realistic situation, the memory descriptor would be appended
- to the match list for every portal.
- We also assume that the thread is capable of keeping up with the 
-\begin_inset Quotes eld
-\end_inset 
-
-dropped
-\begin_inset Quotes erd
-\end_inset 
-
- requests.
- If this is not the case, we could use a finite threshold on the memory
- descriptor to capture the first few dropped requests.
-\layout LyX-Code
-
-
-\size small 
-#include <stdio.h>
-\newline 
-#include <stdlib.h>
-\newline 
-#include <portals.h>
-\newline 
-
-\newline 
-#define DROP_SIZE 32       /* number of dropped requests to track */
-\newline 
-
-\newline 
-int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) {
-\newline 
-    ptl_handle_eq_t drop_events;
-\newline 
-    ptl_event_t event;
-\newline 
-    ptl_handle_md_t drop_em;
-\newline 
-    ptl_md_t drop_desc;
-\newline 
-    ptl_process_id_t any_proc;
-\newline 
-    ptl_handle_me_t match_any;
-\newline 
-
-\newline 
-    /* create the event queue */
-\newline 
-    if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) {
-\newline 
-        fprintf( stderr, "Couldn't create the event queue
-\backslash 
-n" );
-\newline 
-        exit( 1 );
-\newline 
-    }
-\newline 
-
-\newline 
-    /* build a match entry */
-\newline 
-    any_proc.nid = PTL_ID_ANY;
-\newline 
-    any_proc.pid = PTL_ID_ANY;
-\newline 
-    PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN,
-\newline 
-                        &match_any );
-\newline 
-
-\newline 
-    /* create the memory descriptor */
-\newline 
-    drop_desc.start = NULL;
-\newline 
-    drop_desc.length = 0;
-\newline 
-    drop_desc.threshold = PTL_MD_THRESH_INF;
-\newline 
-    drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE;
-\newline 
-    drop_desc.user_ptr = NULL;
-\newline 
-    drop_desc.eventq = drop_events;
-\newline 
-    if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) {
-\newline 
-        fprintf( stderr, "Couldn't create the memory descriptor
-\backslash 
-n" );
-\newline 
-        exit( 1 );
-\newline 
-    }
-\newline 
-
-\newline 
-    /* watch for "dropped" requests */
-\newline 
-    while( 1 ) {
-\newline 
-        if( PtlEQWait( drop_events, &event ) != PTL_OK ) break;
-\newline 
-        fprintf( stderr, "Dropped request from gid = event.initiator.gid,
- event.initiator.rid );
-\newline 
-    }
-\newline 
-}
-\layout Section
-
-Message Transmission in MPI
-\begin_inset LatexCommand \label{sec:exmpi}
-
-\end_inset 
-
-
-\layout Standard
-
-We conclude this section with a fairly extensive example that describes
- an approach to implementing message transmission for MPI.
- Like many MPI implementations, we distinguish two message transmission
- protocols: a short message protocol and a long message protocol.
- We use the constant 
-\family typewriter 
-MPI_LONG_LENGTH
-\family default 
- to determine the size of a long message.
-\layout Standard
-
-For small messages, the sender simply sends the message and presumes that
- the message will be received (i.e., the receiver has allocated a memory region
- to receive the message body).
- For large messages, the sender also sends the message, but does not presume
- that the message body will be saved.
- Instead, the sender builds a memory descriptor for the message and enables
- get operations on this descriptor.
- If the target does not save the body of the message, it will record an
- event for the put operation.
- When the process later issues a matching MPI receive, it will perform a
- get operation to retrieve the body of the message.
-\layout Standard
-
-To facilitate receive side matching based on the protocol, we use the most
- significant bit in the match bits to indicate the protocol: 1 for long
- messages and 0 for short messages.
-\layout Standard
-
-The following code presents a function that implements the send side of
- the protocol.
- The global variable 
-\family typewriter 
-EndGet
-\family default 
- is the last match entry attached to the Portal index used for posting long
- messages.
- This entry does not match any incoming requests (i.e., the memory descriptor
- rejects all get operations) and is built during initialization of the MPI
- library.
- The other global variable, 
-\family typewriter 
-MPI_NI
-\family default 
-, is a handle for the network interface used by the MPI implementation.
-\layout LyX-Code
-
-
-\size small 
-extern ptl_handle_me_t EndGet;
-\newline 
-extern ptl_handle_ni_t MPI_NI;
-\newline 
-
-\newline 
-void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq,
-\newline 
-                    ptl_process_id target, ptl_match_bits_t match ) 
-\newline 
-{
-\newline 
-    ptl_handle_md_t send_handle;
-\newline 
-    ptl_md_t mem_desc;
-\newline 
-    ptl_ack_req_t want_ack;
-\newline 
-
-\newline 
-    mem_desc.start = buf;
-\newline 
-    mem_desc.length = len;
-\newline 
-    mem_desc.threshold = 1;
-\newline 
-    mem_desc.options = PTL_MD_GET_OP;
-\newline 
-    mem_desc.user_ptr = data;
-\newline 
-    mem_desc.eventq = eventq;
-\newline 
-
-\newline 
-    if( len >= MPI_LONG_LENGTH ) {
-\newline 
-        ptl_handle_me_t me_handle;
-\newline 
-
-\newline 
-        /* add a match entry to the end of the get list */
-\newline 
-        PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet,
- &me_handle );
-\newline 
-        PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL );
-\newline 
-
-\newline 
-        /* we want an ack for long messages */
-\newline 
-        want_ack = PTL_ACK_REQ;
-\newline 
-
-\newline 
-        /* set the protocol bit to indicate that this is a long message
- */
-\newline 
-        match |= 1<<63;
-\newline 
-    } else {
-\newline 
-        /* we don't want an ack for short messages */
-\newline 
-        want_ack = PTL_ACK_REQ;
-\newline 
-
-\newline 
-        /* set the protocol bit to indicate that this is a short message
- */
-\newline 
-        match &= ~(1<<63);
-\newline 
-    }
-\newline 
-
-\newline 
-   /* create a memory descriptor and send it */
-\newline 
-   PtlMDBind( MPI_NI, mem_desc, &send_handle );
-\newline 
-   PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match,
- 0 );
-\newline 
-}
-\layout Standard
-
-The 
-\emph on 
-MPISend
-\emph default 
- function returns as soon as the message has been scheduled for transmission.
- The event queue argument, 
-\family typewriter 
-eventq
-\family default 
-, can be used to determine the disposition of the message.
- Assuming that 
-\family typewriter 
-eventq
-\family default 
- is not 
-\family typewriter 
-PTL_EQ_NONE
-\family default 
-, a 
-\family typewriter 
-PTL_EVENT_SENT
-\family default 
- event will be recorded for each message as the message is transmitted.
- For small messages, this is the only event that will be recorded in 
-\family typewriter 
-eventq
-\family default 
-.
- In contrast, long messages include an explicit request for an acknowledgement.
- If the 
-\family typewriter 
-target
-\family default 
- process has posted a matching receive, the acknowledgement will be sent
- as the message is received.
- If a matching receive has not been posted, the message will be discarded
- and no acknowledgement will be sent.
- When the 
-\family typewriter 
-target
-\family default 
- process later issues a matching receive, the receive will be translated
- into a get operation and a 
-\family typewriter 
-PTL_EVENT_GET
-\family default 
- event will be recorded in 
-\family typewriter 
-eventq
-\family default 
-.
-\layout Standard
-
-Figure\SpecialChar ~
-
-\begin_inset LatexCommand \ref{fig:mpi}
-
-\end_inset 
-
- illustrates the organization of the match list used for receiving MPI messages.
- The initial entries (not shown in this figure) would be used to match the
- MPI receives that have been preposted by the application.
- The preposted receives are followed by a match entry, 
-\emph on 
-RcvMark
-\emph default 
-, that marks the boundary between preposted receives and the memory descriptors
- used for 
-\begin_inset Quotes eld
-\end_inset 
-
-unexpected
-\begin_inset Quotes erd
-\end_inset 
-
- messages.
- The 
-\emph on 
-RcvMark
-\emph default 
- entry is followed by a small collection of match entries that match unexpected
-\begin_inset Quotes eld
-\end_inset 
-
-short
-\begin_inset Quotes erd
-\end_inset 
-
- messages, i.e., messages that have a 0 in the most significant bit of their
- match bits.
- The memory descriptors associated with these match entries will append
- the incoming message to the associated memory descriptor and record an
- event in an event queue for unexpected messages.
- The unexpected short message matching entries are followed by a match entry
- that will match messages that were not matched by the preceding match entries,
- i.e., the unexpected long messages.
- The memory descriptor associated with this match entry truncates the message
- body and records an event in the event queue for unexpected messages.
- Note that of the memory descriptors used for unexpected messages share
- a common event queue.
- This makes it possible to process the unexpected messages in the order
- in which they arrived, regardless of.
-\layout Standard
-
-
-\begin_inset Float figure
-placement htbp
-wide false
-collapsed false
-
-\layout Standard
-\align center 
-
-\begin_inset Graphics FormatVersion 1
-       filename mpi.eps
-       display color
-       size_type 0
-       rotateOrigin center
-       lyxsize_type 1
-       lyxwidth 389pt
-       lyxheight 284pt
-\end_inset 
-
-
-\layout Caption
-
-Message Reception in MPI
-\begin_inset LatexCommand \label{fig:mpi}
-
-\end_inset 
-
-
-\end_inset 
-
-
-\layout Standard
-
-When the local MPI process posts an MPI receive, we must first search the
- events unexpected message queue to see if a matching message has already
- arrived.
- If no matching message is found, a match entry for the receive is inserted
- before the 
-\emph on 
-RcvMark
-\emph default 
- entry--after the match entries for all of the previously posted receives
- and before the match entries for the unexpected messages.
- This ensures that preposted receives are matched in the order that they
- were posted (a requirement of MPI).
-\layout Standard
-
-While this strategy respects the temporal semantics of MPI, it introduces
- a race condition: a matching message might arrive after the events in the
- unexpected message queue have been searched, but before the match entry
- for the receive has been inserted in the match list.
-\layout Standard
-
-To avoid this race condition we start by setting the 
-\family typewriter 
-threshold
-\family default 
- of the memory descriptor to 0, making the descriptor inactive.
- We then insert the match entry into the match list and proceed to search
- the events in the unexpected message queue.
- A matching message that arrives as we are searching the unexpected message
- queue will not be accepted by the memory descriptor and, if not matched
- by an earlier match list element, will add an event to the unexpected message
- queue.
- After searching the events in the unexpected message queue, we update the
- memory descriptor, setting the threshold to 1 to activate the memory descriptor.
- This update is predicated by the condition that the unexpected message
- queue is empty.
- We repeat the process of searching the unexpected message queue until the
- update succeeds.
-\layout Standard
-
-The following code fragment illustrates this approach.
- Because events must be removed from the unexpected message queue to be
- examined, this code fragment assumes the existence of a user managed event
- list, 
-\family typewriter 
-Rcvd
-\family default 
-, for the events that have already been removed from the unexpected message
- queue.
- In an effort to keep the example focused on the basic protocol, we have
- omitted the code that would be needed to manage the memory descriptors
- used for unexpected short messages.
- In particular, we simply leave messages in these descriptors until they
- are received by the application.
- In a robust implementation, we would introduce code to ensure that short
- unexpected messages are removed from these memory descriptors so that they
- can be re-used.
-\layout LyX-Code
-
-
-\size small 
-extern ptl_handle_eq_t UnexpQueue;
-\newline 
-extern ptl_handle_me_t RcvMark;
-\newline 
-extern ptl_handle_me_t ShortMatch;
-\newline 
-
-\newline 
-typedef struct event_list_tag {
-\newline 
-    ptl_event_t            event;
-\newline 
-    struct event_list_tag* next;
-\newline 
-} event_list;
-\newline 
-
-\newline 
-extern event_list Rcvd;
-\newline 
-
-\newline 
-void AppendRcvd( ptl_event_t event )
-\newline 
-{
-\newline 
-    /* append an event onto the Rcvd list */
-\newline 
-}
-\newline 
-
-\newline 
-int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi
-ts_t match,
-\newline 
-                       ptl_match_bits_t ignore, ptl_event_t *event )
-\newline 
-{
-\newline 
-    /* Search the Rcvd event queue, looking for a message that matches the
- requested message.
-\newline 
-     * If one is found, remove the event from the Rcvd list and return it.
- */
-\newline 
-}
-\newline 
-
-\newline 
-typedef enum { RECEIVED, POSTED } receive_state;
-\newline 
-
-\newline 
-receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event,
- ptl_md_t md_buf )
-\newline 
-{
-\newline 
-    ptl_md_t md_buf;
-\newline 
-    ptl_handle_me_t me_handle;
-\newline 
-
-\newline 
-    if( event.rlength >= MPI_LONG_LENGTH ) {
-\newline 
-        PtlMDBind( MPI_NI, md_buf, &md_handle );
-\newline 
-        PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX,
- md_handle );
-\newline 
-        return POSTED;
-\newline 
-    } else {
-\newline 
-        /* copy the message */
-\newline 
-        if( event.mlength < *length ) *length = event.mlength;
-\newline 
-        memcpy( buf, (char*)event.md_desc.start+event.offset, *length );
-\newline 
-        return RECEIVED;
-\newline 
-    }
-\newline 
-}
-\newline 
-
-\newline 
-receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle
-_eq_t eventq, 
-\newline 
-                           ptl_process_id_t sender, ptl_match_bits_t match,
- ptl_match_bits_t ignore )
-\newline 
-{
-\newline 
-    ptl_md_t md_buf;
-\newline 
-    ptl_handle_md_t md_handle;
-\newline 
-    ptl_handle_me_t me_handle;
-\newline 
-    ptl_event_t event;
-\newline 
-
-\newline 
-    /* build a memory descriptor for the receive */
-\newline 
-    md_buf.start = buf;
-\newline 
-    md_buf.length = *len;
-\newline 
-    md_buf.threshold = 0;     /* temporarily disabled */
-\newline 
-    md_buf.options = PTL_MD_PUT_OP;
-\newline 
-    md_buf.user_ptr = MPI_data;
-\newline 
-    md_buf.eventq = eventq;
-\newline 
-
-\newline 
-    /* see if we have already received the message */
-\newline 
-    if( SearchRcvd(buf, len, sender, match, ignore, &event) )
-\newline 
-         return CopyMsg( buf, len, event, md_buf );
-\newline 
-
-\newline 
-    /* create the match entry and attach the  memory descriptor */
-\newline 
-    PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark,
- &me_handle);
-\newline 
-    PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle );
-\newline 
-
-\newline 
-    md_buf.threshold = 1;
-\newline 
-    do
-\newline 
-        if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) {
-\newline 
-            if( MPIMatch(event, match, ignore, sender) ) {
-\newline 
-                return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset,
- md_buf );
-\newline 
-            } else {
-\newline 
-                AppendRcvd( event );
-\newline 
-            }
-\newline 
-        }
-\newline 
-    while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE
- );
-\newline 
-    return POSTED;
-\newline 
-}
-\layout Chapter*
-
-Acknowledgments
-\layout Standard
-
-Several people have contributed to the philosophy, design, and implementation
- of the Portals message passing architecture as it has evolved.
- We acknowledge the following people for their contributions: Al Audette,
- Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike
- Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke,
- Dave van Dresser, Lee Ward, and Stephen Wheat.
-\layout Standard
-
-
-\begin_inset LatexCommand \BibTeX[ieee]{portals3}
-
-\end_inset 
-
-
-\the_end
diff --git a/lustre/portals/doc/put.fig b/lustre/portals/doc/put.fig
deleted file mode 100644 (file)
index 5235b6d..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-#FIG 3.2
-Landscape
-Center
-Inches
-Letter  
-100.00
-Single
--2
-1200 2
-6 1350 900 2175 1200
-4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001
-4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001
--6
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        2700 1275 2700 1725
-2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2
-       0 0 1.00 60.00 120.00
-        900 525 2700 1200
-2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
-        0 300 1200 300 1200 2250 0 2250 0 300
-2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5
-        2400 300 3600 300 3600 2250 2400 2250 2400 300
-2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2
-       0 0 1.00 60.00 120.00
-        2699 1788 899 1938
-4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001
-4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001
-4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001
-4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001
-4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001
-4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001
diff --git a/lustre/portals/include/.cvsignore b/lustre/portals/include/.cvsignore
deleted file mode 100644 (file)
index 94d3790..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-config.h
-stamp-h
-stamp-h1
-stamp-h.in
-Makefile
-Makefile.in
diff --git a/lustre/portals/include/Makefile.am b/lustre/portals/include/Makefile.am
deleted file mode 100644 (file)
index 2b3eb8c..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = linux portals
-
-EXTRA_DIST = cygwin-ioctl.h
diff --git a/lustre/portals/include/cygwin-ioctl.h b/lustre/portals/include/cygwin-ioctl.h
deleted file mode 100644 (file)
index 8a33957..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * linux/ioctl.h for Linux by H.H. Bergman.
- */
-
-#ifndef _ASMI386_IOCTL_H
-#define _ASMI386_IOCTL_H
-
-/* ioctl command encoding: 32 bits total, command in lower 16 bits,
- * size of the parameter structure in the lower 14 bits of the
- * upper 16 bits.
- * Encoding the size of the parameter structure in the ioctl request
- * is useful for catching programs compiled with old versions
- * and to avoid overwriting user space outside the user buffer area.
- * The highest 2 bits are reserved for indicating the ``access mode''.
- * NOTE: This limits the max parameter size to 16kB -1 !
- */
-
-/*
- * The following is for compatibility across the various Linux
- * platforms.  The i386 ioctl numbering scheme doesn't really enforce
- * a type field.  De facto, however, the top 8 bits of the lower 16
- * bits are indeed used as a type field, so we might just as well make
- * this explicit here.  Please be sure to use the decoding macros
- * below from now on.
- */
-#undef _IO
-#undef _IOR
-#undef _IOW
-#undef _IOC
-#undef IOC_IN
-#undef IOC_OUT
-
-#define _IOC_NRBITS    8
-#define _IOC_TYPEBITS  8
-#define _IOC_SIZEBITS  14
-#define _IOC_DIRBITS   2
-
-#define _IOC_NRMASK    ((1 << _IOC_NRBITS)-1)
-#define _IOC_TYPEMASK  ((1 << _IOC_TYPEBITS)-1)
-#define _IOC_SIZEMASK  ((1 << _IOC_SIZEBITS)-1)
-#define _IOC_DIRMASK   ((1 << _IOC_DIRBITS)-1)
-
-#define _IOC_NRSHIFT   0
-#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
-#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
-#define _IOC_DIRSHIFT  (_IOC_SIZESHIFT+_IOC_SIZEBITS)
-
-/*
- * Direction bits.
- */
-#define _IOC_NONE      0U
-#define _IOC_WRITE     1U
-#define _IOC_READ      2U
-
-#define _IOC(dir,type,nr,size) \
-       (((dir)  << _IOC_DIRSHIFT) | \
-        ((type) << _IOC_TYPESHIFT) | \
-        ((nr)   << _IOC_NRSHIFT) | \
-        ((size) << _IOC_SIZESHIFT))
-
-/* used to create numbers */
-#define _IO(type,nr)           _IOC(_IOC_NONE,(type),(nr),0)
-#define _IOR(type,nr,size)     _IOC(_IOC_READ,(type),(nr),sizeof(size))
-#define _IOW(type,nr,size)     _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
-#define _IOWR(type,nr,size)    _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
-
-/* used to decode ioctl numbers.. */
-#define _IOC_DIR(nr)           (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
-#define _IOC_TYPE(nr)          (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
-#define _IOC_NR(nr)            (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
-#define _IOC_SIZE(nr)          (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
-
-/* ...and for the drivers/sound files... */
-
-#define IOC_IN         (_IOC_WRITE << _IOC_DIRSHIFT)
-#define IOC_OUT                (_IOC_READ << _IOC_DIRSHIFT)
-#define IOC_INOUT      ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
-#define IOCSIZE_MASK   (_IOC_SIZEMASK << _IOC_SIZESHIFT)
-#define IOCSIZE_SHIFT  (_IOC_SIZESHIFT)
-
-#endif /* _ASMI386_IOCTL_H */
diff --git a/lustre/portals/include/linux/Makefile.am b/lustre/portals/include/linux/Makefile.am
deleted file mode 100644 (file)
index 3c28c6e..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-linuxdir = $(includedir)/linux
-
-EXTRA_DIST = kp30.h kpr.h libcfs.h lustre_list.h portals_compat25.h    \
-       portals_lib.h
diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h
deleted file mode 100644 (file)
index 85284ce..0000000
+++ /dev/null
@@ -1,748 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef _KP30_INCLUDED
-#define _KP30_INCLUDED
-
-#include <linux/libcfs.h>
-#define PORTAL_DEBUG
-
-#ifdef __KERNEL__
-# include <linux/vmalloc.h>
-# include <linux/time.h>
-# include <linux/slab.h>
-# include <linux/interrupt.h>
-# include <linux/highmem.h>
-# include <linux/module.h>
-# include <linux/version.h>
-# include <portals/p30.h>
-# include <linux/smp_lock.h>
-# include <asm/atomic.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define schedule_work schedule_task
-#define prepare_work(wq,cb,cbdata)                                            \
-do {                                                                          \
-        INIT_TQUEUE((wq), 0, 0);                                              \
-        PREPARE_TQUEUE((wq), (cb), (cbdata));                                 \
-} while (0)
-
-#define PageUptodate Page_Uptodate
-#define our_recalc_sigpending(current) recalc_sigpending(current)
-#define num_online_cpus() smp_num_cpus
-static inline void our_cond_resched(void)
-{
-        if (current->need_resched)
-               schedule ();
-}
-#define work_struct_t       struct tq_struct
-
-#else
-
-#define prepare_work(wq,cb,cbdata)                                            \
-do {                                                                          \
-        INIT_WORK((wq), (void *)(cb), (void *)(cbdata));                      \
-} while (0)
-#define wait_on_page wait_on_page_locked
-#define our_recalc_sigpending(current) recalc_sigpending()
-#define strtok(a,b) strpbrk(a, b)
-static inline void our_cond_resched(void)
-{
-        cond_resched();
-}
-#define work_struct_t      struct work_struct
-
-#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */
-
-#ifdef PORTAL_DEBUG
-extern void kportal_assertion_failed(char *expr, char *file, const char *func,
-                                     const int line);
-#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__,  \
-                                                        __FUNCTION__, __LINE__))
-#define LASSERTF(cond, fmt...)                                                \
-        do {                                                                  \
-                if (unlikely(!(cond))) {                                      \
-                        portals_debug_msg(DEBUG_SUBSYSTEM, D_EMERG,  __FILE__,\
-                                          __FUNCTION__,__LINE__, CDEBUG_STACK,\
-                                          "ASSERTION(" #cond ") failed:" fmt);\
-                        LBUG();                                               \
-                }                                                             \
-        } while (0)
-
-#else
-#define LASSERT(e)
-#define LASSERTF(cond, fmt...) do { } while (0)
-#endif
-
-#ifdef CONFIG_SMP
-#define LASSERT_SPIN_LOCKED(lock) LASSERT(spin_is_locked(lock))
-#else
-#define LASSERT_SPIN_LOCKED(lock) do {} while(0)
-#endif
-
-#ifdef __arch_um__
-#define LBUG_WITH_LOC(file, func, line)                                 \
-do {                                                                    \
-        CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n");       \
-        portals_debug_dumplog();                                        \
-        portals_run_lbug_upcall(file, func, line);                      \
-        panic("LBUG");                                                  \
-} while (0)
-#else
-#define LBUG_WITH_LOC(file, func, line)                                 \
-do {                                                                    \
-        CEMERG("LBUG\n");                                               \
-        portals_debug_dumpstack(NULL);                                  \
-        portals_debug_dumplog();                                        \
-        portals_run_lbug_upcall(file, func, line);                      \
-        set_task_state(current, TASK_UNINTERRUPTIBLE);                  \
-        schedule();                                                     \
-} while (0)
-#endif /* __arch_um__ */
-
-#define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__)
-
-/*
- * Memory
- */
-#ifdef PORTAL_DEBUG
-extern atomic_t portal_kmemory;
-
-# define portal_kmem_inc(ptr, size)                                           \
-do {                                                                          \
-        atomic_add(size, &portal_kmemory);                                    \
-} while (0)
-
-# define portal_kmem_dec(ptr, size) do {                                      \
-        atomic_sub(size, &portal_kmemory);                                    \
-} while (0)
-
-#else
-# define portal_kmem_inc(ptr, size) do {} while (0)
-# define portal_kmem_dec(ptr, size) do {} while (0)
-#endif /* PORTAL_DEBUG */
-
-#define PORTAL_VMALLOC_SIZE        16384
-
-#define PORTAL_ALLOC_GFP(ptr, size, mask)                                 \
-do {                                                                      \
-        LASSERT(!in_interrupt() ||                                        \
-               (size <= PORTAL_VMALLOC_SIZE && mask == GFP_ATOMIC));      \
-        if ((size) > PORTAL_VMALLOC_SIZE)                                 \
-                (ptr) = vmalloc(size);                                    \
-        else                                                              \
-                (ptr) = kmalloc((size), (mask));                          \
-        if ((ptr) == NULL) {                                              \
-                CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\
-                       #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\
-                CERROR("PORTALS: %d total bytes allocated by portals\n",  \
-                       atomic_read(&portal_kmemory));                     \
-        } else {                                                          \
-                portal_kmem_inc((ptr), (size));                           \
-                memset((ptr), 0, (size));                                 \
-        }                                                                 \
-        CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n",    \
-               (int)(size), (ptr), atomic_read (&portal_kmemory));        \
-} while (0)
-
-#define PORTAL_ALLOC(ptr, size) \
-        PORTAL_ALLOC_GFP(ptr, size, GFP_NOFS)
-
-#define PORTAL_ALLOC_ATOMIC(ptr, size) \
-        PORTAL_ALLOC_GFP(ptr, size, GFP_ATOMIC)
-
-#define PORTAL_FREE(ptr, size)                                          \
-do {                                                                    \
-        int s = (size);                                                 \
-        if ((ptr) == NULL) {                                            \
-                CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at "   \
-                       "%s:%d\n", s, __FILE__, __LINE__);               \
-                break;                                                  \
-        }                                                               \
-        if (s > PORTAL_VMALLOC_SIZE)                                    \
-                vfree(ptr);                                             \
-        else                                                            \
-                kfree(ptr);                                             \
-        portal_kmem_dec((ptr), s);                                      \
-        CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n",     \
-               s, (ptr), atomic_read(&portal_kmemory));                 \
-} while (0)
-
-/* ------------------------------------------------------------------- */
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
-#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x)
-#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x)
-
-#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x))
-#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x)
-
-#define PORTAL_MODULE_USE       MOD_INC_USE_COUNT
-#define PORTAL_MODULE_UNUSE     MOD_DEC_USE_COUNT
-#else
-
-#define PORTAL_SYMBOL_REGISTER(x)
-#define PORTAL_SYMBOL_UNREGISTER(x)
-
-#define PORTAL_SYMBOL_GET(x) symbol_get(x)
-#define PORTAL_SYMBOL_PUT(x) symbol_put(x)
-
-#define PORTAL_MODULE_USE       try_module_get(THIS_MODULE)
-#define PORTAL_MODULE_UNUSE     module_put(THIS_MODULE)
-
-#endif
-
-/******************************************************************************/
-
-#ifdef PORTALS_PROFILING
-#define prof_enum(FOO) PROF__##FOO
-enum {
-        prof_enum(our_recvmsg),
-        prof_enum(our_sendmsg),
-        prof_enum(socknal_recv),
-        prof_enum(lib_parse),
-        prof_enum(conn_list_walk),
-        prof_enum(memcpy),
-        prof_enum(lib_finalize),
-        prof_enum(pingcli_time),
-        prof_enum(gmnal_send),
-        prof_enum(gmnal_recv),
-        MAX_PROFS
-};
-
-struct prof_ent {
-        char *str;
-        /* hrmph.  wrap-tastic. */
-        u32       starts;
-        u32       finishes;
-        cycles_t  total_cycles;
-        cycles_t  start;
-        cycles_t  end;
-};
-
-extern struct prof_ent prof_ents[MAX_PROFS];
-
-#define PROF_START(FOO)                                         \
-        do {                                                    \
-                struct prof_ent *pe = &prof_ents[PROF__##FOO];  \
-                pe->starts++;                                   \
-                pe->start = get_cycles();                       \
-        } while (0)
-
-#define PROF_FINISH(FOO)                                        \
-        do {                                                    \
-                struct prof_ent *pe = &prof_ents[PROF__##FOO];  \
-                pe->finishes++;                                 \
-                pe->end = get_cycles();                         \
-                pe->total_cycles += (pe->end - pe->start);      \
-        } while (0)
-#else /* !PORTALS_PROFILING */
-#define PROF_START(FOO) do {} while(0)
-#define PROF_FINISH(FOO) do {} while(0)
-#endif /* PORTALS_PROFILING */
-
-/* debug.c */
-void portals_debug_dumpstack(struct task_struct *tsk);
-void portals_run_upcall(char **argv);
-void portals_run_lbug_upcall(char * file, const char *fn, const int line);
-void portals_debug_dumplog(void);
-int portals_debug_init(unsigned long bufsize);
-int portals_debug_cleanup(void);
-int portals_debug_clear_buffer(void);
-int portals_debug_mark_buffer(char *text);
-int portals_debug_set_daemon(unsigned int cmd, unsigned int length,
-                             char *file, unsigned int size);
-__s32 portals_debug_copy_to_user(char *buf, unsigned long len);
-#if (__GNUC__)
-/* Use the special GNU C __attribute__ hack to have the compiler check the
- * printf style argument string against the actual argument count and
- * types.
- */
-#ifdef printf
-# warning printf has been defined as a macro...
-# undef printf
-#endif
-void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-                       const int line, unsigned long stack,
-                       char *format, ...)
-        __attribute__ ((format (printf, 7, 8)));
-#else
-void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-                       const int line, unsigned long stack,
-                       const char *format, ...);
-#endif /* __GNUC__ */
-void portals_debug_set_level(unsigned int debug_level);
-
-# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b)
-# define printf(format, b...) CDEBUG(D_OTHER, format , ## b)
-# define time(a) CURRENT_TIME
-
-extern void kportal_daemonize (char *name);
-extern void kportal_blockallsigs (void);
-
-#else  /* !__KERNEL__ */
-# include <stdio.h>
-# include <stdlib.h>
-#ifndef __CYGWIN__
-# include <stdint.h>
-#else
-# include <cygwin-ioctl.h>
-#endif
-# include <unistd.h>
-# include <time.h>
-# include <limits.h>
-# ifndef DEBUG_SUBSYSTEM
-#  define DEBUG_SUBSYSTEM S_UNDEFINED
-# endif
-# ifdef PORTAL_DEBUG
-#  undef NDEBUG
-#  include <assert.h>
-#  define LASSERT(e)     assert(e)
-#  define LASSERTF(cond, args...)                                              \
-do {                                                                           \
-          if (!(cond))                                                         \
-                CERROR(args);                                                  \
-          assert(cond);                                                        \
-} while (0)
-# else
-#  define LASSERT(e)
-#  define LASSERTF(cond, args...) do { } while (0)
-# endif
-# define printk(format, args...) printf (format, ## args)
-# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0);
-# define PORTAL_FREE(a, b) do { free(a); } while (0);
-void portals_debug_dumplog(void);
-# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \
-    printf("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format,                    \
-           (subsys), (mask), (long)time(0), file, fn, line,                   \
-           getpid() , stack, ## a);
-
-#undef CWARN
-#undef CERROR
-#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a)
-#endif
-
-/* support decl needed both by kernel and liblustre */
-char *portals_nid2str(int nal, ptl_nid_t nid, char *str);
-char *portals_id2str(int nal, ptl_process_id_t nid, char *str);
-
-#ifndef CURRENT_TIME
-# define CURRENT_TIME time(0)
-#endif
-
-/******************************************************************************/
-/* Light-weight trace
- * Support for temporary event tracing with minimal Heisenberg effect. */
-#define LWT_SUPPORT  0
-
-#define LWT_MEMORY   (16<<20)
-
-#if !KLWT_SUPPORT
-# if defined(__KERNEL__)
-#  if !defined(BITS_PER_LONG)
-#   error "BITS_PER_LONG not defined"
-#  endif
-# elif !defined(__WORDSIZE)
-#  error "__WORDSIZE not defined"
-# else
-#  define BITS_PER_LONG __WORDSIZE
-# endif
-
-/* kernel hasn't defined this? */
-typedef struct {
-        long long   lwte_when;
-        char       *lwte_where;
-        void       *lwte_task;
-        long        lwte_p1;
-        long        lwte_p2;
-        long        lwte_p3;
-        long        lwte_p4;
-# if BITS_PER_LONG > 32
-        long        lwte_pad;
-# endif
-} lwt_event_t;
-#endif /* !KLWT_SUPPORT */
-
-#if LWT_SUPPORT
-# ifdef __KERNEL__
-#  if !KLWT_SUPPORT
-
-typedef struct _lwt_page {
-        struct list_head     lwtp_list;
-        struct page         *lwtp_page;
-        lwt_event_t         *lwtp_events;
-} lwt_page_t;
-
-typedef struct {
-        int                lwtc_current_index;
-        lwt_page_t        *lwtc_current_page;
-} lwt_cpu_t;
-
-extern int       lwt_enabled;
-extern lwt_cpu_t lwt_cpus[];
-
-/* Note that we _don't_ define LWT_EVENT at all if LWT_SUPPORT isn't set.
- * This stuff is meant for finding specific problems; it never stays in
- * production code... */
-
-#define LWTSTR(n)       #n
-#define LWTWHERE(f,l)   f ":" LWTSTR(l)
-#define LWT_EVENTS_PER_PAGE (PAGE_SIZE / sizeof (lwt_event_t))
-
-#define LWT_EVENT(p1, p2, p3, p4)                                       \
-do {                                                                    \
-        unsigned long    flags;                                         \
-        lwt_cpu_t       *cpu;                                           \
-        lwt_page_t      *p;                                             \
-        lwt_event_t     *e;                                             \
-                                                                        \
-        if (lwt_enabled) {                                              \
-                local_irq_save (flags);                                 \
-                                                                        \
-                cpu = &lwt_cpus[smp_processor_id()];                    \
-                p = cpu->lwtc_current_page;                             \
-                e = &p->lwtp_events[cpu->lwtc_current_index++];         \
-                                                                        \
-                if (cpu->lwtc_current_index >= LWT_EVENTS_PER_PAGE) {   \
-                        cpu->lwtc_current_page =                        \
-                                list_entry (p->lwtp_list.next,          \
-                                            lwt_page_t, lwtp_list);     \
-                        cpu->lwtc_current_index = 0;                    \
-                }                                                       \
-                                                                        \
-                e->lwte_when  = get_cycles();                           \
-                e->lwte_where = LWTWHERE(__FILE__,__LINE__);            \
-                e->lwte_task  = current;                                \
-                e->lwte_p1    = (long)(p1);                             \
-                e->lwte_p2    = (long)(p2);                             \
-                e->lwte_p3    = (long)(p3);                             \
-                e->lwte_p4    = (long)(p4);                             \
-                                                                        \
-                local_irq_restore (flags);                              \
-        }                                                               \
-} while (0)
-
-#endif /* !KLWT_SUPPORT */
-
-extern int  lwt_init (void);
-extern void lwt_fini (void);
-extern int  lwt_lookup_string (int *size, char *knlptr,
-                               char *usrptr, int usrsize);
-extern int  lwt_control (int enable, int clear);
-extern int  lwt_snapshot (cycles_t *now, int *ncpu, int *total_size,
-                          void *user_ptr, int user_size);
-# else  /* __KERNEL__ */
-#  define LWT_EVENT(p1,p2,p3,p4)     /* no userland implementation yet */
-# endif /* __KERNEL__ */
-#endif /* LWT_SUPPORT */
-
-struct portals_device_userstate
-{
-        int          pdu_memhog_pages;
-        struct page *pdu_memhog_root_page;
-};
-
-#include <linux/portals_lib.h>
-
-/*
- * USER LEVEL STUFF BELOW
- */
-
-#define PORTAL_IOCTL_VERSION 0x00010007
-#define PING_SYNC       0
-#define PING_ASYNC      1
-
-struct portal_ioctl_hdr {
-        __u32 ioc_len;
-        __u32 ioc_version;
-};
-
-struct portals_debug_ioctl_data
-{
-        struct portal_ioctl_hdr hdr;
-        unsigned int subs;
-        unsigned int debug;
-};
-
-#define PORTAL_IOC_INIT(data)                           \
-do {                                                    \
-        memset(&data, 0, sizeof(data));                 \
-        data.ioc_version = PORTAL_IOCTL_VERSION;        \
-        data.ioc_len = sizeof(data);                    \
-} while (0)
-
-/* FIXME check conflict with lustre_lib.h */
-#define PTL_IOC_DEBUG_MASK             _IOWR('f', 250, long)
-
-static inline int portal_ioctl_packlen(struct portal_ioctl_data *data)
-{
-        int len = sizeof(*data);
-        len += size_round(data->ioc_inllen1);
-        len += size_round(data->ioc_inllen2);
-        return len;
-}
-
-static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data)
-{
-        if (data->ioc_len > (1<<30)) {
-                CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inllen1 > (1<<30)) {
-                CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inllen2 > (1<<30)) {
-                CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n");
-                return 1;
-        }
-        if (data->ioc_inlbuf1 && !data->ioc_inllen1) {
-                CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_inlbuf2 && !data->ioc_inllen2) {
-                CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_pbuf1 && !data->ioc_plen1) {
-                CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_pbuf2 && !data->ioc_plen2) {
-                CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n");
-                return 1;
-        }
-        if (data->ioc_plen1 && !data->ioc_pbuf1) {
-                CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n");
-                return 1;
-        }
-        if (data->ioc_plen2 && !data->ioc_pbuf2) {
-                CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n");
-                return 1;
-        }
-        if (portal_ioctl_packlen(data) != data->ioc_len ) {
-                CERROR ("PORTALS ioctl: packlen != ioc_len\n");
-                return 1;
-        }
-        if (data->ioc_inllen1 &&
-            data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') {
-                CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n");
-                return 1;
-        }
-        if (data->ioc_inllen2 &&
-            data->ioc_bulk[size_round(data->ioc_inllen1) +
-                           data->ioc_inllen2 - 1] != '\0') {
-                CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n");
-                return 1;
-        }
-        return 0;
-}
-
-#ifndef __KERNEL__
-static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf,
-                                    int max)
-{
-        char *ptr;
-        struct portal_ioctl_data *overlay;
-        data->ioc_len = portal_ioctl_packlen(data);
-        data->ioc_version = PORTAL_IOCTL_VERSION;
-
-        if (*pbuf && portal_ioctl_packlen(data) > max)
-                return 1;
-        if (*pbuf == NULL) {
-                *pbuf = malloc(data->ioc_len);
-        }
-        if (!*pbuf)
-                return 1;
-        overlay = (struct portal_ioctl_data *)*pbuf;
-        memcpy(*pbuf, data, sizeof(*data));
-
-        ptr = overlay->ioc_bulk;
-        if (data->ioc_inlbuf1)
-                LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr);
-        if (data->ioc_inlbuf2)
-                LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr);
-        if (portal_ioctl_is_invalid(overlay))
-                return 1;
-
-        return 0;
-}
-#else
-#include <asm/uaccess.h>
-
-/* buffer MUST be at least the size of portal_ioctl_hdr */
-static inline int portal_ioctl_getdata(char *buf, char *end, void *arg)
-{
-        struct portal_ioctl_hdr *hdr;
-        struct portal_ioctl_data *data;
-        int err;
-        ENTRY;
-
-        hdr = (struct portal_ioctl_hdr *)buf;
-        data = (struct portal_ioctl_data *)buf;
-
-        err = copy_from_user(buf, (void *)arg, sizeof(*hdr));
-        if (err)
-                RETURN(err);
-
-        if (hdr->ioc_version != PORTAL_IOCTL_VERSION) {
-                CERROR("PORTALS: version mismatch kernel vs application\n");
-                RETURN(-EINVAL);
-        }
-
-        if (hdr->ioc_len + buf >= end) {
-                CERROR("PORTALS: user buffer exceeds kernel buffer\n");
-                RETURN(-EINVAL);
-        }
-
-
-        if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) {
-                CERROR("PORTALS: user buffer too small for ioctl\n");
-                RETURN(-EINVAL);
-        }
-
-        err = copy_from_user(buf, (void *)arg, hdr->ioc_len);
-        if (err)
-                RETURN(err);
-
-        if (portal_ioctl_is_invalid(data)) {
-                CERROR("PORTALS: ioctl not correctly formatted\n");
-                RETURN(-EINVAL);
-        }
-
-        if (data->ioc_inllen1)
-                data->ioc_inlbuf1 = &data->ioc_bulk[0];
-
-        if (data->ioc_inllen2)
-                data->ioc_inlbuf2 = &data->ioc_bulk[0] +
-                        size_round(data->ioc_inllen1);
-
-        RETURN(0);
-}
-#endif
-
-/* ioctls for manipulating snapshots 30- */
-#define IOC_PORTAL_TYPE                   'e'
-#define IOC_PORTAL_MIN_NR                 30
-
-#define IOC_PORTAL_PING                    _IOWR('e', 30, long)
-
-#define IOC_PORTAL_CLEAR_DEBUG             _IOWR('e', 32, long)
-#define IOC_PORTAL_MARK_DEBUG              _IOWR('e', 33, long)
-#define IOC_PORTAL_PANIC                   _IOWR('e', 34, long)
-#define IOC_PORTAL_NAL_CMD                 _IOWR('e', 35, long)
-#define IOC_PORTAL_GET_NID                 _IOWR('e', 36, long)
-#define IOC_PORTAL_FAIL_NID                _IOWR('e', 37, long)
-
-#define IOC_PORTAL_LWT_CONTROL             _IOWR('e', 39, long)
-#define IOC_PORTAL_LWT_SNAPSHOT            _IOWR('e', 40, long)
-#define IOC_PORTAL_LWT_LOOKUP_STRING       _IOWR('e', 41, long)
-#define IOC_PORTAL_MEMHOG                  _IOWR('e', 42, long)
-#define IOC_PORTAL_MAX_NR                             42
-
-enum {
-        QSWNAL    = 1,
-        SOCKNAL   = 2,
-        GMNAL     = 3,
-        /*          4 unused */
-        TCPNAL    = 5,
-        ROUTER    = 6,
-        OPENIBNAL = 7,
-        IIBNAL    = 8,
-        LONAL     = 9,
-        RANAL     = 10,
-        NAL_ENUM_END_MARKER
-};
-
-#define PTL_NALFMT_SIZE             32 /* %u:%u.%u.%u.%u,%u (10+4+4+4+3+5+1) */
-
-#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1)
-
-#define NAL_CMD_REGISTER_PEER_FD     100
-#define NAL_CMD_CLOSE_CONNECTION     101
-#define NAL_CMD_REGISTER_MYNID       102
-#define NAL_CMD_PUSH_CONNECTION      103
-#define NAL_CMD_GET_CONN             104
-#define NAL_CMD_DEL_PEER             105
-#define NAL_CMD_ADD_PEER             106
-#define NAL_CMD_GET_PEER             107
-#define NAL_CMD_GET_TXDESC           108
-#define NAL_CMD_ADD_ROUTE            109
-#define NAL_CMD_DEL_ROUTE            110
-#define NAL_CMD_GET_ROUTE            111
-#define NAL_CMD_NOTIFY_ROUTER        112
-#define NAL_CMD_ADD_INTERFACE        113
-#define NAL_CMD_DEL_INTERFACE        114
-#define NAL_CMD_GET_INTERFACE        115
-
-
-enum {
-        DEBUG_DAEMON_START       =  1,
-        DEBUG_DAEMON_STOP        =  2,
-        DEBUG_DAEMON_PAUSE       =  3,
-        DEBUG_DAEMON_CONTINUE    =  4,
-};
-
-
-enum cfg_record_type {
-        PORTALS_CFG_TYPE = 1,
-        LUSTRE_CFG_TYPE = 123,
-};
-
-typedef int (*cfg_record_cb_t)(enum cfg_record_type, int len, void *data);
-
-#ifdef __CYGWIN__
-# ifndef BITS_PER_LONG
-#  if (~0UL) == 0xffffffffUL
-#   define BITS_PER_LONG 32
-#  else
-#   define BITS_PER_LONG 64
-#  endif
-# endif
-#endif
-
-#if BITS_PER_LONG > 32
-# define LI_POISON ((int)0x5a5a5a5a5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a5a5a5a5a)
-#else
-# define LI_POISON ((int)0x5a5a5a5a)
-# define LL_POISON ((long)0x5a5a5a5a)
-# define LP_POISON ((void *)(long)0x5a5a5a5a)
-#endif
-
-#if defined(__x86_64__)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPSZ  "%lu"
-# define LPSSZ "%ld"
-#elif (BITS_PER_LONG == 32 || __WORDSIZE == 32)
-# define LPU64 "%Lu"
-# define LPD64 "%Ld"
-# define LPX64 "%#Lx"
-# define LPSZ  "%u"
-# define LPSSZ "%d"
-#elif (BITS_PER_LONG == 64 || __WORDSIZE == 64)
-# define LPU64 "%lu"
-# define LPD64 "%ld"
-# define LPX64 "%#lx"
-# define LPSZ  "%lu"
-# define LPSSZ "%ld"
-#endif
-#ifndef LPU64
-# error "No word size defined"
-#endif
-
-/* lustre_id output helper macros */
-#define DLID4   "%lu/%lu/%lu/%lu"
-
-#define OLID4(id)                              \
-    (unsigned long)(id)->li_fid.lf_id,         \
-    (unsigned long)(id)->li_fid.lf_group,      \
-    (unsigned long)(id)->li_stc.u.e3s.l3s_ino, \
-    (unsigned long)(id)->li_stc.u.e3s.l3s_gen
-                
-#endif
diff --git a/lustre/portals/include/linux/kpr.h b/lustre/portals/include/linux/kpr.h
deleted file mode 100644 (file)
index 1127698..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef _KPR_H
-#define _KPR_H
-
-# include <portals/lib-types.h> /* for ptl_hdr_t */
-
-/******************************************************************************/
-/* Kernel Portals Router interface */
-
-typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback
-
-/* space for routing targets to stash "stuff" in a forwarded packet */
-typedef union {
-        long long        _alignment;
-        void            *_space[16];            /* scale with CPU arch */
-} kprfd_scratch_t;
-
-/* Kernel Portals Routing Forwarded message Descriptor */
-typedef struct {
-        struct list_head     kprfd_list;        /* stash in queues (routing target can use) */
-        ptl_nid_t            kprfd_target_nid;  /* final destination NID */
-        ptl_nid_t            kprfd_gateway_nid; /* gateway NID */
-        ptl_hdr_t           *kprfd_hdr;         /* header in wire byte order */
-        int                  kprfd_nob;         /* # payload bytes */
-        int                  kprfd_niov;        /* # payload frags */
-        ptl_kiov_t          *kprfd_kiov;        /* payload fragments */
-        void                *kprfd_router_arg;  /* originating NAL's router arg */
-        kpr_fwd_callback_t   kprfd_callback;    /* completion callback */
-        void                *kprfd_callback_arg; /* completion callback arg */
-        kprfd_scratch_t      kprfd_scratch;     /* scratchpad for routing targets */
-} kpr_fwd_desc_t;
-
-typedef void  (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd);
-typedef void  (*kpr_notify_t)(void *arg, ptl_nid_t peer, int alive);
-
-/* NAL's routing interface (Kernel Portals Routing Nal Interface) */
-typedef const struct {
-        int             kprni_nalid;    /* NAL's id */
-        void           *kprni_arg;      /* Arg to pass when calling into NAL */
-        kpr_fwd_t       kprni_fwd;      /* NAL's forwarding entrypoint */
-        kpr_notify_t    kprni_notify;   /* NAL's notification entrypoint */
-} kpr_nal_interface_t;
-
-/* Router's routing interface (Kernel Portals Routing Router Interface) */
-typedef const struct {
-        /* register the calling NAL with the router and get back the handle for
-         * subsequent calls */
-        int     (*kprri_register) (kpr_nal_interface_t *nal_interface,
-                                   void **router_arg);
-
-        /* ask the router to find a gateway that forwards to 'nid' and is a
-         * peer of the calling NAL; assume caller will send 'nob' bytes of
-         * payload there */
-        int     (*kprri_lookup) (void *router_arg, ptl_nid_t nid, int nob,
-                                 ptl_nid_t *gateway_nid);
-
-        /* hand a packet over to the router for forwarding */
-        kpr_fwd_t kprri_fwd_start;
-
-        /* hand a packet back to the router for completion */
-        void    (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd,
-                                   int error);
-
-        /* notify the router about peer state */
-        void    (*kprri_notify) (void *router_arg, ptl_nid_t peer,
-                                 int alive, time_t when);
-
-        /* the calling NAL is shutting down */
-        void    (*kprri_shutdown) (void *router_arg);
-
-        /* deregister the calling NAL with the router */
-        void    (*kprri_deregister) (void *router_arg);
-
-} kpr_router_interface_t;
-
-/* Convenient struct for NAL to stash router interface/args */
-typedef struct {
-        kpr_router_interface_t  *kpr_interface;
-        void                    *kpr_arg;
-} kpr_router_t;
-
-extern kpr_router_interface_t   kpr_router_interface;
-
-static inline int
-kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif)
-{
-        int    rc;
-
-        router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface);
-        if (router->kpr_interface == NULL)
-                return (-ENOENT);
-
-        rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg);
-        if (rc != 0)
-                router->kpr_interface = NULL;
-
-        PORTAL_SYMBOL_PUT (kpr_router_interface);
-        return (rc);
-}
-
-static inline int
-kpr_routing (kpr_router_t *router)
-{
-        return (router->kpr_interface != NULL);
-}
-
-static inline int
-kpr_lookup (kpr_router_t *router, ptl_nid_t nid, int nob, ptl_nid_t *gateway_nid)
-{
-        if (!kpr_routing (router))
-                return (-ENETUNREACH);
-
-        return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, nob,
-                                                    gateway_nid));
-}
-
-static inline void
-kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, ptl_hdr_t *hdr,
-              int nob, int niov, ptl_kiov_t *kiov,
-              kpr_fwd_callback_t callback, void *callback_arg)
-{
-        fwd->kprfd_target_nid   = nid;
-        fwd->kprfd_gateway_nid  = nid;
-        fwd->kprfd_hdr          = hdr;
-        fwd->kprfd_nob          = nob;
-        fwd->kprfd_niov         = niov;
-        fwd->kprfd_kiov         = kiov;
-        fwd->kprfd_callback     = callback;
-        fwd->kprfd_callback_arg = callback_arg;
-}
-
-static inline void
-kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd)
-{
-        if (!kpr_routing (router))
-                fwd->kprfd_callback (fwd->kprfd_callback_arg, -ENETUNREACH);
-        else
-                router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd);
-}
-
-static inline void
-kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error)
-{
-        LASSERT (kpr_routing (router));
-        router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error);
-}
-
-static inline void
-kpr_notify (kpr_router_t *router,
-            ptl_nid_t peer, int alive, time_t when)
-{
-        if (!kpr_routing (router))
-                return;
-
-        router->kpr_interface->kprri_notify(router->kpr_arg, peer, alive, when);
-}
-
-static inline void
-kpr_shutdown (kpr_router_t *router)
-{
-        if (kpr_routing (router))
-                router->kpr_interface->kprri_shutdown (router->kpr_arg);
-}
-
-static inline void
-kpr_deregister (kpr_router_t *router)
-{
-        if (!kpr_routing (router))
-                return;
-        router->kpr_interface->kprri_deregister (router->kpr_arg);
-        router->kpr_interface = NULL;
-}
-
-#endif /* _KPR_H */
diff --git a/lustre/portals/include/linux/libcfs.h b/lustre/portals/include/linux/libcfs.h
deleted file mode 100644 (file)
index d8f5349..0000000
+++ /dev/null
@@ -1,410 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef _LIBCFS_H
-#define _LIBCFS_H
-
-#ifdef HAVE_ASM_TYPES_H
-#include <asm/types.h>
-#else
-#include "types.h"
-#endif
-
-#ifdef __KERNEL__
-# include <linux/time.h>
-# include <asm/timex.h>
-#else
-# include <sys/time.h>
-# define do_gettimeofday(tv) gettimeofday(tv, NULL);
-typedef unsigned long long cycles_t;
-#endif
-
-#define PORTAL_DEBUG
-
-#ifndef offsetof
-# define offsetof(typ,memb)     ((unsigned long)((char *)&(((typ *)0)->memb)))
-#endif
-
-#define LOWEST_BIT_SET(x)       ((x) & ~((x) - 1))
-
-#ifndef __KERNEL__
-/* Userpace byte flipping */
-# include <endian.h>
-# include <byteswap.h>
-# define __swab16(x) bswap_16(x)
-# define __swab32(x) bswap_32(x)
-# define __swab64(x) bswap_64(x)
-# define __swab16s(x) do {*(x) = bswap_16(*(x));} while (0)
-# define __swab32s(x) do {*(x) = bswap_32(*(x));} while (0)
-# define __swab64s(x) do {*(x) = bswap_64(*(x));} while (0)
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define le16_to_cpu(x) (x)
-#  define cpu_to_le16(x) (x)
-#  define le32_to_cpu(x) (x)
-#  define cpu_to_le32(x) (x)
-#  define le64_to_cpu(x) (x)
-#  define cpu_to_le64(x) (x)
-# else
-#  if __BYTE_ORDER == __BIG_ENDIAN
-#   define le16_to_cpu(x) bswap_16(x)
-#   define cpu_to_le16(x) bswap_16(x)
-#   define le32_to_cpu(x) bswap_32(x)
-#   define cpu_to_le32(x) bswap_32(x)
-#   define le64_to_cpu(x) bswap_64(x)
-#   define cpu_to_le64(x) bswap_64(x)
-#  else
-#   error "Unknown byte order"
-#  endif /* __BIG_ENDIAN */
-# endif /* __LITTLE_ENDIAN */
-#endif /* ! __KERNEL__ */
-
-/*
- *  Debugging
- */
-extern unsigned int portal_subsystem_debug;
-extern unsigned int portal_stack;
-extern unsigned int portal_debug;
-extern unsigned int portal_printk;
-
-struct ptldebug_header {
-        __u32 ph_len;
-        __u32 ph_flags;
-        __u32 ph_subsys;
-        __u32 ph_mask;
-        __u32 ph_cpu_id;
-        __u32 ph_sec;
-        __u64 ph_usec;
-        __u32 ph_stack;
-        __u32 ph_pid;
-        __u32 ph_extern_pid;
-        __u32 ph_line_num;
-} __attribute__((packed));
-
-#define PH_FLAG_FIRST_RECORD 1
-
-/* Debugging subsystems (32 bits, non-overlapping) */
-#define S_UNDEFINED   0x00000001
-#define S_MDC         0x00000002
-#define S_MDS         0x00000004
-#define S_OSC         0x00000008
-#define S_OST         0x00000010
-#define S_CLASS       0x00000020
-#define S_LOG         0x00000040
-#define S_LLITE       0x00000080
-#define S_RPC         0x00000100
-#define S_MGMT        0x00000200
-#define S_PORTALS     0x00000400
-#define S_NAL         0x00000800 /* ALL NALs */
-#define S_PINGER      0x00001000
-#define S_FILTER      0x00002000
-#define S_PTLBD       0x00004000
-#define S_ECHO        0x00008000
-#define S_LDLM        0x00010000
-#define S_LOV         0x00020000
-#define S_PTLROUTER   0x00040000
-#define S_COBD        0x00080000
-#define S_SM          0x00100000
-#define S_ASOBD       0x00200000
-#define S_CONFOBD     0x00400000
-#define S_LMV         0x00800000
-#define S_CMOBD       0x01000000
-/* If you change these values, please keep these files up to date...
- *    portals/utils/debug.c
- *    utils/lconf
- */
-
-/* Debugging masks (32 bits, non-overlapping) */
-#define D_TRACE       0x00000001 /* ENTRY/EXIT markers */
-#define D_INODE       0x00000002
-#define D_SUPER       0x00000004
-#define D_EXT2        0x00000008 /* anything from ext2_debug */
-#define D_MALLOC      0x00000010 /* print malloc, free information */
-#define D_CACHE       0x00000020 /* cache-related items */
-#define D_INFO        0x00000040 /* general information */
-#define D_IOCTL       0x00000080 /* ioctl related information */
-#define D_BLOCKS      0x00000100 /* ext2 block allocation */
-#define D_NET         0x00000200 /* network communications */
-#define D_WARNING     0x00000400 /* CWARN(...) == CDEBUG (D_WARNING, ...) */
-#define D_BUFFS       0x00000800
-#define D_OTHER       0x00001000
-#define D_DENTRY      0x00002000
-#define D_PORTALS     0x00004000 /* ENTRY/EXIT markers */
-#define D_PAGE        0x00008000 /* bulk page handling */
-#define D_DLMTRACE    0x00010000
-#define D_ERROR       0x00020000 /* CERROR(...) == CDEBUG (D_ERROR, ...) */
-#define D_EMERG       0x00040000 /* CEMERG(...) == CDEBUG (D_EMERG, ...) */
-#define D_HA          0x00080000 /* recovery and failover */
-#define D_RPCTRACE    0x00100000 /* for distributed debugging */
-#define D_VFSTRACE    0x00200000
-#define D_READA       0x00400000 /* read-ahead */
-#define D_MMAP        0x00800000
-#define D_CONFIG      0x01000000
-/* If you change these values, please keep these files up to date...
- *    portals/utils/debug.c
- *    utils/lconf
- */
-
-#ifdef __KERNEL__
-# include <linux/sched.h> /* THREAD_SIZE */
-#else
-# ifndef THREAD_SIZE /* x86_64 has THREAD_SIZE in userspace */
-#  define THREAD_SIZE 8192
-# endif
-#endif
-
-#define LUSTRE_TRACE_SIZE (THREAD_SIZE >> 5)
-
-#ifdef __KERNEL__
-# ifdef  __ia64__
-#  define CDEBUG_STACK (THREAD_SIZE -                                      \
-                        ((unsigned long)__builtin_dwarf_cfa() &            \
-                         (THREAD_SIZE - 1)))
-# else
-#  define CDEBUG_STACK (THREAD_SIZE -                                      \
-                        ((unsigned long)__builtin_frame_address(0) &       \
-                         (THREAD_SIZE - 1)))
-# endif /* __ia64__ */
-
-#define CHECK_STACK(stack)                                                    \
-        do {                                                                  \
-                if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) {    \
-                        portals_debug_msg(DEBUG_SUBSYSTEM, D_WARNING,         \
-                                          __FILE__, __FUNCTION__, __LINE__,   \
-                                          (stack),"maximum lustre stack %u\n",\
-                                          portal_stack = (stack));            \
-                      /*panic("LBUG");*/                                      \
-                }                                                             \
-        } while (0)
-#else /* !__KERNEL__ */
-#define CHECK_STACK(stack) do { } while(0)
-#define CDEBUG_STACK (0L)
-#endif /* __KERNEL__ */
-
-#if 1
-#define CDEBUG(mask, format, a...)                                            \
-do {                                                                          \
-        CHECK_STACK(CDEBUG_STACK);                                            \
-        if (((mask) & (D_ERROR | D_EMERG | D_WARNING)) ||                     \
-            (portal_debug & (mask) &&                                         \
-             portal_subsystem_debug & DEBUG_SUBSYSTEM))                       \
-                portals_debug_msg(DEBUG_SUBSYSTEM, mask,                      \
-                                  __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK, format, ## a);                \
-} while (0)
-
-#define CDEBUG_MAX_LIMIT 600
-#define CDEBUG_LIMIT(cdebug_mask, cdebug_format, a...)                        \
-do {                                                                          \
-        static unsigned long cdebug_next = 0;                                 \
-        static int cdebug_count = 0, cdebug_delay = 1;                        \
-                                                                              \
-        CHECK_STACK(CDEBUG_STACK);                                            \
-        if (time_after(jiffies, cdebug_next)) {                               \
-                portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask, __FILE__,     \
-                                  __FUNCTION__, __LINE__, CDEBUG_STACK,       \
-                                  cdebug_format, ## a);                       \
-                if (cdebug_count) {                                           \
-                        portals_debug_msg(DEBUG_SUBSYSTEM, cdebug_mask,       \
-                                          __FILE__, __FUNCTION__, __LINE__,   \
-                                          0, "skipped %d similar messages\n", \
-                                          cdebug_count);                      \
-                        cdebug_count = 0;                                     \
-                }                                                             \
-                if (time_after(jiffies, cdebug_next+(CDEBUG_MAX_LIMIT+10)*HZ))\
-                        cdebug_delay = cdebug_delay > 8 ? cdebug_delay/8 : 1; \
-                else                                                          \
-                        cdebug_delay = cdebug_delay*2 >= CDEBUG_MAX_LIMIT*HZ ?\
-                                        CDEBUG_MAX_LIMIT*HZ : cdebug_delay*2; \
-                cdebug_next = jiffies + cdebug_delay;                         \
-        } else {                                                              \
-                portals_debug_msg(DEBUG_SUBSYSTEM,                            \
-                                  portal_debug & ~(D_EMERG|D_ERROR|D_WARNING),\
-                                  __FILE__, __FUNCTION__, __LINE__,           \
-                                  CDEBUG_STACK, cdebug_format, ## a);         \
-                cdebug_count++;                                               \
-        }                                                                     \
-} while (0)
-
-#define CWARN(format, a...) CDEBUG_LIMIT(D_WARNING, format, ## a)
-#define CERROR(format, a...) CDEBUG_LIMIT(D_ERROR, format, ## a)
-#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a)
-
-#define GOTO(label, rc)                                                 \
-do {                                                                    \
-        long GOTO__ret = (long)(rc);                                    \
-        CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \
-               #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\
-               (signed long)GOTO__ret);                                 \
-        goto label;                                                     \
-} while (0)
-
-#define RETURN(rc)                                                      \
-do {                                                                    \
-        typeof(rc) RETURN__ret = (rc);                                  \
-        CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n",       \
-               (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\
-        return RETURN__ret;                                             \
-} while (0)
-
-#define ENTRY                                                           \
-do {                                                                    \
-        CDEBUG(D_TRACE, "Process entered\n");                           \
-} while (0)
-
-#define EXIT                                                            \
-do {                                                                    \
-        CDEBUG(D_TRACE, "Process leaving\n");                           \
-} while(0)
-#else
-#define CDEBUG(mask, format, a...)      do { } while (0)
-#define CWARN(format, a...)             printk(KERN_WARNING format, ## a)
-#define CERROR(format, a...)            printk(KERN_ERR format, ## a)
-#define CEMERG(format, a...)            printk(KERN_EMERG format, ## a)
-#define GOTO(label, rc)                 do { (void)(rc); goto label; } while (0)
-#define RETURN(rc)                      return (rc)
-#define ENTRY                           do { } while (0)
-#define EXIT                            do { } while (0)
-#endif
-
-/* initial pid  */
-# if CRAY_PORTALS
-/* 
- * 1) ptl_pid_t in cray portals is only 16 bits, not 32 bits, therefore this
- *    is too big.
- *
- * 2) the implementation of ernal in cray portals further restricts the pid
- *    space that may be used to 0 <= pid <= 255 (an 8 bit value).  Returns
- *    an error at nal init time for any pid outside this range.  Other nals
- *    in cray portals don't have this restriction.
- * */
-#define LUSTRE_PTL_PID          9
-# else
-#define LUSTRE_PTL_PID          12345
-# endif
-
-#define LUSTRE_SRV_PTL_PID      LUSTRE_PTL_PID    
-
-#define PORTALS_CFG_VERSION 0x00010001;
-
-struct portals_cfg {
-        __u32 pcfg_version;
-        __u32 pcfg_command;
-
-        __u32 pcfg_nal;
-        __u32 pcfg_flags;
-
-        __u32 pcfg_gw_nal;
-        __u64 pcfg_nid;
-        __u64 pcfg_nid2;
-        __u64 pcfg_nid3;
-        __u32 pcfg_id;
-        __u32 pcfg_misc;
-        __u32 pcfg_fd;
-        __u32 pcfg_count;
-        __u32 pcfg_size;
-        __u32 pcfg_wait;
-
-        __u32 pcfg_plen1; /* buffers in userspace */
-        char *pcfg_pbuf1;
-        __u32 pcfg_plen2; /* buffers in userspace */
-        char *pcfg_pbuf2;
-};
-
-#define PCFG_INIT(pcfg, cmd)                            \
-do {                                                    \
-        memset(&pcfg, 0, sizeof(pcfg));                 \
-        pcfg.pcfg_version = PORTALS_CFG_VERSION;        \
-        pcfg.pcfg_command = (cmd);                      \
-                                                        \
-} while (0)
-
-typedef int (nal_cmd_handler_fn)(struct portals_cfg *, void *);
-int libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *arg);
-int libcfs_nal_cmd(struct portals_cfg *pcfg);
-void libcfs_nal_cmd_unregister(int nal);
-
-struct portal_ioctl_data {
-        __u32 ioc_len;
-        __u32 ioc_version;
-        __u64 ioc_nid;
-        __u64 ioc_nid2;
-        __u64 ioc_nid3;
-        __u32 ioc_count;
-        __u32 ioc_nal;
-        __u32 ioc_nal_cmd;
-        __u32 ioc_fd;
-        __u32 ioc_id;
-
-        __u32 ioc_flags;
-        __u32 ioc_size;
-
-        __u32 ioc_wait;
-        __u32 ioc_timeout;
-        __u32 ioc_misc;
-
-        __u32 ioc_inllen1;
-        char *ioc_inlbuf1;
-        __u32 ioc_inllen2;
-        char *ioc_inlbuf2;
-
-        __u32 ioc_plen1; /* buffers in userspace */
-        char *ioc_pbuf1;
-        __u32 ioc_plen2; /* buffers in userspace */
-        char *ioc_pbuf2;
-
-        char ioc_bulk[0];
-};
-
-
-#ifdef __KERNEL__
-
-#include <linux/list.h>
-
-struct libcfs_ioctl_handler {
-        struct list_head item;
-        int (*handle_ioctl)(struct portal_ioctl_data *data,
-                            unsigned int cmd, unsigned long args);
-};
-
-#define DECLARE_IOCTL_HANDLER(ident, func)              \
-        struct libcfs_ioctl_handler ident = {           \
-                .item = LIST_HEAD_INIT(ident.item),     \
-                .handle_ioctl = func                    \
-        }
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand);
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand);
-
-#endif
-
-#ifdef __KERNEL__
-/* libcfs watchdogs */
-struct lc_watchdog;
-
-/* Just use the default handler (dumplog)  */
-#define LC_WATCHDOG_DEFAULT_CB NULL
-
-/* Add a watchdog which fires after "time" milliseconds of delay.  You have to
- * touch it once to enable it. */
-struct lc_watchdog *lc_watchdog_add(int time, 
-                                    void (*cb)(struct lc_watchdog *,
-                                               struct task_struct *,
-                                               void *),
-                                    void *data);
-
-/* Enables a watchdog and resets its timer. */
-void lc_watchdog_touch(struct lc_watchdog *lcw);
-
-/* Disable a watchdog; touch it to restart it. */
-void lc_watchdog_disable(struct lc_watchdog *lcw);
-
-/* Clean up the watchdog */
-void lc_watchdog_delete(struct lc_watchdog *lcw);
-
-/* Dump a debug log */
-void lc_watchdog_dumplog(struct lc_watchdog *lcw,
-                         struct task_struct *tsk,
-                         void *data);
-#endif /* !__KERNEL__ */
-
-#endif /* _LIBCFS_H */
diff --git a/lustre/portals/include/linux/lustre_list.h b/lustre/portals/include/linux/lustre_list.h
deleted file mode 100644 (file)
index a218f2c..0000000
+++ /dev/null
@@ -1,246 +0,0 @@
-#ifndef _LUSTRE_LIST_H
-#define _LUSTRE_LIST_H
-
-#ifdef __KERNEL__
-#include <linux/list.h>
-#else
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-#define prefetch(a) ((void)a)
-
-struct list_head {
-       struct list_head *next, *prev;
-};
-
-typedef struct list_head list_t;
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
-       struct list_head name = LIST_HEAD_INIT(name)
-
-#define INIT_LIST_HEAD(ptr) do { \
-       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
-} while (0)
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add(struct list_head * new,
-                             struct list_head * prev,
-                             struct list_head * next)
-{
-       next->prev = new;
-       new->next = next;
-       new->prev = prev;
-       prev->next = new;
-}
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-       __list_add(new, head, head->next);
-}
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
-       __list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-       next->prev = prev;
-       prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
- */
-static inline void list_del(struct list_head *entry)
-{
-       __list_del(entry->prev, entry->next);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
-       __list_del(entry->prev, entry->next);
-       INIT_LIST_HEAD(entry);
-}
-
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
-       __list_del(list->prev, list->next);
-       list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
-                                 struct list_head *head)
-{
-       __list_del(list->prev, list->next);
-       list_add_tail(list, head);
-}
-
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(struct list_head *head)
-{
-       return head->next == head;
-}
-
-static inline void __list_splice(struct list_head *list,
-                                struct list_head *head)
-{
-       struct list_head *first = list->next;
-       struct list_head *last = list->prev;
-       struct list_head *at = head->next;
-
-       first->prev = head;
-       head->next = first;
-
-       last->next = at;
-       at->prev = last;
-}
-
-/**
- * list_splice - join two lists
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(struct list_head *list, struct list_head *head)
-{
-       if (!list_empty(list))
-               __list_splice(list, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
-                                   struct list_head *head)
-{
-       if (!list_empty(list)) {
-               __list_splice(list, head);
-               INIT_LIST_HEAD(list);
-       }
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr:       the &struct list_head pointer.
- * @type:      the type of the struct this is embedded in.
- * @member:    the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
-       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-/**
- * list_for_each       -       iterate over a list
- * @pos:       the &struct list_head to use as a loop counter.
- * @head:      the head for your list.
- */
-#define list_for_each(pos, head) \
-       for (pos = (head)->next, prefetch(pos->next); pos != (head); \
-               pos = pos->next, prefetch(pos->next))
-
-/**
- * list_for_each_prev  -       iterate over a list in reverse order
- * @pos:       the &struct list_head to use as a loop counter.
- * @head:      the head for your list.
- */
-#define list_for_each_prev(pos, head) \
-       for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \
-               pos = pos->prev, prefetch(pos->prev))
-
-/**
- * list_for_each_safe  -       iterate over a list safe against removal of list entry
- * @pos:       the &struct list_head to use as a loop counter.
- * @n:         another &struct list_head to use as temporary storage
- * @head:      the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
-       for (pos = (head)->next, n = pos->next; pos != (head); \
-               pos = n, n = pos->next)
-
-/**
- * list_for_each_entry  -       iterate over list of given type
- * @pos:        the type * to use as a loop counter.
- * @head:       the head for your list.
- * @member:     the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member)                         \
-        for (pos = list_entry((head)->next, typeof(*pos), member),     \
-                    prefetch(pos->member.next);                        \
-            &pos->member != (head);                                    \
-            pos = list_entry(pos->member.next, typeof(*pos), member),  \
-            prefetch(pos->member.next))
-
-/**
- * list_for_each_entry_safe  -       iterate over list of given type safe against removal of list entry
- * @pos:        the type * to use as a loop counter.
- * @n:          another type * to use as temporary storage
- * @head:       the head for your list.
- * @member:     the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member)                 \
-        for (pos = list_entry((head)->next, typeof(*pos), member),     \
-               n = list_entry(pos->member.next, typeof(*pos), member); \
-            &pos->member != (head);                                    \
-            pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-#endif /* if !__KERNEL__*/
-#endif /* if !_LUSTRE_LIST_H */
diff --git a/lustre/portals/include/linux/portals_compat25.h b/lustre/portals/include/linux/portals_compat25.h
deleted file mode 100644 (file)
index fa2709e..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef _PORTALS_COMPAT_H
-#define _PORTALS_COMPAT_H
-
-// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
-#if SPINLOCK_DEBUG
-# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20)
-#  define SIGNAL_MASK_ASSERT() \
-   LASSERT(current->sighand->siglock.magic == SPINLOCK_MAGIC)
-# else
-#  define SIGNAL_MASK_ASSERT() \
-   LASSERT(current->sigmask_lock.magic == SPINLOCK_MAGIC)
-# endif
-#else
-# define SIGNAL_MASK_ASSERT()
-#endif
-// XXX BUG 1511 -- remove this stanza and all callers when bug 1511 is resolved
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-
-# define SIGNAL_MASK_LOCK(task, flags)                                  \
-  spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                                \
-  spin_unlock_irqrestore(&task->sighand->siglock, flags)
-# define USERMODEHELPER(path, argv, envp)                               \
-  call_usermodehelper(path, argv, envp, 1)
-# define RECALC_SIGPENDING         recalc_sigpending()
-# define CLEAR_SIGPENDING          clear_tsk_thread_flag(current,       \
-                                                         TIF_SIGPENDING)
-# define CURRENT_SECONDS           get_seconds()
-# define smp_num_cpus              num_online_cpus()
-
-
-#elif defined(CONFIG_RH_2_4_20) /* RH 2.4.x */
-
-# define SIGNAL_MASK_LOCK(task, flags)                                  \
-  spin_lock_irqsave(&task->sighand->siglock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                                \
-  spin_unlock_irqrestore(&task->sighand->siglock, flags)
-# define USERMODEHELPER(path, argv, envp)                               \
-  call_usermodehelper(path, argv, envp)
-# define RECALC_SIGPENDING         recalc_sigpending()
-# define CLEAR_SIGPENDING          (current->sigpending = 0)
-# define CURRENT_SECONDS           CURRENT_TIME
-
-#else /* 2.4.x */
-
-# define SIGNAL_MASK_LOCK(task, flags)                                  \
-  spin_lock_irqsave(&task->sigmask_lock, flags)
-# define SIGNAL_MASK_UNLOCK(task, flags)                                \
-  spin_unlock_irqrestore(&task->sigmask_lock, flags)
-# define USERMODEHELPER(path, argv, envp)                               \
-  call_usermodehelper(path, argv, envp)
-# define RECALC_SIGPENDING         recalc_sigpending(current)
-# define CLEAR_SIGPENDING          (current->sigpending = 0)
-# define CURRENT_SECONDS           CURRENT_TIME
-
-#endif
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-#define UML_PID(tsk) ((tsk)->thread.extern_pid)
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define UML_PID(tsk) ((tsk)->thread.mode.tt.extern_pid)
-#else
-#define UML_PID(tsk) ((tsk)->pid)
-#endif
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-# define THREAD_NAME(comm, len, fmt, a...)                              \
-        snprintf(comm, len,fmt"|%d", ## a, UML_PID(current))
-#else
-# define THREAD_NAME(comm, len, fmt, a...)                              \
-        snprintf(comm, len, fmt, ## a)
-#endif
-
-#ifdef HAVE_PAGE_LIST
-/* 2.4 alloc_page users can use page->list */
-#define PAGE_LIST_ENTRY list
-#define PAGE_LIST(page) ((page)->list)
-#else
-/* 2.6 alloc_page users can use page->lru */
-#define PAGE_LIST_ENTRY lru
-#define PAGE_LIST(page) ((page)->lru)
-#endif
-
-#ifndef HAVE_CPU_ONLINE
-#define cpu_online(cpu) (test_bit(cpu_online_map, &(cpu)))
-#endif
-#ifndef HAVE_CPUMASK_T
-#define cpu_set(cpu, map) (set_bit(cpu, &(map)))
-typedef unsigned long cpumask_t;
-#endif
-
-#endif /* _PORTALS_COMPAT_H */
diff --git a/lustre/portals/include/linux/portals_lib.h b/lustre/portals/include/linux/portals_lib.h
deleted file mode 100644 (file)
index 8778a52..0000000
+++ /dev/null
@@ -1,90 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines. 
- *
- */
-
-#ifndef _PORTALS_LIB_H
-#define _PORTALS_LIB_H
-
-#ifndef __KERNEL__
-# include <string.h>
-#else 
-# include <asm/types.h>
-#endif
-
-#undef MIN
-#define MIN(a,b) (((a)<(b)) ? (a): (b))
-#undef MAX
-#define MAX(a,b) (((a)>(b)) ? (a): (b))
-#define MKSTR(ptr) ((ptr))? (ptr) : ""
-
-static inline int size_round (int val)
-{
-        return (val + 7) & (~0x7);
-}
-
-static inline int size_round16(int val)
-{
-        return (val + 0xf) & (~0xf);
-}
-
-static inline int size_round32(int val)
-{
-        return (val + 0x1f) & (~0x1f);
-}
-
-static inline int size_round0(int val)
-{
-        if (!val)
-                return 0;
-        return (val + 1 + 7) & (~0x7);
-}
-
-static inline size_t round_strlen(char *fset)
-{
-        return size_round(strlen(fset) + 1);
-}
-
-#define LOGL(var,len,ptr)                                       \
-do {                                                            \
-        if (var)                                                \
-                memcpy((char *)ptr, (const char *)var, len);    \
-        ptr += size_round(len);                                 \
-} while (0)
-
-#define LOGU(var,len,ptr)                                       \
-do {                                                            \
-        if (var)                                                \
-                memcpy((char *)var, (const char *)ptr, len);    \
-        ptr += size_round(len);                                 \
-} while (0)
-
-#define LOGL0(var,len,ptr)                              \
-do {                                                    \
-        if (!len)                                       \
-                break;                                  \
-        memcpy((char *)ptr, (const char *)var, len);    \
-        *((char *)(ptr) + len) = 0;                     \
-        ptr += size_round(len + 1);                     \
-} while (0)
-
-#endif /* _PORTALS_LIB_H */
diff --git a/lustre/portals/include/portals/.cvsignore b/lustre/portals/include/portals/.cvsignore
deleted file mode 100644 (file)
index 282522d..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-Makefile
-Makefile.in
diff --git a/lustre/portals/include/portals/Makefile.am b/lustre/portals/include/portals/Makefile.am
deleted file mode 100644 (file)
index 4043f66..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-portalsdir=$(includedir)/portals
-
-if UTILS
-portals_HEADERS = list.h
-endif
-
-EXTRA_DIST = api.h api-support.h build_check.h errno.h \
-       internal.h lib-p30.h lib-types.h list.h                  \
-       lltrace.h myrnal.h nal.h nalids.h p30.h ptlctl.h         \
-       socknal.h stringtab.h types.h
diff --git a/lustre/portals/include/portals/api-support.h b/lustre/portals/include/portals/api-support.h
deleted file mode 100644 (file)
index c5994c6..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-
-#include "build_check.h"
-
-#ifndef __KERNEL__
-# include <stdio.h>
-# include <stdlib.h>
-# include <unistd.h>
-# include <time.h>
-
-/* Lots of POSIX dependencies to support PtlEQWait_timeout */
-# include <signal.h>
-# include <setjmp.h>
-# include <time.h>
-#endif
-
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <portals/p30.h>
-
-#include <portals/internal.h>
-#include <portals/nal.h>
-
diff --git a/lustre/portals/include/portals/api.h b/lustre/portals/include/portals/api.h
deleted file mode 100644 (file)
index 56b7b99..0000000
+++ /dev/null
@@ -1,138 +0,0 @@
-#ifndef P30_API_H
-#define P30_API_H
-
-#include "build_check.h"
-
-#include <portals/types.h>
-
-int PtlInit(int *);
-void PtlFini(void);
-
-int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
-             ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits,
-              ptl_handle_ni_t *interface_out);
-
-int PtlNIInitialized(ptl_interface_t);
-
-int PtlNIFini(ptl_handle_ni_t interface_in);
-
-int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id);
-
-int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid);
-
-
-/*
- * Network interfaces
- */
-
-int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
-                ptl_sr_value_t * status_out);
-
-int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
-              unsigned long *distance_out);
-
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out);
-
-
-/* 
- * PtlNIFailNid
- *
- * Not an official Portals 3 API call.  It provides a way of simulating
- * communications failures to all (nid == PTL_NID_ANY), or specific peers
- * (via multiple calls), either until further notice (threshold == -1), or
- * for a specific number of messages.  Passing a threshold of zero, "heals"
- * the given peer.
- */
-int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold);
-
-/*
- * PtlSnprintHandle: 
- *
- * This is not an official Portals 3 API call.  It is provided
- * so that an application can print an opaque handle.
- */
-void PtlSnprintHandle (char *str, int str_len, ptl_handle_any_t handle);
-
-/*
- * Match entries
- */
-
-int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
-                ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
-                ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
-                ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out);
-
-int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
-                ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
-                ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
-                ptl_handle_me_t * handle_out);
-
-int PtlMEUnlink(ptl_handle_me_t current_in);
-
-int PtlMEUnlinkList(ptl_handle_me_t current_in);
-
-
-
-/*
- * Memory descriptors
- */
-
-int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in,
-                ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
-
-int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-             ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out);
-
-int PtlMDUnlink(ptl_handle_md_t md_in);
-
-int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout,
-                ptl_md_t * new_inout, ptl_handle_eq_t testq_in);
-
-
-/* These should not be called by users */
-int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout,
-                         ptl_md_t * new_inout, ptl_handle_eq_t testq_in,
-                         ptl_seq_t sequence_in);
-
-
-
-
-/*
- * Event queues
- */
-int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in,
-               ptl_eq_handler_t handler,
-               ptl_handle_eq_t *handle_out);
-int PtlEQFree(ptl_handle_eq_t eventq_in);
-
-int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
-
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out);
-
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
-             ptl_event_t *event_out, int *which_out);
-
-/*
- * Access Control Table
- */
-int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
-               ptl_process_id_t match_id_in, ptl_pt_index_t portal_in);
-
-
-/*
- * Data movement
- */
-
-int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
-           ptl_process_id_t target_in, ptl_pt_index_t portal_in,
-           ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in,
-           ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in);
-
-int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
-           ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in,
-           ptl_match_bits_t match_bits_in, ptl_size_t offset_in);
-
-
-
-#endif
diff --git a/lustre/portals/include/portals/build_check.h b/lustre/portals/include/portals/build_check.h
deleted file mode 100644 (file)
index c219d2a..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _BUILD_CHECK_H
-#define _BUILD_CHECK_H
-
-#if CRAY_PORTALS
-#error "an application got to me instead of cray's includes"
-#endif
-
-#endif
diff --git a/lustre/portals/include/portals/errno.h b/lustre/portals/include/portals/errno.h
deleted file mode 100644 (file)
index 42f2626..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _P30_ERRNO_H_
-#define _P30_ERRNO_H_
-
-#include "build_check.h"
-/*
- * include/portals/errno.h
- *
- * Shared error number lists
- */
-
-/* If you change these, you must update the string table in api-errno.c */
-typedef enum {
-        PTL_OK                 = 0,
-        PTL_SEGV               = 1,
-
-        PTL_NO_SPACE           = 2,
-        PTL_ME_IN_USE          = 3,
-        PTL_VAL_FAILED         = 4,
-
-        PTL_NAL_FAILED         = 5,
-        PTL_NO_INIT            = 6,
-        PTL_IFACE_DUP          = 7,
-        PTL_IFACE_INVALID      = 8,
-
-        PTL_HANDLE_INVALID     = 9,
-        PTL_MD_INVALID         = 10,
-        PTL_ME_INVALID         = 11,
-/* If you change these, you must update the string table in api-errno.c */
-        PTL_PROCESS_INVALID    = 12,
-        PTL_PT_INDEX_INVALID   = 13,
-
-        PTL_SR_INDEX_INVALID   = 14,
-        PTL_EQ_INVALID         = 15,
-        PTL_EQ_DROPPED         = 16,
-
-        PTL_EQ_EMPTY           = 17,
-        PTL_MD_NO_UPDATE       = 18,
-        PTL_FAIL               = 19,
-
-        PTL_IOV_INVALID        = 20,
-
-       PTL_EQ_IN_USE           = 21,
-
-       PTL_NI_INVALID          = 22,
-       PTL_MD_ILLEGAL          = 23,
-       
-        PTL_MAX_ERRNO          = 24
-} ptl_err_t;
-/* If you change these, you must update the string table in api-errno.c */
-
-extern const char *ptl_err_str[];
-
-#endif
diff --git a/lustre/portals/include/portals/internal.h b/lustre/portals/include/portals/internal.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/lustre/portals/include/portals/lib-p30.h b/lustre/portals/include/portals/lib-p30.h
deleted file mode 100644 (file)
index b710569..0000000
+++ /dev/null
@@ -1,465 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib-p30.h
- *
- * Top level include for library side routines
- */
-
-#ifndef _LIB_P30_H_
-#define _LIB_P30_H_
-
-#include "build_check.h"
-
-#ifdef __KERNEL__
-# include <asm/page.h>
-# include <linux/string.h>
-#else
-# include <portals/list.h>
-# include <string.h>
-# include <pthread.h>
-#endif
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/nal.h>
-#include <portals/lib-types.h>
-
-static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh)
-{
-        return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie &&
-                wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie);
-}
-
-#ifdef __KERNEL__
-#define LIB_LOCK(nal,flags)                                     \
-        spin_lock_irqsave(&(nal)->libnal_ni.ni_lock, flags)
-#define LIB_UNLOCK(nal,flags)                                   \
-        spin_unlock_irqrestore(&(nal)->libnal_ni.ni_lock, flags)
-#else
-#define LIB_LOCK(nal,flags)                                             \
-        (pthread_mutex_lock(&(nal)->libnal_ni.ni_mutex), (flags) = 0)
-#define LIB_UNLOCK(nal,flags)                                   \
-        pthread_mutex_unlock(&(nal)->libnal_ni.ni_mutex)
-#endif
-
-
-#ifdef PTL_USE_LIB_FREELIST
-
-#define MAX_MES         2048
-#define MAX_MDS         2048
-#define MAX_MSGS        2048    /* Outstanding messages */
-#define MAX_EQS         512
-
-extern int lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int nobj, int objsize);
-extern void lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl);
-
-static inline void *
-lib_freelist_alloc (lib_freelist_t *fl)
-{
-        /* ALWAYS called with liblock held */
-        lib_freeobj_t *o;
-
-        if (list_empty (&fl->fl_list))
-                return (NULL);
-        
-        o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list);
-        list_del (&o->fo_list);
-        return ((void *)&o->fo_contents);
-}
-
-static inline void
-lib_freelist_free (lib_freelist_t *fl, void *obj)
-{
-        /* ALWAYS called with liblock held */
-        lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents);
-        
-        list_add (&o->fo_list, &fl->fl_list);
-}
-
-
-static inline lib_eq_t *
-lib_eq_alloc (lib_nal_t *nal)
-{
-        /* NEVER called with liblock held */
-        unsigned long  flags;
-        lib_eq_t      *eq;
-        
-        LIB_LOCK (nal, flags);
-        eq = (lib_eq_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_eqs);
-        LIB_UNLOCK (nal, flags);
-
-        return (eq);
-}
-
-static inline void
-lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
-{
-        /* ALWAYS called with liblock held */
-        lib_freelist_free (&nal->libnal_ni.ni_free_eqs, eq);
-}
-
-static inline lib_md_t *
-lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
-{
-        /* NEVER called with liblock held */
-        unsigned long  flags;
-        lib_md_t      *md;
-        
-        LIB_LOCK (nal, flags);
-        md = (lib_md_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mds);
-        LIB_UNLOCK (nal, flags);
-
-        return (md);
-}
-
-static inline void
-lib_md_free (lib_nal_t *nal, lib_md_t *md)
-{
-        /* ALWAYS called with liblock held */
-        lib_freelist_free (&nal->libnal_ni.ni_free_mds, md);
-}
-
-static inline lib_me_t *
-lib_me_alloc (lib_nal_t *nal)
-{
-        /* NEVER called with liblock held */
-        unsigned long  flags;
-        lib_me_t      *me;
-        
-        LIB_LOCK (nal, flags);
-        me = (lib_me_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_mes);
-        LIB_UNLOCK (nal, flags);
-        
-        return (me);
-}
-
-static inline void
-lib_me_free (lib_nal_t *nal, lib_me_t *me)
-{
-        /* ALWAYS called with liblock held */
-        lib_freelist_free (&nal->libnal_ni.ni_free_mes, me);
-}
-
-static inline lib_msg_t *
-lib_msg_alloc (lib_nal_t *nal)
-{
-        /* NEVER called with liblock held */
-        unsigned long  flags;
-        lib_msg_t     *msg;
-        
-        LIB_LOCK (nal, flags);
-        msg = (lib_msg_t *)lib_freelist_alloc (&nal->libnal_ni.ni_free_msgs);
-        LIB_UNLOCK (nal, flags);
-
-        if (msg != NULL) {
-                /* NULL pointers, clear flags etc */
-                memset (msg, 0, sizeof (*msg));
-                msg->ack_wmd = PTL_WIRE_HANDLE_NONE;
-        }
-        return(msg);
-}
-
-static inline void
-lib_msg_free (lib_nal_t *nal, lib_msg_t *msg)
-{
-        /* ALWAYS called with liblock held */
-        lib_freelist_free (&nal->libnal_ni.ni_free_msgs, msg);
-}
-
-#else
-
-static inline lib_eq_t *
-lib_eq_alloc (lib_nal_t *nal)
-{
-        /* NEVER called with liblock held */
-        lib_eq_t *eq;
-
-        PORTAL_ALLOC(eq, sizeof(*eq));
-        return (eq);
-}
-
-static inline void
-lib_eq_free (lib_nal_t *nal, lib_eq_t *eq)
-{
-        /* ALWAYS called with liblock held */
-        PORTAL_FREE(eq, sizeof(*eq));
-}
-
-static inline lib_md_t *
-lib_md_alloc (lib_nal_t *nal, ptl_md_t *umd)
-{
-        /* NEVER called with liblock held */
-        lib_md_t *md;
-        int       size;
-        int       niov;
-
-        if ((umd->options & PTL_MD_KIOV) != 0) {
-                niov = umd->length;
-                size = offsetof(lib_md_t, md_iov.kiov[niov]);
-        } else {
-                niov = ((umd->options & PTL_MD_IOVEC) != 0) ?
-                       umd->length : 1;
-                size = offsetof(lib_md_t, md_iov.iov[niov]);
-        }
-
-        PORTAL_ALLOC(md, size);
-
-        if (md != NULL) {
-                /* Set here in case of early free */
-                md->options = umd->options;
-                md->md_niov = niov;
-        }
-        
-        return (md);
-}
-
-static inline void 
-lib_md_free (lib_nal_t *nal, lib_md_t *md)
-{
-        /* ALWAYS called with liblock held */
-        int       size;
-
-        if ((md->options & PTL_MD_KIOV) != 0)
-                size = offsetof(lib_md_t, md_iov.kiov[md->md_niov]);
-        else
-                size = offsetof(lib_md_t, md_iov.iov[md->md_niov]);
-
-        PORTAL_FREE(md, size);
-}
-
-static inline lib_me_t *
-lib_me_alloc (lib_nal_t *nal)
-{
-        /* NEVER called with liblock held */
-        lib_me_t *me;
-
-        PORTAL_ALLOC(me, sizeof(*me));
-        return (me);
-}
-
-static inline void 
-lib_me_free(lib_nal_t *nal, lib_me_t *me)
-{
-        /* ALWAYS called with liblock held */
-        PORTAL_FREE(me, sizeof(*me));
-}
-
-static inline lib_msg_t *
-lib_msg_alloc(lib_nal_t *nal)
-{
-        /* NEVER called with liblock held; may be in interrupt... */
-        lib_msg_t *msg;
-
-        if (in_interrupt())
-                PORTAL_ALLOC_ATOMIC(msg, sizeof(*msg));
-        else
-                PORTAL_ALLOC(msg, sizeof(*msg));
-
-        if (msg != NULL) {
-                /* NULL pointers, clear flags etc */
-                memset (msg, 0, sizeof (*msg));
-                msg->ack_wmd = PTL_WIRE_HANDLE_NONE;
-        }
-        return (msg);
-}
-
-static inline void 
-lib_msg_free(lib_nal_t *nal, lib_msg_t *msg)
-{
-        /* ALWAYS called with liblock held */
-        PORTAL_FREE(msg, sizeof(*msg));
-}
-#endif
-
-extern lib_handle_t *lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type);
-extern void lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type);
-extern void lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh);
-
-static inline void
-ptl_eq2handle (ptl_handle_eq_t *handle, lib_nal_t *nal, lib_eq_t *eq)
-{
-        if (eq == NULL) {
-                *handle = PTL_EQ_NONE;
-                return;
-        }
-
-        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-        handle->cookie = eq->eq_lh.lh_cookie;
-}
-
-static inline lib_eq_t *
-ptl_handle2eq (ptl_handle_eq_t *handle, lib_nal_t *nal)
-{
-        /* ALWAYS called with liblock held */
-        lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, 
-                                              PTL_COOKIE_TYPE_EQ);
-        if (lh == NULL)
-                return (NULL);
-
-        return (lh_entry (lh, lib_eq_t, eq_lh));
-}
-
-static inline void
-ptl_md2handle (ptl_handle_md_t *handle, lib_nal_t *nal, lib_md_t *md)
-{
-        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-        handle->cookie = md->md_lh.lh_cookie;
-}
-
-static inline lib_md_t *
-ptl_handle2md (ptl_handle_md_t *handle, lib_nal_t *nal)
-{
-        /* ALWAYS called with liblock held */
-        lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-                                              PTL_COOKIE_TYPE_MD);
-        if (lh == NULL)
-                return (NULL);
-
-        return (lh_entry (lh, lib_md_t, md_lh));
-}
-
-static inline lib_md_t *
-ptl_wire_handle2md (ptl_handle_wire_t *wh, lib_nal_t *nal)
-{
-        /* ALWAYS called with liblock held */
-        lib_handle_t *lh;
-        
-        if (wh->wh_interface_cookie != nal->libnal_ni.ni_interface_cookie)
-                return (NULL);
-        
-        lh = lib_lookup_cookie (nal, wh->wh_object_cookie,
-                                PTL_COOKIE_TYPE_MD);
-        if (lh == NULL)
-                return (NULL);
-
-        return (lh_entry (lh, lib_md_t, md_lh));
-}
-
-static inline void
-ptl_me2handle (ptl_handle_me_t *handle, lib_nal_t *nal, lib_me_t *me)
-{
-        handle->nal_idx = nal->libnal_ni.ni_api->nal_handle.nal_idx;
-        handle->cookie = me->me_lh.lh_cookie;
-}
-
-static inline lib_me_t *
-ptl_handle2me (ptl_handle_me_t *handle, lib_nal_t *nal)
-{
-        /* ALWAYS called with liblock held */
-        lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie,
-                                              PTL_COOKIE_TYPE_ME);
-        if (lh == NULL)
-                return (NULL);
-
-        return (lh_entry (lh, lib_me_t, me_lh));
-}
-
-extern int lib_init(lib_nal_t *libnal, nal_t *apinal,
-                    ptl_process_id_t pid,
-                    ptl_ni_limits_t *desired_limits, 
-                    ptl_ni_limits_t *actual_limits);
-extern int lib_fini(lib_nal_t *libnal);
-
-/*
- * When the NAL detects an incoming message header, it should call
- * lib_parse() decode it.  If the message header is garbage, lib_parse()
- * returns immediately with failure, otherwise the NAL callbacks will be
- * called to receive the message body.  They are handed the private cookie
- * as a way for the NAL to maintain state about which transaction is being
- * processed.  An extra parameter, lib_msg contains the lib-level message
- * state for passing to lib_finalize() when the message body has been
- * received.
- */
-extern void lib_enq_event_locked (lib_nal_t *nal, void *private,
-                                  lib_eq_t *eq, ptl_event_t *ev);
-extern void lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, 
-                          ptl_ni_fail_t ni_fail_type);
-extern ptl_err_t lib_parse (lib_nal_t *nal, ptl_hdr_t *hdr, void *private);
-extern lib_msg_t *lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, 
-                                        lib_msg_t *get_msg);
-extern void print_hdr (lib_nal_t * nal, ptl_hdr_t * hdr);
-
-
-extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov);
-extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, 
-                              ptl_size_t offset, ptl_size_t len);
-extern void lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, 
-                              char *src, ptl_size_t len);
-extern int lib_extract_iov (int dst_niov, struct iovec *dst,
-                            int src_niov, struct iovec *src,
-                            ptl_size_t offset, ptl_size_t len);
-
-extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov);
-extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, 
-                               ptl_size_t offset, ptl_size_t len);
-extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-                               char *src, ptl_size_t len);
-extern int lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, 
-                             int src_niov, ptl_kiov_t *src,
-                             ptl_size_t offset, ptl_size_t len);
-
-extern void lib_assert_wire_constants (void);
-
-extern ptl_err_t lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-                           ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen);
-extern ptl_err_t lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
-                           ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                           lib_md_t *md, ptl_size_t offset, ptl_size_t len);
-
-extern int lib_api_ni_status (nal_t *nal, ptl_sr_index_t sr_idx,
-                              ptl_sr_value_t *status);
-extern int lib_api_ni_dist (nal_t *nal, ptl_process_id_t *pid, 
-                            unsigned long *dist);
-
-extern int lib_api_eq_alloc (nal_t *nal, ptl_size_t count,
-                             ptl_eq_handler_t callback, 
-                             ptl_handle_eq_t *handle);
-extern int lib_api_eq_free(nal_t *nal, ptl_handle_eq_t *eqh);
-extern int lib_api_eq_poll (nal_t *nal, 
-                            ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
-                            ptl_event_t *event, int *which);
-
-extern int lib_api_me_attach(nal_t *nal,
-                             ptl_pt_index_t portal,
-                             ptl_process_id_t match_id, 
-                             ptl_match_bits_t match_bits, 
-                             ptl_match_bits_t ignore_bits,
-                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
-                             ptl_handle_me_t *handle);
-extern int lib_api_me_insert(nal_t *nal,
-                             ptl_handle_me_t *current_meh,
-                             ptl_process_id_t match_id, 
-                             ptl_match_bits_t match_bits, 
-                             ptl_match_bits_t ignore_bits,
-                             ptl_unlink_t unlink, ptl_ins_pos_t pos,
-                             ptl_handle_me_t *handle);
-extern int lib_api_me_unlink (nal_t *nal, ptl_handle_me_t *meh);
-extern void lib_me_unlink(lib_nal_t *nal, lib_me_t *me);
-
-extern int lib_api_get_id(nal_t *nal, ptl_process_id_t *pid);
-
-extern void lib_md_unlink(lib_nal_t *nal, lib_md_t *md);
-extern void lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd);
-extern int lib_api_md_attach(nal_t *nal, ptl_handle_me_t *meh,
-                             ptl_md_t *umd, ptl_unlink_t unlink, 
-                             ptl_handle_md_t *handle);
-extern int lib_api_md_bind(nal_t *nal, ptl_md_t *umd, ptl_unlink_t unlink,
-                           ptl_handle_md_t *handle);
-extern int lib_api_md_unlink (nal_t *nal, ptl_handle_md_t *mdh);
-extern int lib_api_md_update (nal_t *nal, ptl_handle_md_t *mdh,
-                              ptl_md_t *oldumd, ptl_md_t *newumd,
-                              ptl_handle_eq_t *testqh);
-
-extern int lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, 
-                       ptl_process_id_t *id,
-                       ptl_pt_index_t portal, ptl_ac_index_t ac,
-                       ptl_match_bits_t match_bits, ptl_size_t offset);
-extern int lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
-                       ptl_ack_req_t ack, ptl_process_id_t *id,
-                       ptl_pt_index_t portal, ptl_ac_index_t ac,
-                       ptl_match_bits_t match_bits, 
-                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
-extern int lib_api_fail_nid(nal_t *apinal, ptl_nid_t nid, unsigned int threshold);
-
-#endif
diff --git a/lustre/portals/include/portals/lib-types.h b/lustre/portals/include/portals/lib-types.h
deleted file mode 100644 (file)
index cfcef2b..0000000
+++ /dev/null
@@ -1,359 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * p30/lib-types.h
- *
- * Types used by the library side routines that do not need to be
- * exposed to the user application
- */
-
-#ifndef _LIB_TYPES_H_
-#define _LIB_TYPES_H_
-
-#include "build_check.h"
-
-#include <portals/types.h>
-#include <portals/nal.h>
-#ifdef __KERNEL__
-# include <linux/uio.h>
-# include <linux/smp_lock.h>
-# include <linux/types.h>
-#else
-# define PTL_USE_LIB_FREELIST
-# include <sys/types.h>
-#endif
-
-typedef char *user_ptr;
-typedef struct lib_msg_t lib_msg_t;
-typedef struct lib_ptl_t lib_ptl_t;
-typedef struct lib_ac_t lib_ac_t;
-typedef struct lib_me_t lib_me_t;
-typedef struct lib_md_t lib_md_t;
-typedef struct lib_eq_t lib_eq_t;
-
-#define WIRE_ATTR      __attribute__((packed))
-
-/* The wire handle's interface cookie only matches one network interface in
- * one epoch (i.e. new cookie when the interface restarts or the node
- * reboots).  The object cookie only matches one object on that interface
- * during that object's lifetime (i.e. no cookie re-use). */
-typedef struct {
-        __u64 wh_interface_cookie;
-        __u64 wh_object_cookie;
-} WIRE_ATTR ptl_handle_wire_t;
-
-/* byte-flip insensitive! */
-#define PTL_WIRE_HANDLE_NONE \
-((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1})
-
-typedef enum {
-        PTL_MSG_ACK = 0,
-        PTL_MSG_PUT,
-        PTL_MSG_GET,
-        PTL_MSG_REPLY,
-        PTL_MSG_HELLO,
-} ptl_msg_type_t;
-
-/* The variant fields of the portals message header are aligned on an 8
- * byte boundary in the message header.  Note that all types used in these
- * wire structs MUST be fixed size and the smaller types are placed at the
- * end. */
-typedef struct ptl_ack {
-        ptl_handle_wire_t  dst_wmd;
-        ptl_match_bits_t   match_bits;
-        ptl_size_t         mlength;
-} WIRE_ATTR ptl_ack_t;
-
-typedef struct ptl_put {
-        ptl_handle_wire_t  ack_wmd;
-        ptl_match_bits_t   match_bits;
-        ptl_hdr_data_t     hdr_data;
-        ptl_pt_index_t     ptl_index;
-        ptl_size_t         offset;
-} WIRE_ATTR ptl_put_t;
-
-typedef struct ptl_get {
-        ptl_handle_wire_t  return_wmd;
-        ptl_match_bits_t   match_bits;
-        ptl_pt_index_t     ptl_index;
-        ptl_size_t         src_offset;
-        ptl_size_t         sink_length;
-} WIRE_ATTR ptl_get_t;
-
-typedef struct ptl_reply {
-        ptl_handle_wire_t  dst_wmd;
-} WIRE_ATTR ptl_reply_t;
-
-typedef struct ptl_hello {
-        __u64              incarnation;
-        __u32              type;
-} WIRE_ATTR ptl_hello_t;
-
-typedef struct {
-        ptl_nid_t           dest_nid;
-        ptl_nid_t           src_nid;
-        ptl_pid_t           dest_pid;
-        ptl_pid_t           src_pid;
-        __u32               type;               /* ptl_msg_type_t */
-        __u32               payload_length;     /* payload data to follow */
-        /*<------__u64 aligned------->*/
-        union {
-                ptl_ack_t   ack;
-                ptl_put_t   put;
-                ptl_get_t   get;
-                ptl_reply_t reply;
-                ptl_hello_t hello;
-        } msg;
-} WIRE_ATTR ptl_hdr_t;
-
-/* A HELLO message contains the portals magic number and protocol version
- * code in the header's dest_nid, the peer's NID in the src_nid, and
- * PTL_MSG_HELLO in the type field.  All other common fields are zero
- * (including payload_size; i.e. no payload).  
- * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is
- * running the same protocol and to find out its NID, so that hosts with
- * multiple IP interfaces can have a single NID. These NALs should exchange
- * HELLO messages when a connection is first established. 
- * Individual NALs can put whatever else they fancy in ptl_hdr_t::msg. 
- */
-typedef struct {
-        __u32  magic;                          /* PORTALS_PROTO_MAGIC */
-        __u16   version_major;                  /* increment on incompatible change */
-        __u16   version_minor;                  /* increment on compatible change */
-} WIRE_ATTR ptl_magicversion_t;
-
-#define PORTALS_PROTO_MAGIC                0xeebc0ded
-
-#define PORTALS_PROTO_VERSION_MAJOR        1
-#define PORTALS_PROTO_VERSION_MINOR        0
-
-typedef struct {
-        long recv_count, recv_length, send_count, send_length, drop_count,
-            drop_length, msgs_alloc, msgs_max;
-} lib_counters_t;
-
-/* temporary expedient: limit number of entries in discontiguous MDs */
-#define PTL_MTU        (1<<20)
-#define PTL_MD_MAX_IOV 256
-
-struct lib_msg_t {
-        struct list_head  msg_list;
-        lib_md_t         *md;
-        ptl_handle_wire_t ack_wmd;
-        ptl_event_t       ev;
-};
-
-struct lib_ptl_t {
-        ptl_pt_index_t size;
-        struct list_head *tbl;
-};
-
-struct lib_ac_t {
-        int next_free;
-};
-
-typedef struct {
-        struct list_head  lh_hash_chain;
-        __u64             lh_cookie;
-} lib_handle_t;
-
-#define lh_entry(ptr, type, member) \
-       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-struct lib_eq_t {
-        struct list_head  eq_list;
-        lib_handle_t      eq_lh;
-        ptl_seq_t         eq_enq_seq;
-        ptl_seq_t         eq_deq_seq;
-        ptl_size_t        eq_size;
-        ptl_event_t      *eq_events;
-        int               eq_refcount;
-        ptl_eq_handler_t  eq_callback;
-        void             *eq_addrkey;
-};
-
-struct lib_me_t {
-        struct list_head  me_list;
-        lib_handle_t      me_lh;
-        ptl_process_id_t  match_id;
-        ptl_match_bits_t  match_bits, ignore_bits;
-        ptl_unlink_t      unlink;
-        lib_md_t         *md;
-};
-
-struct lib_md_t {
-        struct list_head  md_list;
-        lib_handle_t      md_lh;
-        lib_me_t         *me;
-        user_ptr          start;
-        ptl_size_t        offset;
-        ptl_size_t        length;
-        ptl_size_t        max_size;
-        int               threshold;
-        int               pending;
-        unsigned int      options;
-        unsigned int      md_flags;
-        void             *user_ptr;
-        lib_eq_t         *eq;
-        void             *md_addrkey;
-        unsigned int      md_niov;                /* # frags */
-        union {
-                struct iovec  iov[PTL_MD_MAX_IOV];
-                ptl_kiov_t    kiov[PTL_MD_MAX_IOV];
-        } md_iov;
-};
-
-#define PTL_MD_FLAG_ZOMBIE            (1 << 0)
-#define PTL_MD_FLAG_AUTO_UNLINK       (1 << 1)
-
-static inline int lib_md_exhausted (lib_md_t *md) 
-{
-        return (md->threshold == 0 ||
-                ((md->options & PTL_MD_MAX_SIZE) != 0 &&
-                 md->offset + md->max_size > md->length));
-}
-
-#ifdef PTL_USE_LIB_FREELIST
-typedef struct
-{
-        void             *fl_objs;             /* single contiguous array of objects */
-        int                fl_nobjs;            /* the number of them */
-        int                fl_objsize;          /* the size (including overhead) of each of them */
-        struct list_head   fl_list;             /* where they are enqueued */
-} lib_freelist_t;
-
-typedef struct
-{
-        struct list_head   fo_list;             /* enqueue on fl_list */
-        void              *fo_contents;         /* aligned contents */
-} lib_freeobj_t;
-#endif
-
-typedef struct {
-        /* info about peers we are trying to fail */
-        struct list_head  tp_list;             /* stash in ni.ni_test_peers */
-        ptl_nid_t         tp_nid;              /* matching nid */
-        unsigned int      tp_threshold;        /* # failures to simulate */
-} lib_test_peer_t;
-
-#define PTL_COOKIE_TYPE_MD    1
-#define PTL_COOKIE_TYPE_ME    2
-#define PTL_COOKIE_TYPE_EQ    3
-#define PTL_COOKIE_TYPES      4
-/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be
- * extracted by masking with (PTL_COOKIE_TYPES - 1) */
-
-typedef struct lib_ni 
-{
-        nal_t            *ni_api;
-        ptl_process_id_t  ni_pid;
-        lib_ptl_t         ni_portals;
-        lib_counters_t    ni_counters;
-        ptl_ni_limits_t   ni_actual_limits;
-
-        int               ni_lh_hash_size;      /* size of lib handle hash table */
-        struct list_head *ni_lh_hash_table;     /* all extant lib handles, this interface */
-        __u64             ni_next_object_cookie; /* cookie generator */
-        __u64             ni_interface_cookie;  /* uniquely identifies this ni in this epoch */
-        
-        struct list_head  ni_test_peers;
-        
-#ifdef PTL_USE_LIB_FREELIST
-        lib_freelist_t    ni_free_mes;
-        lib_freelist_t    ni_free_msgs;
-        lib_freelist_t    ni_free_mds;
-        lib_freelist_t    ni_free_eqs;
-#endif
-
-        struct list_head  ni_active_msgs;
-        struct list_head  ni_active_mds;
-        struct list_head  ni_active_eqs;
-
-#ifdef __KERNEL__
-        spinlock_t        ni_lock;
-        wait_queue_head_t ni_waitq;
-#else
-        pthread_mutex_t   ni_mutex;
-        pthread_cond_t    ni_cond;
-#endif
-} lib_ni_t;
-
-
-typedef struct lib_nal
-{
-       /* lib-level interface state */
-       lib_ni_t libnal_ni;
-
-       /* NAL-private data */
-       void *libnal_data;
-
-       /*
-        * send: Sends a preformatted header and payload data to a
-        * specified remote process. The payload is scattered over 'niov'
-        * fragments described by iov, starting at 'offset' for 'mlen'
-        * bytes.  
-        * NB the NAL may NOT overwrite iov.  
-        * PTL_OK on success => NAL has committed to send and will call
-        * lib_finalize on completion
-        */
-       ptl_err_t (*libnal_send) 
-                (struct lib_nal *nal, void *private, lib_msg_t *cookie, 
-                 ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                 unsigned int niov, struct iovec *iov, 
-                 size_t offset, size_t mlen);
-        
-       /* as send, but with a set of page fragments (NULL if not supported) */
-       ptl_err_t (*libnal_send_pages)
-                (struct lib_nal *nal, void *private, lib_msg_t * cookie, 
-                 ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                 unsigned int niov, ptl_kiov_t *iov, 
-                 size_t offset, size_t mlen);
-       /*
-        * recv: Receives an incoming message from a remote process.  The
-        * payload is to be received into the scattered buffer of 'niov'
-        * fragments described by iov, starting at 'offset' for 'mlen'
-        * bytes.  Payload bytes after 'mlen' up to 'rlen' are to be
-        * discarded.  
-        * NB the NAL may NOT overwrite iov.
-        * PTL_OK on success => NAL has committed to receive and will call
-        * lib_finalize on completion
-        */
-       ptl_err_t (*libnal_recv) 
-                (struct lib_nal *nal, void *private, lib_msg_t * cookie,
-                 unsigned int niov, struct iovec *iov, 
-                 size_t offset, size_t mlen, size_t rlen);
-
-       /* as recv, but with a set of page fragments (NULL if not supported) */
-       ptl_err_t (*libnal_recv_pages) 
-                (struct lib_nal *nal, void *private, lib_msg_t * cookie,
-                 unsigned int niov, ptl_kiov_t *iov, 
-                 size_t offset, size_t mlen, size_t rlen);
-
-       /*
-        * (un)map: Tell the NAL about some memory it will access.
-        * *addrkey passed to libnal_unmap() is what libnal_map() set it to.
-        * type of *iov depends on options.
-        * Set to NULL if not required.
-        */
-       ptl_err_t (*libnal_map)
-                (struct lib_nal *nal, unsigned int niov, struct iovec *iov, 
-                 void **addrkey);
-       void (*libnal_unmap)
-                (struct lib_nal *nal, unsigned int niov, struct iovec *iov, 
-                 void **addrkey);
-
-       /* as (un)map, but with a set of page fragments */
-       ptl_err_t (*libnal_map_pages)
-                (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, 
-                 void **addrkey);
-       void (*libnal_unmap_pages)
-                (struct lib_nal *nal, unsigned int niov, ptl_kiov_t *iov, 
-                 void **addrkey);
-
-       void (*libnal_printf)(struct lib_nal *nal, const char *fmt, ...);
-
-       /* Calculate a network "distance" to given node */
-       int (*libnal_dist) (struct lib_nal *nal, ptl_nid_t nid, unsigned long *dist);
-} lib_nal_t;
-
-#endif
diff --git a/lustre/portals/include/portals/list.h b/lustre/portals/include/portals/list.h
deleted file mode 100644 (file)
index 37d9952..0000000
+++ /dev/null
@@ -1,243 +0,0 @@
-#ifndef _LINUX_LIST_H
-/*
- * Simple doubly linked list implementation.
- *
- * Some of the internal functions ("__xxx") are useful when
- * manipulating whole lists rather than single entries, as
- * sometimes we already know the next/prev entries and we can
- * generate better code by using them directly rather than
- * using the generic single-entry routines.
- */
-
-struct list_head {
-       struct list_head *next, *prev;
-};
-
-typedef struct list_head list_t;
-
-#define LIST_HEAD_INIT(name) { &(name), &(name) }
-
-#define LIST_HEAD(name) \
-       struct list_head name = LIST_HEAD_INIT(name)
-
-#define INIT_LIST_HEAD(ptr) do { \
-       (ptr)->next = (ptr); (ptr)->prev = (ptr); \
-} while (0)
-
-/*
- * Insert a new entry between two known consecutive entries.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_add(struct list_head * new,
-                             struct list_head * prev,
-                             struct list_head * next)
-{
-       next->prev = new;
-       new->next = next;
-       new->prev = prev;
-       prev->next = new;
-}
-
-/**
- * list_add - add a new entry
- * @new: new entry to be added
- * @head: list head to add it after
- *
- * Insert a new entry after the specified head.
- * This is good for implementing stacks.
- */
-static inline void list_add(struct list_head *new, struct list_head *head)
-{
-       __list_add(new, head, head->next);
-}
-
-/**
- * list_add_tail - add a new entry
- * @new: new entry to be added
- * @head: list head to add it before
- *
- * Insert a new entry before the specified head.
- * This is useful for implementing queues.
- */
-static inline void list_add_tail(struct list_head *new, struct list_head *head)
-{
-       __list_add(new, head->prev, head);
-}
-
-/*
- * Delete a list entry by making the prev/next entries
- * point to each other.
- *
- * This is only for internal list manipulation where we know
- * the prev/next entries already!
- */
-static inline void __list_del(struct list_head * prev, struct list_head * next)
-{
-       next->prev = prev;
-       prev->next = next;
-}
-
-/**
- * list_del - deletes entry from list.
- * @entry: the element to delete from the list.
- * Note: list_empty on entry does not return true after this, the entry is in an undefined state.
- */
-static inline void list_del(struct list_head *entry)
-{
-       __list_del(entry->prev, entry->next);
-}
-
-/**
- * list_del_init - deletes entry from list and reinitialize it.
- * @entry: the element to delete from the list.
- */
-static inline void list_del_init(struct list_head *entry)
-{
-       __list_del(entry->prev, entry->next);
-       INIT_LIST_HEAD(entry);
-}
-#endif
-
-#ifndef list_for_each_entry
-/**
- * list_move - delete from one list and add as another's head
- * @list: the entry to move
- * @head: the head that will precede our entry
- */
-static inline void list_move(struct list_head *list, struct list_head *head)
-{
-       __list_del(list->prev, list->next);
-       list_add(list, head);
-}
-
-/**
- * list_move_tail - delete from one list and add as another's tail
- * @list: the entry to move
- * @head: the head that will follow our entry
- */
-static inline void list_move_tail(struct list_head *list,
-                                 struct list_head *head)
-{
-       __list_del(list->prev, list->next);
-       list_add_tail(list, head);
-}
-#endif
-
-#ifndef _LINUX_LIST_H
-#define _LINUX_LIST_H
-/**
- * list_empty - tests whether a list is empty
- * @head: the list to test.
- */
-static inline int list_empty(struct list_head *head)
-{
-       return head->next == head;
-}
-
-static inline void __list_splice(struct list_head *list,
-                                struct list_head *head)
-{
-       struct list_head *first = list->next;
-       struct list_head *last = list->prev;
-       struct list_head *at = head->next;
-
-       first->prev = head;
-       head->next = first;
-
-       last->next = at;
-       at->prev = last;
-}
-
-/**
- * list_splice - join two lists
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- */
-static inline void list_splice(struct list_head *list, struct list_head *head)
-{
-       if (!list_empty(list))
-               __list_splice(list, head);
-}
-
-/**
- * list_splice_init - join two lists and reinitialise the emptied list.
- * @list: the new list to add.
- * @head: the place to add it in the first list.
- *
- * The list at @list is reinitialised
- */
-static inline void list_splice_init(struct list_head *list,
-                                   struct list_head *head)
-{
-       if (!list_empty(list)) {
-               __list_splice(list, head);
-               INIT_LIST_HEAD(list);
-       }
-}
-
-/**
- * list_entry - get the struct for this entry
- * @ptr:       the &struct list_head pointer.
- * @type:      the type of the struct this is embedded in.
- * @member:    the name of the list_struct within the struct.
- */
-#define list_entry(ptr, type, member) \
-       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-/**
- * list_for_each       -       iterate over a list
- * @pos:       the &struct list_head to use as a loop counter.
- * @head:      the head for your list.
- */
-#define list_for_each(pos, head) \
-       for (pos = (head)->next ; pos != (head); pos = pos->next )
-
-/**
- * list_for_each_prev  -       iterate over a list in reverse order
- * @pos:       the &struct list_head to use as a loop counter.
- * @head:      the head for your list.
- */
-#define list_for_each_prev(pos, head) \
-       for (pos = (head)->prev ; pos != (head); pos = pos->prev)
-
-/**
- * list_for_each_safe  -       iterate over a list safe against removal of list entry
- * @pos:       the &struct list_head to use as a loop counter.
- * @n:         another &struct list_head to use as temporary storage
- * @head:      the head for your list.
- */
-#define list_for_each_safe(pos, n, head) \
-       for (pos = (head)->next, n = pos->next; pos != (head); \
-               pos = n, n = pos->next)
-
-#endif
-
-#ifndef list_for_each_entry
-/**
- * list_for_each_entry  -       iterate over list of given type
- * @pos:        the type * to use as a loop counter.
- * @head:       the head for your list.
- * @member:     the name of the list_struct within the struct.
- */
-#define list_for_each_entry(pos, head, member)                         \
-        for (pos = list_entry((head)->next, typeof(*pos), member);     \
-            &pos->member != (head);                                    \
-            pos = list_entry(pos->member.next, typeof(*pos), member))
-#endif
-
-#ifndef list_for_each_entry_safe
-/**
- * list_for_each_entry_safe  -       iterate over list of given type safe against removal of list entry
- * @pos:        the type * to use as a loop counter.
- * @n:          another type * to use as temporary storage
- * @head:       the head for your list.
- * @member:     the name of the list_struct within the struct.
- */
-#define list_for_each_entry_safe(pos, n, head, member)                 \
-        for (pos = list_entry((head)->next, typeof(*pos), member),     \
-               n = list_entry(pos->member.next, typeof(*pos), member); \
-            &pos->member != (head);                                    \
-            pos = n, n = list_entry(n->member.next, typeof(*n), member))
-#endif
diff --git a/lustre/portals/include/portals/lltrace.h b/lustre/portals/include/portals/lltrace.h
deleted file mode 100644 (file)
index 3e01df1..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Compile with:
- * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl
- */
-#ifndef __LTRACE_H_
-#define __LTRACE_H_
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <getopt.h>
-#include <string.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/time.h>
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <portals/ptlctl.h>
-#include <linux/limits.h>
-#include <asm/page.h>
-#include <linux/version.h>
-
-static inline int ltrace_write_file(char* fname)
-{
-        char* argv[3];
-
-        argv[0] = "debug_kernel";
-        argv[1] = fname;
-        argv[2] = "1";
-
-        fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]);
-
-        return jt_dbg_debug_kernel(3, argv);
-}
-
-static inline int ltrace_clear()
-{
-        char* argv[1];
-
-        argv[0] = "clear";
-
-        fprintf(stderr, "[ptlctl] %s\n", argv[0]);
-
-        return jt_dbg_clear_debug_buf(1, argv);
-}
-
-static inline int ltrace_mark(int indent_level, char* text)
-{
-        char* argv[2];
-        char mark_buf[PATH_MAX];
-
-        snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text);
-
-        argv[0] = "mark";
-        argv[1] = mark_buf;
-        return jt_dbg_mark_debug_buf(2, argv);
-}
-
-static inline int ltrace_applymasks()
-{
-        char* argv[2];
-        argv[0] = "list";
-        argv[1] = "applymasks";
-
-        fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]);
-
-        return jt_dbg_list(2, argv);
-}
-
-
-static inline int ltrace_filter(char* subsys_or_mask)
-{
-        char* argv[2];
-        argv[0] = "filter";
-        argv[1] = subsys_or_mask;
-        return jt_dbg_filter(2, argv);
-}
-
-static inline int ltrace_show(char* subsys_or_mask)
-{
-        char* argv[2];
-        argv[0] = "show";
-        argv[1] = subsys_or_mask;
-        return jt_dbg_show(2, argv);
-}
-
-static inline int ltrace_start()
-{
-        int rc = 0;
-        dbg_initialize(0, NULL);
-#ifdef PORTALS_DEV_ID
-        rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
-#endif
-        ltrace_filter("class");
-        ltrace_filter("nal");
-        ltrace_filter("portals");
-
-        ltrace_show("all_types");
-        ltrace_filter("trace");
-        ltrace_filter("malloc");
-        ltrace_filter("net");
-        ltrace_filter("page");
-        ltrace_filter("other");
-        ltrace_filter("info");
-        ltrace_applymasks();
-
-        return rc;
-}
-
-
-static inline void ltrace_stop()
-{
-#ifdef PORTALS_DEV_ID
-        unregister_ioc_dev(PORTALS_DEV_ID);
-#endif
-}
-
-static inline int not_uml()
-{
-  /* Return Values:
-   *   0 when run under UML
-   *   1 when run on host
-   *  <0 when lookup failed
-   */
-       struct stat buf;
-       int rc = stat("/dev/ubd", &buf);
-       rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc;
-       if (rc<0) {
-         fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno));
-         rc = 1; /* Assume host */
-       }
-       return rc;
-}
-
-#define LTRACE_MAX_NOB   256
-static inline void ltrace_add_processnames(char* fname)
-{
-        char cmdbuf[LTRACE_MAX_NOB];
-        struct timeval tv;
-        struct timezone tz;
-        int nob;
-        int underuml = !not_uml();
-
-        gettimeofday(&tv, &tz);
-
-        nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \"");
-
-        /* Careful - these format strings need to match the CDEBUG
-         * formats in portals/linux/debug.c EXACTLY
-         */
-        nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ",
-                        S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec);
-
-        if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) {
-                nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
-                                 "(%s:%d:%s() %d | %d+%lu): ",
-                                 "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L);
-        }
-        else {
-                nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB,
-                                 "(%s:%d:%s() %d+%lu): ",
-                                 "lltrace.h", __LINE__, __FUNCTION__, 0, 0L);
-        }
-
-        nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname);
-        system(cmdbuf);
-}
-
-#endif
diff --git a/lustre/portals/include/portals/myrnal.h b/lustre/portals/include/portals/myrnal.h
deleted file mode 100644 (file)
index 13790f7..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef MYRNAL_H
-#define MYRNAL_H
-
-#define MAX_ARGS_LEN            (256)
-#define MAX_RET_LEN             (128)
-#define MYRNAL_MAX_ACL_SIZE     (64)
-#define MYRNAL_MAX_PTL_SIZE     (64)
-
-#define P3CMD                   (100)
-#define P3SYSCALL               (200)
-#define P3REGISTER              (300)
-
-enum { PTL_MLOCKALL };
-
-typedef struct {
-       void *args;
-       size_t args_len;
-       void *ret;
-       size_t ret_len;
-       int p3cmd;
-} myrnal_forward_t;
-
-#endif                         /* MYRNAL_H */
diff --git a/lustre/portals/include/portals/nal.h b/lustre/portals/include/portals/nal.h
deleted file mode 100644 (file)
index bf86569..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef _NAL_H_
-#define _NAL_H_
-
-#include "build_check.h"
-
-/*
- * p30/nal.h
- *
- * The API side NAL declarations
- */
-
-#include <portals/types.h>
-
-typedef struct nal_t nal_t;
-
-struct nal_t {
-       /* common interface state */
-       int              nal_refct;
-        ptl_handle_ni_t  nal_handle;
-
-       /* NAL-private data */
-       void            *nal_data;
-
-       /* NAL API implementation 
-        * NB only nal_ni_init needs to be set when the NAL registers itself */
-       int (*nal_ni_init) (nal_t *nal, ptl_pid_t requested_pid,
-                           ptl_ni_limits_t *req, ptl_ni_limits_t *actual);
-       
-       void (*nal_ni_fini) (nal_t *nal);
-
-       int (*nal_get_id) (nal_t *nal, ptl_process_id_t *id);
-       int (*nal_ni_status) (nal_t *nal, ptl_sr_index_t register, ptl_sr_value_t *status);
-       int (*nal_ni_dist) (nal_t *nal, ptl_process_id_t *id, unsigned long *distance);
-       int (*nal_fail_nid) (nal_t *nal, ptl_nid_t nid, unsigned int threshold);
-
-       int (*nal_me_attach) (nal_t *nal, ptl_pt_index_t portal,
-                             ptl_process_id_t match_id, 
-                             ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
-                             ptl_unlink_t unlink, ptl_ins_pos_t pos, 
-                             ptl_handle_me_t *handle);
-       int (*nal_me_insert) (nal_t *nal, ptl_handle_me_t *me,
-                             ptl_process_id_t match_id, 
-                             ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits,
-                             ptl_unlink_t unlink, ptl_ins_pos_t pos, 
-                             ptl_handle_me_t *handle);
-       int (*nal_me_unlink) (nal_t *nal, ptl_handle_me_t *me);
-       
-       int (*nal_md_attach) (nal_t *nal, ptl_handle_me_t *me,
-                             ptl_md_t *md, ptl_unlink_t unlink, 
-                             ptl_handle_md_t *handle);
-       int (*nal_md_bind) (nal_t *nal, 
-                           ptl_md_t *md, ptl_unlink_t unlink, 
-                           ptl_handle_md_t *handle);
-       int (*nal_md_unlink) (nal_t *nal, ptl_handle_md_t *md);
-       int (*nal_md_update) (nal_t *nal, ptl_handle_md_t *md,
-                             ptl_md_t *old_md, ptl_md_t *new_md,
-                             ptl_handle_eq_t *testq);
-
-       int (*nal_eq_alloc) (nal_t *nal, ptl_size_t count,
-                            ptl_eq_handler_t handler,
-                            ptl_handle_eq_t *handle);
-       int (*nal_eq_free) (nal_t *nal, ptl_handle_eq_t *eq);
-       int (*nal_eq_poll) (nal_t *nal, 
-                           ptl_handle_eq_t *eqs, int neqs, int timeout,
-                           ptl_event_t *event, int *which);
-
-       int (*nal_ace_entry) (nal_t *nal, ptl_ac_index_t index,
-                             ptl_process_id_t match_id, ptl_pt_index_t portal);
-       
-       int (*nal_put) (nal_t *nal, ptl_handle_md_t *md, ptl_ack_req_t ack,
-                       ptl_process_id_t *target, ptl_pt_index_t portal,
-                       ptl_ac_index_t ac, ptl_match_bits_t match,
-                       ptl_size_t offset, ptl_hdr_data_t hdr_data);
-       int (*nal_get) (nal_t *nal, ptl_handle_md_t *md,
-                       ptl_process_id_t *target, ptl_pt_index_t portal,
-                       ptl_ac_index_t ac, ptl_match_bits_t match,
-                       ptl_size_t offset);
-};
-
-extern nal_t *ptl_hndl2nal(ptl_handle_any_t *any);
-
-#ifdef __KERNEL__
-extern int ptl_register_nal(ptl_interface_t interface, nal_t *nal);
-extern void ptl_unregister_nal(ptl_interface_t interface);
-#endif
-
-#endif
diff --git a/lustre/portals/include/portals/nalids.h b/lustre/portals/include/portals/nalids.h
deleted file mode 100644 (file)
index 55a991b..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "build_check.h"
-
diff --git a/lustre/portals/include/portals/p30.h b/lustre/portals/include/portals/p30.h
deleted file mode 100644 (file)
index 4b8631d..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#ifndef _P30_H_
-#define _P30_H_
-
-#include "build_check.h"
-
-/*
- * p30.h
- *
- * User application interface file
- */
-
-#if defined (__KERNEL__)
-#include <linux/uio.h>
-#include <linux/types.h>
-#else
-#include <sys/types.h>
-#include <sys/uio.h>
-#endif
-
-#include <portals/types.h>
-#include <portals/api.h>
-
-#endif
diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h
deleted file mode 100644 (file)
index ef52a25..0000000
+++ /dev/null
@@ -1,96 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * header for libptlctl.a
- */
-#ifndef _PTLCTL_H_
-#define _PTLCTL_H_
-
-#include <portals/types.h>
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
-
-#define PORTALS_DEV_ID 0
-#define PORTALS_DEV_PATH "/dev/portals"
-#define OBD_DEV_ID 1
-#define OBD_DEV_PATH "/dev/obd"
-#define SMFS_DEV_ID  2
-#define SMFS_DEV_PATH "/dev/snapdev"
-
-int ptl_name2nal(char *str);
-int ptl_parse_ipaddr (__u32 *ipaddrp, char *str);
-int ptl_parse_nid (ptl_nid_t *nidp, char *str);
-char * ptl_nid2str (char *buffer, ptl_nid_t nid);
-
-int ptl_initialize(int argc, char **argv);
-int jt_ptl_network(int argc, char **argv);
-int jt_ptl_print_interfaces(int argc, char **argv);
-int jt_ptl_add_interface(int argc, char **argv);
-int jt_ptl_del_interface(int argc, char **argv);
-int jt_ptl_print_peers (int argc, char **argv);
-int jt_ptl_add_peer (int argc, char **argv);
-int jt_ptl_del_peer (int argc, char **argv);
-int jt_ptl_print_connections (int argc, char **argv);
-int jt_ptl_connect(int argc, char **argv);
-int jt_ptl_disconnect(int argc, char **argv);
-int jt_ptl_push_connection(int argc, char **argv);
-int jt_ptl_print_active_txs(int argc, char **argv);
-int jt_ptl_ping(int argc, char **argv);
-int jt_ptl_shownid(int argc, char **argv);
-int jt_ptl_mynid(int argc, char **argv);
-int jt_ptl_add_uuid(int argc, char **argv);
-int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility  */
-int jt_ptl_close_uuid(int argc, char **argv);
-int jt_ptl_del_uuid(int argc, char **argv);
-int jt_ptl_add_route (int argc, char **argv);
-int jt_ptl_del_route (int argc, char **argv);
-int jt_ptl_notify_router (int argc, char **argv);
-int jt_ptl_print_routes (int argc, char **argv);
-int jt_ptl_fail_nid (int argc, char **argv);
-int jt_ptl_lwt(int argc, char **argv);
-int jt_ptl_memhog(int argc, char **argv);
-
-int dbg_initialize(int argc, char **argv);
-int jt_dbg_filter(int argc, char **argv);
-int jt_dbg_show(int argc, char **argv);
-int jt_dbg_list(int argc, char **argv);
-int jt_dbg_debug_kernel(int argc, char **argv);
-int jt_dbg_debug_daemon(int argc, char **argv);
-int jt_dbg_debug_file(int argc, char **argv);
-int jt_dbg_clear_debug_buf(int argc, char **argv);
-int jt_dbg_mark_debug_buf(int argc, char **argv);
-int jt_dbg_modules(int argc, char **argv);
-int jt_dbg_panic(int argc, char **argv);
-
-int ptl_set_cfg_record_cb(cfg_record_cb_t cb);
-
-/* l_ioctl.c */
-typedef int (ioc_handler_t)(int dev_id, unsigned int opc, void *buf);
-void set_ioc_handler(ioc_handler_t *handler);
-int register_ioc_dev(int dev_id, const char * dev_name);
-void unregister_ioc_dev(int dev_id);
-int set_ioctl_dump(char * file);
-int l_ioctl(int dev_id, unsigned int opc, void *buf);
-int parse_dump(char * dump_file, ioc_handler_t ioc_func);
-int jt_ioc_dump(int argc, char **argv);
-extern char *dump_filename;
-int dump(int dev_id, unsigned int opc, void *buf);
-
-#endif
diff --git a/lustre/portals/include/portals/socknal.h b/lustre/portals/include/portals/socknal.h
deleted file mode 100644 (file)
index 27e6f8e..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * <portals/socknal.h>
- *
- * #defines shared between socknal implementation and utilities
- */
-
-#define SOCKNAL_CONN_NONE     (-1)
-#define SOCKNAL_CONN_ANY        0
-#define SOCKNAL_CONN_CONTROL    1
-#define SOCKNAL_CONN_BULK_IN    2
-#define SOCKNAL_CONN_BULK_OUT   3
-#define SOCKNAL_CONN_NTYPES     4
diff --git a/lustre/portals/include/portals/stringtab.h b/lustre/portals/include/portals/stringtab.h
deleted file mode 100644 (file)
index 33e4375..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-/*
- * stringtab.h
- */
diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h
deleted file mode 100644 (file)
index 0bada40..0000000
+++ /dev/null
@@ -1,193 +0,0 @@
-#ifndef _P30_TYPES_H_
-#define _P30_TYPES_H_
-
-#include "build_check.h"
-
-#include <linux/libcfs.h>
-#include <portals/errno.h>
-
-/* This implementation uses the same type for API function return codes and
- * the completion status in an event  */
-#define PTL_NI_OK  PTL_OK
-typedef ptl_err_t ptl_ni_fail_t;
-
-typedef __u32 ptl_uid_t;
-typedef __u32 ptl_jid_t;
-typedef __u64 ptl_nid_t;
-typedef __u32 ptl_pid_t;
-typedef __u32 ptl_pt_index_t;
-typedef __u32 ptl_ac_index_t;
-typedef __u64 ptl_match_bits_t;
-typedef __u64 ptl_hdr_data_t;
-typedef __u32 ptl_size_t;
-
-#define PTL_TIME_FOREVER    (-1)
-
-typedef struct {
-        unsigned long nal_idx;                 /* which network interface */
-        __u64         cookie;                  /* which thing on that interface */
-} ptl_handle_any_t;
-
-typedef ptl_handle_any_t ptl_handle_ni_t;
-typedef ptl_handle_any_t ptl_handle_eq_t;
-typedef ptl_handle_any_t ptl_handle_md_t;
-typedef ptl_handle_any_t ptl_handle_me_t;
-
-#define PTL_INVALID_HANDLE \
-    ((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1})
-#define PTL_EQ_NONE PTL_INVALID_HANDLE
-
-static inline int PtlHandleIsEqual (ptl_handle_any_t h1, ptl_handle_any_t h2)
-{
-       return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie);
-}
-
-#define PTL_UID_ANY      ((ptl_uid_t) -1)
-#define PTL_JID_ANY      ((ptl_jid_t) -1)
-#define PTL_NID_ANY      ((ptl_nid_t) -1)
-#define PTL_PID_ANY      ((ptl_pid_t) -1)
-
-typedef struct {
-        ptl_nid_t nid;
-        ptl_pid_t pid;   /* node id / process id */
-} ptl_process_id_t;
-
-typedef enum {
-        PTL_RETAIN = 0,
-        PTL_UNLINK
-} ptl_unlink_t;
-
-typedef enum {
-        PTL_INS_BEFORE,
-        PTL_INS_AFTER
-} ptl_ins_pos_t;
-
-typedef struct {
-        void            *start;
-        ptl_size_t       length;
-        int              threshold;
-        int              max_size;
-        unsigned int     options;
-        void            *user_ptr;
-        ptl_handle_eq_t  eq_handle;
-} ptl_md_t;
-
-/* Options for the MD structure */
-#define PTL_MD_OP_PUT               (1 << 0)
-#define PTL_MD_OP_GET               (1 << 1)
-#define PTL_MD_MANAGE_REMOTE        (1 << 2)
-/* unused                           (1 << 3) */
-#define PTL_MD_TRUNCATE             (1 << 4)
-#define PTL_MD_ACK_DISABLE          (1 << 5)
-#define PTL_MD_IOVEC               (1 << 6)
-#define PTL_MD_MAX_SIZE                    (1 << 7)
-#define PTL_MD_KIOV                 (1 << 8)
-#define PTL_MD_EVENT_START_DISABLE  (1 << 9)
-#define PTL_MD_EVENT_END_DISABLE    (1 << 10)
-
-/* For compatibility with Cray Portals */
-#define PTL_MD_LUSTRE_COMPLETION_SEMANTICS  0
-#define PTL_MD_PHYS                         0
-
-#define PTL_MD_THRESH_INF       (-1)
-
-/* NB lustre portals uses struct iovec internally! */
-typedef struct iovec ptl_md_iovec_t;
-
-typedef struct {
-       struct page     *kiov_page;
-       unsigned int     kiov_len;
-       unsigned int     kiov_offset;
-} ptl_kiov_t;
-
-typedef enum {
-        PTL_EVENT_GET_START,
-        PTL_EVENT_GET_END,
-
-        PTL_EVENT_PUT_START,
-        PTL_EVENT_PUT_END,
-
-        PTL_EVENT_REPLY_START,
-        PTL_EVENT_REPLY_END,
-
-        PTL_EVENT_ACK,
-
-        PTL_EVENT_SEND_START,
-       PTL_EVENT_SEND_END,
-
-       PTL_EVENT_UNLINK,
-} ptl_event_kind_t;
-
-#define PTL_SEQ_BASETYPE       long
-typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t;
-#define PTL_SEQ_GT(a,b)        (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0)
-
-/* XXX
- * cygwin need the pragma line, not clear if it's needed in other places.
- * checking!!!
- */
-#ifdef __CYGWIN__
-#pragma pack(push, 4)
-#endif
-typedef struct {
-        ptl_event_kind_t   type;
-        ptl_process_id_t   initiator;
-        ptl_uid_t          uid;
-        ptl_jid_t          jid;
-        ptl_pt_index_t     pt_index;
-        ptl_match_bits_t   match_bits;
-        ptl_size_t         rlength;
-        ptl_size_t         mlength;
-        ptl_size_t         offset;
-        ptl_handle_md_t    md_handle;
-        ptl_md_t           md;
-        ptl_hdr_data_t     hdr_data;
-        ptl_seq_t          link;
-        ptl_ni_fail_t      ni_fail_type;
-
-        int                unlinked;
-
-        volatile ptl_seq_t sequence;
-} ptl_event_t;
-#ifdef __CYGWIN__
-#pragma pop
-#endif
-
-typedef enum {
-        PTL_ACK_REQ,
-        PTL_NOACK_REQ
-} ptl_ack_req_t;
-
-typedef void (*ptl_eq_handler_t)(ptl_event_t *event);
-#define PTL_EQ_HANDLER_NONE NULL
-
-typedef struct {
-       int max_mes;
-       int max_mds;
-       int max_eqs;
-       int max_ac_index;
-       int max_pt_index;
-       int max_md_iovecs;
-       int max_me_list;
-       int max_getput_md;
-} ptl_ni_limits_t;
-
-/*
- * Status registers
- */
-typedef enum {
-        PTL_SR_DROP_COUNT,
-        PTL_SR_DROP_LENGTH,
-        PTL_SR_RECV_COUNT,
-        PTL_SR_RECV_LENGTH,
-        PTL_SR_SEND_COUNT,
-        PTL_SR_SEND_LENGTH,
-        PTL_SR_MSGS_MAX,
-} ptl_sr_index_t;
-
-typedef int ptl_sr_value_t;
-
-typedef int ptl_interface_t;
-#define PTL_IFACE_DEFAULT    (-1)
-
-#endif
diff --git a/lustre/portals/knals/.cvsignore b/lustre/portals/knals/.cvsignore
deleted file mode 100644 (file)
index f5fd0b0..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-Makefile
-autoMakefile
-autoMakefile.in
-.*.cmd
-.depend
diff --git a/lustre/portals/knals/Makefile.in b/lustre/portals/knals/Makefile.in
deleted file mode 100644 (file)
index 7e2e601..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-@BUILD_GMNAL_TRUE@subdir-m += gmnal
-@BUILD_RANAL_TRUE@subdir-m += ranal
-@BUILD_OPENIBNAL_TRUE@subdir-m += openibnal
-@BUILD_IIBNAL_TRUE@subdir-m += iibnal
-@BUILD_QSWNAL_TRUE@subdir-m += qswnal
-subdir-m += socknal
-subdir-m += lonal
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/Makefile.mk b/lustre/portals/knals/Makefile.mk
deleted file mode 100644 (file)
index 454ee16..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-include $(obj)/../Kernelenv
-
-obj-y = socknal/
-obj-y = lonal/
-# more coming...
diff --git a/lustre/portals/knals/autoMakefile.am b/lustre/portals/knals/autoMakefile.am
deleted file mode 100644 (file)
index 4638188..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-SUBDIRS = gmnal iibnal openibnal qswnal socknal lonal ranal
diff --git a/lustre/portals/knals/gmnal/.cvsignore b/lustre/portals/knals/gmnal/.cvsignore
deleted file mode 100644 (file)
index 642e2e6..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.cmd
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/gmnal/Makefile.in b/lustre/portals/knals/gmnal/Makefile.in
deleted file mode 100644 (file)
index 89ea361..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kgmnal
-kgmnal-objs := gmnal_api.o gmnal_cb.o gmnal_comm.o gmnal_utils.o gmnal_module.o
-
-EXTRA_PRE_CFLAGS := @GMCPPFLAGS@
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/gmnal/Makefile.mk b/lustre/portals/knals/gmnal/Makefile.mk
deleted file mode 100644 (file)
index b799a47..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include ../../Kernelenv
-
-obj-y += gmnal.o
-gmnal-objs    := gmnal_api.o gmnal_cb.o gmnal_utils.o gmnal_comm.o gmnal_module.o
-
diff --git a/lustre/portals/knals/gmnal/autoMakefile.am b/lustre/portals/knals/gmnal/autoMakefile.am
deleted file mode 100644 (file)
index d8b9edb..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if BUILD_GMNAL
-if !CRAY_PORTALS
-modulenet_DATA = kgmnal$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kgmnal-objs:%.o=%.c) gmnal.h
diff --git a/lustre/portals/knals/gmnal/gmnal.h b/lustre/portals/knals/gmnal/gmnal.h
deleted file mode 100644 (file)
index f45eab7..0000000
+++ /dev/null
@@ -1,455 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- *
- *   This file is part of Lustre, http://www.lustre.org/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- *     Portals GM kernel NAL header file
- *     This file makes all declaration and prototypes 
- *     for the API side and CB side of the NAL
- */
-#ifndef __INCLUDE_GMNAL_H__
-#define __INCLUDE_GMNAL_H__
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include "linux/config.h"
-#include "linux/module.h"
-#include "linux/tty.h"
-#include "linux/kernel.h"
-#include "linux/mm.h"
-#include "linux/string.h"
-#include "linux/stat.h"
-#include "linux/errno.h"
-#include "linux/locks.h"
-#include "linux/unistd.h"
-#include "linux/init.h"
-#include "linux/sem.h"
-#include "linux/vmalloc.h"
-#include "linux/sysctl.h"
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include "portals/nal.h"
-#include "portals/api.h"
-#include "portals/errno.h"
-#include "linux/kp30.h"
-#include "portals/p30.h"
-
-#include "portals/nal.h"
-#include "portals/lib-p30.h"
-
-#define GM_STRONG_TYPES 1
-#ifdef VERSION
-#undef VERSION
-#endif
-#include "gm.h"
-#include "gm_internal.h"
-
-
-
-/*
- *     Defines for the API NAL
- */
-
-/*
- *     Small message size is configurable
- *     insmod can set small_msg_size
- *     which is used to populate nal_data.small_msg_size
- */
-#define GMNAL_SMALL_MESSAGE            1078
-#define GMNAL_LARGE_MESSAGE_INIT       1079
-#define GMNAL_LARGE_MESSAGE_ACK        1080
-#define GMNAL_LARGE_MESSAGE_FINI       1081
-
-extern  int gmnal_small_msg_size;
-extern  int num_rx_threads;
-extern  int num_stxds;
-extern  int gm_port;
-#define GMNAL_SMALL_MSG_SIZE(a)                a->small_msg_size
-#define GMNAL_IS_SMALL_MESSAGE(n,a,b,c)        gmnal_is_small_msg(n, a, b, c)
-#define GMNAL_MAGIC                            0x1234abcd
-/*
- *     The gm_port to use for gmnal
- */
-#define GMNAL_GM_PORT  gm_port
-
-
-/*
- *     Small Transmit Descriptor
- *     A structre to keep track of a small transmit operation
- *     This structure has a one-to-one relationship with a small
- *     transmit buffer (both create by gmnal_stxd_alloc). 
- *     There are two free list of stxd. One for use by clients of the NAL
- *     and the other by the NAL rxthreads when doing sends. 
- *     This helps prevent deadlock caused by stxd starvation.
- */
-typedef struct _gmnal_stxd_t {
-       void                    *buffer;
-       int                     buffer_size;
-       gm_size_t               gm_size;
-       int                     msg_size;
-       int                     gm_target_node;
-       int                     gm_priority;
-       int                     type;
-       struct _gmnal_data_t    *nal_data;
-       lib_msg_t               *cookie;
-       int                     niov;
-       struct iovec            iov[PTL_MD_MAX_IOV];
-       struct _gmnal_stxd_t    *next;
-        int                     rxt; 
-        int                     kniov;
-        struct iovec            *iovec_dup;
-} gmnal_stxd_t;
-
-/*
- *     keeps a transmit token for large transmit (gm_get)
- *     and a pointer to rxd that is used as context for large receive
- */
-typedef struct _gmnal_ltxd_t {
-       struct _gmnal_ltxd_t    *next;
-       struct  _gmnal_srxd_t  *srxd;
-} gmnal_ltxd_t;
-
-
-/*
- *     as for gmnal_stxd_t 
- *     a hash table in nal_data find srxds from
- *     the rx buffer address. hash table populated at init time
- */
-typedef struct _gmnal_srxd_t {
-       void                    *buffer;
-       int                     size;
-       gm_size_t               gmsize;
-       unsigned int            gm_source_node;
-       gmnal_stxd_t            *source_stxd;
-       int                     type;
-       int                     nsiov;
-       int                     nriov;
-       struct iovec            *riov;
-       int                     ncallbacks;
-       spinlock_t              callback_lock;
-       int                     callback_status;
-       lib_msg_t               *cookie;
-       struct _gmnal_srxd_t    *next;
-       struct _gmnal_data_t    *nal_data;
-} gmnal_srxd_t;
-
-/*
- *     Header which lmgnal puts at the start of each message
- */
-typedef struct _gmnal_msghdr {
-       int             magic;
-       int             type;
-       unsigned int    sender_node_id;
-       gmnal_stxd_t    *stxd;
-       int             niov;
-       } gmnal_msghdr_t;
-#define GMNAL_MSGHDR_SIZE      sizeof(gmnal_msghdr_t)
-
-/*
- *     the caretaker thread (ct_thread) gets receive events
- *     (and other events) from the myrinet device via the GM2 API.
- *     caretaker thread populates one work entry for each receive event,
- *     puts it on a Q in nal_data and wakes a receive thread to  
- *     process the receive.  
- *     Processing a portals receive can involve a transmit operation. 
- *     Because of this the caretaker thread cannot process receives 
- *     as it may get deadlocked when supply of transmit descriptors 
- *     is exhausted (as caretaker thread is responsible for replacing 
- *     transmit descriptors on the free list)
- */
-typedef struct _gmnal_rxtwe {
-       void                    *buffer;
-       unsigned                snode;
-       unsigned                sport;
-       unsigned                type;
-       unsigned                length;
-       struct _gmnal_rxtwe     *next;
-} gmnal_rxtwe_t;
-
-/*
- *     1 receive thread started on each CPU
- */
-#define NRXTHREADS 10 /* max number of receiver threads */
-
-typedef struct _gmnal_data_t {
-       spinlock_t      stxd_lock;
-       struct semaphore stxd_token;
-       gmnal_stxd_t    *stxd;
-       spinlock_t      rxt_stxd_lock;
-       struct semaphore rxt_stxd_token;
-       gmnal_stxd_t    *rxt_stxd;
-       spinlock_t      ltxd_lock;
-       struct semaphore ltxd_token;
-       gmnal_ltxd_t    *ltxd;
-       spinlock_t      srxd_lock;
-       struct semaphore srxd_token;
-       gmnal_srxd_t    *srxd;
-       struct gm_hash  *srxd_hash;
-       nal_t           *nal;   
-       lib_nal_t       *libnal;
-       struct gm_port  *gm_port;
-       unsigned int    gm_local_nid;
-       unsigned int    gm_global_nid;
-       spinlock_t      gm_lock;
-       long            rxthread_pid[NRXTHREADS];
-       int             rxthread_stop_flag;
-       spinlock_t      rxthread_flag_lock;
-       long            rxthread_flag;
-       long            ctthread_pid;
-       int             ctthread_flag;
-       gm_alarm_t      ctthread_alarm;
-       int             small_msg_size;
-       int             small_msg_gmsize;
-       gmnal_rxtwe_t   *rxtwe_head;
-       gmnal_rxtwe_t   *rxtwe_tail;
-       spinlock_t      rxtwe_lock;
-       struct  semaphore rxtwe_wait;
-        struct ctl_table_header *sysctl;
-} gmnal_data_t;
-
-/*
- *     Flags to start/stop and check status of threads
- *     each rxthread sets 1 bit (any bit) of the flag on startup
- *     and clears 1 bit when exiting
- */
-#define GMNAL_THREAD_RESET     0
-#define GMNAL_THREAD_STOP      666
-#define GMNAL_CTTHREAD_STARTED 333
-#define GMNAL_RXTHREADS_STARTED ( (1<<num_rx_threads)-1)
-
-
-extern gmnal_data_t    *global_nal_data;
-
-/*
- * for ioctl get pid
- */
-#define GMNAL_IOC_GET_GNID 1   
-
-/*
- *     Return codes
- */
-#define GMNAL_STATUS_OK        0
-#define GMNAL_STATUS_FAIL      1
-#define GMNAL_STATUS_NOMEM     2
-
-
-/*
- *     FUNCTION PROTOTYPES
- */
-
-/*
- *     Locking macros
- */
-
-/*
- *     For the Small tx and rx descriptor lists
- */
-#define GMNAL_TXD_LOCK_INIT(a)         spin_lock_init(&a->stxd_lock);
-#define GMNAL_TXD_LOCK(a)              spin_lock(&a->stxd_lock);
-#define GMNAL_TXD_UNLOCK(a)            spin_unlock(&a->stxd_lock);
-#define GMNAL_TXD_TOKEN_INIT(a, n)     sema_init(&a->stxd_token, n);
-#define GMNAL_TXD_GETTOKEN(a)          down(&a->stxd_token);
-#define GMNAL_TXD_TRYGETTOKEN(a)       down_trylock(&a->stxd_token)
-#define GMNAL_TXD_RETURNTOKEN(a)       up(&a->stxd_token);
-
-#define GMNAL_RXT_TXD_LOCK_INIT(a)     spin_lock_init(&a->rxt_stxd_lock);
-#define GMNAL_RXT_TXD_LOCK(a)          spin_lock(&a->rxt_stxd_lock);
-#define GMNAL_RXT_TXD_UNLOCK(a)                spin_unlock(&a->rxt_stxd_lock);
-#define GMNAL_RXT_TXD_TOKEN_INIT(a, n) sema_init(&a->rxt_stxd_token, n);
-#define GMNAL_RXT_TXD_GETTOKEN(a)      down(&a->rxt_stxd_token);
-#define GMNAL_RXT_TXD_TRYGETTOKEN(a)   down_trylock(&a->rxt_stxd_token)
-#define GMNAL_RXT_TXD_RETURNTOKEN(a)   up(&a->rxt_stxd_token);
-
-#define GMNAL_LTXD_LOCK_INIT(a)                spin_lock_init(&a->ltxd_lock);
-#define GMNAL_LTXD_LOCK(a)             spin_lock(&a->ltxd_lock);
-#define GMNAL_LTXD_UNLOCK(a)           spin_unlock(&a->ltxd_lock);
-#define GMNAL_LTXD_TOKEN_INIT(a, n)    sema_init(&a->ltxd_token, n);
-#define GMNAL_LTXD_GETTOKEN(a)         down(&a->ltxd_token);
-#define GMNAL_LTXD_TRYGETTOKEN(a)      down_trylock(&a->ltxd_token)
-#define GMNAL_LTXD_RETURNTOKEN(a)      up(&a->ltxd_token);
-
-#define GMNAL_RXD_LOCK_INIT(a)         spin_lock_init(&a->srxd_lock);
-#define GMNAL_RXD_LOCK(a)              spin_lock(&a->srxd_lock);
-#define GMNAL_RXD_UNLOCK(a)            spin_unlock(&a->srxd_lock);
-#define GMNAL_RXD_TOKEN_INIT(a, n)     sema_init(&a->srxd_token, n);
-#define GMNAL_RXD_GETTOKEN(a)          down(&a->srxd_token);
-#define GMNAL_RXD_TRYGETTOKEN(a)       down_trylock(&a->srxd_token)
-#define GMNAL_RXD_RETURNTOKEN(a)       up(&a->srxd_token);
-
-#define GMNAL_GM_LOCK_INIT(a)          spin_lock_init(&a->gm_lock);
-#define GMNAL_GM_LOCK(a)               spin_lock(&a->gm_lock);
-#define GMNAL_GM_UNLOCK(a)             spin_unlock(&a->gm_lock);
-
-
-/*
- *     Memory Allocator
- */
-
-/*
- *     API NAL
- */
-int gmnal_api_startup(nal_t *, ptl_pid_t, 
-                      ptl_ni_limits_t *, ptl_ni_limits_t *);
-
-int gmnal_api_forward(nal_t *, int, void *, size_t, void *, size_t);
-
-void gmnal_api_shutdown(nal_t *);
-
-int gmnal_api_validate(nal_t *, void *, size_t);
-
-void gmnal_api_yield(nal_t *, unsigned long *, int);
-
-void gmnal_api_lock(nal_t *, unsigned long *);
-
-void gmnal_api_unlock(nal_t *, unsigned long *);
-
-
-#define GMNAL_INIT_NAL(a)      do {    \
-                                (a)->nal_ni_init = gmnal_api_startup; \
-                               (a)->nal_ni_fini = gmnal_api_shutdown; \
-                               (a)->nal_data = NULL; \
-                               } while (0)
-
-
-/*
- *     CB NAL
- */
-
-ptl_err_t gmnal_cb_send(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
-       int, ptl_nid_t, ptl_pid_t, unsigned int, struct iovec *, size_t, size_t);
-
-ptl_err_t gmnal_cb_send_pages(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *,
-       int, ptl_nid_t, ptl_pid_t, unsigned int, ptl_kiov_t *, size_t, size_t);
-
-ptl_err_t gmnal_cb_recv(lib_nal_t *, void *, lib_msg_t *, 
-       unsigned int, struct iovec *, size_t, size_t, size_t);
-
-ptl_err_t gmnal_cb_recv_pages(lib_nal_t *, void *, lib_msg_t *, 
-       unsigned int, ptl_kiov_t *, size_t, size_t, size_t);
-
-int gmnal_cb_dist(lib_nal_t *, ptl_nid_t, unsigned long *);
-
-int gmnal_init(void);
-
-void  gmnal_fini(void);
-
-
-
-#define GMNAL_INIT_NAL_CB(a)   do {    \
-                               a->libnal_send = gmnal_cb_send; \
-                               a->libnal_send_pages = gmnal_cb_send_pages; \
-                               a->libnal_recv = gmnal_cb_recv; \
-                               a->libnal_recv_pages = gmnal_cb_recv_pages; \
-                               a->libnal_map = NULL; \
-                               a->libnal_unmap = NULL; \
-                               a->libnal_dist = gmnal_cb_dist; \
-                               a->libnal_data = NULL; \
-                               } while (0)
-
-
-/*
- *     Small and Large Transmit and Receive Descriptor Functions
- */
-int            gmnal_alloc_txd(gmnal_data_t *);
-void           gmnal_free_txd(gmnal_data_t *);
-gmnal_stxd_t*  gmnal_get_stxd(gmnal_data_t *, int);
-void           gmnal_return_stxd(gmnal_data_t *, gmnal_stxd_t *);
-gmnal_ltxd_t*  gmnal_get_ltxd(gmnal_data_t *);
-void           gmnal_return_ltxd(gmnal_data_t *, gmnal_ltxd_t *);
-
-int            gmnal_alloc_srxd(gmnal_data_t *);
-void           gmnal_free_srxd(gmnal_data_t *);
-gmnal_srxd_t*  gmnal_get_srxd(gmnal_data_t *, int);
-void           gmnal_return_srxd(gmnal_data_t *, gmnal_srxd_t *);
-
-/*
- *     general utility functions
- */
-gmnal_srxd_t   *gmnal_rxbuffer_to_srxd(gmnal_data_t *, void*);
-void           gmnal_stop_rxthread(gmnal_data_t *);
-void           gmnal_stop_ctthread(gmnal_data_t *);
-void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
-void           gmnal_drop_sends_callback(gm_port_t *, void *, gm_status_t);
-void           gmnal_resume_sending_callback(gm_port_t *, void *, gm_status_t);
-char           *gmnal_gm_error(gm_status_t);
-char           *gmnal_rxevent(gm_recv_event_t*);
-int            gmnal_is_small_msg(gmnal_data_t*, int, struct iovec*, int);
-void           gmnal_yield(int);
-int            gmnal_start_kernel_threads(gmnal_data_t *);
-
-
-/*
- *     Communication functions
- */
-
-/*
- *     Receive threads
- */
-int            gmnal_ct_thread(void *); /* caretaker thread */
-int            gmnal_rx_thread(void *); /* receive thread */
-int            gmnal_pre_receive(gmnal_data_t*, gmnal_rxtwe_t*, int);
-int            gmnal_rx_bad(gmnal_data_t *, gmnal_rxtwe_t *, gmnal_srxd_t*);
-int            gmnal_rx_requeue_buffer(gmnal_data_t *, gmnal_srxd_t *);
-int            gmnal_add_rxtwe(gmnal_data_t *, gm_recv_t *);
-gmnal_rxtwe_t * gmnal_get_rxtwe(gmnal_data_t *);
-void           gmnal_remove_rxtwe(gmnal_data_t *);
-
-
-/*
- *     Small messages
- */
-int            gmnal_small_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
-                               struct iovec *, size_t, size_t, size_t);
-int            gmnal_small_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
-                               int, ptl_nid_t, ptl_pid_t, 
-                               unsigned int, struct iovec*, size_t, int);
-void           gmnal_small_tx_callback(gm_port_t *, void *, gm_status_t);
-
-
-
-/*
- *     Large messages
- */
-int            gmnal_large_rx(lib_nal_t *, void *, lib_msg_t *, unsigned int, 
-                               struct iovec *, size_t, size_t, size_t);
-
-int            gmnal_large_tx(lib_nal_t *, void *, lib_msg_t *, ptl_hdr_t *, 
-                               int, ptl_nid_t, ptl_pid_t, unsigned int, 
-                               struct iovec*, size_t, int);
-
-void           gmnal_large_tx_callback(gm_port_t *, void *, gm_status_t);
-
-int            gmnal_remote_get(gmnal_srxd_t *, int, struct iovec*, int, 
-                                 struct iovec*);
-
-void           gmnal_remote_get_callback(gm_port_t *, void *, gm_status_t);
-
-int            gmnal_copyiov(int, gmnal_srxd_t *, int, struct iovec*, int, 
-                              struct iovec*);
-
-void           gmnal_large_tx_ack(gmnal_data_t *, gmnal_srxd_t *);
-void           gmnal_large_tx_ack_callback(gm_port_t *, void *, gm_status_t);
-void           gmnal_large_tx_ack_received(gmnal_data_t *, gmnal_srxd_t *);
-
-#endif /*__INCLUDE_GMNAL_H__*/
diff --git a/lustre/portals/knals/gmnal/gmnal_api.c b/lustre/portals/knals/gmnal/gmnal_api.c
deleted file mode 100644 (file)
index bd6c83e..0000000
+++ /dev/null
@@ -1,424 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- *
- *   This file is part of Lustre, http://www.lustre.org/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- *     Implements the API NAL functions
- */
-
-#include "gmnal.h"
-
-
-
-gmnal_data_t   *global_nal_data = NULL;
-#define         GLOBAL_NID_STR_LEN      16
-char            global_nid_str[GLOBAL_NID_STR_LEN] = {0};
-ptl_handle_ni_t kgmnal_ni;
-
-extern int gmnal_cmd(struct portals_cfg *pcfg, void *private);
-
-/*
- *      Write the global nid /proc/sys/gmnal/globalnid
- */
-#define GMNAL_SYSCTL    201
-#define GMNAL_SYSCTL_GLOBALNID  1
-
-static ctl_table gmnal_sysctl_table[] = {
-        {GMNAL_SYSCTL_GLOBALNID, "globalnid",
-         global_nid_str, GLOBAL_NID_STR_LEN,
-         0444, NULL, &proc_dostring},
-        { 0 }
-};
-
-
-static ctl_table gmnalnal_top_sysctl_table[] = {
-        {GMNAL_SYSCTL, "gmnal", NULL, 0, 0555, gmnal_sysctl_table},
-        { 0 }
-};
-
-/*
- *     gmnal_api_shutdown
- *      nal_refct == 0 => called on last matching PtlNIFini()
- *     Close down this interface and free any resources associated with it
- *     nal_t   nal     our nal to shutdown
- */
-void
-gmnal_api_shutdown(nal_t *nal)
-{
-       gmnal_data_t    *nal_data;
-       lib_nal_t       *libnal;
-
-        if (nal->nal_refct != 0)
-                return;
-        
-
-        LASSERT(nal == global_nal_data->nal);
-        libnal = (lib_nal_t *)nal->nal_data;
-        nal_data = (gmnal_data_t *)libnal->libnal_data;
-        LASSERT(nal_data == global_nal_data);
-       CDEBUG(D_TRACE, "gmnal_api_shutdown: nal_data [%p]\n", nal_data);
-
-        /* Stop portals calling our ioctl handler */
-        libcfs_nal_cmd_unregister(GMNAL);
-
-        /* XXX for shutdown "under fire" we probably need to set a shutdown
-         * flag so when lib calls us we fail immediately and dont queue any
-         * more work but our threads can still call into lib OK.  THEN
-         * shutdown our threads, THEN lib_fini() */
-        lib_fini(libnal);
-
-       gmnal_stop_rxthread(nal_data);
-       gmnal_stop_ctthread(nal_data);
-       gmnal_free_txd(nal_data);
-       gmnal_free_srxd(nal_data);
-       GMNAL_GM_LOCK(nal_data);
-       gm_close(nal_data->gm_port);
-       gm_finalize();
-       GMNAL_GM_UNLOCK(nal_data);
-        if (nal_data->sysctl)
-                unregister_sysctl_table (nal_data->sysctl);
-        /* Don't free 'nal'; it's a static struct */
-       PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-       PORTAL_FREE(libnal, sizeof(lib_nal_t));
-
-        global_nal_data = NULL;
-        PORTAL_MODULE_UNUSE;
-}
-
-
-int
-gmnal_api_startup(nal_t *nal, ptl_pid_t requested_pid,
-                  ptl_ni_limits_t *requested_limits,
-                  ptl_ni_limits_t *actual_limits)
-{
-
-       lib_nal_t       *libnal = NULL;
-       gmnal_data_t    *nal_data = NULL;
-       gmnal_srxd_t    *srxd = NULL;
-       gm_status_t     gm_status;
-       unsigned int    local_nid = 0, global_nid = 0;
-        ptl_process_id_t process_id;
-
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL) {
-                        libnal = (lib_nal_t *)nal->nal_data;
-                        *actual_limits = libnal->libnal_ni.ni_actual_limits;
-                }
-                return (PTL_OK);
-        }
-
-        /* Called on first PtlNIInit() */
-
-       CDEBUG(D_TRACE, "startup\n");
-
-        LASSERT(global_nal_data == NULL);
-
-       PORTAL_ALLOC(nal_data, sizeof(gmnal_data_t));
-       if (!nal_data) {
-               CDEBUG(D_ERROR, "can't get memory\n");
-               return(PTL_NO_SPACE);
-       }       
-       memset(nal_data, 0, sizeof(gmnal_data_t));
-       /*
-        *      set the small message buffer size 
-        */
-
-       CDEBUG(D_INFO, "Allocd and reset nal_data[%p]\n", nal_data);
-       CDEBUG(D_INFO, "small_msg_size is [%d]\n", nal_data->small_msg_size);
-
-       PORTAL_ALLOC(libnal, sizeof(lib_nal_t));
-       if (!libnal) {
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));
-               return(PTL_NO_SPACE);
-       }
-       memset(libnal, 0, sizeof(lib_nal_t));
-       CDEBUG(D_INFO, "Allocd and reset libnal[%p]\n", libnal);
-
-       GMNAL_INIT_NAL_CB(libnal);
-       /*
-        *      String them all together
-        */
-       libnal->libnal_data = (void*)nal_data;
-       nal_data->nal = nal;
-       nal_data->libnal = libnal;
-
-       GMNAL_GM_LOCK_INIT(nal_data);
-
-
-       /*
-        *      initialise the interface, 
-        */
-       CDEBUG(D_INFO, "Calling gm_init\n");
-       if (gm_init() != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "call to gm_init failed\n");
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-       }
-
-
-       CDEBUG(D_NET, "Calling gm_open with port [%d], "
-                      "name [%s], version [%d]\n", GMNAL_GM_PORT, 
-              "gmnal", GM_API_VERSION);
-
-       GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_open(&nal_data->gm_port, 0, GMNAL_GM_PORT, "gmnal", 
-                           GM_API_VERSION);
-       GMNAL_GM_UNLOCK(nal_data);
-
-       CDEBUG(D_INFO, "gm_open returned [%d]\n", gm_status);
-       if (gm_status == GM_SUCCESS) {
-               CDEBUG(D_INFO, "gm_open succeeded port[%p]\n", 
-                      nal_data->gm_port);
-       } else {
-               switch(gm_status) {
-               case(GM_INVALID_PARAMETER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Invalid Parameter\n");
-                       break;
-               case(GM_BUSY):
-                       CDEBUG(D_ERROR, "gm_open Failure. GM Busy\n");
-                       break;
-               case(GM_NO_SUCH_DEVICE):
-                       CDEBUG(D_ERROR, "gm_open Failure. No such device\n");
-                       break;
-               case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       CDEBUG(D_ERROR, "gm_open Failure. Incompatile lib "
-                              "and driver\n");
-                       break;
-               case(GM_OUT_OF_MEMORY):
-                       CDEBUG(D_ERROR, "gm_open Failure. Out of Memory\n");
-                       break;
-               default:
-                       CDEBUG(D_ERROR, "gm_open Failure. Unknow error "
-                              "code [%d]\n", gm_status);
-                       break;
-               }       
-               GMNAL_GM_LOCK(nal_data);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-       }
-
-       
-       nal_data->small_msg_size = gmnal_small_msg_size;
-       nal_data->small_msg_gmsize = 
-                       gm_min_size_for_length(gmnal_small_msg_size);
-
-       if (gmnal_alloc_srxd(nal_data) != GMNAL_STATUS_OK) {
-               CDEBUG(D_ERROR, "Failed to allocate small rx descriptors\n");
-               gmnal_free_txd(nal_data);
-               GMNAL_GM_LOCK(nal_data);
-               gm_close(nal_data->gm_port);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-       }
-
-
-       /*
-        *      Hang out a bunch of small receive buffers
-        *      In fact hang them all out
-        */
-       while((srxd = gmnal_get_srxd(nal_data, 0))) {
-               CDEBUG(D_NET, "giving [%p] to gm_provide_recvive_buffer\n", 
-                      srxd->buffer);
-               GMNAL_GM_LOCK(nal_data);
-               gm_provide_receive_buffer_with_tag(nal_data->gm_port, 
-                                                  srxd->buffer, srxd->gmsize, 
-                                                  GM_LOW_PRIORITY, 0);
-               GMNAL_GM_UNLOCK(nal_data);
-       }
-       
-       /*
-        *      Allocate pools of small tx buffers and descriptors
-        */
-       if (gmnal_alloc_txd(nal_data) != GMNAL_STATUS_OK) {
-               CDEBUG(D_ERROR, "Failed to allocate small tx descriptors\n");
-               GMNAL_GM_LOCK(nal_data);
-               gm_close(nal_data->gm_port);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-       }
-
-       gmnal_start_kernel_threads(nal_data);
-
-       while (nal_data->rxthread_flag != GMNAL_RXTHREADS_STARTED) {
-               gmnal_yield(1);
-               CDEBUG(D_INFO, "Waiting for receive thread signs of life\n");
-       }
-
-       CDEBUG(D_INFO, "receive thread seems to have started\n");
-
-
-       /*
-        *      Initialise the portals library
-        */
-       CDEBUG(D_NET, "Getting node id\n");
-       GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_get_node_id(nal_data->gm_port, &local_nid);
-       GMNAL_GM_UNLOCK(nal_data);
-       if (gm_status != GM_SUCCESS) {
-               gmnal_stop_rxthread(nal_data);
-               gmnal_stop_ctthread(nal_data);
-               CDEBUG(D_ERROR, "can't determine node id\n");
-               gmnal_free_txd(nal_data);
-               gmnal_free_srxd(nal_data);
-               GMNAL_GM_LOCK(nal_data);
-               gm_close(nal_data->gm_port);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-       }
-
-       nal_data->gm_local_nid = local_nid;
-       CDEBUG(D_INFO, "Local node id is [%u]\n", local_nid);
-
-       GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_node_id_to_global_id(nal_data->gm_port, local_nid, 
-                                           &global_nid);
-       GMNAL_GM_UNLOCK(nal_data);
-       if (gm_status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "failed to obtain global id\n");
-               gmnal_stop_rxthread(nal_data);
-               gmnal_stop_ctthread(nal_data);
-               gmnal_free_txd(nal_data);
-               gmnal_free_srxd(nal_data);
-               GMNAL_GM_LOCK(nal_data);
-               gm_close(nal_data->gm_port);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-       }
-       CDEBUG(D_INFO, "Global node id is [%u]\n", global_nid);
-       nal_data->gm_global_nid = global_nid;
-        snprintf(global_nid_str, GLOBAL_NID_STR_LEN, "%u", global_nid);
-
-/*
-       pid = gm_getpid();
-*/
-        process_id.pid = requested_pid;
-        process_id.nid = global_nid;
-        
-       CDEBUG(D_INFO, "portals_pid is [%u]\n", process_id.pid);
-       CDEBUG(D_INFO, "portals_nid is ["LPU64"]\n", process_id.nid);
-       
-       CDEBUG(D_PORTALS, "calling lib_init\n");
-       if (lib_init(libnal, nal, process_id, 
-                     requested_limits, actual_limits) != PTL_OK) {
-               CDEBUG(D_ERROR, "lib_init failed\n");
-               gmnal_stop_rxthread(nal_data);
-               gmnal_stop_ctthread(nal_data);
-               gmnal_free_txd(nal_data);
-               gmnal_free_srxd(nal_data);
-               GMNAL_GM_LOCK(nal_data);
-               gm_close(nal_data->gm_port);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-               
-       }
-
-       if (libcfs_nal_cmd_register(GMNAL, &gmnal_cmd, libnal->libnal_data) != 0) {
-               CDEBUG(D_INFO, "libcfs_nal_cmd_register failed\n");
-
-                /* XXX these cleanup cases should be restructured to
-                 * minimise duplication... */
-                lib_fini(libnal);
-                
-               gmnal_stop_rxthread(nal_data);
-               gmnal_stop_ctthread(nal_data);
-               gmnal_free_txd(nal_data);
-               gmnal_free_srxd(nal_data);
-               GMNAL_GM_LOCK(nal_data);
-               gm_close(nal_data->gm_port);
-               gm_finalize();
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(nal_data, sizeof(gmnal_data_t));    
-               PORTAL_FREE(libnal, sizeof(lib_nal_t));
-               return(PTL_FAIL);
-        }
-
-        /* might be better to initialise this at module load rather than in
-         * NAL startup */
-        nal_data->sysctl = NULL;
-        nal_data->sysctl = register_sysctl_table (gmnalnal_top_sysctl_table, 0);
-
-       
-       CDEBUG(D_INFO, "gmnal_init finished\n");
-       global_nal_data = nal->nal_data;
-
-        /* no unload now until shutdown */
-        PORTAL_MODULE_USE;
-        
-       return(PTL_OK);
-}
-
-nal_t the_gm_nal;
-
-/* 
- *        Called when module loaded
- */
-int gmnal_init(void)
-{
-        int    rc;
-
-       memset(&the_gm_nal, 0, sizeof(nal_t));
-       CDEBUG(D_INFO, "reset nal[%p]\n", &the_gm_nal);
-       GMNAL_INIT_NAL(&the_gm_nal);
-
-        rc = ptl_register_nal(GMNAL, &the_gm_nal);
-        if (rc != PTL_OK)
-                CERROR("Can't register GMNAL: %d\n", rc);
-        rc = PtlNIInit(GMNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kgmnal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(GMNAL);
-                return (-ENODEV);
-        }
-
-        return (rc);
-}
-
-                
-
-/*
- *     Called when module removed
- */
-void gmnal_fini()
-{
-       CDEBUG(D_TRACE, "gmnal_fini\n");
-
-        LASSERT(global_nal_data == NULL);
-        PtlNIFini(kgmnal_ni);
-
-        ptl_unregister_nal(GMNAL);
-}
diff --git a/lustre/portals/knals/gmnal/gmnal_cb.c b/lustre/portals/knals/gmnal/gmnal_cb.c
deleted file mode 100644 (file)
index 0ebf437..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- *
- *   This file is part of Lustre, http://www.lustre.org/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- *     This file implements the nal cb functions
- */
-
-
-#include "gmnal.h"
-
-ptl_err_t gmnal_cb_recv(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-                  unsigned int niov, struct iovec *iov, size_t offset, 
-                  size_t mlen, size_t rlen)
-{
-       gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
-       int             status = PTL_OK;
-
-
-       CDEBUG(D_TRACE, "gmnal_cb_recv libnal [%p], private[%p], cookie[%p], "
-              "niov[%d], iov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n", 
-              libnal, private, cookie, niov, iov, offset, mlen, rlen);
-
-       switch(srxd->type) {
-       case(GMNAL_SMALL_MESSAGE):
-               CDEBUG(D_INFO, "gmnal_cb_recv got small message\n");
-               status = gmnal_small_rx(libnal, private, cookie, niov, 
-                                        iov, offset, mlen, rlen);
-       break;
-       case(GMNAL_LARGE_MESSAGE_INIT):
-               CDEBUG(D_INFO, "gmnal_cb_recv got large message init\n");
-               status = gmnal_large_rx(libnal, private, cookie, niov, 
-                                        iov, offset, mlen, rlen);
-       }
-               
-
-       CDEBUG(D_INFO, "gmnal_cb_recv gmnal_return status [%d]\n", status);
-       return(status);
-}
-
-ptl_err_t gmnal_cb_recv_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-                        unsigned int kniov, ptl_kiov_t *kiov, size_t offset, 
-                        size_t mlen, size_t rlen)
-{
-       gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
-       int             status = PTL_OK;
-       struct iovec    *iovec = NULL, *iovec_dup = NULL;
-       int             i = 0;
-       ptl_kiov_t      *kiov_dup = kiov;;
-
-
-       CDEBUG(D_TRACE, "gmnal_cb_recv_pages libnal [%p],private[%p], "
-              "cookie[%p], kniov[%d], kiov [%p], offset["LPSZ"], mlen["LPSZ"], rlen["LPSZ"]\n",
-              libnal, private, cookie, kniov, kiov, offset, mlen, rlen);
-
-       if (srxd->type == GMNAL_SMALL_MESSAGE) {
-               PORTAL_ALLOC(iovec, sizeof(struct iovec)*kniov);
-               if (!iovec) {
-                       CDEBUG(D_ERROR, "Can't malloc\n");
-                       return(GMNAL_STATUS_FAIL);
-               }
-                iovec_dup = iovec;
-
-               /*
-                *      map each page and create an iovec for it
-                */
-               for (i=0; i<kniov; i++) {
-                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-                       CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-                              kiov->kiov_page, kiov->kiov_len, 
-                              kiov->kiov_offset);
-                       iovec->iov_len = kiov->kiov_len;
-                       CDEBUG(D_INFO, "Calling kmap[%p]", kiov->kiov_page);
-
-                       iovec->iov_base = kmap(kiov->kiov_page) + 
-                                                 kiov->kiov_offset;
-
-                       CDEBUG(D_INFO, "iov_base is [%p]\n", iovec->iov_base);
-                        iovec++;
-                        kiov++;
-               }
-               CDEBUG(D_INFO, "calling gmnal_small_rx\n");
-               status = gmnal_small_rx(libnal, private, cookie, kniov, 
-                                        iovec_dup, offset, mlen, rlen);
-               for (i=0; i<kniov; i++) {
-                       kunmap(kiov_dup->kiov_page);
-                       kiov_dup++;
-               }
-               PORTAL_FREE(iovec_dup, sizeof(struct iovec)*kniov);
-       }
-               
-
-       CDEBUG(D_INFO, "gmnal_return status [%d]\n", status);
-       return(status);
-}
-
-
-ptl_err_t gmnal_cb_send(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-                  ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, 
-                  unsigned int niov, struct iovec *iov, size_t offset, size_t len)
-{
-
-       gmnal_data_t    *nal_data;
-
-
-       CDEBUG(D_TRACE, "gmnal_cb_send niov[%d] offset["LPSZ"] len["LPSZ"] nid["LPU64"]\n", 
-              niov, offset, len, nid);
-       nal_data = libnal->libnal_data;
-       
-       if (GMNAL_IS_SMALL_MESSAGE(nal_data, niov, iov, len)) {
-               CDEBUG(D_INFO, "This is a small message send\n");
-               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, pid, 
-                               niov, iov, offset,  len);
-       } else {
-               CDEBUG(D_ERROR, "Large message send it is not supported\n");
-               lib_finalize(libnal, private, cookie, PTL_FAIL);
-               return(PTL_FAIL);
-               gmnal_large_tx(libnal, private, cookie, hdr, type, nid, pid, 
-                               niov, iov, offset, len);
-       }
-       return(PTL_OK);
-}
-
-ptl_err_t gmnal_cb_send_pages(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-                        ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                         unsigned int kniov, ptl_kiov_t *kiov, size_t offset, size_t len)
-{
-
-       int     i = 0;
-       gmnal_data_t    *nal_data;
-       struct  iovec   *iovec = NULL, *iovec_dup = NULL;
-       ptl_kiov_t      *kiov_dup = kiov;
-
-       CDEBUG(D_TRACE, "gmnal_cb_send_pages nid ["LPU64"] niov[%d] offset["LPSZ"] len["LPSZ"]\n", 
-               nid, kniov, offset, len);
-       nal_data = libnal->libnal_data;
-       PORTAL_ALLOC(iovec, kniov*sizeof(struct iovec));
-        iovec_dup = iovec;
-       if (GMNAL_IS_SMALL_MESSAGE(nal_data, 0, NULL, len)) {
-               CDEBUG(D_INFO, "This is a small message send\n");
-               
-               for (i=0; i<kniov; i++) {
-                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-                       CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-                              kiov->kiov_page, kiov->kiov_len, 
-                              kiov->kiov_offset);
-
-                       iovec->iov_base = kmap(kiov->kiov_page) 
-                                               + kiov->kiov_offset;
-
-                       iovec->iov_len = kiov->kiov_len;
-                        iovec++;
-                        kiov++;
-               }
-               gmnal_small_tx(libnal, private, cookie, hdr, type, nid, 
-                               pid, kniov, iovec_dup, offset, len);
-       } else {
-               CDEBUG(D_ERROR, "Large message send it is not supported yet\n");
-               return(PTL_FAIL);
-               for (i=0; i<kniov; i++) {
-                       CDEBUG(D_INFO, "processing kniov [%d] [%p]\n", i, kiov);
-                       CDEBUG(D_INFO, "kniov page [%p] len [%d] offset[%d]\n",
-                              kiov->kiov_page, kiov->kiov_len, 
-                              kiov->kiov_offset);
-
-                       iovec->iov_base = kmap(kiov->kiov_page) 
-                                                + kiov->kiov_offset;
-                       iovec->iov_len = kiov->kiov_len;
-                        iovec++;
-                        kiov++;
-               }
-               gmnal_large_tx(libnal, private, cookie, hdr, type, nid, 
-                               pid, kniov, iovec, offset, len);
-       }
-       for (i=0; i<kniov; i++) {
-               kunmap(kiov_dup->kiov_page);
-               kiov_dup++;
-       }
-       PORTAL_FREE(iovec_dup, kniov*sizeof(struct iovec));
-       return(PTL_OK);
-}
-
-int gmnal_cb_dist(lib_nal_t *libnal, ptl_nid_t nid, unsigned long *dist)
-{
-       CDEBUG(D_TRACE, "gmnal_cb_dist\n");
-       if (dist)
-               *dist = 27;
-       return(PTL_OK);
-}
diff --git a/lustre/portals/knals/gmnal/gmnal_comm.c b/lustre/portals/knals/gmnal/gmnal_comm.c
deleted file mode 100644 (file)
index 6a8fcbc..0000000
+++ /dev/null
@@ -1,1380 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- *
- *   This file is part of Lustre, http://www.lustre.org/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/*
- *     This file contains all gmnal send and receive functions
- */
-
-#include "gmnal.h"
-
-/*
- *     The caretaker thread
- *     This is main thread of execution for the NAL side
- *     This guy waits in gm_blocking_recvive and gets
- *     woken up when the myrinet adaptor gets an interrupt.
- *     Hands off receive operations to the receive thread 
- *     This thread Looks after gm_callbacks etc inline.
- */
-int
-gmnal_ct_thread(void *arg)
-{
-       gmnal_data_t            *nal_data;
-       gm_recv_event_t         *rxevent = NULL;
-       gm_recv_t               *recv = NULL;
-
-       if (!arg) {
-               CDEBUG(D_TRACE, "NO nal_data. Exiting\n");
-               return(-1);
-       }
-
-       nal_data = (gmnal_data_t*)arg;
-       CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
-
-       daemonize();
-
-       nal_data->ctthread_flag = GMNAL_CTTHREAD_STARTED;
-
-       GMNAL_GM_LOCK(nal_data);
-       while(nal_data->ctthread_flag == GMNAL_CTTHREAD_STARTED) {
-               CDEBUG(D_NET, "waiting\n");
-               rxevent = gm_blocking_receive_no_spin(nal_data->gm_port);
-               if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) {
-                       CDEBUG(D_INFO, "time to exit\n");
-                       break;
-               }
-               CDEBUG(D_INFO, "got [%s]\n", gmnal_rxevent(rxevent));
-               switch (GM_RECV_EVENT_TYPE(rxevent)) {
-
-                       case(GM_RECV_EVENT):
-                               CDEBUG(D_NET, "CTTHREAD:: GM_RECV_EVENT\n");
-                               recv = (gm_recv_t*)&rxevent->recv;
-                               GMNAL_GM_UNLOCK(nal_data);
-                               gmnal_add_rxtwe(nal_data, recv);
-                               GMNAL_GM_LOCK(nal_data);
-                               CDEBUG(D_NET, "CTTHREAD:: Added event to Q\n");
-                       break;
-                       case(_GM_SLEEP_EVENT):
-                               /*
-                                *      Blocking receive above just returns
-                                *      immediatly with _GM_SLEEP_EVENT
-                                *      Don't know what this is
-                                */
-                               CDEBUG(D_NET, "Sleeping in gm_unknown\n");
-                               GMNAL_GM_UNLOCK(nal_data);
-                               gm_unknown(nal_data->gm_port, rxevent);
-                               GMNAL_GM_LOCK(nal_data);
-                               CDEBUG(D_INFO, "Awake from gm_unknown\n");
-                               break;
-                               
-                       default:
-                               /*
-                                *      Don't know what this is
-                                *      gm_unknown will make sense of it
-                                *      Should be able to do something with
-                                *      FAST_RECV_EVENTS here.
-                                */
-                               CDEBUG(D_NET, "Passing event to gm_unknown\n");
-                               GMNAL_GM_UNLOCK(nal_data);
-                               gm_unknown(nal_data->gm_port, rxevent);
-                               GMNAL_GM_LOCK(nal_data);
-                               CDEBUG(D_INFO, "Processed unknown event\n");
-               }
-       }
-       GMNAL_GM_UNLOCK(nal_data);
-       nal_data->ctthread_flag = GMNAL_THREAD_RESET;
-       CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data);
-       return(GMNAL_STATUS_OK);
-}
-
-
-/*
- *     process a receive event
- */
-int gmnal_rx_thread(void *arg)
-{
-       gmnal_data_t            *nal_data;
-       void                    *buffer;
-       gmnal_rxtwe_t           *we = NULL;
-
-       if (!arg) {
-               CDEBUG(D_TRACE, "NO nal_data. Exiting\n");
-               return(-1);
-       }
-
-       nal_data = (gmnal_data_t*)arg;
-       CDEBUG(D_TRACE, "nal_data is [%p]\n", arg);
-
-       daemonize();
-       /*
-        *      set 1 bit for each thread started
-        *      doesn't matter which bit
-        */
-       spin_lock(&nal_data->rxthread_flag_lock);
-       if (nal_data->rxthread_flag)
-               nal_data->rxthread_flag=nal_data->rxthread_flag*2 + 1;
-       else
-               nal_data->rxthread_flag = 1;
-       CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag);
-       spin_unlock(&nal_data->rxthread_flag_lock);
-
-       while(nal_data->rxthread_stop_flag != GMNAL_THREAD_STOP) {
-               CDEBUG(D_NET, "RXTHREAD:: Receive thread waiting\n");
-               we = gmnal_get_rxtwe(nal_data);
-               if (!we) {
-                       CDEBUG(D_INFO, "Receive thread time to exit\n");
-                       break;
-               }
-
-               buffer = we->buffer;
-               switch(((gmnal_msghdr_t*)buffer)->type) {
-               case(GMNAL_SMALL_MESSAGE):
-                       gmnal_pre_receive(nal_data, we, 
-                                          GMNAL_SMALL_MESSAGE);
-               break;  
-               case(GMNAL_LARGE_MESSAGE_INIT):
-                       gmnal_pre_receive(nal_data, we, 
-                                          GMNAL_LARGE_MESSAGE_INIT);
-               break;  
-               case(GMNAL_LARGE_MESSAGE_ACK):
-                       gmnal_pre_receive(nal_data, we, 
-                                          GMNAL_LARGE_MESSAGE_ACK);
-               break;  
-               default:
-                       CDEBUG(D_ERROR, "Unsupported message type\n");
-                       gmnal_rx_bad(nal_data, we, NULL);
-               }
-               PORTAL_FREE(we, sizeof(gmnal_rxtwe_t));
-       }
-
-       spin_lock(&nal_data->rxthread_flag_lock);
-       nal_data->rxthread_flag/=2;
-       CDEBUG(D_INFO, "rxthread flag is [%ld]\n", nal_data->rxthread_flag);
-       spin_unlock(&nal_data->rxthread_flag_lock);
-       CDEBUG(D_INFO, "thread nal_data [%p] is exiting\n", nal_data);
-       return(GMNAL_STATUS_OK);
-}
-
-
-
-/*
- *     Start processing a small message receive
- *     Get here from gmnal_receive_thread
- *     Hand off to lib_parse, which calls cb_recv
- *     which hands back to gmnal_small_receive
- *     Deal with all endian stuff here.
- */
-int
-gmnal_pre_receive(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, int gmnal_type)
-{
-       gmnal_srxd_t    *srxd = NULL;
-       void            *buffer = NULL;
-       unsigned int snode, sport, type, length;
-       gmnal_msghdr_t  *gmnal_msghdr;
-       ptl_hdr_t       *portals_hdr;
-        int              rc;
-
-       CDEBUG(D_INFO, "nal_data [%p], we[%p] type [%d]\n", 
-              nal_data, we, gmnal_type);
-
-       buffer = we->buffer;
-       snode = we->snode;
-       sport = we->sport;
-       type = we->type;
-       buffer = we->buffer;
-       length = we->length;
-
-       gmnal_msghdr = (gmnal_msghdr_t*)buffer;
-       portals_hdr = (ptl_hdr_t*)(buffer+GMNAL_MSGHDR_SIZE);
-
-       CDEBUG(D_INFO, "rx_event:: Sender node [%d], Sender Port [%d], "
-              "type [%d], length [%d], buffer [%p]\n",
-              snode, sport, type, length, buffer);
-       CDEBUG(D_INFO, "gmnal_msghdr:: Sender node [%u], magic [%d], "
-              "gmnal_type [%d]\n", gmnal_msghdr->sender_node_id, 
-              gmnal_msghdr->magic, gmnal_msghdr->type);
-       CDEBUG(D_INFO, "portals_hdr:: Sender node ["LPD64"], "
-              "dest_node ["LPD64"]\n", portals_hdr->src_nid, 
-              portals_hdr->dest_nid);
-
-       
-       /*
-        *      Get a receive descriptor for this message
-        */
-       srxd = gmnal_rxbuffer_to_srxd(nal_data, buffer);
-       CDEBUG(D_INFO, "Back from gmnal_rxbuffer_to_srxd\n");
-       if (!srxd) {
-               CDEBUG(D_ERROR, "Failed to get receive descriptor\n");
-                /* I think passing a NULL srxd to lib_parse will crash
-                 * gmnal_recv() */
-                LBUG();
-               lib_parse(nal_data->libnal, portals_hdr, srxd);
-               return(GMNAL_STATUS_FAIL);
-       }
-
-       /*
-        *      no need to bother portals library with this
-        */
-       if (gmnal_type == GMNAL_LARGE_MESSAGE_ACK) {
-               gmnal_large_tx_ack_received(nal_data, srxd);
-               return(GMNAL_STATUS_OK);
-       }
-
-       srxd->nal_data = nal_data;
-       srxd->type = gmnal_type;
-       srxd->nsiov = gmnal_msghdr->niov;
-       srxd->gm_source_node = gmnal_msghdr->sender_node_id;
-       
-       CDEBUG(D_PORTALS, "Calling lib_parse buffer is [%p]\n", 
-              buffer+GMNAL_MSGHDR_SIZE);
-       /*
-        *      control passes to lib, which calls cb_recv 
-        *      cb_recv is responsible for returning the buffer 
-        *      for future receive
-        */
-       rc = lib_parse(nal_data->libnal, portals_hdr, srxd);
-
-        if (rc != PTL_OK) {
-                /* I just received garbage; take appropriate action... */
-                LBUG();
-        }
-
-       return(GMNAL_STATUS_OK);
-}
-
-
-
-/*
- *     After a receive has been processed, 
- *     hang out the receive buffer again.
- *     This implicitly returns a receive token.
- */
-int
-gmnal_rx_requeue_buffer(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
-{
-       CDEBUG(D_TRACE, "gmnal_rx_requeue_buffer\n");
-
-       CDEBUG(D_NET, "requeueing srxd[%p] nal_data[%p]\n", srxd, nal_data);
-
-       GMNAL_GM_LOCK(nal_data);
-       gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer,
-                                       srxd->gmsize, GM_LOW_PRIORITY, 0 );
-       GMNAL_GM_UNLOCK(nal_data);
-
-       return(GMNAL_STATUS_OK);
-}
-
-
-/*
- *     Handle a bad message
- *     A bad message is one we don't expect or can't interpret
- */
-int
-gmnal_rx_bad(gmnal_data_t *nal_data, gmnal_rxtwe_t *we, gmnal_srxd_t *srxd)
-{
-       CDEBUG(D_TRACE, "Can't handle message\n");
-
-       if (!srxd)
-               srxd = gmnal_rxbuffer_to_srxd(nal_data, 
-                                              we->buffer);
-       if (srxd) {
-               gmnal_rx_requeue_buffer(nal_data, srxd);
-       } else {
-               CDEBUG(D_ERROR, "Can't find a descriptor for this buffer\n");
-               /*
-                *      get rid of it ?
-                */
-               return(GMNAL_STATUS_FAIL);
-       }
-
-       return(GMNAL_STATUS_OK);
-}
-
-
-
-/*
- *     Process a small message receive.
- *     Get here from gmnal_receive_thread, gmnal_pre_receive
- *     lib_parse, cb_recv
- *     Put data from prewired receive buffer into users buffer(s)
- *     Hang out the receive buffer again for another receive
- *     Call lib_finalize
- */
-int
-gmnal_small_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-               unsigned int niov, struct iovec *iov, size_t offset, size_t mlen, size_t rlen)
-{
-       gmnal_srxd_t    *srxd = NULL;
-       void    *buffer = NULL;
-       gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->libnal_data;
-
-
-       CDEBUG(D_TRACE, "niov [%d] mlen["LPSZ"]\n", niov, mlen);
-
-       if (!private) {
-               CDEBUG(D_ERROR, "gmnal_small_rx no context\n");
-               lib_finalize(libnal, private, cookie, PTL_FAIL);
-               return(PTL_FAIL);
-       }
-
-       srxd = (gmnal_srxd_t*)private;
-       buffer = srxd->buffer;
-       buffer += sizeof(gmnal_msghdr_t);
-       buffer += sizeof(ptl_hdr_t);
-
-       while(niov--) {
-                if (offset >= iov->iov_len) {
-                        offset -= iov->iov_len;
-                } else if (offset > 0) {
-                       CDEBUG(D_INFO, "processing [%p] base [%p] len %d, "
-                               "offset %d, len ["LPSZ"]\n", iov,
-                              iov->iov_base + offset, iov->iov_len, offset,
-                               iov->iov_len - offset);
-                       gm_bcopy(buffer, iov->iov_base + offset,
-                                 iov->iov_len - offset);
-                        offset = 0;
-                        buffer += iov->iov_len - offset;
-                } else {
-                       CDEBUG(D_INFO, "processing [%p] len ["LPSZ"]\n", iov,
-                              iov->iov_len);
-                       gm_bcopy(buffer, iov->iov_base, iov->iov_len);
-                       buffer += iov->iov_len;
-                }
-                iov++;
-       }
-
-
-       /*
-        *      let portals library know receive is complete
-        */
-       CDEBUG(D_PORTALS, "calling lib_finalize\n");
-       lib_finalize(libnal, private, cookie, PTL_OK);
-       /*
-        *      return buffer so it can be used again
-        */
-       CDEBUG(D_NET, "calling gm_provide_receive_buffer\n");
-       GMNAL_GM_LOCK(nal_data);
-       gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, 
-                                          srxd->gmsize, GM_LOW_PRIORITY, 0);   
-       GMNAL_GM_UNLOCK(nal_data);
-
-       return(PTL_OK);
-}
-
-
-/*
- *     Start a small transmit. 
- *     Get a send token (and wired transmit buffer).
- *     Copy data from senders buffer to wired buffer and
- *     initiate gm_send from the wired buffer.
- *     The callback function informs when the send is complete.
- */
-int
-gmnal_small_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-               ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
-               unsigned int niov, struct iovec *iov, size_t offset, int size)
-{
-       gmnal_data_t    *nal_data = (gmnal_data_t*)libnal->libnal_data;
-       gmnal_stxd_t    *stxd = NULL;
-       void            *buffer = NULL;
-       gmnal_msghdr_t  *msghdr = NULL;
-       int             tot_size = 0;
-       unsigned int    local_nid;
-       gm_status_t     gm_status = GM_SUCCESS;
-
-       CDEBUG(D_TRACE, "gmnal_small_tx libnal [%p] private [%p] cookie [%p] "
-              "hdr [%p] type [%d] global_nid ["LPU64"] pid [%d] niov [%d] "
-              "iov [%p] size [%d]\n", libnal, private, cookie, hdr, type, 
-              global_nid, pid, niov, iov, size);
-
-       CDEBUG(D_INFO, "portals_hdr:: dest_nid ["LPU64"], src_nid ["LPU64"]\n",
-              hdr->dest_nid, hdr->src_nid);
-
-       if (!nal_data) {
-               CDEBUG(D_ERROR, "no nal_data\n");
-               return(GMNAL_STATUS_FAIL);
-       } else {
-               CDEBUG(D_INFO, "nal_data [%p]\n", nal_data);
-       }
-
-       GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, 
-                                           &local_nid);
-       GMNAL_GM_UNLOCK(nal_data);
-       if (gm_status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "Failed to obtain local id\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-       CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
-
-       stxd = gmnal_get_stxd(nal_data, 1);
-       CDEBUG(D_INFO, "stxd [%p]\n", stxd);
-
-       stxd->type = GMNAL_SMALL_MESSAGE;
-       stxd->cookie = cookie;
-
-       /*
-        *      Copy gmnal_msg_hdr and portals header to the transmit buffer
-        *      Then copy the data in
-        */
-       buffer = stxd->buffer;
-       msghdr = (gmnal_msghdr_t*)buffer;
-
-       msghdr->magic = GMNAL_MAGIC;
-       msghdr->type = GMNAL_SMALL_MESSAGE;
-       msghdr->sender_node_id = nal_data->gm_global_nid;
-       CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
-
-       buffer += sizeof(gmnal_msghdr_t);
-
-       CDEBUG(D_INFO, "processing  portals hdr at [%p]\n", buffer);
-       gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t));
-
-       buffer += sizeof(ptl_hdr_t);
-
-       while(niov--) {
-                if (offset >= iov->iov_len) {
-                        offset -= iov->iov_len;
-                } else if (offset > 0) {
-                       CDEBUG(D_INFO, "processing iov [%p] base [%p] len ["LPSZ"] to [%p]\n", 
-                               iov, iov->iov_base + offset, iov->iov_len - offset, buffer);
-                       gm_bcopy(iov->iov_base + offset, buffer, iov->iov_len - offset);
-                       buffer+= iov->iov_len - offset;
-                        offset = 0;
-                } else {
-                       CDEBUG(D_INFO, "processing iov [%p] len ["LPSZ"] to [%p]\n", 
-                               iov, iov->iov_len, buffer);
-                       gm_bcopy(iov->iov_base, buffer, iov->iov_len);
-                       buffer+= iov->iov_len;
-                } 
-                iov++;
-       }
-
-       CDEBUG(D_INFO, "sending\n");
-       tot_size = size+sizeof(ptl_hdr_t)+sizeof(gmnal_msghdr_t);
-       stxd->msg_size = tot_size;
-
-
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
-              "gmsize [%lu] msize [%d] global_nid ["LPU64"] local_nid[%d] "
-              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
-              stxd->msg_size, global_nid, local_nid, stxd);
-
-       GMNAL_GM_LOCK(nal_data);
-       stxd->gm_priority = GM_LOW_PRIORITY;
-       stxd->gm_target_node = local_nid;
-       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, 
-                                     stxd->gm_size, stxd->msg_size, 
-                                     GM_LOW_PRIORITY, local_nid, 
-                                     gmnal_small_tx_callback, (void*)stxd);
-       GMNAL_GM_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "done\n");
-               
-       return(PTL_OK);
-}
-
-
-/*
- *     A callback to indicate the small transmit operation is compete
- *     Check for erros and try to deal with them.
- *     Call lib_finalise to inform the client application that the send 
- *     is complete and the memory can be reused.
- *     Return the stxd when finished with it (returns a send token)
- */
-void 
-gmnal_small_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
-{
-       gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
-       lib_msg_t       *cookie = stxd->cookie;
-       gmnal_data_t    *nal_data = (gmnal_data_t*)stxd->nal_data;
-       lib_nal_t       *libnal = nal_data->libnal;
-
-       if (!stxd) {
-               CDEBUG(D_TRACE, "send completion event for unknown stxd\n");
-               return;
-       }
-       if (status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "Result of send stxd [%p] is [%s]\n", 
-                      stxd, gmnal_gm_error(status));
-       }
-
-       switch(status) {
-               case(GM_SUCCESS):
-               break;
-
-
-
-               case(GM_SEND_DROPPED):
-               /*
-                *      do a resend on the dropped ones
-                */
-                       CDEBUG(D_ERROR, "send stxd [%p] was dropped "
-                              "resending\n", context);
-                       GMNAL_GM_LOCK(nal_data);
-                       gm_send_to_peer_with_callback(nal_data->gm_port, 
-                                                     stxd->buffer, 
-                                                     stxd->gm_size, 
-                                                     stxd->msg_size, 
-                                                     stxd->gm_priority, 
-                                                     stxd->gm_target_node, 
-                                                     gmnal_small_tx_callback,
-                                                     context);
-                       GMNAL_GM_UNLOCK(nal_data);
-               
-               return;
-               case(GM_TIMED_OUT):
-               case(GM_SEND_TIMED_OUT):
-               /*
-                *      drop these ones
-                */
-                       CDEBUG(D_INFO, "calling gm_drop_sends\n");
-                       GMNAL_GM_LOCK(nal_data);
-                       gm_drop_sends(nal_data->gm_port, stxd->gm_priority, 
-                                     stxd->gm_target_node, GMNAL_GM_PORT, 
-                                     gmnal_drop_sends_callback, context);
-                       GMNAL_GM_UNLOCK(nal_data);
-
-               return;
-
-
-               /*
-                *      abort on these ?
-                */
-               case(GM_TRY_AGAIN):
-               case(GM_INTERRUPTED):
-               case(GM_FAILURE):
-               case(GM_INPUT_BUFFER_TOO_SMALL):
-               case(GM_OUTPUT_BUFFER_TOO_SMALL):
-               case(GM_BUSY):
-               case(GM_MEMORY_FAULT):
-               case(GM_INVALID_PARAMETER):
-               case(GM_OUT_OF_MEMORY):
-               case(GM_INVALID_COMMAND):
-               case(GM_PERMISSION_DENIED):
-               case(GM_INTERNAL_ERROR):
-               case(GM_UNATTACHED):
-               case(GM_UNSUPPORTED_DEVICE):
-               case(GM_SEND_REJECTED):
-               case(GM_SEND_TARGET_PORT_CLOSED):
-               case(GM_SEND_TARGET_NODE_UNREACHABLE):
-               case(GM_SEND_PORT_CLOSED):
-               case(GM_NODE_ID_NOT_YET_SET):
-               case(GM_STILL_SHUTTING_DOWN):
-               case(GM_CLONE_BUSY):
-               case(GM_NO_SUCH_DEVICE):
-               case(GM_ABORTED):
-               case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-               case(GM_UNTRANSLATED_SYSTEM_ERROR):
-               case(GM_ACCESS_DENIED):
-               case(GM_NO_DRIVER_SUPPORT):
-               case(GM_PTE_REF_CNT_OVERFLOW):
-               case(GM_NOT_SUPPORTED_IN_KERNEL):
-               case(GM_NOT_SUPPORTED_ON_ARCH):
-               case(GM_NO_MATCH):
-               case(GM_USER_ERROR):
-               case(GM_DATA_CORRUPTED):
-               case(GM_HARDWARE_FAULT):
-               case(GM_SEND_ORPHANED):
-               case(GM_MINOR_OVERFLOW):
-               case(GM_PAGE_TABLE_FULL):
-               case(GM_UC_ERROR):
-               case(GM_INVALID_PORT_NUMBER):
-               case(GM_DEV_NOT_FOUND):
-               case(GM_FIRMWARE_NOT_RUNNING):
-               case(GM_YP_NO_MATCH):
-               default:
-                       CDEBUG(D_ERROR, "Unknown send error\n");
-                gm_resume_sending(nal_data->gm_port, stxd->gm_priority,
-                                      stxd->gm_target_node, GMNAL_GM_PORT,
-                                      gmnal_resume_sending_callback, context);
-                return;
-
-       }
-
-       /*
-        *      TO DO
-        *      If this is a large message init,
-        *      we're not finished with the data yet,
-        *      so can't call lib_finalise.
-        *      However, we're also holding on to a 
-        *      stxd here (to keep track of the source
-        *      iovec only). Should use another structure
-        *      to keep track of iovec and return stxd to 
-        *      free list earlier.
-        */
-       if (stxd->type == GMNAL_LARGE_MESSAGE_INIT) {
-               CDEBUG(D_INFO, "large transmit done\n");
-               return;
-       }
-       gmnal_return_stxd(nal_data, stxd);
-       lib_finalize(libnal, stxd, cookie, PTL_OK);
-       return;
-}
-
-/*
- *     After an error on the port
- *     call this to allow future sends to complete
- */
-void gmnal_resume_sending_callback(struct gm_port *gm_port, void *context,
-                                 gm_status_t status)
-{
-        gmnal_data_t    *nal_data;
-        gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
-        CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
-        gmnal_return_stxd(stxd->nal_data, stxd);
-        return;
-}
-
-
-void gmnal_drop_sends_callback(struct gm_port *gm_port, void *context, 
-                               gm_status_t status)
-{
-       gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
-       gmnal_data_t    *nal_data = stxd->nal_data;
-
-       CDEBUG(D_TRACE, "status is [%d] context is [%p]\n", status, context);
-       if (status == GM_SUCCESS) {
-               GMNAL_GM_LOCK(nal_data);
-               gm_send_to_peer_with_callback(gm_port, stxd->buffer, 
-                                             stxd->gm_size, stxd->msg_size, 
-                                             stxd->gm_priority, 
-                                             stxd->gm_target_node, 
-                                             gmnal_small_tx_callback, 
-                                             context);
-               GMNAL_GM_LOCK(nal_data);
-       } else {
-               CDEBUG(D_ERROR, "send_to_peer status for stxd [%p] is "
-                      "[%d][%s]\n", stxd, status, gmnal_gm_error(status));
-       }
-
-
-       return;
-}
-
-
-/*
- *     Begine a large transmit.
- *     Do a gm_register of the memory pointed to by the iovec 
- *     and send details to the receiver. The receiver does a gm_get
- *     to pull the data and sends and ack when finished. Upon receipt of
- *     this ack, deregister the memory. Only 1 send token is required here.
- */
-int
-gmnal_large_tx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-               ptl_hdr_t *hdr, int type, ptl_nid_t global_nid, ptl_pid_t pid, 
-               unsigned int niov, struct iovec *iov, size_t offset, int size)
-{
-
-       gmnal_data_t    *nal_data;
-       gmnal_stxd_t    *stxd = NULL;
-       void            *buffer = NULL;
-       gmnal_msghdr_t  *msghdr = NULL;
-       unsigned int    local_nid;
-       int             mlen = 0;       /* the size of the init message data */
-       struct iovec    *iov_dup = NULL;
-       gm_status_t     gm_status;
-       int             niov_dup;
-
-
-       CDEBUG(D_TRACE, "gmnal_large_tx libnal [%p] private [%p], cookie [%p] "
-              "hdr [%p], type [%d] global_nid ["LPU64"], pid [%d], niov [%d], "
-              "iov [%p], size [%d]\n", libnal, private, cookie, hdr, type, 
-              global_nid, pid, niov, iov, size);
-
-       if (libnal)
-               nal_data = (gmnal_data_t*)libnal->libnal_data;
-       else  {
-               CDEBUG(D_ERROR, "no libnal.\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-       
-
-       /*
-        *      Get stxd and buffer. Put local address of data in buffer, 
-        *      send local addresses to target, 
-        *      wait for the target node to suck the data over.
-        *      The stxd is used to ren
-        */
-       stxd = gmnal_get_stxd(nal_data, 1);
-       CDEBUG(D_INFO, "stxd [%p]\n", stxd);
-
-       stxd->type = GMNAL_LARGE_MESSAGE_INIT;
-       stxd->cookie = cookie;
-
-       /*
-        *      Copy gmnal_msg_hdr and portals header to the transmit buffer
-        *      Then copy the iov in
-        */
-       buffer = stxd->buffer;
-       msghdr = (gmnal_msghdr_t*)buffer;
-
-       CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
-
-       msghdr->magic = GMNAL_MAGIC;
-       msghdr->type = GMNAL_LARGE_MESSAGE_INIT;
-       msghdr->sender_node_id = nal_data->gm_global_nid;
-       msghdr->stxd = stxd;
-       msghdr->niov = niov ;
-       buffer += sizeof(gmnal_msghdr_t);
-       mlen = sizeof(gmnal_msghdr_t);
-       CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
-
-
-       CDEBUG(D_INFO, "processing  portals hdr at [%p]\n", buffer);
-
-       gm_bcopy(hdr, buffer, sizeof(ptl_hdr_t));
-       buffer += sizeof(ptl_hdr_t);
-       mlen += sizeof(ptl_hdr_t); 
-       CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                niov--;
-                iov++;
-        } 
-
-        LASSERT(offset >= 0);
-        /*
-        *      Store the iovs in the stxd for we can get 
-        *      them later if we need them
-        */
-        stxd->iov[0].iov_base = iov->iov_base + offset; 
-        stxd->iov[0].iov_len = iov->iov_len - offset; 
-       CDEBUG(D_NET, "Copying iov [%p] to [%p], niov=%d\n", iov, stxd->iov, niov);
-        if (niov > 1)
-               gm_bcopy(&iov[1], &stxd->iov[1], (niov-1)*sizeof(struct iovec));
-       stxd->niov = niov;
-
-       /*
-        *      copy the iov to the buffer so target knows 
-        *      where to get the data from
-        */
-       CDEBUG(D_INFO, "processing iov to [%p]\n", buffer);
-       gm_bcopy(stxd->iov, buffer, stxd->niov*sizeof(struct iovec));
-       mlen += stxd->niov*(sizeof(struct iovec));
-       CDEBUG(D_INFO, "mlen is [%d]\n", mlen);
-       
-       /*
-        *      register the memory so the NIC can get hold of the data
-        *      This is a slow process. it'd be good to overlap it 
-        *      with something else.
-        */
-        iov = stxd->iov;
-       iov_dup = iov;
-       niov_dup = niov;
-       while(niov--) {
-               CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", 
-                      iov->iov_base, iov->iov_len);
-               GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_register_memory(nal_data->gm_port, 
-                                              iov->iov_base, iov->iov_len);
-               if (gm_status != GM_SUCCESS) {
-                       GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
-                              "for memory [%p] len ["LPSZ"]\n", 
-                              gm_status, gmnal_gm_error(gm_status), 
-                              iov->iov_base, iov->iov_len);
-                       GMNAL_GM_LOCK(nal_data);
-                       while (iov_dup != iov) {
-                               gm_deregister_memory(nal_data->gm_port, 
-                                                    iov_dup->iov_base, 
-                                                    iov_dup->iov_len);
-                               iov_dup++;
-                       }
-                       GMNAL_GM_UNLOCK(nal_data);
-                       gmnal_return_stxd(nal_data, stxd);
-                       return(PTL_FAIL);
-               }
-
-               GMNAL_GM_UNLOCK(nal_data);
-               iov++;
-       }
-
-       /*
-        *      Send the init message to the target
-        */
-       CDEBUG(D_INFO, "sending mlen [%d]\n", mlen);
-       GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_global_id_to_node_id(nal_data->gm_port, global_nid, 
-                                           &local_nid);
-       if (gm_status != GM_SUCCESS) {
-               GMNAL_GM_UNLOCK(nal_data);
-               CDEBUG(D_ERROR, "Failed to obtain local id\n");
-               gmnal_return_stxd(nal_data, stxd);
-               /* TO DO deregister memory on failure */
-               return(GMNAL_STATUS_FAIL);
-       }
-       CDEBUG(D_INFO, "Local Node_id is [%d]\n", local_nid);
-       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, 
-                                     stxd->gm_size, mlen, GM_LOW_PRIORITY, 
-                                     local_nid, gmnal_large_tx_callback, 
-                                     (void*)stxd);
-       GMNAL_GM_UNLOCK(nal_data);
-       
-       CDEBUG(D_INFO, "done\n");
-               
-       return(PTL_OK);
-}
-
-/*
- *     Callback function indicates that send of buffer with 
- *     large message iovec has completed (or failed).
- */
-void 
-gmnal_large_tx_callback(gm_port_t *gm_port, void *context, gm_status_t status)
-{
-       gmnal_small_tx_callback(gm_port, context, status);
-
-}
-
-
-
-/*
- *     Have received a buffer that contains an iovec of the sender. 
- *     Do a gm_register_memory of the receivers buffer and then do a get
- *     data from the sender.
- */
-int
-gmnal_large_rx(lib_nal_t *libnal, void *private, lib_msg_t *cookie, 
-               unsigned int nriov, struct iovec *riov, size_t offset, 
-               size_t mlen, size_t rlen)
-{
-       gmnal_data_t    *nal_data = libnal->libnal_data;
-       gmnal_srxd_t    *srxd = (gmnal_srxd_t*)private;
-       void            *buffer = NULL;
-       struct  iovec   *riov_dup;
-       int             nriov_dup;
-       gmnal_msghdr_t  *msghdr = NULL;
-       gm_status_t     gm_status;
-
-       CDEBUG(D_TRACE, "gmnal_large_rx :: libnal[%p], private[%p], "
-              "cookie[%p], niov[%d], iov[%p], mlen["LPSZ"], rlen["LPSZ"]\n",
-               libnal, private, cookie, nriov, riov, mlen, rlen);
-
-       if (!srxd) {
-               CDEBUG(D_ERROR, "gmnal_large_rx no context\n");
-               lib_finalize(libnal, private, cookie, PTL_FAIL);
-               return(PTL_FAIL);
-       }
-
-       buffer = srxd->buffer;
-       msghdr = (gmnal_msghdr_t*)buffer;
-       buffer += sizeof(gmnal_msghdr_t);
-       buffer += sizeof(ptl_hdr_t);
-
-       /*
-        *      Store the senders stxd address in the srxd for this message
-        *      The gmnal_large_message_ack needs it to notify the sender
-        *      the pull of data is complete
-        */
-       srxd->source_stxd = msghdr->stxd;
-
-       /*
-        *      Register the receivers memory
-        *      get the data,
-        *      tell the sender that we got the data
-        *      then tell the receiver we got the data
-        *      TO DO
-        *      If the iovecs match, could interleave 
-        *      gm_registers and gm_gets for each element
-        */
-        while (offset >= riov->iov_len) {
-                offset -= riov->iov_len;
-                riov++;
-                nriov--;
-        } 
-        LASSERT (nriov >= 0);
-        LASSERT (offset >= 0);
-       /*
-        *      do this so the final gm_get callback can deregister the memory
-        */
-       PORTAL_ALLOC(srxd->riov, nriov*(sizeof(struct iovec)));
-
-        srxd->riov[0].iov_base = riov->iov_base + offset;
-        srxd->riov[0].iov_len = riov->iov_len - offset;
-        if (nriov > 1)
-               gm_bcopy(&riov[1], &srxd->riov[1], (nriov-1)*(sizeof(struct iovec)));
-       srxd->nriov = nriov;
-        
-        riov = srxd->riov;
-       nriov_dup = nriov;
-       riov_dup = riov;
-       while(nriov--) {
-               CDEBUG(D_INFO, "Registering memory [%p] len ["LPSZ"] \n", 
-                      riov->iov_base, riov->iov_len);
-               GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_register_memory(nal_data->gm_port, 
-                                              riov->iov_base, riov->iov_len);
-               if (gm_status != GM_SUCCESS) {
-                       GMNAL_GM_UNLOCK(nal_data);
-                       CDEBUG(D_ERROR, "gm_register_memory returns [%d][%s] "
-                              "for memory [%p] len ["LPSZ"]\n", 
-                              gm_status, gmnal_gm_error(gm_status), 
-                              riov->iov_base, riov->iov_len);
-                       GMNAL_GM_LOCK(nal_data);
-                       while (riov_dup != riov) {
-                               gm_deregister_memory(nal_data->gm_port, 
-                                                    riov_dup->iov_base, 
-                                                    riov_dup->iov_len);
-                               riov_dup++;
-                       }
-                       GMNAL_GM_LOCK(nal_data);
-                       /*
-                        *      give back srxd and buffer. Send NACK to sender
-                        */
-                        PORTAL_FREE(srxd->riov, nriov_dup*(sizeof(struct iovec)));
-                       return(PTL_FAIL);
-               }
-               GMNAL_GM_UNLOCK(nal_data);
-               riov++;
-       }
-
-       /*
-        *      now do gm_get to get the data
-        */
-       srxd->cookie = cookie;
-       if (gmnal_remote_get(srxd, srxd->nsiov, (struct iovec*)buffer, 
-                             nriov_dup, riov_dup) != GMNAL_STATUS_OK) {
-               CDEBUG(D_ERROR, "can't get the data");
-       }
-
-       CDEBUG(D_INFO, "lgmanl_large_rx done\n");
-
-       return(PTL_OK);
-}
-
-
-/*
- *     Perform a number of remote gets as part of receiving 
- *     a large message.
- *     The final one to complete (i.e. the last callback to get called)
- *     tidies up.
- *     gm_get requires a send token.
- */
-int
-gmnal_remote_get(gmnal_srxd_t *srxd, int nsiov, struct iovec *siov, 
-                 int nriov, struct iovec *riov)
-{
-
-       int     ncalls = 0;
-
-       CDEBUG(D_TRACE, "gmnal_remote_get srxd[%p], nriov[%d], riov[%p], "
-              "nsiov[%d], siov[%p]\n", srxd, nriov, riov, nsiov, siov);
-
-
-       ncalls = gmnal_copyiov(0, srxd, nsiov, siov, nriov, riov);
-       if (ncalls < 0) {
-               CDEBUG(D_ERROR, "there's something wrong with the iovecs\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-       CDEBUG(D_INFO, "gmnal_remote_get ncalls [%d]\n", ncalls);
-       spin_lock_init(&srxd->callback_lock);
-       srxd->ncallbacks = ncalls;
-       srxd->callback_status = 0;
-
-       ncalls = gmnal_copyiov(1, srxd, nsiov, siov, nriov, riov);
-       if (ncalls < 0) {
-               CDEBUG(D_ERROR, "there's something wrong with the iovecs\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-
-       return(GMNAL_STATUS_OK);
-
-}
-
-
-/*
- *     pull data from source node (source iovec) to a local iovec.
- *     The iovecs may not match which adds the complications below.
- *     Count the number of gm_gets that will be required to the callbacks
- *     can determine who is the last one.
- */    
-int
-gmnal_copyiov(int do_copy, gmnal_srxd_t *srxd, int nsiov, 
-              struct iovec *siov, int nriov, struct iovec *riov)
-{
-
-       int     ncalls = 0;
-       int     slen = siov->iov_len, rlen = riov->iov_len;
-       char    *sbuf = siov->iov_base, *rbuf = riov->iov_base; 
-       unsigned long   sbuf_long;
-       gm_remote_ptr_t remote_ptr = 0;
-       unsigned int    source_node;
-       gmnal_ltxd_t    *ltxd = NULL;
-       gmnal_data_t    *nal_data = srxd->nal_data;
-
-       CDEBUG(D_TRACE, "copy[%d] nal_data[%p]\n", do_copy, nal_data);
-       if (do_copy) {
-               if (!nal_data) {
-                       CDEBUG(D_ERROR, "Bad args No nal_data\n");
-                       return(GMNAL_STATUS_FAIL);
-               }
-               GMNAL_GM_LOCK(nal_data);
-               if (gm_global_id_to_node_id(nal_data->gm_port, 
-                                           srxd->gm_source_node, 
-                                           &source_node) != GM_SUCCESS) {
-
-                       CDEBUG(D_ERROR, "cannot resolve global_id [%u] "
-                              "to local node_id\n", srxd->gm_source_node);
-                       GMNAL_GM_UNLOCK(nal_data);
-                       return(GMNAL_STATUS_FAIL);
-               }
-               GMNAL_GM_UNLOCK(nal_data);
-               /*
-                *      We need a send token to use gm_get
-                *      getting an stxd gets us a send token.
-                *      the stxd is used as the context to the
-                *      callback function (so stxd can be returned).
-                *      Set pointer in stxd to srxd so callback count in srxd
-                *      can be decremented to find last callback to complete
-                */
-               CDEBUG(D_INFO, "gmnal_copyiov source node is G[%u]L[%d]\n", 
-                      srxd->gm_source_node, source_node);
-       }
-
-       do {
-               CDEBUG(D_INFO, "sbuf[%p] slen[%d] rbuf[%p], rlen[%d]\n",
-                               sbuf, slen, rbuf, rlen);
-               if (slen > rlen) {
-                       ncalls++;
-                       if (do_copy) {
-                               CDEBUG(D_INFO, "slen>rlen\n");
-                               ltxd = gmnal_get_ltxd(nal_data);
-                               ltxd->srxd = srxd;
-                               GMNAL_GM_LOCK(nal_data);
-                               /* 
-                                *      funny business to get rid 
-                                *      of compiler warning 
-                                */
-                               sbuf_long = (unsigned long) sbuf;
-                               remote_ptr = (gm_remote_ptr_t)sbuf_long;
-                               gm_get(nal_data->gm_port, remote_ptr, rbuf, 
-                                      rlen, GM_LOW_PRIORITY, source_node, 
-                                      GMNAL_GM_PORT, 
-                                      gmnal_remote_get_callback, ltxd);
-                               GMNAL_GM_UNLOCK(nal_data);
-                       }
-                       /*
-                        *      at the end of 1 iov element
-                        */
-                       sbuf+=rlen;
-                       slen-=rlen;
-                       riov++;
-                       nriov--;
-                       rbuf = riov->iov_base;
-                       rlen = riov->iov_len;
-               } else if (rlen > slen) {
-                       ncalls++;
-                       if (do_copy) {
-                               CDEBUG(D_INFO, "slen<rlen\n");
-                               ltxd = gmnal_get_ltxd(nal_data);
-                               ltxd->srxd = srxd;
-                               GMNAL_GM_LOCK(nal_data);
-                               sbuf_long = (unsigned long) sbuf;
-                               remote_ptr = (gm_remote_ptr_t)sbuf_long;
-                               gm_get(nal_data->gm_port, remote_ptr, rbuf, 
-                                      slen, GM_LOW_PRIORITY, source_node, 
-                                      GMNAL_GM_PORT, 
-                                      gmnal_remote_get_callback, ltxd);
-                               GMNAL_GM_UNLOCK(nal_data);
-                       }
-                       /*
-                        *      at end of siov element
-                        */
-                       rbuf+=slen;
-                       rlen-=slen;
-                       siov++;
-                       sbuf = siov->iov_base;
-                       slen = siov->iov_len;
-               } else {
-                       ncalls++;
-                       if (do_copy) {
-                               CDEBUG(D_INFO, "rlen=slen\n");
-                               ltxd = gmnal_get_ltxd(nal_data);
-                               ltxd->srxd = srxd;
-                               GMNAL_GM_LOCK(nal_data);
-                               sbuf_long = (unsigned long) sbuf;
-                               remote_ptr = (gm_remote_ptr_t)sbuf_long;
-                               gm_get(nal_data->gm_port, remote_ptr, rbuf, 
-                                      rlen, GM_LOW_PRIORITY, source_node, 
-                                      GMNAL_GM_PORT, 
-                                      gmnal_remote_get_callback, ltxd);
-                               GMNAL_GM_UNLOCK(nal_data);
-                       }
-                       /*
-                        *      at end of siov and riov element
-                        */
-                       siov++;
-                       sbuf = siov->iov_base;
-                       slen = siov->iov_len;
-                       riov++;
-                       nriov--;
-                       rbuf = riov->iov_base;
-                       rlen = riov->iov_len;
-               }
-
-       } while (nriov);
-       return(ncalls);
-}
-
-
-/*
- *     The callback function that is invoked after each gm_get call completes.
- *     Multiple callbacks may be invoked for 1 transaction, only the final
- *     callback has work to do.
- */
-void
-gmnal_remote_get_callback(gm_port_t *gm_port, void *context, 
-                          gm_status_t status)
-{
-
-       gmnal_ltxd_t    *ltxd = (gmnal_ltxd_t*)context;
-       gmnal_srxd_t    *srxd = ltxd->srxd;
-       lib_nal_t       *libnal = srxd->nal_data->libnal;
-       int             lastone;
-       struct  iovec   *riov;
-       int             nriov;
-       gmnal_data_t    *nal_data;
-
-       CDEBUG(D_TRACE, "called for context [%p]\n", context);
-
-       if (status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "reports error [%d][%s]\n", status, 
-                      gmnal_gm_error(status));
-       }
-
-       spin_lock(&srxd->callback_lock);
-       srxd->ncallbacks--;
-       srxd->callback_status |= status;
-       lastone = srxd->ncallbacks?0:1;
-       spin_unlock(&srxd->callback_lock);
-       nal_data = srxd->nal_data;
-
-       /*
-        *      everyone returns a send token
-        */
-       gmnal_return_ltxd(nal_data, ltxd);
-
-       if (!lastone) {
-               CDEBUG(D_ERROR, "NOT final callback context[%p]\n", srxd);
-               return;
-       }
-       
-       /*
-        *      Let our client application proceed
-        */     
-       CDEBUG(D_ERROR, "final callback context[%p]\n", srxd);
-       lib_finalize(libnal, srxd, srxd->cookie, PTL_OK);
-
-       /*
-        *      send an ack to the sender to let him know we got the data
-        */
-       gmnal_large_tx_ack(nal_data, srxd);
-
-       /*
-        *      Unregister the memory that was used
-        *      This is a very slow business (slower then register)
-        */
-       nriov = srxd->nriov;
-       riov = srxd->riov;
-       GMNAL_GM_LOCK(nal_data);
-       while (nriov--) {
-               CDEBUG(D_ERROR, "deregister memory [%p]\n", riov->iov_base);
-               if (gm_deregister_memory(srxd->nal_data->gm_port, 
-                                        riov->iov_base, riov->iov_len)) {
-                       CDEBUG(D_ERROR, "failed to deregister memory [%p]\n", 
-                              riov->iov_base);
-               }
-               riov++;
-       }
-       GMNAL_GM_UNLOCK(nal_data);
-       PORTAL_FREE(srxd->riov, sizeof(struct iovec)*nriov);
-
-       /*
-        *      repost the receive buffer (return receive token)
-        */
-       GMNAL_GM_LOCK(nal_data);
-       gm_provide_receive_buffer_with_tag(nal_data->gm_port, srxd->buffer, 
-                                          srxd->gmsize, GM_LOW_PRIORITY, 0);   
-       GMNAL_GM_UNLOCK(nal_data);
-       
-       return;
-}
-
-
-/*
- *     Called on target node.
- *     After pulling data from a source node
- *     send an ack message to indicate the large transmit is complete.
- */
-void 
-gmnal_large_tx_ack(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
-{
-
-       gmnal_stxd_t    *stxd;
-       gmnal_msghdr_t *msghdr;
-       void            *buffer = NULL;
-       unsigned int    local_nid;
-       gm_status_t     gm_status = GM_SUCCESS;
-
-       CDEBUG(D_TRACE, "srxd[%p] target_node [%u]\n", srxd, 
-              srxd->gm_source_node);
-
-       GMNAL_GM_LOCK(nal_data);
-       gm_status = gm_global_id_to_node_id(nal_data->gm_port, 
-                                           srxd->gm_source_node, &local_nid);
-       GMNAL_GM_UNLOCK(nal_data);
-       if (gm_status != GM_SUCCESS) {
-               CDEBUG(D_ERROR, "Failed to obtain local id\n");
-               return;
-       }
-       CDEBUG(D_INFO, "Local Node_id is [%u][%x]\n", local_nid, local_nid);
-
-       stxd = gmnal_get_stxd(nal_data, 1);
-       CDEBUG(D_TRACE, "gmnal_large_tx_ack got stxd[%p]\n", stxd);
-
-       stxd->nal_data = nal_data;
-       stxd->type = GMNAL_LARGE_MESSAGE_ACK;
-
-       /*
-        *      Copy gmnal_msg_hdr and portals header to the transmit buffer
-        *      Then copy the data in
-        */
-       buffer = stxd->buffer;
-       msghdr = (gmnal_msghdr_t*)buffer;
-
-       /*
-        *      Add in the address of the original stxd from the sender node
-        *      so it knows which thread to notify.
-        */
-       msghdr->magic = GMNAL_MAGIC;
-       msghdr->type = GMNAL_LARGE_MESSAGE_ACK;
-       msghdr->sender_node_id = nal_data->gm_global_nid;
-       msghdr->stxd = srxd->source_stxd;
-       CDEBUG(D_INFO, "processing msghdr at [%p]\n", buffer);
-
-       CDEBUG(D_INFO, "sending\n");
-       stxd->msg_size= sizeof(gmnal_msghdr_t);
-
-
-       CDEBUG(D_NET, "Calling gm_send_to_peer port [%p] buffer [%p] "
-              "gmsize [%lu] msize [%d] global_nid [%u] local_nid[%d] "
-              "stxd [%p]\n", nal_data->gm_port, stxd->buffer, stxd->gm_size, 
-              stxd->msg_size, srxd->gm_source_node, local_nid, stxd);
-       GMNAL_GM_LOCK(nal_data);
-       stxd->gm_priority = GM_LOW_PRIORITY;
-       stxd->gm_target_node = local_nid;
-       gm_send_to_peer_with_callback(nal_data->gm_port, stxd->buffer, 
-                                     stxd->gm_size, stxd->msg_size, 
-                                     GM_LOW_PRIORITY, local_nid, 
-                                     gmnal_large_tx_ack_callback, 
-                                     (void*)stxd);
-       
-       GMNAL_GM_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "gmnal_large_tx_ack :: done\n");
-               
-       return;
-}
-
-
-/*
- *     A callback to indicate the small transmit operation is compete
- *     Check for errors and try to deal with them.
- *     Call lib_finalise to inform the client application that the 
- *     send is complete and the memory can be reused.
- *     Return the stxd when finished with it (returns a send token)
- */
-void 
-gmnal_large_tx_ack_callback(gm_port_t *gm_port, void *context, 
-                            gm_status_t status)
-{
-       gmnal_stxd_t    *stxd = (gmnal_stxd_t*)context;
-       gmnal_data_t    *nal_data = (gmnal_data_t*)stxd->nal_data;
-
-       if (!stxd) {
-               CDEBUG(D_ERROR, "send completion event for unknown stxd\n");
-               return;
-       }
-       CDEBUG(D_TRACE, "send completion event for stxd [%p] status is [%d]\n",
-              stxd, status);
-       gmnal_return_stxd(stxd->nal_data, stxd);
-
-       GMNAL_GM_UNLOCK(nal_data);
-       return;
-}
-
-/*
- *     Indicates the large transmit operation is compete.
- *     Called on transmit side (means data has been pulled  by receiver 
- *     or failed).
- *     Call lib_finalise to inform the client application that the send 
- *     is complete, deregister the memory and return the stxd. 
- *     Finally, report the rx buffer that the ack message was delivered in.
- */
-void 
-gmnal_large_tx_ack_received(gmnal_data_t *nal_data, gmnal_srxd_t *srxd)
-{
-       lib_nal_t       *libnal = nal_data->libnal;
-       gmnal_stxd_t    *stxd = NULL;
-       gmnal_msghdr_t  *msghdr = NULL;
-       void            *buffer = NULL;
-       struct  iovec   *iov;
-
-
-       CDEBUG(D_TRACE, "gmnal_large_tx_ack_received buffer [%p]\n", buffer);
-
-       buffer = srxd->buffer;
-       msghdr = (gmnal_msghdr_t*)buffer;
-       stxd = msghdr->stxd;
-
-       CDEBUG(D_INFO, "gmnal_large_tx_ack_received stxd [%p]\n", stxd);
-
-       lib_finalize(libnal, stxd, stxd->cookie, PTL_OK);
-
-       /*
-        *      extract the iovec from the stxd, deregister the memory.
-        *      free the space used to store the iovec
-        */
-       iov = stxd->iov;
-       while(stxd->niov--) {
-               CDEBUG(D_INFO, "deregister memory [%p] size ["LPSZ"]\n",
-                      iov->iov_base, iov->iov_len);
-               GMNAL_GM_LOCK(nal_data);
-               gm_deregister_memory(nal_data->gm_port, iov->iov_base, 
-                                    iov->iov_len);
-               GMNAL_GM_UNLOCK(nal_data);
-               iov++;
-       }
-
-       /*
-        *      return the send token
-        *      TO DO It is bad to hold onto the send token so long?
-        */
-       gmnal_return_stxd(nal_data, stxd);
-
-
-       /*
-        *      requeue the receive buffer 
-        */
-       gmnal_rx_requeue_buffer(nal_data, srxd);
-       
-
-       return;
-}
diff --git a/lustre/portals/knals/gmnal/gmnal_module.c b/lustre/portals/knals/gmnal/gmnal_module.c
deleted file mode 100644 (file)
index 3aca90f..0000000
+++ /dev/null
@@ -1,134 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- *
- *   This file is part of Lustre, http://www.lustre.org/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "gmnal.h"
-
-
-int gmnal_small_msg_size = 525312;
-/*
- *      -1 indicates default value.
- *      This is 1 thread per cpu
- *      See start_kernel_threads
- */
-int num_rx_threads = -1;
-int num_stxds = 5;
-int gm_port = 4;
-
-int 
-gmnal_cmd(struct portals_cfg *pcfg, void *private)
-{
-       gmnal_data_t    *nal_data = NULL;
-       char            *name = NULL;
-       int             nid = -2;
-       int             gnid;
-       gm_status_t     gm_status;
-
-
-       CDEBUG(D_TRACE, "gmnal_cmd [%d] private [%p]\n", 
-              pcfg->pcfg_command, private);
-       nal_data = (gmnal_data_t*)private;
-       switch(pcfg->pcfg_command) {
-       /*
-        * just reuse already defined GET_NID. Should define GMNAL version
-        */
-       case(GMNAL_IOC_GET_GNID):
-
-               PORTAL_ALLOC(name, pcfg->pcfg_plen1);
-               copy_from_user(name, pcfg->pcfg_pbuf1, pcfg->pcfg_plen1);
-       
-               GMNAL_GM_LOCK(nal_data);
-               //nid = gm_host_name_to_node_id(nal_data->gm_port, name);
-                gm_status = gm_host_name_to_node_id_ex (nal_data->gm_port, 0, name, &nid);
-               GMNAL_GM_UNLOCK(nal_data);
-                if (gm_status != GM_SUCCESS) {
-                        CDEBUG(D_INFO, "gm_host_name_to_node_id_ex(...host %s) failed[%d]\n",
-                                name, gm_status);
-                        return (-1);
-                } else
-                       CDEBUG(D_INFO, "Local node %s id is [%d]\n", name, nid);
-               GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_node_id_to_global_id(nal_data->gm_port, 
-                                                   nid, &gnid);
-               GMNAL_GM_UNLOCK(nal_data);
-               if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_INFO, "gm_node_id_to_global_id failed[%d]\n", 
-                              gm_status);
-                       return(-1);
-               }
-               CDEBUG(D_INFO, "Global node is is [%u][%x]\n", gnid, gnid);
-               copy_to_user(pcfg->pcfg_pbuf2, &gnid, pcfg->pcfg_plen2);
-       break;
-       default:
-               CDEBUG(D_INFO, "gmnal_cmd UNKNOWN[%d]\n", pcfg->pcfg_command);
-               pcfg->pcfg_nid2 = -1;
-       }
-
-
-       return(0);
-}
-
-
-static int __init
-gmnal_load(void)
-{
-       int     status;
-       CDEBUG(D_TRACE, "This is the gmnal module initialisation routine\n");
-
-
-       CDEBUG(D_INFO, "Calling gmnal_init\n");
-        status = gmnal_init();
-       if (status == PTL_OK) {
-               CDEBUG(D_INFO, "Portals GMNAL initialised ok\n");
-       } else {
-               CDEBUG(D_INFO, "Portals GMNAL Failed to initialise\n");
-               return(-ENODEV);
-               
-       }
-
-       CDEBUG(D_INFO, "This is the end of the gmnal init routine");
-
-
-       return(0);
-}
-
-
-static void __exit
-gmnal_unload(void)
-{
-       gmnal_fini();
-       return;
-}
-
-
-module_init(gmnal_load);
-
-module_exit(gmnal_unload);
-
-MODULE_PARM(gmnal_small_msg_size, "i");
-MODULE_PARM(num_rx_threads, "i");
-MODULE_PARM(num_stxds, "i");
-MODULE_PARM(gm_port, "i");
-
-MODULE_AUTHOR("Morgan Doyle");
-
-MODULE_DESCRIPTION("A Portals kernel NAL for Myrinet GM.");
-
-MODULE_LICENSE("GPL");
diff --git a/lustre/portals/knals/gmnal/gmnal_utils.c b/lustre/portals/knals/gmnal/gmnal_utils.c
deleted file mode 100644 (file)
index 6a52319..0000000
+++ /dev/null
@@ -1,1075 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
- *
- *   This file is part of Lustre, http://www.lustre.org/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-/*
- *     All utilities required by lgmanl
- */
-
-#include "gmnal.h"
-
-/*
- *     Am I one of the gmnal rxthreads ?
- */
-int
-gmnal_is_rxthread(gmnal_data_t *nal_data)
-{
-       int i;
-       for (i=0; i<num_rx_threads; i++) {
-               if (nal_data->rxthread_pid[i] == current->pid)
-                       return(1);
-       }
-       return(0);
-}
-
-
-/*
- *     Allocate tx descriptors/tokens (large and small)
- *     allocate a number of small tx buffers and register with GM
- *     so they are wired and set up for DMA. This is a costly operation.
- *     Also allocate a corrosponding descriptor to keep track of 
- *     the buffer.
- *     Put all small descriptors on singly linked list to be available to send 
- *     function.
- *     Allocate the rest of the available tx tokens for large messages. These will be
- *     used to do gm_gets in gmnal_copyiov     
- */
-int
-gmnal_alloc_txd(gmnal_data_t *nal_data)
-{
-       int ntx= 0, nstx= 0, nrxt_stx= 0,
-           nltx= 0, i = 0;
-       gmnal_stxd_t    *txd = NULL;
-       gmnal_ltxd_t    *ltxd = NULL;
-       void    *txbuffer = NULL;
-
-       CDEBUG(D_TRACE, "gmnal_alloc_small tx\n");
-
-       GMNAL_GM_LOCK(nal_data);
-       /*
-        *      total number of transmit tokens
-        */
-       ntx = gm_num_send_tokens(nal_data->gm_port);
-       GMNAL_GM_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "total number of send tokens available is [%d]\n", ntx);
-       
-       /*
-        *      allocate a number for small sends
-        *      num_stxds from gmnal_module.c
-        */
-       nstx = num_stxds;
-       /*
-        *      give that number plus 1 to the receive threads
-        */
-        nrxt_stx = nstx + 1;
-
-       /*
-        *      give the rest for gm_gets
-        */
-       nltx = ntx - (nrxt_stx + nstx);
-       if (nltx < 1) {
-               CDEBUG(D_ERROR, "No tokens available for large messages\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-
-
-       /*
-        * A semaphore is initialised with the 
-        * number of transmit tokens available.
-        * To get a stxd, acquire the token semaphore.
-        * this decrements the available token count
-        * (if no tokens you block here, someone returning a 
-        * stxd will release the semaphore and wake you)
-        * When token is obtained acquire the spinlock 
-        * to manipulate the list
-        */
-       GMNAL_TXD_TOKEN_INIT(nal_data, nstx);
-       GMNAL_TXD_LOCK_INIT(nal_data);
-       GMNAL_RXT_TXD_TOKEN_INIT(nal_data, nrxt_stx);
-       GMNAL_RXT_TXD_LOCK_INIT(nal_data);
-       GMNAL_LTXD_TOKEN_INIT(nal_data, nltx);
-       GMNAL_LTXD_LOCK_INIT(nal_data);
-       
-       for (i=0; i<=nstx; i++) {
-               PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t));
-               if (!txd) {
-                       CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i);
-                       return(GMNAL_STATUS_NOMEM);
-               }
-               GMNAL_GM_LOCK(nal_data);
-               txbuffer = gm_dma_malloc(nal_data->gm_port, 
-                                        GMNAL_SMALL_MSG_SIZE(nal_data));
-               GMNAL_GM_UNLOCK(nal_data);
-               if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
-                              " size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
-                       PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
-                       return(GMNAL_STATUS_FAIL);
-               }
-               txd->buffer = txbuffer;
-               txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data);
-               txd->gm_size = gm_min_size_for_length(txd->buffer_size);
-               txd->nal_data = (struct _gmnal_data_t*)nal_data;
-                txd->rxt = 0;
-
-               txd->next = nal_data->stxd;
-               nal_data->stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
-                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-       }
-
-       for (i=0; i<=nrxt_stx; i++) {
-               PORTAL_ALLOC(txd, sizeof(gmnal_stxd_t));
-               if (!txd) {
-                       CDEBUG(D_ERROR, "Failed to malloc txd [%d]\n", i);
-                       return(GMNAL_STATUS_NOMEM);
-               }
-               GMNAL_GM_LOCK(nal_data);
-               txbuffer = gm_dma_malloc(nal_data->gm_port, 
-                                        GMNAL_SMALL_MSG_SIZE(nal_data));
-               GMNAL_GM_UNLOCK(nal_data);
-               if (!txbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc txbuffer [%d],"
-                              " size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
-                       PORTAL_FREE(txd, sizeof(gmnal_stxd_t));
-                       return(GMNAL_STATUS_FAIL);
-               }
-               txd->buffer = txbuffer;
-               txd->buffer_size = GMNAL_SMALL_MSG_SIZE(nal_data);
-               txd->gm_size = gm_min_size_for_length(txd->buffer_size);
-               txd->nal_data = (struct _gmnal_data_t*)nal_data;
-                txd->rxt = 1;
-
-               txd->next = nal_data->rxt_stxd;
-               nal_data->rxt_stxd = txd;
-               CDEBUG(D_INFO, "Registered txd [%p] with buffer [%p], "
-                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-       }
-
-       /*
-        *      string together large tokens
-        */
-       for (i=0; i<=nltx ; i++) {
-               PORTAL_ALLOC(ltxd, sizeof(gmnal_ltxd_t));
-               ltxd->next = nal_data->ltxd;
-               nal_data->ltxd = ltxd;
-       }
-       return(GMNAL_STATUS_OK);
-}
-
-/*     Free the list of wired and gm_registered small tx buffers and 
- *     the tx descriptors that go along with them.
- */
-void
-gmnal_free_txd(gmnal_data_t *nal_data)
-{
-       gmnal_stxd_t *txd = nal_data->stxd, *_txd = NULL;
-       gmnal_ltxd_t *ltxd = NULL, *_ltxd = NULL;
-
-       CDEBUG(D_TRACE, "gmnal_free_small tx\n");
-
-       while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
-                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-               _txd = txd;
-               txd = txd->next;
-               GMNAL_GM_LOCK(nal_data);
-               gm_dma_free(nal_data->gm_port, _txd->buffer);
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(_txd, sizeof(gmnal_stxd_t));
-       }
-        txd = nal_data->rxt_stxd;
-       while(txd) {
-               CDEBUG(D_INFO, "Freeing txd [%p] with buffer [%p], "
-                      "size [%d]\n", txd, txd->buffer, txd->buffer_size);
-               _txd = txd;
-               txd = txd->next;
-               GMNAL_GM_LOCK(nal_data);
-               gm_dma_free(nal_data->gm_port, _txd->buffer);
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(_txd, sizeof(gmnal_stxd_t));
-       }
-       ltxd = nal_data->ltxd;
-       while(txd) {
-               _ltxd = ltxd;
-               ltxd = ltxd->next;
-               PORTAL_FREE(_ltxd, sizeof(gmnal_ltxd_t));
-       }
-       
-       return;
-}
-
-
-/*
- *     Get a txd from the list
- *     This get us a wired and gm_registered small tx buffer.
- *     This implicitly gets us a send token also.
- */
-gmnal_stxd_t *
-gmnal_get_stxd(gmnal_data_t *nal_data, int block)
-{
-
-       gmnal_stxd_t    *txd = NULL;
-       pid_t           pid = current->pid;
-
-
-       CDEBUG(D_TRACE, "gmnal_get_stxd nal_data [%p] block[%d] pid [%d]\n", 
-              nal_data, block, pid);
-
-       if (gmnal_is_rxthread(nal_data)) {
-                CDEBUG(D_INFO, "RXTHREAD Attempting to get token\n");
-               GMNAL_RXT_TXD_GETTOKEN(nal_data);
-               GMNAL_RXT_TXD_LOCK(nal_data);
-               txd = nal_data->rxt_stxd;
-               nal_data->rxt_stxd = txd->next;
-               GMNAL_RXT_TXD_UNLOCK(nal_data);
-               CDEBUG(D_INFO, "RXTHREAD got [%p], head is [%p]\n", 
-                      txd, nal_data->rxt_stxd);
-                txd->kniov = 0;
-                txd->rxt = 1;
-        } else {
-               if (block) {
-                        CDEBUG(D_INFO, "Attempting to get token\n");
-                       GMNAL_TXD_GETTOKEN(nal_data);
-                        CDEBUG(D_PORTALS, "Got token\n");
-               } else {
-                       if (GMNAL_TXD_TRYGETTOKEN(nal_data)) {
-                               CDEBUG(D_ERROR, "can't get token\n");
-                               return(NULL);
-                       }
-               }
-               GMNAL_TXD_LOCK(nal_data);
-               txd = nal_data->stxd;
-               nal_data->stxd = txd->next;
-               GMNAL_TXD_UNLOCK(nal_data);
-               CDEBUG(D_INFO, "got [%p], head is [%p]\n", txd, 
-                      nal_data->stxd);
-                txd->kniov = 0;
-        }       /* general txd get */
-       return(txd);
-}
-
-/*
- *     Return a txd to the list
- */
-void
-gmnal_return_stxd(gmnal_data_t *nal_data, gmnal_stxd_t *txd)
-{
-       CDEBUG(D_TRACE, "nal_data [%p], txd[%p] rxt[%d]\n", nal_data, 
-              txd, txd->rxt);
-
-        /*
-         *      this transmit descriptor is 
-         *      for the rxthread
-         */
-        if (txd->rxt) {
-               GMNAL_RXT_TXD_LOCK(nal_data);
-               txd->next = nal_data->rxt_stxd;
-               nal_data->rxt_stxd = txd;
-               GMNAL_RXT_TXD_UNLOCK(nal_data);
-               GMNAL_RXT_TXD_RETURNTOKEN(nal_data);
-                CDEBUG(D_INFO, "Returned stxd to rxthread list\n");
-        } else {
-               GMNAL_TXD_LOCK(nal_data);
-               txd->next = nal_data->stxd;
-               nal_data->stxd = txd;
-               GMNAL_TXD_UNLOCK(nal_data);
-               GMNAL_TXD_RETURNTOKEN(nal_data);
-                CDEBUG(D_INFO, "Returned stxd to general list\n");
-        }
-       return;
-}
-
-
-/*
- *     Get a large transmit descriptor from the free list
- *     This implicitly gets us a transmit  token .
- *     always wait for one.
- */
-gmnal_ltxd_t *
-gmnal_get_ltxd(gmnal_data_t *nal_data)
-{
-
-       gmnal_ltxd_t    *ltxd = NULL;
-
-       CDEBUG(D_TRACE, "nal_data [%p]\n", nal_data);
-
-       GMNAL_LTXD_GETTOKEN(nal_data);
-       GMNAL_LTXD_LOCK(nal_data);
-       ltxd = nal_data->ltxd;
-       nal_data->ltxd = ltxd->next;
-       GMNAL_LTXD_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "got [%p], head is [%p]\n", ltxd, nal_data->ltxd);
-       return(ltxd);
-}
-
-/*
- *     Return an ltxd to the list
- */
-void
-gmnal_return_ltxd(gmnal_data_t *nal_data, gmnal_ltxd_t *ltxd)
-{
-       CDEBUG(D_TRACE, "nal_data [%p], ltxd[%p]\n", nal_data, ltxd);
-
-       GMNAL_LTXD_LOCK(nal_data);
-       ltxd->next = nal_data->ltxd;
-       nal_data->ltxd = ltxd;
-       GMNAL_LTXD_UNLOCK(nal_data);
-       GMNAL_LTXD_RETURNTOKEN(nal_data);
-       return;
-}
-/*
- *     allocate a number of small rx buffers and register with GM
- *     so they are wired and set up for DMA. This is a costly operation.
- *     Also allocate a corrosponding descriptor to keep track of 
- *     the buffer.
- *     Put all descriptors on singly linked list to be available to 
- *     receive thread.
- */
-int
-gmnal_alloc_srxd(gmnal_data_t *nal_data)
-{
-       int nrx = 0, nsrx = 0, i = 0;
-       gmnal_srxd_t    *rxd = NULL;
-       void    *rxbuffer = NULL;
-
-       CDEBUG(D_TRACE, "gmnal_alloc_small rx\n");
-
-       GMNAL_GM_LOCK(nal_data);
-       nrx = gm_num_receive_tokens(nal_data->gm_port);
-       GMNAL_GM_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "total number of receive tokens available is [%d]\n", 
-              nrx);
-       
-       nsrx = nrx/2;
-       nsrx = 12;
-       /*
-        *      make the number of rxds twice our total
-        *      number of stxds plus 1
-        */
-       nsrx = num_stxds*2 + 2;
-
-       CDEBUG(D_INFO, "Allocated [%d] receive tokens to small messages\n", 
-              nsrx);
-
-
-       GMNAL_GM_LOCK(nal_data);
-       nal_data->srxd_hash = gm_create_hash(gm_hash_compare_ptrs, 
-                                            gm_hash_hash_ptr, 0, 0, nsrx, 0);
-       GMNAL_GM_UNLOCK(nal_data);
-       if (!nal_data->srxd_hash) {
-                       CDEBUG(D_ERROR, "Failed to create hash table\n");
-                       return(GMNAL_STATUS_NOMEM);
-       }
-
-       GMNAL_RXD_TOKEN_INIT(nal_data, nsrx);
-       GMNAL_RXD_LOCK_INIT(nal_data);
-
-       for (i=0; i<=nsrx; i++) {
-               PORTAL_ALLOC(rxd, sizeof(gmnal_srxd_t));
-               if (!rxd) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxd [%d]\n", i);
-                       return(GMNAL_STATUS_NOMEM);
-               }
-#if 0
-               PORTAL_ALLOC(rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
-               if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to malloc rxbuffer [%d], "
-                              "size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
-                       PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
-                       return(GMNAL_STATUS_FAIL);
-               }
-               CDEBUG(D_NET, "Calling gm_register_memory with port [%p] "
-                      "rxbuffer [%p], size [%d]\n", nal_data->gm_port, 
-                      rxbuffer, GMNAL_SMALL_MSG_SIZE(nal_data));
-               GMNAL_GM_LOCK(nal_data);
-               gm_status = gm_register_memory(nal_data->gm_port, rxbuffer, 
-                                              GMNAL_SMALL_MSG_SIZE(nal_data));
-               GMNAL_GM_UNLOCK(nal_data);
-               if (gm_status != GM_SUCCESS) {
-                       CDEBUG(D_ERROR, "gm_register_memory failed buffer [%p],"
-                              " index [%d]\n", rxbuffer, i);
-                       switch(gm_status) {
-                               case(GM_FAILURE):
-                                       CDEBUG(D_ERROR, "GM_FAILURE\n");
-                               break;
-                               case(GM_PERMISSION_DENIED):
-                                       CDEBUG(D_ERROR, "PERMISSION_DENIED\n");
-                               break;
-                               case(GM_INVALID_PARAMETER):
-                                       CDEBUG(D_ERROR, "INVALID_PARAMETER\n");
-                               break;
-                               default:
-                                       CDEBUG(D_ERROR, "Unknown error[%d]\n", 
-                                              gm_status);
-                               break;
-                               
-                       }
-                       return(GMNAL_STATUS_FAIL);
-               }
-#else
-               GMNAL_GM_LOCK(nal_data);
-               rxbuffer = gm_dma_malloc(nal_data->gm_port, 
-                                        GMNAL_SMALL_MSG_SIZE(nal_data));
-               GMNAL_GM_UNLOCK(nal_data);
-               if (!rxbuffer) {
-                       CDEBUG(D_ERROR, "Failed to gm_dma_malloc rxbuffer [%d],"
-                              " size [%d]\n", i, 
-                              GMNAL_SMALL_MSG_SIZE(nal_data));
-                       PORTAL_FREE(rxd, sizeof(gmnal_srxd_t));
-                       return(GMNAL_STATUS_FAIL);
-               }
-#endif
-               
-               rxd->buffer = rxbuffer;
-               rxd->size = GMNAL_SMALL_MSG_SIZE(nal_data);
-               rxd->gmsize = gm_min_size_for_length(rxd->size);
-
-               if (gm_hash_insert(nal_data->srxd_hash, 
-                                  (void*)rxbuffer, (void*)rxd)) {
-
-                       CDEBUG(D_ERROR, "failed to create hash entry rxd[%p] "
-                              "for rxbuffer[%p]\n", rxd, rxbuffer);
-                       return(GMNAL_STATUS_FAIL);
-               }
-
-               rxd->next = nal_data->srxd;
-               nal_data->srxd = rxd;
-               CDEBUG(D_INFO, "Registered rxd [%p] with buffer [%p], "
-                      "size [%d]\n", rxd, rxd->buffer, rxd->size);
-       }
-
-       return(GMNAL_STATUS_OK);
-}
-
-
-
-/*     Free the list of wired and gm_registered small rx buffers and the 
- *     rx descriptors that go along with them.
- */
-void
-gmnal_free_srxd(gmnal_data_t *nal_data)
-{
-       gmnal_srxd_t *rxd = nal_data->srxd, *_rxd = NULL;
-
-       CDEBUG(D_TRACE, "gmnal_free_small rx\n");
-
-       while(rxd) {
-               CDEBUG(D_INFO, "Freeing rxd [%p] buffer [%p], size [%d]\n",
-                      rxd, rxd->buffer, rxd->size);
-               _rxd = rxd;
-               rxd = rxd->next;
-
-#if 0
-               GMNAL_GM_LOCK(nal_data);
-               gm_deregister_memory(nal_data->gm_port, _rxd->buffer, 
-                                    _rxd->size);
-               GMNAL_GM_UNLOCK(nal_data);
-               PORTAL_FREE(_rxd->buffer, GMNAL_SMALL_RXBUFFER_SIZE);
-#else
-               GMNAL_GM_LOCK(nal_data);
-               gm_dma_free(nal_data->gm_port, _rxd->buffer);
-               GMNAL_GM_UNLOCK(nal_data);
-#endif
-               PORTAL_FREE(_rxd, sizeof(gmnal_srxd_t));
-       }
-       return;
-}
-
-
-/*
- *     Get a rxd from the free list
- *     This get us a wired and gm_registered small rx buffer.
- *     This implicitly gets us a receive token also.
- */
-gmnal_srxd_t *
-gmnal_get_srxd(gmnal_data_t *nal_data, int block)
-{
-
-       gmnal_srxd_t    *rxd = NULL;
-       CDEBUG(D_TRACE, "nal_data [%p] block [%d]\n", nal_data, block);
-
-       if (block) {
-               GMNAL_RXD_GETTOKEN(nal_data);
-       } else {
-               if (GMNAL_RXD_TRYGETTOKEN(nal_data)) {
-                       CDEBUG(D_INFO, "gmnal_get_srxd Can't get token\n");
-                       return(NULL);
-               }
-       }
-       GMNAL_RXD_LOCK(nal_data);
-       rxd = nal_data->srxd;
-       if (rxd)
-               nal_data->srxd = rxd->next;
-       GMNAL_RXD_UNLOCK(nal_data);
-       CDEBUG(D_INFO, "got [%p], head is [%p]\n", rxd, nal_data->srxd);
-       return(rxd);
-}
-
-/*
- *     Return an rxd to the list
- */
-void
-gmnal_return_srxd(gmnal_data_t *nal_data, gmnal_srxd_t *rxd)
-{
-       CDEBUG(D_TRACE, "nal_data [%p], rxd[%p]\n", nal_data, rxd);
-
-       GMNAL_RXD_LOCK(nal_data);
-       rxd->next = nal_data->srxd;
-       nal_data->srxd = rxd;
-       GMNAL_RXD_UNLOCK(nal_data);
-       GMNAL_RXD_RETURNTOKEN(nal_data);
-       return;
-}
-
-/*
- *     Given a pointer to a srxd find 
- *     the relevant descriptor for it
- *     This is done by searching a hash
- *     list that is created when the srxd's 
- *     are created
- */
-gmnal_srxd_t *
-gmnal_rxbuffer_to_srxd(gmnal_data_t *nal_data, void *rxbuffer)
-{
-       gmnal_srxd_t    *srxd = NULL;
-       CDEBUG(D_TRACE, "nal_data [%p], rxbuffer [%p]\n", nal_data, rxbuffer);
-       srxd = gm_hash_find(nal_data->srxd_hash, rxbuffer);
-       CDEBUG(D_INFO, "srxd is [%p]\n", srxd);
-       return(srxd);
-}
-
-
-void
-gmnal_stop_rxthread(gmnal_data_t *nal_data)
-{
-       int     delay = 30;
-
-
-
-       CDEBUG(D_TRACE, "Attempting to stop rxthread nal_data [%p]\n", 
-               nal_data);
-       
-       nal_data->rxthread_stop_flag = GMNAL_THREAD_STOP;
-
-       gmnal_remove_rxtwe(nal_data);
-       /*
-        *      kick the thread 
-        */
-       up(&nal_data->rxtwe_wait);
-
-       while(nal_data->rxthread_flag != GMNAL_THREAD_RESET && delay--) {
-               CDEBUG(D_INFO, "gmnal_stop_rxthread sleeping\n");
-                gmnal_yield(1);
-               up(&nal_data->rxtwe_wait);
-       }
-
-       if (nal_data->rxthread_flag != GMNAL_THREAD_RESET) {
-               CDEBUG(D_ERROR, "I don't know how to wake the thread\n");
-       } else {
-               CDEBUG(D_INFO, "rx thread seems to have stopped\n");
-       }
-}
-
-void
-gmnal_stop_ctthread(gmnal_data_t *nal_data)
-{
-       int     delay = 15;
-
-
-
-       CDEBUG(D_TRACE, "Attempting to stop ctthread nal_data [%p]\n", 
-              nal_data);
-       
-       nal_data->ctthread_flag = GMNAL_THREAD_STOP;
-       GMNAL_GM_LOCK(nal_data);
-       gm_set_alarm(nal_data->gm_port, &nal_data->ctthread_alarm, 10, 
-                    NULL, NULL);
-       GMNAL_GM_UNLOCK(nal_data);
-
-       while(nal_data->ctthread_flag == GMNAL_THREAD_STOP && delay--) {
-               CDEBUG(D_INFO, "gmnal_stop_ctthread sleeping\n");
-                gmnal_yield(1);
-       }
-
-       if (nal_data->ctthread_flag == GMNAL_THREAD_STOP) {
-               CDEBUG(D_ERROR, "I DON'T KNOW HOW TO WAKE THE THREAD\n");
-       } else {
-               CDEBUG(D_INFO, "CT THREAD SEEMS TO HAVE STOPPED\n");
-       }
-}
-
-
-
-char * 
-gmnal_gm_error(gm_status_t status)
-{
-       return(gm_strerror(status));
-
-       switch(status) {
-               case(GM_SUCCESS):
-                       return("SUCCESS");
-               case(GM_FAILURE):
-                       return("FAILURE");
-               case(GM_INPUT_BUFFER_TOO_SMALL):
-                       return("INPUT_BUFFER_TOO_SMALL");
-               case(GM_OUTPUT_BUFFER_TOO_SMALL):
-                       return("OUTPUT_BUFFER_TOO_SMALL");
-               case(GM_TRY_AGAIN ):
-                       return("TRY_AGAIN");
-               case(GM_BUSY):
-                       return("BUSY");
-               case(GM_MEMORY_FAULT):
-                       return("MEMORY_FAULT");
-               case(GM_INTERRUPTED):
-                       return("INTERRUPTED");
-               case(GM_INVALID_PARAMETER):
-                       return("INVALID_PARAMETER");
-               case(GM_OUT_OF_MEMORY):
-                       return("OUT_OF_MEMORY");
-               case(GM_INVALID_COMMAND):
-                       return("INVALID_COMMAND");
-               case(GM_PERMISSION_DENIED):
-                       return("PERMISSION_DENIED");
-               case(GM_INTERNAL_ERROR):
-                       return("INTERNAL_ERROR");
-               case(GM_UNATTACHED):
-                       return("UNATTACHED");
-               case(GM_UNSUPPORTED_DEVICE):
-                       return("UNSUPPORTED_DEVICE");
-               case(GM_SEND_TIMED_OUT):
-                       return("GM_SEND_TIMEDOUT");
-               case(GM_SEND_REJECTED):
-                       return("GM_SEND_REJECTED");
-               case(GM_SEND_TARGET_PORT_CLOSED):
-                       return("GM_SEND_TARGET_PORT_CLOSED");
-               case(GM_SEND_TARGET_NODE_UNREACHABLE):
-                       return("GM_SEND_TARGET_NODE_UNREACHABLE");
-               case(GM_SEND_DROPPED):
-                       return("GM_SEND_DROPPED");
-               case(GM_SEND_PORT_CLOSED):
-                       return("GM_SEND_PORT_CLOSED");
-               case(GM_NODE_ID_NOT_YET_SET):
-                       return("GM_NODE_ID_NOT_YET_SET");
-               case(GM_STILL_SHUTTING_DOWN):
-                       return("GM_STILL_SHUTTING_DOWN");
-               case(GM_CLONE_BUSY):
-                       return("GM_CLONE_BUSY");
-               case(GM_NO_SUCH_DEVICE):
-                       return("GM_NO_SUCH_DEVICE");
-               case(GM_ABORTED):
-                       return("GM_ABORTED");
-               case(GM_INCOMPATIBLE_LIB_AND_DRIVER):
-                       return("GM_INCOMPATIBLE_LIB_AND_DRIVER");
-               case(GM_UNTRANSLATED_SYSTEM_ERROR):
-                       return("GM_UNTRANSLATED_SYSTEM_ERROR");
-               case(GM_ACCESS_DENIED):
-                       return("GM_ACCESS_DENIED");
-
-
-/*
- *     These ones are in the docs but aren't in the header file 
-               case(GM_DEV_NOT_FOUND):
-                       return("GM_DEV_NOT_FOUND");
-               case(GM_INVALID_PORT_NUMBER):
-                       return("GM_INVALID_PORT_NUMBER");
-               case(GM_UC_ERROR):
-                       return("GM_US_ERROR");
-               case(GM_PAGE_TABLE_FULL):
-                       return("GM_PAGE_TABLE_FULL");
-               case(GM_MINOR_OVERFLOW):
-                       return("GM_MINOR_OVERFLOW");
-               case(GM_SEND_ORPHANED):
-                       return("GM_SEND_ORPHANED");
-               case(GM_HARDWARE_FAULT):
-                       return("GM_HARDWARE_FAULT");
-               case(GM_DATA_CORRUPTED):
-                       return("GM_DATA_CORRUPTED");
-               case(GM_TIMED_OUT):
-                       return("GM_TIMED_OUT");
-               case(GM_USER_ERROR):
-                       return("GM_USER_ERROR");
-               case(GM_NO_MATCH):
-                       return("GM_NOMATCH");
-               case(GM_NOT_SUPPORTED_IN_KERNEL):
-                       return("GM_NOT_SUPPORTED_IN_KERNEL");
-               case(GM_NOT_SUPPORTED_ON_ARCH):
-                       return("GM_NOT_SUPPORTED_ON_ARCH");
-               case(GM_PTE_REF_CNT_OVERFLOW):
-                       return("GM_PTR_REF_CNT_OVERFLOW");
-               case(GM_NO_DRIVER_SUPPORT):
-                       return("GM_NO_DRIVER_SUPPORT");
-               case(GM_FIRMWARE_NOT_RUNNING):
-                       return("GM_FIRMWARE_NOT_RUNNING");
-
- *     These ones are in the docs but aren't in the header file 
- */
-               default:
-                       return("UNKNOWN GM ERROR CODE");
-       }
-}
-
-
-char *
-gmnal_rxevent(gm_recv_event_t  *ev)
-{
-       short   event;
-       event = GM_RECV_EVENT_TYPE(ev);
-       switch(event) {
-               case(GM_NO_RECV_EVENT):
-                       return("GM_NO_RECV_EVENT");
-               case(GM_SENDS_FAILED_EVENT):
-                       return("GM_SEND_FAILED_EVENT");
-               case(GM_ALARM_EVENT):
-                       return("GM_ALARM_EVENT");
-               case(GM_SENT_EVENT):
-                       return("GM_SENT_EVENT");
-               case(_GM_SLEEP_EVENT):
-                       return("_GM_SLEEP_EVENT");
-               case(GM_RAW_RECV_EVENT):
-                       return("GM_RAW_RECV_EVENT");
-               case(GM_BAD_SEND_DETECTED_EVENT):
-                       return("GM_BAD_SEND_DETECTED_EVENT");
-               case(GM_SEND_TOKEN_VIOLATION_EVENT):
-                       return("GM_SEND_TOKEN_VIOLATION_EVENT");
-               case(GM_RECV_TOKEN_VIOLATION_EVENT):
-                       return("GM_RECV_TOKEN_VIOLATION_EVENT");
-               case(GM_BAD_RECV_TOKEN_EVENT):
-                       return("GM_BAD_RECV_TOKEN_EVENT");
-               case(GM_ALARM_VIOLATION_EVENT):
-                       return("GM_ALARM_VIOLATION_EVENT");
-               case(GM_RECV_EVENT):
-                       return("GM_RECV_EVENT");
-               case(GM_HIGH_RECV_EVENT):
-                       return("GM_HIGH_RECV_EVENT");
-               case(GM_PEER_RECV_EVENT):
-                       return("GM_PEER_RECV_EVENT");
-               case(GM_HIGH_PEER_RECV_EVENT):
-                       return("GM_HIGH_PEER_RECV_EVENT");
-               case(GM_FAST_RECV_EVENT):
-                       return("GM_FAST_RECV_EVENT");
-               case(GM_FAST_HIGH_RECV_EVENT):
-                       return("GM_FAST_HIGH_RECV_EVENT");
-               case(GM_FAST_PEER_RECV_EVENT):
-                       return("GM_FAST_PEER_RECV_EVENT");
-               case(GM_FAST_HIGH_PEER_RECV_EVENT):
-                       return("GM_FAST_HIGH_PEER_RECV_EVENT");
-               case(GM_REJECTED_SEND_EVENT):
-                       return("GM_REJECTED_SEND_EVENT");
-               case(GM_ORPHANED_SEND_EVENT):
-                       return("GM_ORPHANED_SEND_EVENT");
-               case(GM_BAD_RESEND_DETECTED_EVENT):
-                       return("GM_BAD_RESEND_DETETED_EVENT");
-               case(GM_DROPPED_SEND_EVENT):
-                       return("GM_DROPPED_SEND_EVENT");
-               case(GM_BAD_SEND_VMA_EVENT):
-                       return("GM_BAD_SEND_VMA_EVENT");
-               case(GM_BAD_RECV_VMA_EVENT):
-                       return("GM_BAD_RECV_VMA_EVENT");
-               case(_GM_FLUSHED_ALARM_EVENT):
-                       return("GM_FLUSHED_ALARM_EVENT");
-               case(GM_SENT_TOKENS_EVENT):
-                       return("GM_SENT_TOKENS_EVENTS");
-               case(GM_IGNORE_RECV_EVENT):
-                       return("GM_IGNORE_RECV_EVENT");
-               case(GM_ETHERNET_RECV_EVENT):
-                       return("GM_ETHERNET_RECV_EVENT");
-               case(GM_NEW_NO_RECV_EVENT):
-                       return("GM_NEW_NO_RECV_EVENT");
-               case(GM_NEW_SENDS_FAILED_EVENT):
-                       return("GM_NEW_SENDS_FAILED_EVENT");
-               case(GM_NEW_ALARM_EVENT):
-                       return("GM_NEW_ALARM_EVENT");
-               case(GM_NEW_SENT_EVENT):
-                       return("GM_NEW_SENT_EVENT");
-               case(_GM_NEW_SLEEP_EVENT):
-                       return("GM_NEW_SLEEP_EVENT");
-               case(GM_NEW_RAW_RECV_EVENT):
-                       return("GM_NEW_RAW_RECV_EVENT");
-               case(GM_NEW_BAD_SEND_DETECTED_EVENT):
-                       return("GM_NEW_BAD_SEND_DETECTED_EVENT");
-               case(GM_NEW_SEND_TOKEN_VIOLATION_EVENT):
-                       return("GM_NEW_SEND_TOKEN_VIOLATION_EVENT");
-               case(GM_NEW_RECV_TOKEN_VIOLATION_EVENT):
-                       return("GM_NEW_RECV_TOKEN_VIOLATION_EVENT");
-               case(GM_NEW_BAD_RECV_TOKEN_EVENT):
-                       return("GM_NEW_BAD_RECV_TOKEN_EVENT");
-               case(GM_NEW_ALARM_VIOLATION_EVENT):
-                       return("GM_NEW_ALARM_VIOLATION_EVENT");
-               case(GM_NEW_RECV_EVENT):
-                       return("GM_NEW_RECV_EVENT");
-               case(GM_NEW_HIGH_RECV_EVENT):
-                       return("GM_NEW_HIGH_RECV_EVENT");
-               case(GM_NEW_PEER_RECV_EVENT):
-                       return("GM_NEW_PEER_RECV_EVENT");
-               case(GM_NEW_HIGH_PEER_RECV_EVENT):
-                       return("GM_NEW_HIGH_PEER_RECV_EVENT");
-               case(GM_NEW_FAST_RECV_EVENT):
-                       return("GM_NEW_FAST_RECV_EVENT");
-               case(GM_NEW_FAST_HIGH_RECV_EVENT):
-                       return("GM_NEW_FAST_HIGH_RECV_EVENT");
-               case(GM_NEW_FAST_PEER_RECV_EVENT):
-                       return("GM_NEW_FAST_PEER_RECV_EVENT");
-               case(GM_NEW_FAST_HIGH_PEER_RECV_EVENT):
-                       return("GM_NEW_FAST_HIGH_PEER_RECV_EVENT");
-               case(GM_NEW_REJECTED_SEND_EVENT):
-                       return("GM_NEW_REJECTED_SEND_EVENT");
-               case(GM_NEW_ORPHANED_SEND_EVENT):
-                       return("GM_NEW_ORPHANED_SEND_EVENT");
-               case(_GM_NEW_PUT_NOTIFICATION_EVENT):
-                       return("_GM_NEW_PUT_NOTIFICATION_EVENT");
-               case(GM_NEW_FREE_SEND_TOKEN_EVENT):
-                       return("GM_NEW_FREE_SEND_TOKEN_EVENT");
-               case(GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT):
-                       return("GM_NEW_FREE_HIGH_SEND_TOKEN_EVENT");
-               case(GM_NEW_BAD_RESEND_DETECTED_EVENT):
-                       return("GM_NEW_BAD_RESEND_DETECTED_EVENT");
-               case(GM_NEW_DROPPED_SEND_EVENT):
-                       return("GM_NEW_DROPPED_SEND_EVENT");
-               case(GM_NEW_BAD_SEND_VMA_EVENT):
-                       return("GM_NEW_BAD_SEND_VMA_EVENT");
-               case(GM_NEW_BAD_RECV_VMA_EVENT):
-                       return("GM_NEW_BAD_RECV_VMA_EVENT");
-               case(_GM_NEW_FLUSHED_ALARM_EVENT):
-                       return("GM_NEW_FLUSHED_ALARM_EVENT");
-               case(GM_NEW_SENT_TOKENS_EVENT):
-                       return("GM_NEW_SENT_TOKENS_EVENT");
-               case(GM_NEW_IGNORE_RECV_EVENT):
-                       return("GM_NEW_IGNORE_RECV_EVENT");
-               case(GM_NEW_ETHERNET_RECV_EVENT):
-                       return("GM_NEW_ETHERNET_RECV_EVENT");
-               default:
-                       return("Unknown Recv event");
-#if 0
-               case(/* _GM_PUT_NOTIFICATION_EVENT */
-               case(/* GM_FREE_SEND_TOKEN_EVENT */
-               case(/* GM_FREE_HIGH_SEND_TOKEN_EVENT */
-#endif
-       }
-}
-
-
-void
-gmnal_yield(int delay)
-{
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule_timeout(delay);
-}
-
-int
-gmnal_is_small_msg(gmnal_data_t *nal_data, int niov, struct iovec *iov, 
-                   int len)
-{
-
-       CDEBUG(D_TRACE, "len [%d] limit[%d]\n", len, 
-              GMNAL_SMALL_MSG_SIZE(nal_data));
-
-       if ((len + sizeof(ptl_hdr_t) + sizeof(gmnal_msghdr_t)) 
-                    < GMNAL_SMALL_MSG_SIZE(nal_data)) {
-
-               CDEBUG(D_INFO, "Yep, small message\n");
-               return(1);
-       } else {
-               CDEBUG(D_ERROR, "No, not small message\n");
-               /*
-                *      could be made up of lots of little ones !
-                */
-               return(0);
-       }
-
-}
-
-/* 
- *     extract info from the receive event.
- *     Have to do this before the next call to gm_receive
- *     Deal with all endian stuff here.
- *     Then stick work entry on list where rxthreads
- *     can get it to complete the receive
- */
-int
-gmnal_add_rxtwe(gmnal_data_t *nal_data, gm_recv_t *recv)
-{
-       gmnal_rxtwe_t   *we = NULL;
-
-       CDEBUG(D_NET, "adding entry to list\n");
-
-       PORTAL_ALLOC(we, sizeof(gmnal_rxtwe_t));
-       if (!we) {
-               CDEBUG(D_ERROR, "failed to malloc\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-       we->buffer = gm_ntohp(recv->buffer);
-       we->snode = (int)gm_ntoh_u16(recv->sender_node_id);
-       we->sport = (int)gm_ntoh_u8(recv->sender_port_id);
-       we->type = (int)gm_ntoh_u8(recv->type);
-       we->length = (int)gm_ntohl(recv->length);
-
-       spin_lock(&nal_data->rxtwe_lock);
-       if (nal_data->rxtwe_tail) {
-               nal_data->rxtwe_tail->next = we;
-       } else {
-               nal_data->rxtwe_head = we;
-               nal_data->rxtwe_tail = we;
-       }
-       nal_data->rxtwe_tail = we;
-       spin_unlock(&nal_data->rxtwe_lock);
-
-       up(&nal_data->rxtwe_wait);
-       return(GMNAL_STATUS_OK);
-}
-
-void
-gmnal_remove_rxtwe(gmnal_data_t *nal_data)
-{
-       gmnal_rxtwe_t   *_we, *we = nal_data->rxtwe_head;
-
-       CDEBUG(D_NET, "removing all work list entries\n");
-
-       spin_lock(&nal_data->rxtwe_lock);
-       CDEBUG(D_NET, "Got lock\n");
-       while (we) {
-               _we = we;
-               we = we->next;
-               PORTAL_FREE(_we, sizeof(gmnal_rxtwe_t));
-       }
-       spin_unlock(&nal_data->rxtwe_lock);
-       nal_data->rxtwe_head = NULL;
-       nal_data->rxtwe_tail = NULL;
-}
-
-gmnal_rxtwe_t *
-gmnal_get_rxtwe(gmnal_data_t *nal_data)
-{
-       gmnal_rxtwe_t   *we = NULL;
-
-       CDEBUG(D_NET, "Getting entry to list\n");
-
-       do  {
-               down(&nal_data->rxtwe_wait);
-               if (nal_data->rxthread_stop_flag == GMNAL_THREAD_STOP) {
-                       /*
-                        *      time to stop
-                        *      TO DO some one free the work entries    
-                        */
-                       return(NULL);
-               }
-               spin_lock(&nal_data->rxtwe_lock);
-               if (nal_data->rxtwe_head) {
-                       CDEBUG(D_INFO, "Got a work entry\n");
-                       we = nal_data->rxtwe_head;
-                       nal_data->rxtwe_head = we->next;
-                       if (!nal_data->rxtwe_head)
-                               nal_data->rxtwe_tail = NULL;
-               } else {
-                       CDEBUG(D_WARNING, "woken but no work\n");
-               }
-               spin_unlock(&nal_data->rxtwe_lock);
-       } while (!we);
-
-       CDEBUG(D_INFO, "Returning we[%p]\n", we);
-       return(we);
-}
-
-
-/*
- *     Start the caretaker thread and a number of receiver threads
- *     The caretaker thread gets events from the gm library.
- *     It passes receive events to the receiver threads via a work list.
- *     It processes other events itself in gm_unknown. These will be
- *     callback events or sleeps.
- */
-int
-gmnal_start_kernel_threads(gmnal_data_t *nal_data)
-{
-
-       int     threads = 0;
-       /*
-        *      the alarm is used to wake the caretaker thread from 
-        *      gm_unknown call (sleeping) to exit it.
-        */
-       CDEBUG(D_NET, "Initializing caretaker thread alarm and flag\n");
-       gm_initialize_alarm(&nal_data->ctthread_alarm);
-       nal_data->ctthread_flag = GMNAL_THREAD_RESET;
-
-
-       CDEBUG(D_INFO, "Starting caretaker thread\n");
-       nal_data->ctthread_pid = 
-                kernel_thread(gmnal_ct_thread, (void*)nal_data, 0);
-       if (nal_data->ctthread_pid <= 0) {
-               CDEBUG(D_ERROR, "Caretaker thread failed to start\n");
-               return(GMNAL_STATUS_FAIL);
-       }
-
-       while (nal_data->rxthread_flag != GMNAL_THREAD_RESET) {
-               gmnal_yield(1);
-               CDEBUG(D_INFO, "Waiting for caretaker thread signs of life\n");
-       }
-
-       CDEBUG(D_INFO, "caretaker thread has started\n");
-
-
-       /*
-        *      Now start a number of receiver threads
-        *      these treads get work to do from the caretaker (ct) thread
-        */
-       nal_data->rxthread_flag = GMNAL_THREAD_RESET;
-       nal_data->rxthread_stop_flag = GMNAL_THREAD_RESET;
-
-       for (threads=0; threads<NRXTHREADS; threads++)
-               nal_data->rxthread_pid[threads] = -1;
-       spin_lock_init(&nal_data->rxtwe_lock);
-       spin_lock_init(&nal_data->rxthread_flag_lock);
-       sema_init(&nal_data->rxtwe_wait, 0);
-       nal_data->rxtwe_head = NULL;
-       nal_data->rxtwe_tail = NULL;
-        /*
-         *      If the default number of receive threades isn't
-         *      modified at load time, then start one thread per cpu
-         */
-        if (num_rx_threads == -1)
-                num_rx_threads = smp_num_cpus;
-       CDEBUG(D_INFO, "Starting [%d] receive threads\n", num_rx_threads);
-       for (threads=0; threads<num_rx_threads; threads++) {
-               nal_data->rxthread_pid[threads] = 
-                      kernel_thread(gmnal_rx_thread, (void*)nal_data, 0);
-               if (nal_data->rxthread_pid[threads] <= 0) {
-                       CDEBUG(D_ERROR, "Receive thread failed to start\n");
-                       gmnal_stop_rxthread(nal_data);
-                       gmnal_stop_ctthread(nal_data);
-                       return(GMNAL_STATUS_FAIL);
-               }
-       }
-
-       for (;;) {
-               spin_lock(&nal_data->rxthread_flag_lock);
-               if (nal_data->rxthread_flag == GMNAL_RXTHREADS_STARTED) {
-                       spin_unlock(&nal_data->rxthread_flag_lock);
-                       break;
-               }
-               spin_unlock(&nal_data->rxthread_flag_lock);
-               gmnal_yield(1);
-       }
-
-       CDEBUG(D_INFO, "receive threads seem to have started\n");
-
-       return(GMNAL_STATUS_OK);
-}
diff --git a/lustre/portals/knals/iibnal/.cvsignore b/lustre/portals/knals/iibnal/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/iibnal/Makefile.in b/lustre/portals/knals/iibnal/Makefile.in
deleted file mode 100644 (file)
index e7934e2..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kiibnal
-kiibnal-objs := iibnal.o iibnal_cb.o
-
-EXTRA_POST_CFLAGS := @IIBCPPFLAGS@
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/iibnal/Makefile.mk b/lustre/portals/knals/iibnal/Makefile.mk
deleted file mode 100644 (file)
index 0459a20..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include $(src)/../../Kernelenv
-
-obj-y += kiibnal.o
-kiibnal-objs := iibnal.o iibnal_cb.o
-
diff --git a/lustre/portals/knals/iibnal/autoMakefile.am b/lustre/portals/knals/iibnal/autoMakefile.am
deleted file mode 100644 (file)
index 251df66..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-if BUILD_IIBNAL
-modulenet_DATA = kiibnal$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kiibnal-objs:%.o=%.c) iibnal.h
diff --git a/lustre/portals/knals/iibnal/iibnal.c b/lustre/portals/knals/iibnal/iibnal.c
deleted file mode 100644 (file)
index 09908c9..0000000
+++ /dev/null
@@ -1,1713 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "iibnal.h"
-
-nal_t                   kibnal_api;
-ptl_handle_ni_t         kibnal_ni;
-kib_tunables_t          kibnal_tunables;
-
-kib_data_t              kibnal_data = {
-        .kib_service_id = IBNAL_SERVICE_NUMBER,
-};
-
-#ifdef CONFIG_SYSCTL
-#define IBNAL_SYSCTL             202
-
-#define IBNAL_SYSCTL_TIMEOUT     1
-
-static ctl_table kibnal_ctl_table[] = {
-        {IBNAL_SYSCTL_TIMEOUT, "timeout", 
-         &kibnal_tunables.kib_io_timeout, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
-};
-
-static ctl_table kibnal_top_ctl_table[] = {
-        {IBNAL_SYSCTL, "iibnal", NULL, 0, 0555, kibnal_ctl_table},
-        { 0 }
-};
-#endif
-
-#ifdef unused
-void
-print_service(IB_SERVICE_RECORD *service, char *tag, int rc)
-{
-        char name[32];
-
-        if (service == NULL) 
-        {
-                CWARN("tag       : %s\n"
-                      "status    : %d (NULL)\n", tag, rc);
-                return;
-        }
-        strncpy (name, service->ServiceName, sizeof(name)-1);
-        name[sizeof(name)-1] = 0;
-        
-        CWARN("tag       : %s\n"
-              "status    : %d\n"
-              "service id: "LPX64"\n"
-              "name      : %s\n"
-              "NID       : "LPX64"\n", tag, rc,
-              service->RID.ServiceID, name,
-              *kibnal_service_nid_field(service));
-}
-#endif
-
-static void
-kibnal_service_setunset_done (void *arg, FABRIC_OPERATION_DATA *fod,
-                              FSTATUS frc, uint32 madrc)
-{
-        *(FSTATUS *)arg = frc;
-        up (&kibnal_data.kib_nid_signal);
-}
-
-#if IBNAL_CHECK_ADVERT
-static void
-kibnal_service_query_done (void *arg, QUERY *qry, 
-                           QUERY_RESULT_VALUES *qry_result)
-{
-        FSTATUS frc = qry_result->Status;
-
-        if (frc != FSUCCESS &&
-            qry_result->ResultDataSize == 0)
-                frc = FERROR;
-        
-        *(FSTATUS *)arg = frc;
-        up (&kibnal_data.kib_nid_signal);
-}
-
-static void
-kibnal_check_advert (void)
-{
-        QUERY                  *qry;
-        IB_SERVICE_RECORD      *svc;
-        FSTATUS                 frc;
-        FSTATUS                 frc2;
-
-        PORTAL_ALLOC(qry, sizeof(*qry));
-        if (qry == NULL)
-                return;
-
-        memset (qry, 0, sizeof(*qry));
-        qry->InputType = InputTypeServiceRecord;
-        qry->OutputType = OutputTypeServiceRecord;
-        qry->InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
-        svc = &qry->InputValue.ServiceRecordValue.ServiceRecord;
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-
-        frc = iibt_sd_query_port_fabric_information(kibnal_data.kib_sd,
-                                                    kibnal_data.kib_port_guid,
-                                                    qry,
-                                                    kibnal_service_query_done,
-                                                    NULL, &frc2);
-        if (frc != FSUCCESS && frc != FPENDING) {
-                CERROR ("Immediate error %d checking SM service\n", frc);
-        } else {
-                down (&kibnal_data.kib_nid_signal);
-                frc = frc2;
-
-                if (frc != 0)
-                        CERROR ("Error %d checking SM service\n", rc);
-        }
-
-        return (rc);
-}
-#endif
-
-static void fill_fod(FABRIC_OPERATION_DATA *fod, FABRIC_OPERATION_TYPE type)
-{
-        IB_SERVICE_RECORD     *svc;
-
-        memset (fod, 0, sizeof(*fod));
-        fod->Type = type;
-
-        svc = &fod->Value.ServiceRecordValue.ServiceRecord;
-        svc->RID.ServiceID = kibnal_data.kib_service_id;
-        svc->RID.ServiceGID.Type.Global.InterfaceID = kibnal_data.kib_port_guid;
-        svc->RID.ServiceGID.Type.Global.SubnetPrefix = DEFAULT_SUBNET_PREFIX;
-        svc->RID.ServiceP_Key = kibnal_data.kib_port_pkey;
-        svc->ServiceLease = 0xffffffff;
-
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-}
-
-static int
-kibnal_advertise (void)
-{
-        FABRIC_OPERATION_DATA *fod;
-        IB_SERVICE_RECORD     *svc;
-        FSTATUS                frc;
-        FSTATUS                frc2;
-
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC(fod, sizeof(*fod));
-        if (fod == NULL)
-                return (-ENOMEM);
-
-        fill_fod(fod, FabOpSetServiceRecord);
-        svc = &fod->Value.ServiceRecordValue.ServiceRecord;
-
-        CDEBUG(D_NET, "Advertising service id "LPX64" %s:"LPX64"\n", 
-               svc->RID.ServiceID, 
-               svc->ServiceName, *kibnal_service_nid_field(svc));
-
-        frc = iibt_sd_port_fabric_operation(kibnal_data.kib_sd,
-                                            kibnal_data.kib_port_guid,
-                                            fod, kibnal_service_setunset_done, 
-                                            NULL, &frc2);
-
-        if (frc != FSUCCESS && frc != FPENDING) {
-                CERROR ("Immediate error %d advertising NID "LPX64"\n",
-                        frc, kibnal_data.kib_nid);
-                goto out;
-        }
-
-        down (&kibnal_data.kib_nid_signal);
-
-        frc = frc2;
-        if (frc != FSUCCESS)
-                CERROR ("Error %d advertising BUD "LPX64"\n",
-                        frc, kibnal_data.kib_nid);
-out:
-        PORTAL_FREE(fod, sizeof(*fod));
-        return (frc == FSUCCESS) ? 0 : -EINVAL;
-}
-
-static void
-kibnal_unadvertise (int expect_success)
-{
-        FABRIC_OPERATION_DATA *fod;
-        IB_SERVICE_RECORD     *svc;
-        FSTATUS                frc;
-        FSTATUS                frc2;
-
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC(fod, sizeof(*fod));
-        if (fod == NULL)
-                return;
-
-        fill_fod(fod, FabOpDeleteServiceRecord);
-        svc = &fod->Value.ServiceRecordValue.ServiceRecord;
-
-        CDEBUG(D_NET, "Unadvertising service %s:"LPX64"\n",
-               svc->ServiceName, *kibnal_service_nid_field(svc));
-        
-        frc = iibt_sd_port_fabric_operation(kibnal_data.kib_sd,
-                                            kibnal_data.kib_port_guid,
-                                            fod, kibnal_service_setunset_done, 
-                                            NULL, &frc2);
-
-        if (frc != FSUCCESS && frc != FPENDING) {
-                CERROR ("Immediate error %d unadvertising NID "LPX64"\n",
-                        frc, kibnal_data.kib_nid);
-                goto out;
-        }
-
-        down (&kibnal_data.kib_nid_signal);
-
-        if ((frc2 == FSUCCESS) == !!expect_success)
-                goto out;
-
-        if (expect_success)
-                CERROR("Error %d unadvertising NID "LPX64"\n",
-                       frc2, kibnal_data.kib_nid);
-        else
-                CWARN("Removed conflicting NID "LPX64"\n",
-                      kibnal_data.kib_nid);
- out:
-        PORTAL_FREE(fod, sizeof(*fod));
-}
-
-static int
-kibnal_set_mynid(ptl_nid_t nid)
-{
-        struct timeval tv;
-        lib_ni_t      *ni = &kibnal_lib.libnal_ni;
-        int            rc;
-        FSTATUS        frc;
-
-        CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_pid.nid);
-
-        do_gettimeofday(&tv);
-
-        down (&kibnal_data.kib_nid_mutex);
-
-        if (nid == kibnal_data.kib_nid) {
-                /* no change of NID */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
-
-        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n",
-               kibnal_data.kib_nid, nid);
-        
-        if (kibnal_data.kib_nid != PTL_NID_ANY) {
-
-                kibnal_unadvertise (1);
-
-                frc = iibt_cm_cancel(kibnal_data.kib_cep);
-                if (frc != FSUCCESS && frc != FPENDING)
-                        CERROR ("Error %d stopping listener\n", frc);
-
-                frc = iibt_cm_destroy_cep(kibnal_data.kib_cep);
-                if (frc != FSUCCESS)
-                        CERROR ("Error %d destroying CEP\n", frc);
-
-                kibnal_data.kib_cep = NULL;
-        }
-        
-        kibnal_data.kib_nid = ni->ni_pid.nid = nid;
-        kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-        
-        /* Delete all existing peers and their connections after new
-         * NID/incarnation set to ensure no old connections in our brave
-         * new world. */
-        kibnal_del_peer (PTL_NID_ANY, 0);
-
-        if (kibnal_data.kib_nid == PTL_NID_ANY) {
-                /* No new NID to install */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
-
-        /* remove any previous advert (crashed node etc) */
-        kibnal_unadvertise(0);
-
-        kibnal_data.kib_cep = iibt_cm_create_cep(CM_RC_TYPE);
-        if (kibnal_data.kib_cep == NULL) {
-                CERROR ("Can't create CEP\n");
-                rc = -ENOMEM;
-        } else {
-                CM_LISTEN_INFO info;
-                memset (&info, 0, sizeof(info));
-                info.ListenAddr.EndPt.SID = kibnal_data.kib_service_id;
-
-                frc = iibt_cm_listen(kibnal_data.kib_cep, &info,
-                                     kibnal_listen_callback, NULL);
-                if (frc != FSUCCESS && frc != FPENDING) {
-                        CERROR ("iibt_cm_listen error: %d\n", frc);
-                        rc = -EINVAL;
-                } else {
-                        rc = 0;
-                }
-        }
-        
-        if (rc == 0) {
-                rc = kibnal_advertise();
-                if (rc == 0) {
-#if IBNAL_CHECK_ADVERT
-                        kibnal_check_advert();
-#endif
-                        up (&kibnal_data.kib_nid_mutex);
-                        return (0);
-                }
-                
-                iibt_cm_cancel (kibnal_data.kib_cep);
-                iibt_cm_destroy_cep (kibnal_data.kib_cep);
-                /* remove any peers that sprung up while I failed to
-                 * advertise myself */
-                kibnal_del_peer (PTL_NID_ANY, 0);
-        }
-
-        kibnal_data.kib_nid = PTL_NID_ANY;
-        up (&kibnal_data.kib_nid_mutex);
-        return (rc);
-}
-
-kib_peer_t *
-kibnal_create_peer (ptl_nid_t nid)
-{
-        kib_peer_t *peer;
-
-        LASSERT (nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC (peer, sizeof (*peer));
-        if (peer == NULL)
-                return (NULL);
-
-        memset(peer, 0, sizeof(*peer));         /* zero flags etc */
-
-        peer->ibp_nid = nid;
-        atomic_set (&peer->ibp_refcount, 1);    /* 1 ref for caller */
-
-        INIT_LIST_HEAD (&peer->ibp_list);       /* not in the peer table yet */
-        INIT_LIST_HEAD (&peer->ibp_conns);
-        INIT_LIST_HEAD (&peer->ibp_tx_queue);
-
-        peer->ibp_reconnect_time = jiffies;
-        peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
-
-        atomic_inc (&kibnal_data.kib_npeers);
-        return (peer);
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
-
-        LASSERT (atomic_read (&peer->ibp_refcount) == 0);
-        LASSERT (peer->ibp_persistence == 0);
-        LASSERT (!kibnal_peer_active(peer));
-        LASSERT (peer->ibp_connecting == 0);
-        LASSERT (list_empty (&peer->ibp_conns));
-        LASSERT (list_empty (&peer->ibp_tx_queue));
-
-        PORTAL_FREE (peer, sizeof (*peer));
-
-        /* NB a peer's connections keep a reference on their peer until
-         * they are destroyed, so we can be assured that _all_ state to do
-         * with this peer has been cleaned up when its refcount drops to
-         * zero. */
-        atomic_dec (&kibnal_data.kib_npeers);
-}
-
-/* the caller is responsible for accounting for the additional reference
- * that this creates */
-kib_peer_t *
-kibnal_find_peer_locked (ptl_nid_t nid)
-{
-        struct list_head *peer_list = kibnal_nid2peerlist (nid);
-        struct list_head *tmp;
-        kib_peer_t       *peer;
-
-        list_for_each (tmp, peer_list) {
-
-                peer = list_entry (tmp, kib_peer_t, ibp_list);
-
-                LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
-                         peer->ibp_connecting != 0 || /* creating conns */
-                         !list_empty (&peer->ibp_conns));  /* active conn */
-
-                if (peer->ibp_nid != nid)
-                        continue;
-
-                CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-                       peer, nid, atomic_read (&peer->ibp_refcount));
-                return (peer);
-        }
-        return (NULL);
-}
-
-kib_peer_t *
-kibnal_get_peer (ptl_nid_t nid)
-{
-        kib_peer_t     *peer;
-
-        read_lock (&kibnal_data.kib_global_lock);
-        peer = kibnal_find_peer_locked (nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                kib_peer_addref(peer);
-        read_unlock (&kibnal_data.kib_global_lock);
-
-        return (peer);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
-        LASSERT (peer->ibp_persistence == 0);
-        LASSERT (list_empty(&peer->ibp_conns));
-
-        LASSERT (kibnal_peer_active(peer));
-        list_del_init (&peer->ibp_list);
-        /* lose peerlist's ref */
-        kib_peer_decref(peer);
-}
-
-static int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
-{
-        kib_peer_t        *peer;
-        struct list_head  *ptmp;
-        int                i;
-
-        read_lock (&kibnal_data.kib_global_lock);
-
-        for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
-                list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        if (index-- > 0)
-                                continue;
-
-                        *nidp = peer->ibp_nid;
-                        *persistencep = peer->ibp_persistence;
-
-                        read_unlock (&kibnal_data.kib_global_lock);
-                        return (0);
-                }
-        }
-
-        read_unlock (&kibnal_data.kib_global_lock);
-        return (-ENOENT);
-}
-
-static int
-kibnal_add_persistent_peer (ptl_nid_t nid)
-{
-        unsigned long      flags;
-        kib_peer_t        *peer;
-        kib_peer_t        *peer2;
-        
-        if (nid == PTL_NID_ANY)
-                return (-EINVAL);
-
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL)
-                return (-ENOMEM);
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        peer2 = kibnal_find_peer_locked (nid);
-        if (peer2 != NULL) {
-                kib_peer_decref (peer);
-                peer = peer2;
-        } else {
-                /* peer table takes existing ref on peer */
-                list_add_tail (&peer->ibp_list,
-                               kibnal_nid2peerlist (nid));
-        }
-
-        peer->ibp_persistence++;
-        
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-        return (0);
-}
-
-static void
-kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
-{
-        struct list_head *ctmp;
-        struct list_head *cnxt;
-        kib_conn_t       *conn;
-
-        if (!single_share)
-                peer->ibp_persistence = 0;
-        else if (peer->ibp_persistence > 0)
-                peer->ibp_persistence--;
-
-        if (peer->ibp_persistence != 0)
-                return;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
-                conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
-                kibnal_close_conn_locked (conn, 0);
-        }
-
-        /* NB peer unlinks itself when last conn is closed */
-}
-
-int
-kibnal_del_peer (ptl_nid_t nid, int single_share)
-{
-        unsigned long      flags;
-        struct list_head  *ptmp;
-        struct list_head  *pnxt;
-        kib_peer_t        *peer;
-        int                lo;
-        int                hi;
-        int                i;
-        int                rc = -ENOENT;
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
-        else {
-                lo = 0;
-                hi = kibnal_data.kib_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        if (!(nid == PTL_NID_ANY || peer->ibp_nid == nid))
-                                continue;
-
-                        kibnal_del_peer_locked (peer, single_share);
-                        rc = 0;         /* matched something */
-
-                        if (single_share)
-                                goto out;
-                }
-        }
- out:
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        return (rc);
-}
-
-static kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
-        kib_peer_t        *peer;
-        struct list_head  *ptmp;
-        kib_conn_t        *conn;
-        struct list_head  *ctmp;
-        int                i;
-
-        read_lock (&kibnal_data.kib_global_lock);
-
-        for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-                list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence > 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        list_for_each (ctmp, &peer->ibp_conns) {
-                                if (index-- > 0)
-                                        continue;
-
-                                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-                                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                                       atomic_read (&conn->ibc_refcount));
-                                atomic_inc (&conn->ibc_refcount);
-                                read_unlock (&kibnal_data.kib_global_lock);
-                                return (conn);
-                        }
-                }
-        }
-
-        read_unlock (&kibnal_data.kib_global_lock);
-        return (NULL);
-}
-
-kib_conn_t *
-kibnal_create_conn (void)
-{
-        kib_conn_t  *conn;
-        int          i;
-        __u64        vaddr = 0;
-        __u64        vaddr_base;
-        int          page_offset;
-        int          ipage;
-        int          rc;
-        FSTATUS      frc;
-        union {
-                IB_QP_ATTRIBUTES_CREATE    qp_create;
-                IB_QP_ATTRIBUTES_MODIFY    qp_attr;
-        } params;
-        
-        PORTAL_ALLOC (conn, sizeof (*conn));
-        if (conn == NULL) {
-                CERROR ("Can't allocate connection\n");
-                return (NULL);
-        }
-
-        /* zero flags, NULL pointers etc... */
-        memset (conn, 0, sizeof (*conn));
-
-        INIT_LIST_HEAD (&conn->ibc_tx_queue);
-        INIT_LIST_HEAD (&conn->ibc_active_txs);
-        spin_lock_init (&conn->ibc_lock);
-        
-        atomic_inc (&kibnal_data.kib_nconns);
-        /* well not really, but I call destroy() on failure, which decrements */
-
-        PORTAL_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
-        if (conn->ibc_rxs == NULL)
-                goto failed;
-        memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
-        rc = kibnal_alloc_pages(&conn->ibc_rx_pages, IBNAL_RX_MSG_PAGES, 1);
-        if (rc != 0)
-                goto failed;
-
-        vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
-        for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
-                struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
-                kib_rx_t   *rx = &conn->ibc_rxs[i];
-
-                rx->rx_conn = conn;
-                rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
-                             page_offset);
-
-                if (kibnal_whole_mem()) 
-                        rx->rx_vaddr = kibnal_page2phys(page) + 
-                                       page_offset + 
-                                       kibnal_data.kib_md.md_addr;
-                else
-                        rx->rx_vaddr = vaddr;
-                
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
-                
-                page_offset += IBNAL_MSG_SIZE;
-                LASSERT (page_offset <= PAGE_SIZE);
-
-                if (page_offset == PAGE_SIZE) {
-                        page_offset = 0;
-                        ipage++;
-                        LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
-                }
-        }
-
-        params.qp_create = (IB_QP_ATTRIBUTES_CREATE) {
-                .Type                    = QPTypeReliableConnected,
-                .SendQDepth              = IBNAL_TX_MAX_SG * 
-                                           IBNAL_MSG_QUEUE_SIZE,
-                .RecvQDepth              = IBNAL_MSG_QUEUE_SIZE,
-                .SendDSListDepth         = 1,
-                .RecvDSListDepth         = 1,
-                .SendCQHandle            = kibnal_data.kib_cq,
-                .RecvCQHandle            = kibnal_data.kib_cq,
-                .PDHandle                = kibnal_data.kib_pd,
-                .SendSignaledCompletions = TRUE,
-        };
-        frc = iibt_qp_create(kibnal_data.kib_hca, &params.qp_create, NULL,
-                             &conn->ibc_qp, &conn->ibc_qp_attrs);
-        if (rc != 0) {
-                CERROR ("Failed to create queue pair: %d\n", rc);
-                goto failed;
-        }
-
-        /* Mark QP created */
-        conn->ibc_state = IBNAL_CONN_INIT_QP;
-
-        params.qp_attr = (IB_QP_ATTRIBUTES_MODIFY) {
-                .RequestState             = QPStateInit,
-                .Attrs                    = (IB_QP_ATTR_PORTGUID |
-                                             IB_QP_ATTR_PKEYINDEX |
-                                             IB_QP_ATTR_ACCESSCONTROL),
-                .PortGUID                 = kibnal_data.kib_port_guid,
-                .PkeyIndex                = 0,
-                .AccessControl = {
-                        .s = {
-                                .RdmaWrite = 1,
-                                .RdmaRead  = 1,
-                        },
-                },
-        };
-        rc = iibt_qp_modify(conn->ibc_qp, &params.qp_attr, NULL);
-        if (rc != 0) {
-                CERROR ("Failed to modify queue pair: %d\n", rc);
-                goto failed;
-        }
-
-        /* 1 ref for caller */
-        atomic_set (&conn->ibc_refcount, 1);
-        return (conn);
-        
- failed:
-        kibnal_destroy_conn (conn);
-        return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
-        int    rc;
-        FSTATUS frc;
-        
-        CDEBUG (D_NET, "connection %p\n", conn);
-
-        LASSERT (atomic_read (&conn->ibc_refcount) == 0);
-        LASSERT (list_empty(&conn->ibc_tx_queue));
-        LASSERT (list_empty(&conn->ibc_active_txs));
-        LASSERT (conn->ibc_nsends_posted == 0);
-        LASSERT (conn->ibc_connreq == NULL);
-
-        switch (conn->ibc_state) {
-        case IBNAL_CONN_DISCONNECTED:
-                /* called after connection sequence initiated */
-                /* fall through */
-
-        case IBNAL_CONN_INIT_QP:
-                /* _destroy includes an implicit Reset of the QP which 
-                 * discards posted work */
-                rc = iibt_qp_destroy(conn->ibc_qp);
-                if (rc != 0)
-                        CERROR("Can't destroy QP: %d\n", rc);
-                /* fall through */
-                
-        case IBNAL_CONN_INIT_NOTHING:
-                break;
-
-        default:
-                LASSERT (0);
-        }
-
-        if (conn->ibc_cep != NULL) {
-                frc = iibt_cm_destroy_cep(conn->ibc_cep);
-                if (frc != 0)
-                        CERROR("Can't destroy CEP %p: %d\n", conn->ibc_cep, 
-                               frc);
-        }
-
-        if (conn->ibc_rx_pages != NULL) 
-                kibnal_free_pages(conn->ibc_rx_pages);
-        
-        if (conn->ibc_rxs != NULL)
-                PORTAL_FREE(conn->ibc_rxs, 
-                            IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
-        if (conn->ibc_peer != NULL)
-                kib_peer_decref(conn->ibc_peer);
-
-        PORTAL_FREE(conn, sizeof (*conn));
-
-        atomic_dec(&kibnal_data.kib_nconns);
-        
-        if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
-            kibnal_data.kib_shutdown) {
-                /* I just nuked the last connection on shutdown; wake up
-                 * everyone so they can exit. */
-                wake_up_all(&kibnal_data.kib_sched_waitq);
-                wake_up_all(&kibnal_data.kib_connd_waitq);
-        }
-}
-
-void
-kibnal_put_conn (kib_conn_t *conn)
-{
-        unsigned long flags;
-
-        CDEBUG (D_NET, "putting conn[%p] state %d -> "LPX64" (%d)\n",
-                conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                atomic_read (&conn->ibc_refcount));
-
-        LASSERT (atomic_read (&conn->ibc_refcount) > 0);
-        if (!atomic_dec_and_test (&conn->ibc_refcount))
-                return;
-
-        /* must disconnect before dropping the final ref */
-        LASSERT (conn->ibc_state == IBNAL_CONN_DISCONNECTED);
-
-        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
-        list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns);
-        wake_up (&kibnal_data.kib_connd_waitq);
-
-        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-}
-
-static int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
-        kib_conn_t         *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
-                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
-                count++;
-                kibnal_close_conn_locked (conn, why);
-        }
-
-        return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
-        kib_conn_t         *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
-                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
-                if (conn->ibc_incarnation == incarnation)
-                        continue;
-
-                CDEBUG(D_NET, "Closing stale conn nid:"LPX64" incarnation:"LPX64"("LPX64")\n",
-                       peer->ibp_nid, conn->ibc_incarnation, incarnation);
-                
-                count++;
-                kibnal_close_conn_locked (conn, -ESTALE);
-        }
-
-        return (count);
-}
-
-static int
-kibnal_close_matching_conns (ptl_nid_t nid)
-{
-        unsigned long       flags;
-        kib_peer_t         *peer;
-        struct list_head   *ptmp;
-        struct list_head   *pnxt;
-        int                 lo;
-        int                 hi;
-        int                 i;
-        int                 count = 0;
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
-        else {
-                lo = 0;
-                hi = kibnal_data.kib_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        if (!(nid == PTL_NID_ANY || nid == peer->ibp_nid))
-                                continue;
-
-                        count += kibnal_close_peer_conns_locked (peer, 0);
-                }
-        }
-
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        /* wildcards always succeed */
-        if (nid == PTL_NID_ANY)
-                return (0);
-        
-        return (count == 0 ? -ENOENT : 0);
-}
-
-static int
-kibnal_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int rc = -EINVAL;
-        ENTRY;
-
-        LASSERT (pcfg != NULL);
-
-        switch(pcfg->pcfg_command) {
-        case NAL_CMD_GET_PEER: {
-                ptl_nid_t   nid = 0;
-                int         share_count = 0;
-
-                rc = kibnal_get_peer_info(pcfg->pcfg_count,
-                                          &nid, &share_count);
-                pcfg->pcfg_nid   = nid;
-                pcfg->pcfg_size  = 0;
-                pcfg->pcfg_id    = 0;
-                pcfg->pcfg_misc  = 0;
-                pcfg->pcfg_count = 0;
-                pcfg->pcfg_wait  = share_count;
-                break;
-        }
-        case NAL_CMD_ADD_PEER: {
-                rc = kibnal_add_persistent_peer (pcfg->pcfg_nid);
-                break;
-        }
-        case NAL_CMD_DEL_PEER: {
-                rc = kibnal_del_peer (pcfg->pcfg_nid, 
-                                       /* flags == single_share */
-                                       pcfg->pcfg_flags != 0);
-                break;
-        }
-        case NAL_CMD_GET_CONN: {
-                kib_conn_t *conn = kibnal_get_conn_by_idx (pcfg->pcfg_count);
-
-                if (conn == NULL)
-                        rc = -ENOENT;
-                else {
-                        rc = 0;
-                        pcfg->pcfg_nid   = conn->ibc_peer->ibp_nid;
-                        pcfg->pcfg_id    = 0;
-                        pcfg->pcfg_misc  = 0;
-                        pcfg->pcfg_flags = 0;
-                        kibnal_put_conn (conn);
-                }
-                break;
-        }
-        case NAL_CMD_CLOSE_CONNECTION: {
-                rc = kibnal_close_matching_conns (pcfg->pcfg_nid);
-                break;
-        }
-        case NAL_CMD_REGISTER_MYNID: {
-                if (pcfg->pcfg_nid == PTL_NID_ANY)
-                        rc = -EINVAL;
-                else
-                        rc = kibnal_set_mynid (pcfg->pcfg_nid);
-                break;
-        }
-        }
-
-        RETURN(rc);
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
-        int     npages = p->ibp_npages;
-        int     rc;
-        int     i;
-        
-        if (p->ibp_mapped) {
-                rc = iibt_deregister_memory(p->ibp_handle);
-                if (rc != 0)
-                        CERROR ("Deregister error: %d\n", rc);
-        }
-        
-        for (i = 0; i < npages; i++)
-                if (p->ibp_pages[i] != NULL)
-                        __free_page(p->ibp_pages[i]);
-        
-        PORTAL_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int allow_write)
-{
-        kib_pages_t                *p;
-        __u64                      *phys_pages;
-        int                         i;
-        FSTATUS                     frc;
-        IB_ACCESS_CONTROL           access;
-
-        memset(&access, 0, sizeof(access));
-        access.s.MWBindable = 1;
-        access.s.LocalWrite = 1;
-        access.s.RdmaRead = 1;
-        access.s.RdmaWrite = 1;
-
-        PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
-        if (p == NULL) {
-                CERROR ("Can't allocate buffer %d\n", npages);
-                return (-ENOMEM);
-        }
-
-        memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
-        p->ibp_npages = npages;
-        
-        for (i = 0; i < npages; i++) {
-                p->ibp_pages[i] = alloc_page (GFP_KERNEL);
-                if (p->ibp_pages[i] == NULL) {
-                        CERROR ("Can't allocate page %d of %d\n", i, npages);
-                        kibnal_free_pages(p);
-                        return (-ENOMEM);
-                }
-        }
-
-        if (kibnal_whole_mem())
-                goto out;
-
-        PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
-        if (phys_pages == NULL) {
-                CERROR ("Can't allocate physarray for %d pages\n", npages);
-                /* XXX free ibp_pages? */
-                kibnal_free_pages(p);
-                return (-ENOMEM);
-        }
-
-        /* if we were using the _contig_ registration variant we would have
-         * an array of PhysAddr/Length pairs, but the discontiguous variant
-         * just takes the PhysAddr */
-        for (i = 0; i < npages; i++)
-                phys_pages[i] = kibnal_page2phys(p->ibp_pages[i]);
-
-        frc = iibt_register_physical_memory(kibnal_data.kib_hca,
-                                            0,          /* requested vaddr */
-                                            phys_pages, npages,
-                                            0,          /* offset */
-                                            kibnal_data.kib_pd,
-                                            access,
-                                            &p->ibp_handle, &p->ibp_vaddr,
-                                            &p->ibp_lkey, &p->ibp_rkey);
-        
-        PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
-        
-        if (frc != FSUCCESS) {
-                CERROR ("Error %d mapping %d pages\n", frc, npages);
-                kibnal_free_pages(p);
-                return (-ENOMEM);
-        }
-
-        CDEBUG(D_NET, "registered %d pages; handle: %p vaddr "LPX64" "
-                      "lkey %x rkey %x\n", npages, p->ibp_handle,
-                      p->ibp_vaddr, p->ibp_lkey, p->ibp_rkey);
-        
-        p->ibp_mapped = 1;
-out:
-        *pp = p;
-        return (0);
-}
-
-static int
-kibnal_setup_tx_descs (void)
-{
-        int           ipage = 0;
-        int           page_offset = 0;
-        __u64         vaddr;
-        __u64         vaddr_base;
-        struct page  *page;
-        kib_tx_t     *tx;
-        int           i;
-        int           rc;
-
-        /* pre-mapped messages are not bigger than 1 page */
-        LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
-        /* No fancy arithmetic when we do the buffer calculations */
-        LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
-        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages, IBNAL_TX_MSG_PAGES, 
-                                0);
-        if (rc != 0)
-                return (rc);
-
-        /* ignored for the whole_mem case */
-        vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
-                page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
-                tx = &kibnal_data.kib_tx_descs[i];
-
-                memset (tx, 0, sizeof(*tx));    /* zero flags etc */
-                
-                tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + 
-                                            page_offset);
-
-                if (kibnal_whole_mem()) 
-                        tx->tx_vaddr = kibnal_page2phys(page) + 
-                                       page_offset + 
-                                       kibnal_data.kib_md.md_addr;
-                else
-                        tx->tx_vaddr = vaddr;
-
-                tx->tx_isnblk = (i >= IBNAL_NTX);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
-
-                CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", 
-                       i, tx, tx->tx_msg, tx->tx_vaddr);
-
-                if (tx->tx_isnblk)
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_nblk_txs);
-                else
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_txs);
-
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
-
-                page_offset += IBNAL_MSG_SIZE;
-                LASSERT (page_offset <= PAGE_SIZE);
-
-                if (page_offset == PAGE_SIZE) {
-                        page_offset = 0;
-                        ipage++;
-                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES);
-                }
-        }
-        
-        return (0);
-}
-
-static void
-kibnal_api_shutdown (nal_t *nal)
-{
-        int   i;
-        int   rc;
-
-        if (nal->nal_refct != 0) {
-                /* This module got the first ref */
-                PORTAL_MODULE_UNUSE;
-                return;
-        }
-
-        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-
-        LASSERT(nal == &kibnal_api);
-
-        switch (kibnal_data.kib_init) {
-        default:
-                CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
-                LBUG();
-
-        case IBNAL_INIT_ALL:
-                /* stop calls to nal_cmd */
-                libcfs_nal_cmd_unregister(IIBNAL);
-                /* No new peers */
-
-                /* resetting my NID to unadvertises me, removes my
-                 * listener and nukes all current peers */
-                kibnal_set_mynid (PTL_NID_ANY);
-
-                /* Wait for all peer state to clean up (crazy) */
-                i = 2;
-                while (atomic_read (&kibnal_data.kib_npeers) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d peers to disconnect (can take a few seconds)\n",
-                               atomic_read (&kibnal_data.kib_npeers));
-                        set_current_state (TASK_UNINTERRUPTIBLE);
-                        schedule_timeout (HZ);
-                }
-                /* fall through */
-
-        case IBNAL_INIT_CQ:
-                rc = iibt_cq_destroy(kibnal_data.kib_cq);
-                if (rc != 0)
-                        CERROR ("Destroy CQ error: %d\n", rc);
-                /* fall through */
-
-        case IBNAL_INIT_TXD:
-                kibnal_free_pages (kibnal_data.kib_tx_pages);
-                /* fall through */
-
-        case IBNAL_INIT_MR:
-                if (kibnal_data.kib_md.md_handle != NULL) {
-                        rc = iibt_deregister_memory(kibnal_data.kib_md.md_handle);
-                        if (rc != FSUCCESS)
-                                CERROR ("Deregister memory: %d\n", rc);
-                }
-                /* fall through */
-
-#if IBNAL_FMR
-        case IBNAL_INIT_FMR:
-                rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
-                if (rc != 0)
-                        CERROR ("Destroy FMR pool error: %d\n", rc);
-                /* fall through */
-#endif
-        case IBNAL_INIT_PD:
-                rc = iibt_pd_free(kibnal_data.kib_pd);
-                if (rc != 0)
-                        CERROR ("Destroy PD error: %d\n", rc);
-                /* fall through */
-
-        case IBNAL_INIT_SD:
-                rc = iibt_sd_deregister(kibnal_data.kib_sd);
-                if (rc != 0)
-                        CERROR ("Deregister SD error: %d\n", rc);
-                /* fall through */
-
-        case IBNAL_INIT_PORT:
-                /* XXX ??? */
-                /* fall through */
-
-        case IBNAL_INIT_PORTATTRS:
-                PORTAL_FREE(kibnal_data.kib_hca_attrs.PortAttributesList,
-                            kibnal_data.kib_hca_attrs.PortAttributesListSize);
-                /* fall through */
-
-        case IBNAL_INIT_HCA:
-                rc = iibt_close_hca(kibnal_data.kib_hca);
-                if (rc != 0)
-                        CERROR ("Close HCA  error: %d\n", rc);
-                /* fall through */
-
-        case IBNAL_INIT_LIB:
-                lib_fini(&kibnal_lib);
-                /* fall through */
-
-        case IBNAL_INIT_DATA:
-                /* Module refcount only gets to zero when all peers
-                 * have been closed so all lists must be empty */
-                LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
-                LASSERT (kibnal_data.kib_peers != NULL);
-                for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-                        LASSERT (list_empty (&kibnal_data.kib_peers[i]));
-                }
-                LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
-                LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
-                LASSERT (list_empty (&kibnal_data.kib_sched_txq));
-                LASSERT (list_empty (&kibnal_data.kib_connd_conns));
-                LASSERT (list_empty (&kibnal_data.kib_connd_peers));
-
-                /* flag threads to terminate; wake and wait for them to die */
-                kibnal_data.kib_shutdown = 1;
-                wake_up_all (&kibnal_data.kib_sched_waitq);
-                wake_up_all (&kibnal_data.kib_connd_waitq);
-
-                i = 2;
-                while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "Waiting for %d threads to terminate\n",
-                               atomic_read (&kibnal_data.kib_nthreads));
-                        set_current_state (TASK_INTERRUPTIBLE);
-                        schedule_timeout (HZ);
-                }
-                /* fall through */
-                
-        case IBNAL_INIT_NOTHING:
-                break;
-        }
-
-        if (kibnal_data.kib_tx_descs != NULL)
-                PORTAL_FREE (kibnal_data.kib_tx_descs,
-                             IBNAL_TX_MSGS * sizeof(kib_tx_t));
-
-        if (kibnal_data.kib_peers != NULL)
-                PORTAL_FREE (kibnal_data.kib_peers,
-                             sizeof (struct list_head) * 
-                             kibnal_data.kib_peer_hash_size);
-
-        CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-        printk(KERN_INFO "Lustre: Infinicon IB NAL unloaded (final mem %d)\n",
-               atomic_read(&portal_kmemory));
-
-        kibnal_data.kib_init = IBNAL_INIT_NOTHING;
-}
-
-#define roundup_power(val, power) \
-        ( (val + (__u64)(power - 1)) & ~((__u64)(power - 1)) )
-
-/* this isn't very portable or sturdy in the face of funny mem/bus configs */
-static __u64 max_phys_mem(IB_CA_ATTRIBUTES *ca_attr)
-{
-        struct sysinfo si;
-        __u64 ret;
-
-        /* XXX we don't bother with first-gen cards */
-        if (ca_attr->VendorId == 0xd0b7 && ca_attr->DeviceId == 0x3101)
-                return 0ULL;
-
-        si_meminfo(&si);
-        ret = (__u64)max(si.totalram, max_mapnr) * si.mem_unit;
-        return roundup_power(ret, 128 * 1024 * 1024);
-} 
-#undef roundup_power
-
-static int
-kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
-                     ptl_ni_limits_t *requested_limits,
-                     ptl_ni_limits_t *actual_limits)
-{
-        ptl_process_id_t    process_id;
-        int                 pkmem = atomic_read(&portal_kmemory);
-        IB_PORT_ATTRIBUTES *pattr;
-        FSTATUS             frc;
-        int                 rc;
-        int                 n;
-        int                 i;
-
-        LASSERT (nal == &kibnal_api);
-
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL)
-                        *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits;
-                /* This module got the first ref */
-                PORTAL_MODULE_USE;
-                return (PTL_OK);
-        }
-
-        LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
-
-        frc = IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2, 
-                                       &kibnal_data.kib_interfaces);
-        if (frc != FSUCCESS) {
-                CERROR("IbtGetInterfaceByVersion(IBT_INTERFACE_VERSION_2) = %d\n",
-                        frc);
-                return -ENOSYS;
-        }
-
-        init_MUTEX (&kibnal_data.kib_nid_mutex);
-        init_MUTEX_LOCKED (&kibnal_data.kib_nid_signal);
-        kibnal_data.kib_nid = PTL_NID_ANY;
-
-        rwlock_init(&kibnal_data.kib_global_lock);
-
-        kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC (kibnal_data.kib_peers,
-                      sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
-        if (kibnal_data.kib_peers == NULL) {
-                goto failed;
-        }
-        for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
-                INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
-        spin_lock_init (&kibnal_data.kib_connd_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
-        INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
-        init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
-        spin_lock_init (&kibnal_data.kib_sched_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
-        init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
-        spin_lock_init (&kibnal_data.kib_tx_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-        INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
-        init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
-
-        PORTAL_ALLOC (kibnal_data.kib_tx_descs,
-                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
-        if (kibnal_data.kib_tx_descs == NULL) {
-                CERROR ("Can't allocate tx descs\n");
-                goto failed;
-        }
-
-        /* lists/ptrs/locks initialised */
-        kibnal_data.kib_init = IBNAL_INIT_DATA;
-        /*****************************************************/
-
-        process_id.pid = 0;
-        process_id.nid = kibnal_data.kib_nid;
-        
-        rc = lib_init(&kibnal_lib, nal, process_id,
-                      requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-                CERROR("lib_init failed: error %d\n", rc);
-                goto failed;
-        }
-
-        /* lib interface initialised */
-        kibnal_data.kib_init = IBNAL_INIT_LIB;
-        /*****************************************************/
-
-        for (i = 0; i < IBNAL_N_SCHED; i++) {
-                rc = kibnal_thread_start (kibnal_scheduler, (void *)i);
-                if (rc != 0) {
-                        CERROR("Can't spawn iibnal scheduler[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
-        }
-
-        rc = kibnal_thread_start (kibnal_connd, NULL);
-        if (rc != 0) {
-                CERROR ("Can't spawn iibnal connd: %d\n", rc);
-                goto failed;
-        }
-
-        n = sizeof(kibnal_data.kib_hca_guids) /
-            sizeof(kibnal_data.kib_hca_guids[0]);
-        frc = iibt_get_hca_guids(&n, kibnal_data.kib_hca_guids);
-        if (frc != FSUCCESS) {
-                CERROR ("Can't get channel adapter guids: %d\n", frc);
-                goto failed;
-        }
-        if (n == 0) {
-                CERROR ("No channel adapters found\n");
-                goto failed;
-        }
-
-        /* Infinicon has per-HCA rather than per CQ completion handlers */
-        frc = iibt_open_hca(kibnal_data.kib_hca_guids[0],
-                            kibnal_ca_callback,
-                            kibnal_ca_async_callback,
-                            &kibnal_data.kib_hca,
-                            &kibnal_data.kib_hca);
-        if (frc != FSUCCESS) {
-                CERROR ("Can't open CA[0]: %d\n", frc);
-                goto failed;
-        }
-        
-        /* Channel Adapter opened */
-        kibnal_data.kib_init = IBNAL_INIT_HCA;
-        /*****************************************************/
-
-        kibnal_data.kib_hca_attrs.PortAttributesList = NULL;
-        kibnal_data.kib_hca_attrs.PortAttributesListSize = 0;
-        frc = iibt_query_hca(kibnal_data.kib_hca,
-                             &kibnal_data.kib_hca_attrs, NULL);
-        if (frc != FSUCCESS) {
-                CERROR ("Can't size port attrs: %d\n", frc);
-                goto failed;
-        }
-        
-        PORTAL_ALLOC(kibnal_data.kib_hca_attrs.PortAttributesList,
-                     kibnal_data.kib_hca_attrs.PortAttributesListSize);
-        if (kibnal_data.kib_hca_attrs.PortAttributesList == NULL)
-                goto failed;
-
-        /* Port attrs allocated */
-        kibnal_data.kib_init = IBNAL_INIT_PORTATTRS;
-        /*****************************************************/
-        
-        frc = iibt_query_hca(kibnal_data.kib_hca, &kibnal_data.kib_hca_attrs,
-                             NULL);
-        if (frc != FSUCCESS) {
-                CERROR ("Can't get port attrs for CA 0: %d\n", frc);
-                goto failed;
-        }
-
-        for (i = 0, pattr = kibnal_data.kib_hca_attrs.PortAttributesList;
-             pattr != NULL;
-             i++, pattr = pattr->Next) {
-                switch (pattr->PortState) {
-                default:
-                        CERROR("Unexpected port[%d] state %d\n",
-                               i, pattr->PortState);
-                        continue;
-                case PortStateDown:
-                        CDEBUG(D_NET, "port[%d] Down\n", i);
-                        continue;
-                case PortStateInit:
-                        CDEBUG(D_NET, "port[%d] Init\n", i);
-                        continue;
-                case PortStateArmed:
-                        CDEBUG(D_NET, "port[%d] Armed\n", i);
-                        continue;
-                        
-                case PortStateActive:
-                        CDEBUG(D_NET, "port[%d] Active\n", i);
-                        kibnal_data.kib_port = i;
-                        kibnal_data.kib_port_guid = pattr->GUID;
-                        kibnal_data.kib_port_pkey = pattr->PkeyTable[0];
-                        break;
-                }
-                break;
-        }
-
-        if (pattr == NULL) {
-                CERROR ("Can't find an active port\n");
-                goto failed;
-        }
-
-        CDEBUG(D_NET, "got guid "LPX64"\n", kibnal_data.kib_port_guid);
-        
-        /* Active port found */
-        kibnal_data.kib_init = IBNAL_INIT_PORT;
-        /*****************************************************/
-
-        frc = iibt_sd_register(&kibnal_data.kib_sd, NULL);
-        if (frc != FSUCCESS) {
-                CERROR ("Can't register with SD: %d\n", frc);
-                goto failed;
-        }
-        
-        /* Registered with SD OK */
-        kibnal_data.kib_init = IBNAL_INIT_SD;
-        /*****************************************************/
-
-        frc = iibt_pd_allocate(kibnal_data.kib_hca, 0, &kibnal_data.kib_pd);
-        if (frc != FSUCCESS) {
-                CERROR ("Can't create PD: %d\n", rc);
-                goto failed;
-        }
-        
-        /* flag PD initialised */
-        kibnal_data.kib_init = IBNAL_INIT_PD;
-        /*****************************************************/
-
-#if IBNAL_FMR
-        {
-                const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK;
-                struct ib_fmr_pool_param params = {
-                        .max_pages_per_fmr = PTL_MTU/PAGE_SIZE,
-                        .access            = (IB_ACCESS_LOCAL_WRITE |
-                                              IB_ACCESS_REMOTE_WRITE |
-                                              IB_ACCESS_REMOTE_READ),
-                        .pool_size         = pool_size,
-                        .dirty_watermark   = (pool_size * 3)/4,
-                        .flush_function    = NULL,
-                        .flush_arg         = NULL,
-                        .cache             = 1,
-                };
-                rc = ib_fmr_pool_create(kibnal_data.kib_pd, &params,
-                                        &kibnal_data.kib_fmr_pool);
-                if (rc != 0) {
-                        CERROR ("Can't create FMR pool size %d: %d\n", 
-                                pool_size, rc);
-                        goto failed;
-                }
-        }
-
-        /* flag FMR pool initialised */
-        kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
-        /*****************************************************/
-        if (IBNAL_WHOLE_MEM) {
-                IB_MR_PHYS_BUFFER phys;
-                IB_ACCESS_CONTROL access;
-                kib_md_t *md = &kibnal_data.kib_md;
-
-                memset(&access, 0, sizeof(access));
-                access.s.MWBindable = 1;
-                access.s.LocalWrite = 1;
-                access.s.RdmaRead = 1;
-                access.s.RdmaWrite = 1;
-
-                phys.PhysAddr = 0;
-                phys.Length = max_phys_mem(&kibnal_data.kib_hca_attrs);
-                if (phys.Length == 0) {
-                        CERROR ("couldn't determine the end of phys mem\n");
-                        goto failed;
-                }
-       
-                rc = iibt_register_contig_physical_memory(kibnal_data.kib_hca,
-                                                          0,
-                                                          &phys, 1,
-                                                          0,
-                                                          kibnal_data.kib_pd,
-                                                          access,
-                                                          &md->md_handle,
-                                                          &md->md_addr,
-                                                          &md->md_lkey,
-                                                          &md->md_rkey);
-                if (rc != FSUCCESS) {
-                        CERROR("registering physical memory failed: %d\n", 
-                               rc);
-                        CERROR("falling back to registration per-rdma\n");
-                        md->md_handle = NULL;
-                } else {
-                        CDEBUG(D_NET, "registered "LPU64" bytes of mem\n",
-                               phys.Length);
-                        kibnal_data.kib_init = IBNAL_INIT_MR;
-                }
-        }
-
-        /*****************************************************/
-
-        rc = kibnal_setup_tx_descs();
-        if (rc != 0) {
-                CERROR ("Can't register tx descs: %d\n", rc);
-                goto failed;
-        }
-        
-        /* flag TX descs initialised */
-        kibnal_data.kib_init = IBNAL_INIT_TXD;
-        /*****************************************************/
-        
-        {
-                uint32 nentries;
-
-                frc = iibt_cq_create(kibnal_data.kib_hca, IBNAL_CQ_ENTRIES,
-                                     &kibnal_data.kib_cq, &kibnal_data.kib_cq,
-                                     &nentries);
-                if (frc != FSUCCESS) {
-                        CERROR ("Can't create RX CQ: %d\n", frc);
-                        goto failed;
-                }
-
-                /* flag CQ initialised */
-                kibnal_data.kib_init = IBNAL_INIT_CQ;
-
-                if (nentries < IBNAL_CQ_ENTRIES) {
-                        CERROR ("CQ only has %d entries, need %d\n", 
-                                nentries, IBNAL_CQ_ENTRIES);
-                        goto failed;
-                }
-
-                rc = iibt_cq_rearm(kibnal_data.kib_cq, CQEventSelNextWC);
-                if (rc != 0) {
-                        CERROR ("Failed to re-arm completion queue: %d\n", rc);
-                        goto failed;
-                }
-        }
-        
-        /*****************************************************/
-
-        rc = libcfs_nal_cmd_register(IIBNAL, &kibnal_cmd, NULL);
-        if (rc != 0) {
-                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-                goto failed;
-        }
-
-        /* flag everything initialised */
-        kibnal_data.kib_init = IBNAL_INIT_ALL;
-        /*****************************************************/
-
-        printk(KERN_INFO "Lustre: Infinicon IB NAL loaded "
-               "(initial mem %d)\n", pkmem);
-
-        return (PTL_OK);
-
- failed:
-        kibnal_api_shutdown (&kibnal_api);    
-        return (PTL_FAIL);
-}
-
-void __exit
-kibnal_module_fini (void)
-{
-#ifdef CONFIG_SYSCTL
-        if (kibnal_tunables.kib_sysctl != NULL)
-                unregister_sysctl_table (kibnal_tunables.kib_sysctl);
-#endif
-        PtlNIFini(kibnal_ni);
-
-        ptl_unregister_nal(IIBNAL);
-}
-
-int __init
-kibnal_module_init (void)
-{
-        int    rc;
-
-        if (sizeof(kib_wire_connreq_t) > CM_REQUEST_INFO_USER_LEN) {
-                CERROR("sizeof(kib_wire_connreq_t) > CM_REQUEST_INFO_USER_LEN\n");
-                return -EINVAL;
-        }
-
-        /* the following must be sizeof(int) for proc_dointvec() */
-        if (sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)) {
-                CERROR("sizeof (kibnal_tunables.kib_io_timeout) != sizeof (int)\n");
-                return -EINVAL;
-        }
-
-        kibnal_api.nal_ni_init = kibnal_api_startup;
-        kibnal_api.nal_ni_fini = kibnal_api_shutdown;
-
-        /* Initialise dynamic tunables to defaults once only */
-        kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT;
-
-        rc = ptl_register_nal(IIBNAL, &kibnal_api);
-        if (rc != PTL_OK) {
-                CERROR("Can't register IBNAL: %d\n", rc);
-                return (-ENOMEM);               /* or something... */
-        }
-
-        /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(IIBNAL, 0, NULL, NULL, &kibnal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(IIBNAL);
-                return (-ENODEV);
-        }
-        
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kibnal_tunables.kib_sysctl = 
-                register_sysctl_table (kibnal_top_ctl_table, 0);
-#endif
-        return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Infinicon IB NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);
-
diff --git a/lustre/portals/knals/iibnal/iibnal.h b/lustre/portals/knals/iibnal/iibnal.h
deleted file mode 100644 (file)
index 3242158..0000000
+++ /dev/null
@@ -1,892 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#include <linux/iba/ibt.h>
-
-#define GCC_VERSION (__GNUC__ * 10000 \
-                + __GNUC_MINOR__ * 100 \
-                + __GNUC_PATCHLEVEL__)
-
-/* Test for GCC > 3.2.2 */
-#if GCC_VERSION <= 30202
-/* GCC 3.2.2, and presumably several versions before it, will
- * miscompile this driver. See
- * http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9853. */
-#error Invalid GCC version. Must use GCC >= 3.2.3
-#endif
-
-#define IBNAL_SERVICE_NAME   "iibnal"
-#define IBNAL_SERVICE_NUMBER 0x11b9a1
-
-#if CONFIG_SMP
-# define IBNAL_N_SCHED      num_online_cpus()   /* # schedulers */
-#else
-# define IBNAL_N_SCHED      1                   /* # schedulers */
-#endif
-
-#define IBNAL_MIN_RECONNECT_INTERVAL HZ         /* first failed connection retry... */
-#define IBNAL_MAX_RECONNECT_INTERVAL (60*HZ)    /* ...exponentially increasing to this */
-
-#define IBNAL_MSG_SIZE       (4<<10)            /* max size of queued messages (inc hdr) */
-
-#define IBNAL_MSG_QUEUE_SIZE   8                /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER 7                /* when to eagerly return credits */
-/* 7 indicates infinite retry attempts, Infinicon recommended 5 */
-#define IBNAL_RETRY            5                /* # times to retry */
-#define IBNAL_RNR_RETRY        5                /*  */
-#define IBNAL_CM_RETRY         5                /* # times to retry connection */
-#define IBNAL_FLOW_CONTROL     1
-#define IBNAL_ACK_TIMEOUT       20              /* supposedly 4 secs */
-
-#define IBNAL_NTX             64                /* # tx descs */
-/* this had to be dropped down so that we only register < 255 pages per
- * region.  this will change if we register all memory. */
-#define IBNAL_NTX_NBLK        128               /* # reserved tx descs */
-
-#define IBNAL_PEER_HASH_SIZE  101               /* # peer lists */
-
-#define IBNAL_RESCHED         100               /* # scheduler loops before reschedule */
-
-#define IBNAL_CONCURRENT_PEERS 1000             /* # nodes all talking at once to me */
-
-/* default vals for runtime tunables */
-#define IBNAL_IO_TIMEOUT      50                /* default comms timeout (seconds) */
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS       (IBNAL_NTX + IBNAL_NTX_NBLK)
-#define IBNAL_TX_MSG_BYTES  (IBNAL_TX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES  ((IBNAL_TX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-#define IBNAL_TX_MAX_SG (PTL_MD_MAX_IOV + 1)
-
-/* RX messages (per connection) */
-#define IBNAL_RX_MSGS       IBNAL_MSG_QUEUE_SIZE
-#define IBNAL_RX_MSG_BYTES  (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES  ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-
-/* we may have up to 2 completions per transmit +
-   1 completion per receive, per connection */
-#define IBNAL_CQ_ENTRIES  ((2*IBNAL_TX_MSGS) +                          \
-                           (IBNAL_RX_MSGS * IBNAL_CONCURRENT_PEERS))
-
-#define IBNAL_RDMA_BASE  0x0eeb0000
-#define IBNAL_FMR        0
-#define IBNAL_WHOLE_MEM  1
-#define IBNAL_CKSUM      0
-//#define IBNAL_CALLBACK_CTXT  IB_CQ_CALLBACK_PROCESS
-#define IBNAL_CALLBACK_CTXT  IB_CQ_CALLBACK_INTERRUPT
-
-/* XXX I have no idea. */
-#define IBNAL_STARTING_PSN 1
-
-typedef struct 
-{
-        int               kib_io_timeout;       /* comms timeout (seconds) */
-        struct ctl_table_header *kib_sysctl;    /* sysctl interface */
-} kib_tunables_t;
-
-/* some of these have specific types in the stack that just map back
- * to the uFOO types, like IB_{L,R}_KEY. */
-typedef struct
-{
-        int               ibp_npages;           /* # pages */
-        int               ibp_mapped;           /* mapped? */
-        __u64             ibp_vaddr;            /* mapped region vaddr */
-        __u32             ibp_lkey;             /* mapped region lkey */
-        __u32             ibp_rkey;             /* mapped region rkey */
-        IB_HANDLE         ibp_handle;           /* mapped region handle */
-        struct page      *ibp_pages[0];
-} kib_pages_t;
-
-typedef struct
-{
-        IB_HANDLE         md_handle;
-        __u32             md_lkey;
-        __u32             md_rkey;
-        __u64             md_addr;
-} kib_md_t __attribute__((packed));
-        
-typedef struct 
-{
-        int               kib_init;             /* initialisation state */
-        __u64             kib_incarnation;      /* which one am I */
-        int               kib_shutdown;         /* shut down? */
-        atomic_t          kib_nthreads;         /* # live threads */
-
-        __u64             kib_service_id;       /* service number I listen on */
-        __u64             kib_port_guid;        /* my GUID (lo 64 of GID)*/
-        __u16             kib_port_pkey;        /* my pkey, whatever that is */
-        ptl_nid_t         kib_nid;              /* my NID */
-        struct semaphore  kib_nid_mutex;        /* serialise NID ops */
-        struct semaphore  kib_nid_signal;       /* signal completion */
-        IB_HANDLE         kib_cep;              /* connection end point */
-
-        rwlock_t          kib_global_lock;      /* stabilize peer/conn ops */
-
-        struct list_head *kib_peers;            /* hash table of all my known peers */
-        int               kib_peer_hash_size;   /* size of kib_peers */
-        atomic_t          kib_npeers;           /* # peers extant */
-        atomic_t          kib_nconns;           /* # connections extant */
-
-        struct list_head  kib_connd_conns;      /* connections to progress */
-        struct list_head  kib_connd_peers;      /* peers waiting for a connection */
-        wait_queue_head_t kib_connd_waitq;      /* connection daemons sleep here */
-        unsigned long     kib_connd_waketime;   /* when connd will wake */
-        spinlock_t        kib_connd_lock;       /* serialise */
-
-        wait_queue_head_t kib_sched_waitq;      /* schedulers sleep here */
-        struct list_head  kib_sched_txq;        /* tx requiring attention */
-        struct list_head  kib_sched_rxq;        /* rx requiring attention */
-        spinlock_t        kib_sched_lock;       /* serialise */
-        
-        struct kib_tx    *kib_tx_descs;         /* all the tx descriptors */
-        kib_pages_t      *kib_tx_pages;         /* premapped tx msg pages */
-
-        struct list_head  kib_idle_txs;         /* idle tx descriptors */
-        struct list_head  kib_idle_nblk_txs;    /* idle reserved tx descriptors */
-        wait_queue_head_t kib_idle_tx_waitq;    /* block here for tx descriptor */
-        __u64             kib_next_tx_cookie;   /* RDMA completion cookie */
-        spinlock_t        kib_tx_lock;          /* serialise */
-        
-        IB_HANDLE         kib_hca;              /* The HCA */
-        int               kib_port;             /* port on the device */
-        IB_HANDLE         kib_pd;               /* protection domain */
-        IB_HANDLE         kib_sd;               /* SD handle */
-        IB_HANDLE         kib_cq;               /* completion queue */
-        kib_md_t          kib_md;               /* full-mem registration */
-
-        void             *kib_listen_handle;    /* where I listen for connections */
-
-        IBT_INTERFACE_UNION kib_interfaces;     /* The Infinicon IBT interface */
-
-        uint64              kib_hca_guids[8];   /* all the HCA guids */
-        IB_CA_ATTRIBUTES    kib_hca_attrs;      /* where to get HCA attrs */
-        FABRIC_OPERATION_DATA kib_fabopdata;    /* (un)advertise service record */
-} kib_data_t;
-
-#define IBNAL_INIT_NOTHING         0
-#define IBNAL_INIT_DATA            1
-#define IBNAL_INIT_LIB             2
-#define IBNAL_INIT_HCA             3
-#define IBNAL_INIT_PORTATTRS       4
-#define IBNAL_INIT_PORT            5
-#define IBNAL_INIT_SD              6
-#define IBNAL_INIT_PD              7
-#define IBNAL_INIT_FMR             8
-#define IBNAL_INIT_MR              9
-#define IBNAL_INIT_TXD             10 
-#define IBNAL_INIT_CQ              11 
-#define IBNAL_INIT_ALL             12 
-
-/************************************************************************
- * Wire message structs.
- * These are sent in sender's byte order (i.e. receiver flips).
- * CAVEAT EMPTOR: other structs communicated between nodes (e.g. MAD
- * private data and SM service info), is LE on the wire.
- */
-
-/* also kib_md_t above */
-
-typedef struct
-{
-        __u32                 rd_key;           /* remote key */
-        __u32                 rd_nob;           /* # of bytes */
-        __u64                 rd_addr;          /* remote io vaddr */
-} kib_rdma_desc_t __attribute__((packed));
-
-typedef struct
-{
-        ptl_hdr_t         ibim_hdr;             /* portals header */
-        char              ibim_payload[0];      /* piggy-backed payload */
-} kib_immediate_msg_t __attribute__((packed));
-
-/* these arrays serve two purposes during rdma.  they are built on the passive
- * side and sent to the active side as remote arguments.  On the active side
- * the descs are used as a data structure on the way to local gather items. 
- * the different roles result in split local/remote meaning of desc->rd_key */
-typedef struct
-{
-        ptl_hdr_t         ibrm_hdr;             /* portals header */
-        __u64             ibrm_cookie;          /* opaque completion cookie */
-        __u32             ibrm_num_descs;       /* how many descs */
-        kib_rdma_desc_t   ibrm_desc[0];         /* where to suck/blow */
-} kib_rdma_msg_t __attribute__((packed));
-
-#define kib_rdma_msg_len(num_descs) \
-        offsetof(kib_msg_t, ibm_u.rdma.ibrm_desc[num_descs])
-
-typedef struct
-{
-        __u64             ibcm_cookie;          /* opaque completion cookie */
-        __u32             ibcm_status;          /* completion status */
-} kib_completion_msg_t __attribute__((packed));
-
-typedef struct
-{
-        __u32              ibm_magic;           /* I'm an openibnal message */
-        __u16              ibm_version;         /* this is my version number */
-        __u8               ibm_type;            /* msg type */
-        __u8               ibm_credits;         /* returned credits */
-#if IBNAL_CKSUM
-        __u32              ibm_nob;
-        __u32              ibm_cksum;
-#endif
-        union {
-                kib_immediate_msg_t   immediate;
-                kib_rdma_msg_t        rdma;
-                kib_completion_msg_t  completion;
-        } ibm_u __attribute__((packed));
-} kib_msg_t __attribute__((packed));
-
-#define IBNAL_MSG_MAGIC       0x0be91b91        /* unique magic */
-#define IBNAL_MSG_VERSION              1        /* current protocol version */
-
-#define IBNAL_MSG_NOOP              0xd0        /* nothing (just credits) */
-#define IBNAL_MSG_IMMEDIATE         0xd1        /* portals hdr + payload */
-#define IBNAL_MSG_PUT_RDMA          0xd2        /* portals PUT hdr + source rdma desc */
-#define IBNAL_MSG_PUT_DONE          0xd3        /* signal PUT rdma completion */
-#define IBNAL_MSG_GET_RDMA          0xd4        /* portals GET hdr + sink rdma desc */
-#define IBNAL_MSG_GET_DONE          0xd5        /* signal GET rdma completion */
-
-/***********************************************************************/
-
-typedef struct kib_rx                           /* receive message */
-{
-        struct list_head          rx_list;      /* queue for attention */
-        struct kib_conn          *rx_conn;      /* owning conn */
-        int                       rx_rdma;      /* RDMA completion posted? */
-        int                       rx_posted;    /* posted? */
-        __u64                     rx_vaddr;     /* pre-mapped buffer (hca vaddr) */
-        kib_msg_t                *rx_msg;       /* pre-mapped buffer (host vaddr) */
-        IB_WORK_REQ               rx_wrq;
-        IB_LOCAL_DATASEGMENT      rx_gl;        /* and it's memory */
-} kib_rx_t;
-
-typedef struct kib_tx                           /* transmit message */
-{
-        struct list_head          tx_list;      /* queue on idle_txs ibc_tx_queue etc. */
-        int                       tx_isnblk;    /* I'm reserved for non-blocking sends */
-        struct kib_conn          *tx_conn;      /* owning conn */
-        int                       tx_mapped;    /* mapped for RDMA? */
-        int                       tx_sending;   /* # tx callbacks outstanding */
-        int                       tx_status;    /* completion status */
-        unsigned long             tx_deadline;  /* completion deadline */
-        int                       tx_passive_rdma; /* peer sucks/blows */
-        int                       tx_passive_rdma_wait; /* waiting for peer to complete */
-        __u64                     tx_passive_rdma_cookie; /* completion cookie */
-        lib_msg_t                *tx_libmsg[2]; /* lib msgs to finalize on completion */
-        kib_md_t                  tx_md;        /* RDMA mapping (active/passive) */
-        __u64                     tx_vaddr;     /* pre-mapped buffer (hca vaddr) */
-        kib_msg_t                *tx_msg;       /* pre-mapped buffer (host vaddr) */
-        int                       tx_nsp;       /* # send work items */
-        IB_WORK_REQ               tx_wrq[IBNAL_TX_MAX_SG];    /* send work items... */
-        IB_LOCAL_DATASEGMENT      tx_gl[IBNAL_TX_MAX_SG];     /* ...and their memory */
-} kib_tx_t;
-
-#define KIB_TX_UNMAPPED       0
-#define KIB_TX_MAPPED         1
-#define KIB_TX_MAPPED_FMR     2
-
-typedef struct kib_wire_connreq
-{
-        __u32        wcr_magic;                 /* I'm an openibnal connreq */
-        __u16        wcr_version;               /* this is my version number */
-        __u16        wcr_queue_depth;           /* this is my receive queue size */
-        __u64        wcr_nid;                   /* peer's NID */
-        __u64        wcr_incarnation;           /* peer's incarnation */
-} kib_wire_connreq_t;
-
-typedef struct kib_gid
-{
-        __u64   hi, lo;
-} kib_gid_t;
-
-typedef struct kib_connreq
-{
-        /* connection-in-progress */
-        struct kib_conn                    *cr_conn;
-        kib_wire_connreq_t                  cr_wcr;
-        __u64                               cr_tid;
-        IB_SERVICE_RECORD                   cr_service;
-        kib_gid_t                           cr_gid;
-        IB_PATH_RECORD                      cr_path;
-        CM_REQUEST_INFO                     cr_cmreq;
-        CM_CONN_INFO                        cr_discarded;
-        CM_REJECT_INFO                      cr_rej_info;
-} kib_connreq_t;
-
-typedef struct kib_conn
-{ 
-        struct kib_peer    *ibc_peer;           /* owning peer */
-        struct list_head    ibc_list;           /* stash on peer's conn list */
-        __u64               ibc_incarnation;    /* which instance of the peer */
-        atomic_t            ibc_refcount;       /* # users */
-        int                 ibc_state;          /* what's happening */
-        atomic_t            ibc_nob;            /* # bytes buffered */
-        int                 ibc_nsends_posted;  /* # uncompleted sends */
-        int                 ibc_credits;        /* # credits I have */
-        int                 ibc_outstanding_credits; /* # credits to return */
-        int                 ibc_rcvd_disconnect;/* received discon request */
-        int                 ibc_sent_disconnect;/* sent discon request */
-        struct list_head    ibc_tx_queue;       /* send queue */
-        struct list_head    ibc_active_txs;     /* active tx awaiting completion */
-        spinlock_t          ibc_lock;           /* serialise */
-        kib_rx_t           *ibc_rxs;            /* the rx descs */
-        kib_pages_t        *ibc_rx_pages;       /* premapped rx msg pages */
-        IB_HANDLE           ibc_qp;             /* queue pair */
-        IB_HANDLE           ibc_cep;            /* connection ID? */
-        IB_QP_ATTRIBUTES_QUERY ibc_qp_attrs;    /* QP attrs */
-        kib_connreq_t      *ibc_connreq;        /* connection request state */
-} kib_conn_t;
-
-#define IBNAL_CONN_INIT_NOTHING      0          /* initial state */
-#define IBNAL_CONN_INIT_QP           1          /* ibc_qp set up */
-#define IBNAL_CONN_CONNECTING        2          /* started to connect */
-#define IBNAL_CONN_ESTABLISHED       3          /* connection established */
-#define IBNAL_CONN_SEND_DREQ         4          /* to send disconnect req */
-#define IBNAL_CONN_DREQ              5          /* sent disconnect req */
-#define IBNAL_CONN_DREP              6          /* sent disconnect rep */
-#define IBNAL_CONN_DISCONNECTED      7          /* no more QP or CM traffic */
-
-#define KIB_ASSERT_CONN_STATE(conn, state) do {                         \
-        LASSERTF((conn)->ibc_state == state, "%d\n", conn->ibc_state);  \
-} while (0)
-
-#define KIB_ASSERT_CONN_STATE_RANGE(conn, low, high) do {               \
-        LASSERTF(low <= high, "%d %d\n", low, high);                    \
-        LASSERTF((conn)->ibc_state >= low && (conn)->ibc_state <= high, \
-                 "%d\n", conn->ibc_state);                              \
-} while (0)
-
-typedef struct kib_peer
-{
-        struct list_head    ibp_list;           /* stash on global peer list */
-        struct list_head    ibp_connd_list;     /* schedule on kib_connd_peers */
-        ptl_nid_t           ibp_nid;            /* who's on the other end(s) */
-        atomic_t            ibp_refcount;       /* # users */
-        int                 ibp_persistence;    /* "known" peer refs */
-        struct list_head    ibp_conns;          /* all active connections */
-        struct list_head    ibp_tx_queue;       /* msgs waiting for a conn */
-        int                 ibp_connecting;     /* connecting+accepting */
-        unsigned long       ibp_reconnect_time; /* when reconnect may be attempted */
-        unsigned long       ibp_reconnect_interval; /* exponential backoff */
-} kib_peer_t;
-
-
-extern lib_nal_t       kibnal_lib;
-extern kib_data_t      kibnal_data;
-extern kib_tunables_t  kibnal_tunables;
-
-/******************************************************************************/
-/* Infinicon IBT interface wrappers */
-#define IIBT_IF (kibnal_data.kib_interfaces.ver2)
-
-static inline FSTATUS
-iibt_get_hca_guids(uint32 *hca_count, EUI64 *hca_guid_list)
-{
-        return IIBT_IF.GetCaGuids(hca_count, hca_guid_list);
-}
-
-static inline FSTATUS
-iibt_open_hca(EUI64                    hca_guid, 
-             IB_COMPLETION_CALLBACK   completion_callback,
-             IB_ASYNC_EVENT_CALLBACK  async_event_callback,
-             void                    *arg,
-             IB_HANDLE               *handle)
-{
-        return IIBT_IF.Vpi.OpenCA(hca_guid, completion_callback,
-                                  async_event_callback, arg, handle);
-}
-
-static inline FSTATUS
-iibt_query_hca(IB_HANDLE hca_handle, IB_CA_ATTRIBUTES *attrs, void **argp)
-{
-        return IIBT_IF.Vpi.QueryCA(hca_handle, attrs, argp);
-}
-
-static inline FSTATUS
-iibt_close_hca(IB_HANDLE hca_handle)
-{
-        return IIBT_IF.Vpi.CloseCA(hca_handle);
-}
-
-static inline FSTATUS
-iibt_pd_allocate(IB_HANDLE hca_handle, __u32 max_avs, IB_HANDLE *pd_handle)
-{
-        return IIBT_IF.Vpi.AllocatePD(hca_handle, max_avs, pd_handle);
-}
-
-static inline FSTATUS
-iibt_pd_free(IB_HANDLE pd_handle)
-{
-        return IIBT_IF.Vpi.FreePD(pd_handle);
-}
-
-static inline FSTATUS
-iibt_register_physical_memory(IB_HANDLE hca_handle, 
-                              IB_VIRT_ADDR requested_io_va,
-                              void *phys_buffers, uint64 nphys_buffers,
-                              uint32 io_va_offset, IB_HANDLE pd_handle,
-                              IB_ACCESS_CONTROL access,
-                              IB_HANDLE *mem_handle, 
-                              IB_VIRT_ADDR *actual_io_va,
-                              IB_L_KEY *lkey, IB_R_KEY *rkey)
-{
-        return IIBT_IF.Vpi.RegisterPhysMemRegion(hca_handle, requested_io_va,
-                                                 phys_buffers, nphys_buffers,
-                                                 io_va_offset, pd_handle, 
-                                                 access,
-                                                 mem_handle, actual_io_va,
-                                                 lkey, rkey);
-}
-
-static inline FSTATUS
-iibt_register_contig_physical_memory(IB_HANDLE hca_handle, 
-                                     IB_VIRT_ADDR requested_io_va,
-                                     IB_MR_PHYS_BUFFER *phys_buffers, 
-                                     uint64 nphys_buffers,
-                                     uint32 io_va_offset, IB_HANDLE pd_handle,
-                                     IB_ACCESS_CONTROL access,
-                                     IB_HANDLE *mem_handle, 
-                                     IB_VIRT_ADDR *actual_io_va,
-                                     IB_L_KEY *lkey, IB_R_KEY *rkey)
-{
-        return IIBT_IF.Vpi.RegisterContigPhysMemRegion(hca_handle, 
-                                                       requested_io_va,
-                                                       phys_buffers, 
-                                                       nphys_buffers,
-                                                       io_va_offset, pd_handle, 
-                                                       access,
-                                                       mem_handle, actual_io_va,
-                                                       lkey, rkey);
-}
-
-static inline FSTATUS
-iibt_register_memory(IB_HANDLE hca_handle, 
-                     void *virt_addr, unsigned int length,
-                     IB_HANDLE pd_handle,
-                     IB_ACCESS_CONTROL access,
-                     IB_HANDLE *mem_handle, 
-                     IB_L_KEY *lkey, IB_R_KEY *rkey)
-{
-        return IIBT_IF.Vpi.RegisterMemRegion(hca_handle, 
-                                             virt_addr, length,
-                                             pd_handle, 
-                                             access,
-                                             mem_handle,
-                                             lkey, rkey);
-}
-
-static inline FSTATUS
-iibt_deregister_memory(IB_HANDLE mem_handle)
-{
-        return IIBT_IF.Vpi.DeregisterMemRegion(mem_handle);
-}
-
-static inline FSTATUS
-iibt_cq_create(IB_HANDLE hca_handle, uint32 requested_size,
-              void *arg, IB_HANDLE *cq_handle, uint32 *actual_size)
-{
-        return IIBT_IF.Vpi.CreateCQ(hca_handle, requested_size,
-                                   arg, cq_handle, actual_size);
-}
-
-static inline FSTATUS
-iibt_cq_poll(IB_HANDLE cq_handle, IB_WORK_COMPLETION *wc)
-{
-        return IIBT_IF.Vpi.PollCQ(cq_handle, wc);
-}
-
-static inline FSTATUS
-iibt_cq_rearm(IB_HANDLE cq_handle, IB_CQ_EVENT_SELECT select)
-{
-        return IIBT_IF.Vpi.RearmCQ(cq_handle, select);
-}
-
-static inline FSTATUS
-iibt_cq_destroy(IB_HANDLE cq_handle)
-{
-        return IIBT_IF.Vpi.DestroyCQ(cq_handle);
-}
-
-static inline FSTATUS
-iibt_qp_create(IB_HANDLE hca_handle, IB_QP_ATTRIBUTES_CREATE *create_attr,
-              void *arg, IB_HANDLE *cq_handle, 
-              IB_QP_ATTRIBUTES_QUERY *query_attr)
-{
-        return IIBT_IF.Vpi.CreateQP(hca_handle, create_attr, arg, cq_handle, 
-                                    query_attr);
-}
-
-static inline FSTATUS
-iibt_qp_query(IB_HANDLE qp_handle, IB_QP_ATTRIBUTES_QUERY *query_attr,
-              void **arg_ptr)
-{
-        return IIBT_IF.Vpi.QueryQP(qp_handle, query_attr, arg_ptr);
-}
-
-static inline FSTATUS
-iibt_qp_modify(IB_HANDLE qp_handle, IB_QP_ATTRIBUTES_MODIFY *modify_attr,
-               IB_QP_ATTRIBUTES_QUERY *query_attr)
-{
-        return IIBT_IF.Vpi.ModifyQP(qp_handle, modify_attr, query_attr);
-}
-
-static inline FSTATUS
-iibt_qp_destroy(IB_HANDLE qp_handle)
-{
-        return IIBT_IF.Vpi.DestroyQP(qp_handle);
-}
-
-static inline FSTATUS
-iibt_postrecv(IB_HANDLE qp_handle, IB_WORK_REQ *work_req)
-{
-        return IIBT_IF.Vpi.PostRecv(qp_handle, work_req);
-}
-
-static inline FSTATUS
-iibt_postsend(IB_HANDLE qp_handle, IB_WORK_REQ *work_req)
-{
-        return IIBT_IF.Vpi.PostSend(qp_handle, work_req);
-}
-
-static inline FSTATUS
-iibt_sd_register(IB_HANDLE *sd_handle, CLIENT_CONTROL_PARAMETERS *p)
-{
-        return IIBT_IF.Sdi.Register(sd_handle, p);
-}
-
-static inline FSTATUS
-iibt_sd_deregister(IB_HANDLE sd_handle)
-{
-        return IIBT_IF.Sdi.Deregister(sd_handle);
-}
-
-static inline FSTATUS
-iibt_sd_port_fabric_operation(IB_HANDLE sd_handle, EUI64 port_guid,
-                              FABRIC_OPERATION_DATA *fod,
-                              PFABRIC_OPERATION_CALLBACK callback,
-                              COMMAND_CONTROL_PARAMETERS *p, void *arg)
-{
-        return IIBT_IF.Sdi.PortFabricOperation(sd_handle, port_guid,
-                                               fod, callback, p, arg);
-}
-
-static inline FSTATUS
-iibt_sd_query_port_fabric_information(IB_HANDLE sd_handle, EUI64 port_guid,
-                                      QUERY *qry,
-                                      PQUERY_CALLBACK callback,
-                                      COMMAND_CONTROL_PARAMETERS *p, void *arg)
-{
-        return IIBT_IF.Sdi.QueryPortFabricInformation(sd_handle, port_guid,
-                                                      qry, callback, p, arg);
-}
-
-static inline IB_HANDLE
-iibt_cm_create_cep(CM_CEP_TYPE type)
-{
-        return IIBT_IF.Cmi.CmCreateCEP(type);
-}
-
-static inline FSTATUS
-iibt_cm_modify_cep(IB_HANDLE cep, uint32 attr, char* value, uint32 len,
-                   uint32 offset)
-{
-        return IIBT_IF.Cmi.CmModifyCEP(cep, attr, value, len, offset);
-}
-
-static inline FSTATUS
-iibt_cm_destroy_cep(IB_HANDLE cep_handle)
-{
-        return IIBT_IF.Cmi.CmDestroyCEP(cep_handle);
-}
-
-static inline FSTATUS
-iibt_cm_listen(IB_HANDLE cep, CM_LISTEN_INFO *info,
-               PFN_CM_CALLBACK callback, void *arg)
-{
-        return IIBT_IF.Cmi.CmListen(cep, info, callback, arg);
-}
-
-static inline FSTATUS
-iibt_cm_cancel(IB_HANDLE cep)
-{
-        return IIBT_IF.Cmi.CmCancel(cep);
-}
-
-static inline FSTATUS
-iibt_cm_accept(IB_HANDLE cep, 
-               CM_CONN_INFO *send_info, CM_CONN_INFO *recv_info,
-               PFN_CM_CALLBACK callback, void *arg,
-               IB_HANDLE *new_cep)
-{
-        return IIBT_IF.Cmi.CmAccept(cep,
-                                    send_info, recv_info,
-                                    callback, arg, new_cep);
-}
-
-static inline FSTATUS
-iibt_cm_reject(IB_HANDLE cep, CM_REJECT_INFO *rej)
-{
-        return IIBT_IF.Cmi.CmReject(cep, rej);
-}
-
-static inline FSTATUS
-iibt_cm_disconnect(IB_HANDLE cep, CM_DREQUEST_INFO *req,
-                   CM_DREPLY_INFO *reply)
-{
-        return IIBT_IF.Cmi.CmDisconnect(cep, req, reply);
-}
-
-static inline FSTATUS
-iibt_cm_connect (IB_HANDLE cep, CM_REQUEST_INFO *req,
-                 PFN_CM_CALLBACK callback, void *arg)
-{
-        return IIBT_IF.Cmi.CmConnect (cep, req, callback, arg);
-}
-
-static inline int wrq_signals_completion(IB_WORK_REQ *wrq)
-{
-        return wrq->Req.SendRC.Options.s.SignaledCompletion == 1;
-}
-
-
-/******************************************************************************/
-
-/* these are purposely avoiding using local vars so they don't increase
- * stack consumption. */
-
-#define kib_peer_addref(peer) do {                                      \
-        LASSERTF(atomic_read(&peer->ibp_refcount) > 0, "%d\n",          \
-                 atomic_read(&peer->ibp_refcount));                     \
-        CDEBUG(D_NET, "++peer[%p] -> "LPX64" (%d)\n",                   \
-               peer, peer->ibp_nid, atomic_read (&peer->ibp_refcount)); \
-        atomic_inc(&peer->ibp_refcount);                                \
-} while (0)
-
-#define kib_peer_decref(peer) do {                                      \
-        LASSERTF(atomic_read(&peer->ibp_refcount) > 0, "%d\n",          \
-                 atomic_read(&peer->ibp_refcount));                     \
-        CDEBUG(D_NET, "--peer[%p] -> "LPX64" (%d)\n",                   \
-               peer, peer->ibp_nid, atomic_read (&peer->ibp_refcount)); \
-        if (atomic_dec_and_test (&peer->ibp_refcount)) {                \
-                CDEBUG (D_NET, "destroying peer "LPX64" %p\n",          \
-                        peer->ibp_nid, peer);                           \
-                kibnal_destroy_peer (peer);                             \
-        }                                                               \
-} while (0)
-
-/******************************************************************************/
-
-static inline struct list_head *
-kibnal_nid2peerlist (ptl_nid_t nid) 
-{
-        unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-        
-        return (&kibnal_data.kib_peers [hash]);
-}
-
-static inline int
-kibnal_peer_active(kib_peer_t *peer)
-{
-        /* Am I in the peer hash table? */
-        return (!list_empty(&peer->ibp_list));
-}
-
-static inline void
-kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
-        /* CAVEAT EMPTOR: tx takes caller's ref on conn */
-
-        LASSERT (tx->tx_nsp > 0);               /* work items set up */
-        LASSERT (tx->tx_conn == NULL);          /* only set here */
-
-        tx->tx_conn = conn;
-        tx->tx_deadline = jiffies + kibnal_tunables.kib_io_timeout * HZ;
-        list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
-}
-
-#define KIBNAL_SERVICE_KEY_MASK  (IB_SERVICE_RECORD_COMP_SERVICENAME |          \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_1 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_2 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_3 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_4 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_5 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_6 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_7 |       \
-                                  IB_SERVICE_RECORD_COMP_SERVICEDATA8_8)
-
-static inline __u64*
-kibnal_service_nid_field(IB_SERVICE_RECORD *srv)
-{
-        /* must be consistent with KIBNAL_SERVICE_KEY_MASK */
-        return (__u64 *)srv->ServiceData8;
-}
-
-
-static inline void
-kibnal_set_service_keys(IB_SERVICE_RECORD *srv, ptl_nid_t nid)
-{
-        LASSERT (strlen(IBNAL_SERVICE_NAME) < sizeof(srv->ServiceName));
-        memset (srv->ServiceName, 0, sizeof(srv->ServiceName));
-        strcpy (srv->ServiceName, IBNAL_SERVICE_NAME);
-
-        *kibnal_service_nid_field(srv) = cpu_to_le64(nid);
-}
-
-#if 0
-static inline void
-kibnal_show_rdma_attr (kib_conn_t *conn)
-{
-        struct ib_qp_attribute qp_attr;
-        int                    rc;
-        
-        memset (&qp_attr, 0, sizeof(qp_attr));
-        rc = ib_qp_query(conn->ibc_qp, &qp_attr);
-        if (rc != 0) {
-                CERROR ("Can't get qp attrs: %d\n", rc);
-                return;
-        }
-        
-        CWARN ("RDMA CAPABILITY: write %s read %s\n",
-               (qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ?
-               (qp_attr.enable_rdma_write ? "enabled" : "disabled") : "invalid",
-               (qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ?
-               (qp_attr.enable_rdma_read ? "enabled" : "disabled") : "invalid");
-}
-#endif
-
-#if CONFIG_X86
-static inline __u64
-kibnal_page2phys (struct page *p)
-{
-        __u64 page_number = p - mem_map;
-        
-        return (page_number << PAGE_SHIFT);
-}
-#else
-# error "no page->phys"
-#endif
-
-/* CAVEAT EMPTOR:
- * We rely on tx/rx descriptor alignment to allow us to use the lowest bit
- * of the work request id as a flag to determine if the completion is for a
- * transmit or a receive.  It seems that that the CQ entry's 'op' field
- * isn't always set correctly on completions that occur after QP teardown. */
-
-static inline __u64
-kibnal_ptr2wreqid (void *ptr, int isrx)
-{
-        unsigned long lptr = (unsigned long)ptr;
-
-        LASSERT ((lptr & 1) == 0);
-        return (__u64)(lptr | (isrx ? 1 : 0));
-}
-
-static inline void *
-kibnal_wreqid2ptr (__u64 wreqid)
-{
-        return (void *)(((unsigned long)wreqid) & ~1UL);
-}
-
-static inline int
-kibnal_wreqid_is_rx (__u64 wreqid)
-{
-        return (wreqid & 1) != 0;
-}
-
-static inline int
-kibnal_whole_mem(void)
-{
-        return kibnal_data.kib_md.md_handle != NULL;
-}
-
-extern kib_peer_t *kibnal_create_peer (ptl_nid_t nid);
-extern void kibnal_destroy_peer (kib_peer_t *peer);
-extern int kibnal_del_peer (ptl_nid_t nid, int single_share);
-extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid);
-extern void kibnal_unlink_peer_locked (kib_peer_t *peer);
-extern int  kibnal_close_stale_conns_locked (kib_peer_t *peer, 
-                                              __u64 incarnation);
-extern kib_conn_t *kibnal_create_conn (void);
-extern void kibnal_put_conn (kib_conn_t *conn);
-extern void kibnal_destroy_conn (kib_conn_t *conn);
-void kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg);
-
-extern int kibnal_alloc_pages (kib_pages_t **pp, int npages, int access);
-extern void kibnal_free_pages (kib_pages_t *p);
-
-extern void kibnal_check_sends (kib_conn_t *conn);
-extern void kibnal_close_conn_locked (kib_conn_t *conn, int error);
-extern void kibnal_destroy_conn (kib_conn_t *conn);
-extern int  kibnal_thread_start (int (*fn)(void *arg), void *arg);
-extern int  kibnal_scheduler(void *arg);
-extern int  kibnal_connd (void *arg);
-extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
-extern void kibnal_close_conn (kib_conn_t *conn, int why);
-extern void kibnal_start_active_rdma (int type, int status, 
-                                      kib_rx_t *rx, lib_msg_t *libmsg, 
-                                      unsigned int niov, 
-                                      struct iovec *iov, ptl_kiov_t *kiov,
-                                      size_t offset, size_t nob);
-
-void kibnal_ca_async_callback (void *ca_arg, IB_EVENT_RECORD *ev);
-void kibnal_ca_callback (void *ca_arg, void *cq_arg);
diff --git a/lustre/portals/knals/iibnal/iibnal_cb.c b/lustre/portals/knals/iibnal/iibnal_cb.c
deleted file mode 100644 (file)
index a827ba5..0000000
+++ /dev/null
@@ -1,3018 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "iibnal.h"
-
-/*
- *  LIB functions follow
- *
- */
-static void
-kibnal_schedule_tx_done (kib_tx_t *tx)
-{
-        unsigned long flags;
-
-        spin_lock_irqsave (&kibnal_data.kib_sched_lock, flags);
-
-        list_add_tail(&tx->tx_list, &kibnal_data.kib_sched_txq);
-        wake_up (&kibnal_data.kib_sched_waitq);
-
-        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-}
-
-static void
-kibnal_tx_done (kib_tx_t *tx)
-{
-        ptl_err_t        ptlrc = (tx->tx_status == 0) ? PTL_OK : PTL_FAIL;
-        unsigned long    flags;
-        int              i;
-        FSTATUS          frc;
-
-        LASSERT (tx->tx_sending == 0);          /* mustn't be awaiting callback */
-        LASSERT (!tx->tx_passive_rdma_wait);    /* mustn't be awaiting RDMA */
-
-        switch (tx->tx_mapped) {
-        default:
-                LBUG();
-
-        case KIB_TX_UNMAPPED:
-                break;
-
-        case KIB_TX_MAPPED:
-                if (in_interrupt()) {
-                        /* can't deregister memory in IRQ context... */
-                        kibnal_schedule_tx_done(tx);
-                        return;
-                }
-                frc = iibt_deregister_memory(tx->tx_md.md_handle);
-                LASSERT (frc == FSUCCESS);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
-                break;
-
-#if IBNAL_FMR
-        case KIB_TX_MAPPED_FMR:
-                if (in_interrupt() && tx->tx_status != 0) {
-                        /* can't flush FMRs in IRQ context... */
-                        kibnal_schedule_tx_done(tx);
-                        return;
-                }              
-
-                rc = ib_fmr_deregister(tx->tx_md.md_handle.fmr);
-                LASSERT (rc == 0);
-
-                if (tx->tx_status != 0)
-                        ib_fmr_pool_force_flush(kibnal_data.kib_fmr_pool);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
-                break;
-#endif
-        }
-
-        for (i = 0; i < 2; i++) {
-                /* tx may have up to 2 libmsgs to finalise */
-                if (tx->tx_libmsg[i] == NULL)
-                        continue;
-
-                lib_finalize (&kibnal_lib, NULL, tx->tx_libmsg[i], ptlrc);
-                tx->tx_libmsg[i] = NULL;
-        }
-        
-        if (tx->tx_conn != NULL) {
-                kibnal_put_conn (tx->tx_conn);
-                tx->tx_conn = NULL;
-        }
-
-        tx->tx_nsp = 0;
-        tx->tx_passive_rdma = 0;
-        tx->tx_status = 0;
-
-        spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags);
-
-        if (tx->tx_isnblk) {
-                list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_nblk_txs);
-        } else {
-                list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs);
-                wake_up (&kibnal_data.kib_idle_tx_waitq);
-        }
-
-        spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-}
-
-static kib_tx_t *
-kibnal_get_idle_tx (int may_block) 
-{
-        unsigned long  flags;
-        kib_tx_t      *tx = NULL;
-        ENTRY;
-        
-        for (;;) {
-                spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags);
-
-                /* "normal" descriptor is free */
-                if (!list_empty (&kibnal_data.kib_idle_txs)) {
-                        tx = list_entry (kibnal_data.kib_idle_txs.next,
-                                         kib_tx_t, tx_list);
-                        break;
-                }
-
-                if (!may_block) {
-                        /* may dip into reserve pool */
-                        if (list_empty (&kibnal_data.kib_idle_nblk_txs)) {
-                                CERROR ("reserved tx desc pool exhausted\n");
-                                break;
-                        }
-
-                        tx = list_entry (kibnal_data.kib_idle_nblk_txs.next,
-                                         kib_tx_t, tx_list);
-                        break;
-                }
-
-                /* block for idle tx */
-                spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-
-                wait_event (kibnal_data.kib_idle_tx_waitq,
-                            !list_empty (&kibnal_data.kib_idle_txs) ||
-                            kibnal_data.kib_shutdown);
-        }
-
-        if (tx != NULL) {
-                list_del (&tx->tx_list);
-
-                /* Allocate a new passive RDMA completion cookie.  It might
-                 * not be needed, but we've got a lock right now and we're
-                 * unlikely to wrap... */
-                tx->tx_passive_rdma_cookie = kibnal_data.kib_next_tx_cookie++;
-
-                LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-                LASSERT (tx->tx_nsp == 0);
-                LASSERT (tx->tx_sending == 0);
-                LASSERT (tx->tx_status == 0);
-                LASSERT (tx->tx_conn == NULL);
-                LASSERT (!tx->tx_passive_rdma);
-                LASSERT (!tx->tx_passive_rdma_wait);
-                LASSERT (tx->tx_libmsg[0] == NULL);
-                LASSERT (tx->tx_libmsg[1] == NULL);
-        }
-
-        spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-        
-        RETURN(tx);
-}
-
-static int
-kibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
-        /* I would guess that if kibnal_get_peer (nid) == NULL,
-           and we're not routing, then 'nid' is very distant :) */
-        if ( nal->libnal_ni.ni_pid.nid == nid ) {
-                *dist = 0;
-        } else {
-                *dist = 1;
-        }
-
-        return 0;
-}
-
-static void
-kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status)
-{
-        struct list_head *ttmp;
-        unsigned long     flags;
-        int               idle;
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        list_for_each (ttmp, &conn->ibc_active_txs) {
-                kib_tx_t *tx = list_entry(ttmp, kib_tx_t, tx_list);
-
-                LASSERT (tx->tx_passive_rdma ||
-                         !tx->tx_passive_rdma_wait);
-
-                LASSERT (tx->tx_passive_rdma_wait ||
-                         tx->tx_sending != 0);
-
-                if (!tx->tx_passive_rdma_wait ||
-                    tx->tx_passive_rdma_cookie != cookie)
-                        continue;
-
-                CDEBUG(D_NET, "Complete %p "LPD64": %d\n", tx, cookie, status);
-
-                tx->tx_status = status;
-                tx->tx_passive_rdma_wait = 0;
-                idle = (tx->tx_sending == 0);
-
-                if (idle)
-                        list_del (&tx->tx_list);
-
-                spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-                /* I could be racing with tx callbacks.  It's whoever
-                 * _makes_ tx idle that frees it */
-                if (idle)
-                        kibnal_tx_done (tx);
-                return;
-        }
-                
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-        CERROR ("Unmatched (late?) RDMA completion "LPX64" from "LPX64"\n",
-                cookie, conn->ibc_peer->ibp_nid);
-}
-
-static __u32
-kibnal_lkey(kib_pages_t *ibp)
-{
-        if (kibnal_whole_mem())
-                return kibnal_data.kib_md.md_lkey;
-
-        return ibp->ibp_lkey;
-}
-
-static void
-kibnal_post_rx (kib_rx_t *rx, int do_credits)
-{
-        kib_conn_t   *conn = rx->rx_conn;
-        int           rc = 0;
-        unsigned long flags;
-        FSTATUS       frc;
-        ENTRY;
-
-        rx->rx_gl = (IB_LOCAL_DATASEGMENT) {
-                .Address = rx->rx_vaddr,
-                .Length  = IBNAL_MSG_SIZE,
-                .Lkey    = kibnal_lkey(conn->ibc_rx_pages),
-        };
-
-        rx->rx_wrq = (IB_WORK_REQ) {
-                .Operation              = WROpRecv,
-                .DSListDepth            = 1,
-                .MessageLen             = IBNAL_MSG_SIZE,
-                .WorkReqId              = kibnal_ptr2wreqid(rx, 1),
-                .DSList                 = &rx->rx_gl,
-        };
-
-        KIB_ASSERT_CONN_STATE_RANGE(conn, IBNAL_CONN_ESTABLISHED,
-                                    IBNAL_CONN_DREP);
-        LASSERT (!rx->rx_posted);
-        rx->rx_posted = 1;
-        mb();
-
-        if (conn->ibc_state != IBNAL_CONN_ESTABLISHED)
-                rc = -ECONNABORTED;
-        else {
-                frc = iibt_postrecv(conn->ibc_qp, &rx->rx_wrq);
-                if (frc != FSUCCESS) {
-                        CDEBUG(D_NET, "post failed %d\n", frc);
-                        rc = -EINVAL;
-                }
-                CDEBUG(D_NET, "posted rx %p\n", &rx->rx_wrq);
-        }
-
-        if (rc == 0) {
-                if (do_credits) {
-                        spin_lock_irqsave(&conn->ibc_lock, flags);
-                        conn->ibc_outstanding_credits++;
-                        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
-                        kibnal_check_sends(conn);
-                }
-                EXIT;
-                return;
-        }
-
-        if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-                CERROR ("Error posting receive -> "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, rc);
-                kibnal_close_conn (rx->rx_conn, rc);
-        } else {
-                CDEBUG (D_NET, "Error posting receive -> "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, rc);
-        }
-
-        /* Drop rx's ref */
-        kibnal_put_conn (conn);
-        EXIT;
-}
-
-#if IBNAL_CKSUM
-static inline __u32 kibnal_cksum (void *ptr, int nob)
-{
-        char  *c  = ptr;
-        __u32  sum = 0;
-
-        while (nob-- > 0)
-                sum = ((sum << 1) | (sum >> 31)) + *c++;
-        
-        return (sum);
-}
-#endif
-
-static void hexdump(char *string, void *ptr, int len)
-{
-        unsigned char *c = ptr;
-        int i;
-
-        return;
-
-        if (len < 0 || len > 2048)  {
-                printk("XXX what the hell? %d\n",len);
-                return;
-        }
-
-        printk("%d bytes of '%s' from 0x%p\n", len, string, ptr);
-
-        for (i = 0; i < len;) {
-                printk("%02x",*(c++));
-                i++;
-                if (!(i & 15)) {
-                        printk("\n");
-                } else if (!(i&1)) {
-                        printk(" ");
-                }
-        }
-
-        if(len & 15) {
-                printk("\n");
-        }
-}
-
-static void
-kibnal_rx_callback (IB_WORK_COMPLETION *wc)
-{
-        kib_rx_t     *rx = (kib_rx_t *)kibnal_wreqid2ptr(wc->WorkReqId);
-        kib_msg_t    *msg = rx->rx_msg;
-        kib_conn_t   *conn = rx->rx_conn;
-        int           nob = wc->Length;
-        const int     base_nob = offsetof(kib_msg_t, ibm_u);
-        int           credits;
-        int           flipped;
-        unsigned long flags;
-        __u32         i;
-#if IBNAL_CKSUM
-        __u32         msg_cksum;
-        __u32         computed_cksum;
-#endif
-
-        /* we set the QP to erroring after we've finished disconnecting, 
-         * maybe we should do so sooner. */
-        KIB_ASSERT_CONN_STATE_RANGE(conn, IBNAL_CONN_ESTABLISHED, 
-                                    IBNAL_CONN_DISCONNECTED);
-
-        CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
-        LASSERT (rx->rx_posted);
-        rx->rx_posted = 0;
-        mb();
-
-        /* receives complete with error in any case after we've started
-         * disconnecting */
-        if (conn->ibc_state > IBNAL_CONN_ESTABLISHED)
-                goto failed;
-
-        if (wc->Status != WRStatusSuccess) {
-                CERROR("Rx from "LPX64" failed: %d\n", 
-                       conn->ibc_peer->ibp_nid, wc->Status);
-                goto failed;
-        }
-
-        if (nob < base_nob) {
-                CERROR ("Short rx from "LPX64": %d < expected %d\n",
-                        conn->ibc_peer->ibp_nid, nob, base_nob);
-                goto failed;
-        }
-
-        hexdump("rx", rx->rx_msg, sizeof(kib_msg_t));
-
-        /* Receiver does any byte flipping if necessary... */
-
-        if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
-                flipped = 0;
-        } else {
-                if (msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
-                        CERROR ("Unrecognised magic: %08x from "LPX64"\n", 
-                                msg->ibm_magic, conn->ibc_peer->ibp_nid);
-                        goto failed;
-                }
-                flipped = 1;
-                __swab16s (&msg->ibm_version);
-                LASSERT (sizeof(msg->ibm_type) == 1);
-                LASSERT (sizeof(msg->ibm_credits) == 1);
-        }
-
-        if (msg->ibm_version != IBNAL_MSG_VERSION) {
-                CERROR ("Incompatible msg version %d (%d expected)\n",
-                        msg->ibm_version, IBNAL_MSG_VERSION);
-                goto failed;
-        }
-
-#if IBNAL_CKSUM
-        if (nob != msg->ibm_nob) {
-                CERROR ("Unexpected # bytes %d (%d expected)\n", nob, msg->ibm_nob);
-                goto failed;
-        }
-
-        msg_cksum = le32_to_cpu(msg->ibm_cksum);
-        msg->ibm_cksum = 0;
-        computed_cksum = kibnal_cksum (msg, nob);
-        
-        if (msg_cksum != computed_cksum) {
-                CERROR ("Checksum failure %d: (%d expected)\n",
-                        computed_cksum, msg_cksum);
-//                goto failed;
-        }
-        CDEBUG(D_NET, "cksum %x, nob %d\n", computed_cksum, nob);
-#endif
-
-        /* Have I received credits that will let me send? */
-        credits = msg->ibm_credits;
-        if (credits != 0) {
-                spin_lock_irqsave(&conn->ibc_lock, flags);
-                conn->ibc_credits += credits;
-                spin_unlock_irqrestore(&conn->ibc_lock, flags);
-                
-                kibnal_check_sends(conn);
-        }
-
-        switch (msg->ibm_type) {
-        case IBNAL_MSG_NOOP:
-                kibnal_post_rx (rx, 1);
-                return;
-
-        case IBNAL_MSG_IMMEDIATE:
-                if (nob < base_nob + sizeof (kib_immediate_msg_t)) {
-                        CERROR ("Short IMMEDIATE from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid, nob);
-                        goto failed;
-                }
-                break;
-                
-        case IBNAL_MSG_PUT_RDMA:
-        case IBNAL_MSG_GET_RDMA:
-                if (nob < base_nob + sizeof (kib_rdma_msg_t)) {
-                        CERROR ("Short RDMA msg from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid, nob);
-                        goto failed;
-                }
-                if (flipped) 
-                        __swab32(msg->ibm_u.rdma.ibrm_num_descs);
-
-                CDEBUG(D_NET, "%d RDMA: cookie "LPX64":\n",
-                       msg->ibm_type, msg->ibm_u.rdma.ibrm_cookie);
-
-                if ((msg->ibm_u.rdma.ibrm_num_descs > PTL_MD_MAX_IOV) ||
-                    (kib_rdma_msg_len(msg->ibm_u.rdma.ibrm_num_descs) > 
-                     min(nob, IBNAL_MSG_SIZE))) {
-                        CERROR ("num_descs %d too large\n", 
-                                msg->ibm_u.rdma.ibrm_num_descs);
-                        goto failed;
-                }
-
-                for(i = 0; i < msg->ibm_u.rdma.ibrm_num_descs; i++) {
-                        kib_rdma_desc_t *desc = &msg->ibm_u.rdma.ibrm_desc[i];
-
-                        if (flipped) {
-                                __swab32(desc->rd_key);
-                                __swab32(desc->rd_nob);
-                                __swab64(desc->rd_addr);
-                        }
-
-                        CDEBUG(D_NET, "  key %x, " "addr "LPX64", nob %u\n",
-                               desc->rd_key, desc->rd_addr, desc->rd_nob);
-                }
-                break;
-                        
-        case IBNAL_MSG_PUT_DONE:
-        case IBNAL_MSG_GET_DONE:
-                if (nob < base_nob + sizeof (kib_completion_msg_t)) {
-                        CERROR ("Short COMPLETION msg from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid, nob);
-                        goto failed;
-                }
-                if (flipped)
-                        __swab32s(&msg->ibm_u.completion.ibcm_status);
-                
-                CDEBUG(D_NET, "%d DONE: cookie "LPX64", status %d\n",
-                       msg->ibm_type, msg->ibm_u.completion.ibcm_cookie,
-                       msg->ibm_u.completion.ibcm_status);
-
-                kibnal_complete_passive_rdma (conn, 
-                                              msg->ibm_u.completion.ibcm_cookie,
-                                              msg->ibm_u.completion.ibcm_status);
-                kibnal_post_rx (rx, 1);
-                return;
-                        
-        default:
-                CERROR ("Can't parse type from "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, msg->ibm_type);
-                goto failed;
-        }
-
-        /* schedule for kibnal_rx() in thread context */
-        spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-        
-        list_add_tail (&rx->rx_list, &kibnal_data.kib_sched_rxq);
-        wake_up (&kibnal_data.kib_sched_waitq);
-        
-        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-        return;
-        
- failed:
-        CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
-        kibnal_close_conn(conn, -ECONNABORTED);
-
-        /* Don't re-post rx & drop its ref on conn */
-        kibnal_put_conn(conn);
-}
-
-void
-kibnal_rx (kib_rx_t *rx)
-{
-        kib_msg_t   *msg = rx->rx_msg;
-
-        /* Clear flag so I can detect if I've sent an RDMA completion */
-        rx->rx_rdma = 0;
-
-        switch (msg->ibm_type) {
-        case IBNAL_MSG_GET_RDMA:
-                lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx);
-                /* If the incoming get was matched, I'll have initiated the
-                 * RDMA and the completion message... */
-                if (rx->rx_rdma)
-                        break;
-
-                /* Otherwise, I'll send a failed completion now to prevent
-                 * the peer's GET blocking for the full timeout. */
-                CERROR ("Completing unmatched RDMA GET from "LPX64"\n",
-                        rx->rx_conn->ibc_peer->ibp_nid);
-                kibnal_start_active_rdma (IBNAL_MSG_GET_DONE, -EIO,
-                                          rx, NULL, 0, NULL, NULL, 0, 0);
-                break;
-                
-        case IBNAL_MSG_PUT_RDMA:
-                lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx);
-                if (rx->rx_rdma)
-                        break;
-                /* This is most unusual, since even if lib_parse() didn't
-                 * match anything, it should have asked us to read (and
-                 * discard) the payload.  The portals header must be
-                 * inconsistent with this message type, so it's the
-                 * sender's fault for sending garbage and she can time
-                 * herself out... */
-                CERROR ("Uncompleted RMDA PUT from "LPX64"\n",
-                        rx->rx_conn->ibc_peer->ibp_nid);
-                break;
-
-        case IBNAL_MSG_IMMEDIATE:
-                lib_parse(&kibnal_lib, &msg->ibm_u.immediate.ibim_hdr, rx);
-                LASSERT (!rx->rx_rdma);
-                break;
-                
-        default:
-                LBUG();
-                break;
-        }
-
-        kibnal_post_rx (rx, 1);
-}
-
-static struct page *
-kibnal_kvaddr_to_page (unsigned long vaddr)
-{
-        struct page *page;
-
-        if (vaddr >= VMALLOC_START &&
-            vaddr < VMALLOC_END)
-                page = vmalloc_to_page ((void *)vaddr);
-#if CONFIG_HIGHMEM
-        else if (vaddr >= PKMAP_BASE &&
-                 vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
-                page = vmalloc_to_page ((void *)vaddr);
-        /* in 2.4 ^ just walks the page tables */
-#endif
-        else
-                page = virt_to_page (vaddr);
-
-        if (!VALID_PAGE (page))
-                page = NULL;
-
-        return page;
-}
-
-static void
-kibnal_fill_ibrm(kib_tx_t *tx, struct page *page, unsigned long page_offset,
-                 unsigned long len, int active)
-{
-        kib_rdma_msg_t *ibrm = &tx->tx_msg->ibm_u.rdma;
-        kib_rdma_desc_t *desc;
-
-        LASSERTF(ibrm->ibrm_num_descs < PTL_MD_MAX_IOV, "%u\n", 
-                 ibrm->ibrm_num_descs);
-
-        desc = &ibrm->ibrm_desc[ibrm->ibrm_num_descs];
-        if (active)
-                desc->rd_key = kibnal_data.kib_md.md_lkey;
-        else
-                desc->rd_key = kibnal_data.kib_md.md_rkey;
-        desc->rd_nob = len; /*PAGE_SIZE - kiov->kiov_offset; */
-        desc->rd_addr = kibnal_page2phys(page) + page_offset +
-                        kibnal_data.kib_md.md_addr;
-
-        ibrm->ibrm_num_descs++;
-}
-
-static int
-kibnal_map_rdma_iov(kib_tx_t *tx, unsigned long vaddr, int nob, int active)
-{
-        struct page *page;
-        int page_offset, len;
-
-        while (nob > 0) {
-                page = kibnal_kvaddr_to_page(vaddr);
-                if (page == NULL)
-                        return -EFAULT;
-
-                page_offset = vaddr & (PAGE_SIZE - 1);
-                len = min(nob, (int)PAGE_SIZE - page_offset);
-                
-                kibnal_fill_ibrm(tx, page, page_offset, len, active);
-                nob -= len;
-                vaddr += len;
-        }
-        return 0;
-}
-
-static int
-kibnal_map_iov (kib_tx_t *tx, IB_ACCESS_CONTROL access,
-                 int niov, struct iovec *iov, int offset, int nob, int active)
-                 
-{
-        void   *vaddr;
-        FSTATUS frc;
-
-        LASSERT (nob > 0);
-        LASSERT (niov > 0);
-        LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                niov--;
-                iov++;
-                LASSERT (niov > 0);
-        }
-
-        if (nob > iov->iov_len - offset) {
-                CERROR ("Can't map multiple vaddr fragments\n");
-                return (-EMSGSIZE);
-        }
-
-        /* our large contiguous iov could be backed by multiple physical
-         * pages. */
-        if (kibnal_whole_mem()) {
-                int rc;
-                tx->tx_msg->ibm_u.rdma.ibrm_num_descs = 0;
-                rc = kibnal_map_rdma_iov(tx, (unsigned long)iov->iov_base + 
-                                         offset, nob, active);
-                if (rc != 0) {
-                        CERROR ("Can't map iov: %d\n", rc);
-                        return rc;
-                }
-                return 0;
-        }
-
-        vaddr = (void *)(((unsigned long)iov->iov_base) + offset);
-        tx->tx_md.md_addr = (__u64)((unsigned long)vaddr);
-
-        frc = iibt_register_memory(kibnal_data.kib_hca, vaddr, nob,
-                                   kibnal_data.kib_pd, access,
-                                   &tx->tx_md.md_handle, &tx->tx_md.md_lkey,
-                                   &tx->tx_md.md_rkey);
-        if (frc != 0) {
-                CERROR ("Can't map vaddr %p: %d\n", vaddr, frc);
-                return -EINVAL;
-        }
-
-        tx->tx_mapped = KIB_TX_MAPPED;
-        return (0);
-}
-
-static int
-kibnal_map_kiov (kib_tx_t *tx, IB_ACCESS_CONTROL access,
-                  int nkiov, ptl_kiov_t *kiov,
-                  int offset, int nob, int active)
-{
-        __u64                      *phys = NULL;
-        int                         page_offset;
-        int                         nphys;
-        int                         resid;
-        int                         phys_size = 0;
-        FSTATUS                     frc;
-        int                         i, rc = 0;
-
-        CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
-        LASSERT (nob > 0);
-        LASSERT (nkiov > 0);
-        LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                nkiov--;
-                kiov++;
-                LASSERT (nkiov > 0);
-        }
-
-        page_offset = kiov->kiov_offset + offset;
-        nphys = 1;
-
-        if (!kibnal_whole_mem()) {
-                phys_size = nkiov * sizeof (*phys);
-                PORTAL_ALLOC(phys, phys_size);
-                if (phys == NULL) {
-                        CERROR ("Can't allocate tmp phys\n");
-                        return (-ENOMEM);
-                }
-
-                phys[0] = kibnal_page2phys(kiov->kiov_page);
-        } else {
-                tx->tx_msg->ibm_u.rdma.ibrm_num_descs = 0;
-                kibnal_fill_ibrm(tx, kiov->kiov_page, kiov->kiov_offset, 
-                                 kiov->kiov_len, active);
-        }
-
-        resid = nob - (kiov->kiov_len - offset);
-
-        while (resid > 0) {
-                kiov++;
-                nkiov--;
-                LASSERT (nkiov > 0);
-
-                if (kiov->kiov_offset != 0 ||
-                    ((resid > PAGE_SIZE) && 
-                     kiov->kiov_len < PAGE_SIZE)) {
-                        /* Can't have gaps */
-                        CERROR ("Can't make payload contiguous in I/O VM:"
-                                "page %d, offset %d, len %d \n", nphys, 
-                                kiov->kiov_offset, kiov->kiov_len);
-
-                        for (i = -nphys; i < nkiov; i++) 
-                        {
-                                CERROR("kiov[%d] %p +%d for %d\n",
-                                       i, kiov[i].kiov_page, kiov[i].kiov_offset, kiov[i].kiov_len);
-                        }
-                        
-                        rc = -EINVAL;
-                        goto out;
-                }
-
-                if (nphys == PTL_MD_MAX_IOV) {
-                        CERROR ("payload too big (%d)\n", nphys);
-                        rc = -EMSGSIZE;
-                        goto out;
-                }
-
-                if (!kibnal_whole_mem()) {
-                        LASSERT (nphys * sizeof (*phys) < phys_size);
-                        phys[nphys] = kibnal_page2phys(kiov->kiov_page);
-                } else {
-                        if (kib_rdma_msg_len(nphys) > IBNAL_MSG_SIZE) {
-                                CERROR ("payload too big (%d)\n", nphys);
-                                rc = -EMSGSIZE;
-                                goto out;
-                        }
-                        kibnal_fill_ibrm(tx, kiov->kiov_page, 
-                                         kiov->kiov_offset, kiov->kiov_len,
-                                         active);
-                }
-
-                nphys ++;
-                resid -= PAGE_SIZE;
-        }
-
-        if (kibnal_whole_mem())
-                goto out;
-
-#if 0
-        CWARN ("nphys %d, nob %d, page_offset %d\n", nphys, nob, page_offset);
-        for (i = 0; i < nphys; i++)
-                CWARN ("   [%d] "LPX64"\n", i, phys[i]);
-#endif
-
-#if IBNAL_FMR
-#error "iibnal hasn't learned about FMR yet"
-        rc = ib_fmr_register_physical (kibnal_data.kib_fmr_pool,
-                                       phys, nphys,
-                                       &tx->tx_md.md_addr,
-                                       page_offset,
-                                       &tx->tx_md.md_handle.fmr,
-                                       &tx->tx_md.md_lkey,
-                                       &tx->tx_md.md_rkey);
-#else
-        frc = iibt_register_physical_memory(kibnal_data.kib_hca,
-                                            IBNAL_RDMA_BASE,
-                                            phys, nphys,
-                                            0,          /* offset */
-                                            kibnal_data.kib_pd,
-                                            access,
-                                            &tx->tx_md.md_handle,
-                                            &tx->tx_md.md_addr,
-                                            &tx->tx_md.md_lkey,
-                                            &tx->tx_md.md_rkey);
-#endif
-        if (frc == FSUCCESS) {
-                CDEBUG(D_NET, "Mapped %d pages %d bytes @ offset %d: lkey %x, rkey %x\n",
-                       nphys, nob, page_offset, tx->tx_md.md_lkey, tx->tx_md.md_rkey);
-#if IBNAL_FMR
-                tx->tx_mapped = KIB_TX_MAPPED_FMR;
-#else
-                tx->tx_mapped = KIB_TX_MAPPED;
-#endif
-        } else {
-                CERROR ("Can't map phys: %d\n", rc);
-                rc = -EFAULT;
-        }
-
- out:
-        if (phys != NULL)
-                PORTAL_FREE(phys, phys_size);
-        return (rc);
-}
-
-static kib_conn_t *
-kibnal_find_conn_locked (kib_peer_t *peer)
-{
-        struct list_head *tmp;
-
-        /* just return the first connection */
-        list_for_each (tmp, &peer->ibp_conns) {
-                return (list_entry(tmp, kib_conn_t, ibc_list));
-        }
-
-        return (NULL);
-}
-
-void
-kibnal_check_sends (kib_conn_t *conn)
-{
-        unsigned long   flags;
-        kib_tx_t       *tx;
-        int             rc;
-        int             i;
-        int             done;
-        int             nwork;
-        ENTRY;
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        LASSERT (conn->ibc_nsends_posted <= IBNAL_MSG_QUEUE_SIZE);
-
-        if (list_empty(&conn->ibc_tx_queue) &&
-            conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER) {
-                spin_unlock_irqrestore(&conn->ibc_lock, flags);
-                
-                tx = kibnal_get_idle_tx(0);     /* don't block */
-                if (tx != NULL)
-                        kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);
-
-                spin_lock_irqsave(&conn->ibc_lock, flags);
-                
-                if (tx != NULL) {
-                        atomic_inc(&conn->ibc_refcount);
-                        kibnal_queue_tx_locked(tx, conn);
-                }
-        }
-
-        while (!list_empty (&conn->ibc_tx_queue)) {
-                tx = list_entry (conn->ibc_tx_queue.next, kib_tx_t, tx_list);
-
-                /* We rely on this for QP sizing */
-                LASSERT (tx->tx_nsp > 0 && tx->tx_nsp <= IBNAL_TX_MAX_SG);
-
-                LASSERT (conn->ibc_outstanding_credits >= 0);
-                LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);
-                LASSERT (conn->ibc_credits >= 0);
-                LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);
-
-                /* Not on ibc_rdma_queue */
-                LASSERT (!tx->tx_passive_rdma_wait);
-
-                if (conn->ibc_nsends_posted == IBNAL_MSG_QUEUE_SIZE)
-                        GOTO(out, 0);
-
-                if (conn->ibc_credits == 0)     /* no credits */
-                        GOTO(out, 1);
-                
-                if (conn->ibc_credits == 1 &&   /* last credit reserved for */
-                    conn->ibc_outstanding_credits == 0) /* giving back credits */
-                        GOTO(out, 2);
-
-                list_del (&tx->tx_list);
-
-                if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&
-                    (!list_empty(&conn->ibc_tx_queue) ||
-                     conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER)) {
-                        /* redundant NOOP */
-                        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-                        kibnal_tx_done(tx);
-                        spin_lock_irqsave(&conn->ibc_lock, flags);
-                        continue;
-                }
-
-                tx->tx_msg->ibm_credits = conn->ibc_outstanding_credits;
-                conn->ibc_outstanding_credits = 0;
-
-                conn->ibc_nsends_posted++;
-                conn->ibc_credits--;
-
-                /* we only get a tx completion for the final rdma op */ 
-                tx->tx_sending = min(tx->tx_nsp, 2);
-                tx->tx_passive_rdma_wait = tx->tx_passive_rdma;
-                list_add (&tx->tx_list, &conn->ibc_active_txs);
-#if IBNAL_CKSUM
-                tx->tx_msg->ibm_cksum = 0;
-                tx->tx_msg->ibm_cksum = kibnal_cksum(tx->tx_msg, tx->tx_msg->ibm_nob);
-                CDEBUG(D_NET, "cksum %x, nob %d\n", tx->tx_msg->ibm_cksum, tx->tx_msg->ibm_nob);
-#endif
-                spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-                /* NB the gap between removing tx from the queue and sending it
-                 * allows message re-ordering to occur */
-
-                LASSERT (tx->tx_nsp > 0);
-
-                rc = -ECONNABORTED;
-                nwork = 0;
-                if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-                        tx->tx_status = 0;
-                        /* Driver only accepts 1 item at a time */
-                        for (i = 0; i < tx->tx_nsp; i++) {
-                                hexdump("tx", tx->tx_msg, sizeof(kib_msg_t));
-                                rc = iibt_postsend(conn->ibc_qp, 
-                                                   &tx->tx_wrq[i]);
-                                if (rc != 0)
-                                        break;
-                                if (wrq_signals_completion(&tx->tx_wrq[i]))
-                                        nwork++;
-                                CDEBUG(D_NET, "posted tx wrq %p\n", 
-                                       &tx->tx_wrq[i]);
-                        }
-                }
-
-                spin_lock_irqsave (&conn->ibc_lock, flags);
-                if (rc != 0) {
-                        /* NB credits are transferred in the actual
-                         * message, which can only be the last work item */
-                        conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
-                        conn->ibc_credits++;
-                        conn->ibc_nsends_posted--;
-
-                        tx->tx_status = rc;
-                        tx->tx_passive_rdma_wait = 0;
-                        tx->tx_sending -= tx->tx_nsp - nwork;
-
-                        done = (tx->tx_sending == 0);
-                        if (done)
-                                list_del (&tx->tx_list);
-                        
-                        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-                        
-                        if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
-                                CERROR ("Error %d posting transmit to "LPX64"\n", 
-                                        rc, conn->ibc_peer->ibp_nid);
-                        else
-                                CDEBUG (D_NET, "Error %d posting transmit to "
-                                        LPX64"\n", rc, conn->ibc_peer->ibp_nid);
-
-                        kibnal_close_conn (conn, rc);
-
-                        if (done)
-                                kibnal_tx_done (tx);
-                        return;
-                }
-                
-        }
-
-        EXIT;
-out:
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-}
-
-static void
-kibnal_tx_callback (IB_WORK_COMPLETION *wc)
-{
-        kib_tx_t     *tx = (kib_tx_t *)kibnal_wreqid2ptr(wc->WorkReqId);
-        kib_conn_t   *conn;
-        unsigned long flags;
-        int           idle;
-
-        conn = tx->tx_conn;
-        LASSERT (conn != NULL);
-        LASSERT (tx->tx_sending != 0);
-
-        spin_lock_irqsave(&conn->ibc_lock, flags);
-
-        CDEBUG(D_NET, "conn %p tx %p [%d/%d]: %d\n", conn, tx,
-               tx->tx_sending, tx->tx_nsp, wc->Status);
-
-        /* I could be racing with rdma completion.  Whoever makes 'tx' idle
-         * gets to free it, which also drops its ref on 'conn'.  If it's
-         * not me, then I take an extra ref on conn so it can't disappear
-         * under me. */
-
-        tx->tx_sending--;
-        idle = (tx->tx_sending == 0) &&         /* This is the final callback */
-               (!tx->tx_passive_rdma_wait);     /* Not waiting for RDMA completion */
-        if (idle)
-                list_del(&tx->tx_list);
-
-        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-               conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-               atomic_read (&conn->ibc_refcount));
-        atomic_inc (&conn->ibc_refcount);
-
-        if (tx->tx_sending == 0)
-                conn->ibc_nsends_posted--;
-
-        if (wc->Status != WRStatusSuccess &&
-            tx->tx_status == 0)
-                tx->tx_status = -ECONNABORTED;
-                
-        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
-        if (idle)
-                kibnal_tx_done (tx);
-
-        if (wc->Status != WRStatusSuccess) {
-                CERROR ("Tx completion to "LPX64" failed: %d\n", 
-                        conn->ibc_peer->ibp_nid, wc->Status);
-                kibnal_close_conn (conn, -ENETDOWN);
-        } else {
-                /* can I shovel some more sends out the door? */
-                kibnal_check_sends(conn);
-        }
-
-        kibnal_put_conn (conn);
-}
-
-void 
-kibnal_ca_async_callback (void *ca_arg, IB_EVENT_RECORD *ev)
-{
-        /* XXX flesh out.  this seems largely for async errors */
-        CERROR("type: %d code: %u\n", ev->EventType, ev->EventCode);
-}
-
-void
-kibnal_ca_callback (void *ca_arg, void *cq_arg)
-{
-        IB_HANDLE cq = *(IB_HANDLE *)cq_arg;
-        IB_HANDLE ca = *(IB_HANDLE *)ca_arg;
-        IB_WORK_COMPLETION wc;
-        int armed = 0;
-
-        CDEBUG(D_NET, "ca %p cq %p\n", ca, cq);
-
-        for(;;) {
-                while (iibt_cq_poll(cq, &wc) == FSUCCESS) {
-                        if (kibnal_wreqid_is_rx(wc.WorkReqId))
-                                kibnal_rx_callback(&wc);
-                        else
-                                kibnal_tx_callback(&wc);
-                }
-                if (armed)
-                        return;
-                if (iibt_cq_rearm(cq, CQEventSelNextWC) != FSUCCESS) {
-                        CERROR("rearm failed?\n");
-                        return;
-                }
-                armed = 1;
-        }
-}
-
-void
-kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob)
-{
-        IB_LOCAL_DATASEGMENT *gl = &tx->tx_gl[tx->tx_nsp];
-        IB_WORK_REQ         *wrq = &tx->tx_wrq[tx->tx_nsp];
-        int                       fence;
-        int                       nob = offsetof (kib_msg_t, ibm_u) + body_nob;
-
-        LASSERT (tx->tx_nsp >= 0 && 
-                 tx->tx_nsp < sizeof(tx->tx_wrq)/sizeof(tx->tx_wrq[0]));
-        LASSERT (nob <= IBNAL_MSG_SIZE);
-        
-        tx->tx_msg->ibm_magic = IBNAL_MSG_MAGIC;
-        tx->tx_msg->ibm_version = IBNAL_MSG_VERSION;
-        tx->tx_msg->ibm_type = type;
-#if IBNAL_CKSUM
-        tx->tx_msg->ibm_nob = nob;
-#endif
-        /* Fence the message if it's bundled with an RDMA read */
-        fence = (tx->tx_nsp > 0) &&
-                (type == IBNAL_MSG_PUT_DONE);
-
-        *gl = (IB_LOCAL_DATASEGMENT) {
-                .Address = tx->tx_vaddr,
-                .Length  = IBNAL_MSG_SIZE,
-                .Lkey    = kibnal_lkey(kibnal_data.kib_tx_pages),
-        };
-
-        wrq->WorkReqId      = kibnal_ptr2wreqid(tx, 0);
-        wrq->Operation      = WROpSend;
-        wrq->DSList         = gl;
-        wrq->DSListDepth    = 1;
-        wrq->MessageLen     = nob;
-        wrq->Req.SendRC.ImmediateData  = 0;
-        wrq->Req.SendRC.Options.s.SolicitedEvent         = 1;
-        wrq->Req.SendRC.Options.s.SignaledCompletion     = 1;
-        wrq->Req.SendRC.Options.s.ImmediateData          = 0;
-        wrq->Req.SendRC.Options.s.Fence                  = fence;
-
-        tx->tx_nsp++;
-}
-
-static void
-kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
-        unsigned long         flags;
-
-        spin_lock_irqsave(&conn->ibc_lock, flags);
-
-        kibnal_queue_tx_locked (tx, conn);
-        
-        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-        
-        kibnal_check_sends(conn);
-}
-
-static void
-kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid)
-{
-        unsigned long    flags;
-        kib_peer_t      *peer;
-        kib_conn_t      *conn;
-        rwlock_t        *g_lock = &kibnal_data.kib_global_lock;
-
-        /* If I get here, I've committed to send, so I complete the tx with
-         * failure on any problems */
-        
-        LASSERT (tx->tx_conn == NULL);          /* only set when assigned a conn */
-        LASSERT (tx->tx_nsp > 0);               /* work items have been set up */
-
-        read_lock (g_lock);
-        
-        peer = kibnal_find_peer_locked (nid);
-        if (peer == NULL) {
-                read_unlock (g_lock);
-                tx->tx_status = -EHOSTUNREACH;
-                kibnal_tx_done (tx);
-                return;
-        }
-
-        conn = kibnal_find_conn_locked (peer);
-        if (conn != NULL) {
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount); /* 1 ref for the tx */
-                read_unlock (g_lock);
-                
-                kibnal_queue_tx (tx, conn);
-                return;
-        }
-        
-        /* Making one or more connections; I'll need a write lock... */
-        read_unlock (g_lock);
-        write_lock_irqsave (g_lock, flags);
-
-        peer = kibnal_find_peer_locked (nid);
-        if (peer == NULL) {
-                write_unlock_irqrestore (g_lock, flags);
-                tx->tx_status = -EHOSTUNREACH;
-                kibnal_tx_done (tx);
-                return;
-        }
-
-        conn = kibnal_find_conn_locked (peer);
-        if (conn != NULL) {
-                /* Connection exists; queue message on it */
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount); /* 1 ref for the tx */
-                write_unlock_irqrestore (g_lock, flags);
-                
-                kibnal_queue_tx (tx, conn);
-                return;
-        }
-
-        if (peer->ibp_connecting == 0) {
-                if (!time_after_eq(jiffies, peer->ibp_reconnect_time)) {
-                        write_unlock_irqrestore (g_lock, flags);
-                        tx->tx_status = -EHOSTUNREACH;
-                        kibnal_tx_done (tx);
-                        return;
-                }
-        
-                peer->ibp_connecting = 1;
-                kib_peer_addref(peer); /* extra ref for connd */
-        
-                spin_lock (&kibnal_data.kib_connd_lock);
-        
-                list_add_tail (&peer->ibp_connd_list,
-                               &kibnal_data.kib_connd_peers);
-                wake_up (&kibnal_data.kib_connd_waitq);
-        
-                spin_unlock (&kibnal_data.kib_connd_lock);
-        }
-        
-        /* A connection is being established; queue the message... */
-        list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
-
-        write_unlock_irqrestore (g_lock, flags);
-}
-
-static ptl_err_t
-kibnal_start_passive_rdma (int type, ptl_nid_t nid,
-                            lib_msg_t *libmsg, ptl_hdr_t *hdr)
-{
-        int         nob = libmsg->md->length;
-        kib_tx_t   *tx;
-        kib_msg_t  *ibmsg;
-        int         rc;
-        IB_ACCESS_CONTROL         access = {0,};
-        
-        LASSERT (type == IBNAL_MSG_PUT_RDMA || type == IBNAL_MSG_GET_RDMA);
-        LASSERT (nob > 0);
-        LASSERT (!in_interrupt());              /* Mapping could block */
-
-        access.s.MWBindable = 1;
-        access.s.LocalWrite = 1;
-        access.s.RdmaRead = 1;
-        access.s.RdmaWrite = 1;
-
-        tx = kibnal_get_idle_tx (1);           /* May block; caller is an app thread */
-        LASSERT (tx != NULL);
-
-        if ((libmsg->md->options & PTL_MD_KIOV) == 0) 
-                rc = kibnal_map_iov (tx, access,
-                                     libmsg->md->md_niov,
-                                     libmsg->md->md_iov.iov,
-                                     0, nob, 0);
-        else
-                rc = kibnal_map_kiov (tx, access,
-                                      libmsg->md->md_niov, 
-                                      libmsg->md->md_iov.kiov,
-                                      0, nob, 0);
-
-        if (rc != 0) {
-                CERROR ("Can't map RDMA for "LPX64": %d\n", nid, rc);
-                goto failed;
-        }
-        
-        if (type == IBNAL_MSG_GET_RDMA) {
-                /* reply gets finalized when tx completes */
-                tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, 
-                                                        nid, libmsg);
-                if (tx->tx_libmsg[1] == NULL) {
-                        CERROR ("Can't create reply for GET -> "LPX64"\n",
-                                nid);
-                        rc = -ENOMEM;
-                        goto failed;
-                }
-        }
-        
-        tx->tx_passive_rdma = 1;
-
-        ibmsg = tx->tx_msg;
-
-        ibmsg->ibm_u.rdma.ibrm_hdr = *hdr;
-        ibmsg->ibm_u.rdma.ibrm_cookie = tx->tx_passive_rdma_cookie;
-        /* map_kiov alrady filled the rdma descs for the whole_mem case */
-        if (!kibnal_whole_mem()) {
-                ibmsg->ibm_u.rdma.ibrm_desc[0].rd_key = tx->tx_md.md_rkey;
-                ibmsg->ibm_u.rdma.ibrm_desc[0].rd_addr = tx->tx_md.md_addr;
-                ibmsg->ibm_u.rdma.ibrm_desc[0].rd_nob = nob;
-                ibmsg->ibm_u.rdma.ibrm_num_descs = 1;
-        }
-
-        kibnal_init_tx_msg (tx, type, 
-                            kib_rdma_msg_len(ibmsg->ibm_u.rdma.ibrm_num_descs));
-
-        CDEBUG(D_NET, "Passive: %p cookie "LPX64", key %x, addr "
-               LPX64", nob %d\n",
-               tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey,
-               tx->tx_md.md_addr, nob);
-        
-        /* libmsg gets finalized when tx completes. */
-        tx->tx_libmsg[0] = libmsg;
-
-        kibnal_launch_tx(tx, nid);
-        return (PTL_OK);
-
- failed:
-        tx->tx_status = rc;
-        kibnal_tx_done (tx);
-        return (PTL_FAIL);
-}
-
-void
-kibnal_start_active_rdma (int type, int status,
-                           kib_rx_t *rx, lib_msg_t *libmsg, 
-                           unsigned int niov,
-                           struct iovec *iov, ptl_kiov_t *kiov,
-                           size_t offset, size_t nob)
-{
-        kib_msg_t    *rxmsg = rx->rx_msg;
-        kib_msg_t    *txmsg;
-        kib_tx_t     *tx;
-        IB_ACCESS_CONTROL access = {0,};
-        IB_WR_OP      rdma_op;
-        int           rc;
-        __u32         i;
-
-        CDEBUG(D_NET, "type %d, status %d, niov %d, offset %d, nob %d\n",
-               type, status, niov, offset, nob);
-
-        /* Called by scheduler */
-        LASSERT (!in_interrupt ());
-
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        /* No data if we're completing with failure */
-        LASSERT (status == 0 || nob == 0);
-
-        LASSERT (type == IBNAL_MSG_GET_DONE ||
-                 type == IBNAL_MSG_PUT_DONE);
-
-        /* Flag I'm completing the RDMA.  Even if I fail to send the
-         * completion message, I will have tried my best so further
-         * attempts shouldn't be tried. */
-        LASSERT (!rx->rx_rdma);
-        rx->rx_rdma = 1;
-
-        if (type == IBNAL_MSG_GET_DONE) {
-                rdma_op  = WROpRdmaWrite;
-                LASSERT (rxmsg->ibm_type == IBNAL_MSG_GET_RDMA);
-        } else {
-                access.s.LocalWrite = 1;
-                rdma_op  = WROpRdmaRead;
-                LASSERT (rxmsg->ibm_type == IBNAL_MSG_PUT_RDMA);
-        }
-
-        tx = kibnal_get_idle_tx (0);           /* Mustn't block */
-        if (tx == NULL) {
-                CERROR ("tx descs exhausted on RDMA from "LPX64
-                        " completing locally with failure\n",
-                        rx->rx_conn->ibc_peer->ibp_nid);
-                lib_finalize (&kibnal_lib, NULL, libmsg, PTL_NO_SPACE);
-                return;
-        }
-        LASSERT (tx->tx_nsp == 0);
-                        
-        if (nob == 0) 
-                GOTO(init_tx, 0);
-
-        /* We actually need to transfer some data (the transfer
-         * size could get truncated to zero when the incoming
-         * message is matched) */
-        if (kiov != NULL)
-                rc = kibnal_map_kiov (tx, access, niov, kiov, offset, nob, 1);
-        else
-                rc = kibnal_map_iov (tx, access, niov, iov, offset, nob, 1);
-        
-        if (rc != 0) {
-                CERROR ("Can't map RDMA -> "LPX64": %d\n", 
-                        rx->rx_conn->ibc_peer->ibp_nid, rc);
-                /* We'll skip the RDMA and complete with failure. */
-                status = rc;
-                nob = 0;
-                GOTO(init_tx, rc);
-        } 
-
-        if (!kibnal_whole_mem()) {
-                tx->tx_msg->ibm_u.rdma.ibrm_desc[0].rd_key = tx->tx_md.md_lkey;
-                tx->tx_msg->ibm_u.rdma.ibrm_desc[0].rd_addr = tx->tx_md.md_addr;
-                tx->tx_msg->ibm_u.rdma.ibrm_desc[0].rd_nob = nob;
-                tx->tx_msg->ibm_u.rdma.ibrm_num_descs = 1;
-        }
-
-        /* XXX ugh.  different page-sized hosts. */ 
-        if (tx->tx_msg->ibm_u.rdma.ibrm_num_descs !=
-            rxmsg->ibm_u.rdma.ibrm_num_descs) {
-                CERROR("tx descs (%u) != rx descs (%u)\n", 
-                       tx->tx_msg->ibm_u.rdma.ibrm_num_descs,
-                       rxmsg->ibm_u.rdma.ibrm_num_descs);
-                /* We'll skip the RDMA and complete with failure. */
-                status = rc;
-                nob = 0;
-                GOTO(init_tx, rc);
-        }
-
-        /* map_kiov filled in the rdma descs which describe our side of the
-         * rdma transfer. */
-        /* ibrm_num_descs was verified in rx_callback */
-        for(i = 0; i < rxmsg->ibm_u.rdma.ibrm_num_descs; i++) {
-                kib_rdma_desc_t *ldesc, *rdesc; /* local, remote */
-                IB_LOCAL_DATASEGMENT *ds = &tx->tx_gl[i];
-                IB_WORK_REQ  *wrq = &tx->tx_wrq[i];
-
-                ldesc = &tx->tx_msg->ibm_u.rdma.ibrm_desc[i];
-                rdesc = &rxmsg->ibm_u.rdma.ibrm_desc[i];
-
-                ds->Address = ldesc->rd_addr;
-                ds->Length  = ldesc->rd_nob;
-                ds->Lkey    = ldesc->rd_key;
-
-                memset(wrq, 0, sizeof(*wrq));
-                wrq->WorkReqId      = kibnal_ptr2wreqid(tx, 0);
-                wrq->Operation      = rdma_op;
-                wrq->DSList         = ds;
-                wrq->DSListDepth    = 1;
-                wrq->MessageLen     = ds->Length;
-                wrq->Req.SendRC.ImmediateData  = 0;
-                wrq->Req.SendRC.Options.s.SolicitedEvent         = 0;
-                wrq->Req.SendRC.Options.s.SignaledCompletion     = 0;
-                wrq->Req.SendRC.Options.s.ImmediateData          = 0;
-                wrq->Req.SendRC.Options.s.Fence                  = 0;
-                wrq->Req.SendRC.RemoteDS.Address = rdesc->rd_addr;
-                wrq->Req.SendRC.RemoteDS.Rkey = rdesc->rd_key;
-
-                /* only the last rdma post triggers tx completion */
-                if (i == rxmsg->ibm_u.rdma.ibrm_num_descs - 1)
-                        wrq->Req.SendRC.Options.s.SignaledCompletion = 1;
-
-                tx->tx_nsp++;
-        }
-
-init_tx:
-        txmsg = tx->tx_msg;
-
-        txmsg->ibm_u.completion.ibcm_cookie = rxmsg->ibm_u.rdma.ibrm_cookie;
-        txmsg->ibm_u.completion.ibcm_status = status;
-        
-        kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t));
-
-        if (status == 0 && nob != 0) {
-                LASSERT (tx->tx_nsp > 1);
-                /* RDMA: libmsg gets finalized when the tx completes.  This
-                 * is after the completion message has been sent, which in
-                 * turn is after the RDMA has finished. */
-                tx->tx_libmsg[0] = libmsg;
-        } else {
-                LASSERT (tx->tx_nsp == 1);
-                /* No RDMA: local completion happens now! */
-                CDEBUG(D_WARNING,"No data: immediate completion\n");
-                lib_finalize (&kibnal_lib, NULL, libmsg,
-                              status == 0 ? PTL_OK : PTL_FAIL);
-        }
-
-        /* +1 ref for this tx... */
-        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-               rx->rx_conn, rx->rx_conn->ibc_state, 
-               rx->rx_conn->ibc_peer->ibp_nid,
-               atomic_read (&rx->rx_conn->ibc_refcount));
-        atomic_inc (&rx->rx_conn->ibc_refcount);
-        /* ...and queue it up */
-        kibnal_queue_tx(tx, rx->rx_conn);
-}
-
-static ptl_err_t
-kibnal_sendmsg(lib_nal_t    *nal, 
-                void         *private,
-                lib_msg_t    *libmsg,
-                ptl_hdr_t    *hdr, 
-                int           type, 
-                ptl_nid_t     nid, 
-                ptl_pid_t     pid,
-                unsigned int  payload_niov, 
-                struct iovec *payload_iov, 
-                ptl_kiov_t   *payload_kiov,
-                size_t        payload_offset,
-                size_t        payload_nob)
-{
-        kib_msg_t  *ibmsg;
-        kib_tx_t   *tx;
-        int         nob;
-
-        /* NB 'private' is different depending on what we're sending.... */
-
-        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
-               " pid %d\n", payload_nob, payload_niov, nid , pid);
-
-        LASSERT (payload_nob == 0 || payload_niov > 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
-        /* Thread context if we're sending payload */
-        LASSERT (!in_interrupt() || payload_niov == 0);
-        /* payload is either all vaddrs or all pages */
-        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
-        switch (type) {
-        default:
-                LBUG();
-                return (PTL_FAIL);
-                
-        case PTL_MSG_REPLY: {
-                /* reply's 'private' is the incoming receive */
-                kib_rx_t *rx = private;
-
-                /* RDMA reply expected? */
-                if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) {
-                        kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0,
-                                                 rx, libmsg, payload_niov, 
-                                                 payload_iov, payload_kiov,
-                                                 payload_offset, payload_nob);
-                        return (PTL_OK);
-                }
-                
-                /* Incoming message consistent with immediate reply? */
-                if (rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
-                        CERROR ("REPLY to "LPX64" bad opbm type %d!!!\n",
-                                nid, rx->rx_msg->ibm_type);
-                        return (PTL_FAIL);
-                }
-
-                /* Will it fit in a message? */
-                nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
-                if (nob >= IBNAL_MSG_SIZE) {
-                        CERROR("REPLY for "LPX64" too big (RDMA not requested): %d\n", 
-                               nid, payload_nob);
-                        return (PTL_FAIL);
-                }
-                break;
-        }
-
-        case PTL_MSG_GET:
-                /* might the REPLY message be big enough to need RDMA? */
-                nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[libmsg->md->length]);
-                if (nob > IBNAL_MSG_SIZE)
-                        return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, 
-                                                          nid, libmsg, hdr));
-                break;
-
-        case PTL_MSG_ACK:
-                LASSERT (payload_nob == 0);
-                break;
-
-        case PTL_MSG_PUT:
-                /* Is the payload big enough to need RDMA? */
-                nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
-                if (nob > IBNAL_MSG_SIZE)
-                        return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA,
-                                                          nid, libmsg, hdr));
-                
-                break;
-        }
-
-        tx = kibnal_get_idle_tx(!(type == PTL_MSG_ACK ||
-                                  type == PTL_MSG_REPLY ||
-                                  in_interrupt()));
-        if (tx == NULL) {
-                CERROR ("Can't send %d to "LPX64": tx descs exhausted%s\n", 
-                        type, nid, in_interrupt() ? " (intr)" : "");
-                return (PTL_NO_SPACE);
-        }
-
-        ibmsg = tx->tx_msg;
-        ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
-        if (payload_nob > 0) {
-                if (payload_kiov != NULL)
-                        lib_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload,
-                                          payload_niov, payload_kiov,
-                                          payload_offset, payload_nob);
-                else
-                        lib_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload,
-                                         payload_niov, payload_iov,
-                                         payload_offset, payload_nob);
-        }
-
-        kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE,
-                            offsetof(kib_immediate_msg_t, 
-                                     ibim_payload[payload_nob]));
-
-        /* libmsg gets finalized when tx completes */
-        tx->tx_libmsg[0] = libmsg;
-
-        kibnal_launch_tx(tx, nid);
-        return (PTL_OK);
-}
-
-static ptl_err_t
-kibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-               unsigned int payload_niov, struct iovec *payload_iov,
-               size_t payload_offset, size_t payload_len)
-{
-        return (kibnal_sendmsg(nal, private, cookie,
-                               hdr, type, nid, pid,
-                               payload_niov, payload_iov, NULL,
-                               payload_offset, payload_len));
-}
-
-static ptl_err_t
-kibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
-                     ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                     unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
-                     size_t payload_offset, size_t payload_len)
-{
-        return (kibnal_sendmsg(nal, private, cookie,
-                               hdr, type, nid, pid,
-                               payload_niov, NULL, payload_kiov,
-                               payload_offset, payload_len));
-}
-
-static ptl_err_t
-kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg,
-                 unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov,
-                 size_t offset, size_t mlen, size_t rlen)
-{
-        kib_rx_t    *rx = private;
-        kib_msg_t   *rxmsg = rx->rx_msg;
-        int          msg_nob;
-        
-        LASSERT (mlen <= rlen);
-        LASSERT (!in_interrupt ());
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        switch (rxmsg->ibm_type) {
-        default:
-                LBUG();
-                return (PTL_FAIL);
-                
-        case IBNAL_MSG_IMMEDIATE:
-                msg_nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
-                if (msg_nob > IBNAL_MSG_SIZE) {
-                        CERROR ("Immediate message from "LPX64" too big: %d\n",
-                                rxmsg->ibm_u.immediate.ibim_hdr.src_nid, rlen);
-                        return (PTL_FAIL);
-                }
-
-                if (kiov != NULL)
-                        lib_copy_buf2kiov(niov, kiov, offset,
-                                          rxmsg->ibm_u.immediate.ibim_payload,
-                                          mlen);
-                else
-                        lib_copy_buf2iov(niov, iov, offset,
-                                         rxmsg->ibm_u.immediate.ibim_payload,
-                                         mlen);
-
-                lib_finalize (nal, NULL, libmsg, PTL_OK);
-                return (PTL_OK);
-
-        case IBNAL_MSG_GET_RDMA:
-                /* We get called here just to discard any junk after the
-                 * GET hdr. */
-                LASSERT (libmsg == NULL);
-                lib_finalize (nal, NULL, libmsg, PTL_OK);
-                return (PTL_OK);
-
-        case IBNAL_MSG_PUT_RDMA:
-                kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0,
-                                          rx, libmsg, 
-                                          niov, iov, kiov, offset, mlen);
-                return (PTL_OK);
-        }
-}
-
-static ptl_err_t
-kibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
-              unsigned int niov, struct iovec *iov, 
-              size_t offset, size_t mlen, size_t rlen)
-{
-        return (kibnal_recvmsg (nal, private, msg, niov, iov, NULL,
-                                offset, mlen, rlen));
-}
-
-static ptl_err_t
-kibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
-                     unsigned int niov, ptl_kiov_t *kiov, 
-                     size_t offset, size_t mlen, size_t rlen)
-{
-        return (kibnal_recvmsg (nal, private, msg, niov, NULL, kiov,
-                                offset, mlen, rlen));
-}
-
-/*****************************************************************************
- * the rest of this file concerns connection management.  active connetions
- * start with connect_peer, passive connections start with passive_callback.
- * active disconnects start with conn_close, cm_callback starts passive
- * disconnects and contains the guts of how the disconnect state machine
- * progresses. 
- *****************************************************************************/
-
-int
-kibnal_thread_start (int (*fn)(void *arg), void *arg)
-{
-        long    pid = kernel_thread (fn, arg, 0);
-
-        if (pid < 0)
-                return ((int)pid);
-
-        atomic_inc (&kibnal_data.kib_nthreads);
-        return (0);
-}
-
-static void
-kibnal_thread_fini (void)
-{
-        atomic_dec (&kibnal_data.kib_nthreads);
-}
-
-/* this can be called by anyone at any time to close a connection.  if
- * the connection is still established it heads to the connd to start
- * the disconnection in a safe context.  It has no effect if called
- * on a connection that is already disconnecting */
-void
-kibnal_close_conn_locked (kib_conn_t *conn, int error)
-{
-        /* This just does the immmediate housekeeping, and schedules the
-         * connection for the connd to finish off.
-         * Caller holds kib_global_lock exclusively in irq context */
-        kib_peer_t   *peer = conn->ibc_peer;
-
-        KIB_ASSERT_CONN_STATE_RANGE(conn, IBNAL_CONN_CONNECTING,
-                                    IBNAL_CONN_DISCONNECTED);
-
-        if (conn->ibc_state > IBNAL_CONN_ESTABLISHED)
-                return; /* already disconnecting */
-
-        CDEBUG (error == 0 ? D_NET : D_ERROR,
-                "closing conn to "LPX64": error %d\n", peer->ibp_nid, error);
-
-        if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-                /* kib_connd_conns takes ibc_list's ref */
-                list_del (&conn->ibc_list);
-        } else {
-                /* new ref for kib_connd_conns */
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount);
-        }
-        
-        if (list_empty (&peer->ibp_conns) &&
-            peer->ibp_persistence == 0) {
-                /* Non-persistent peer with no more conns... */
-                kibnal_unlink_peer_locked (peer);
-        }
-
-        conn->ibc_state = IBNAL_CONN_SEND_DREQ;
-
-        spin_lock (&kibnal_data.kib_connd_lock);
-
-        list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns);
-        wake_up (&kibnal_data.kib_connd_waitq);
-                
-        spin_unlock (&kibnal_data.kib_connd_lock);
-}
-
-void
-kibnal_close_conn (kib_conn_t *conn, int error)
-{
-        unsigned long     flags;
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        kibnal_close_conn_locked (conn, error);
-        
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-}
-
-static void
-kibnal_peer_connect_failed (kib_peer_t *peer, int active, int rc)
-{
-        LIST_HEAD        (zombies);
-        kib_tx_t         *tx;
-        unsigned long     flags;
-
-        LASSERT (rc != 0);
-        LASSERT (peer->ibp_reconnect_interval >= IBNAL_MIN_RECONNECT_INTERVAL);
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        LASSERT (peer->ibp_connecting != 0);
-        peer->ibp_connecting--;
-
-        if (peer->ibp_connecting != 0) {
-                /* another connection attempt under way (loopback?)... */
-                write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-                return;
-        }
-
-        if (list_empty(&peer->ibp_conns)) {
-                /* Say when active connection can be re-attempted */
-                peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval;
-                /* Increase reconnection interval */
-                peer->ibp_reconnect_interval = MIN (peer->ibp_reconnect_interval * 2,
-                                                    IBNAL_MAX_RECONNECT_INTERVAL);
-        
-                /* Take peer's blocked blocked transmits; I'll complete
-                 * them with error */
-                while (!list_empty (&peer->ibp_tx_queue)) {
-                        tx = list_entry (peer->ibp_tx_queue.next,
-                                         kib_tx_t, tx_list);
-                        
-                        list_del (&tx->tx_list);
-                        list_add_tail (&tx->tx_list, &zombies);
-                }
-                
-                if (kibnal_peer_active(peer) &&
-                    (peer->ibp_persistence == 0)) {
-                        /* failed connection attempt on non-persistent peer */
-                        kibnal_unlink_peer_locked (peer);
-                }
-        } else {
-                /* Can't have blocked transmits if there are connections */
-                LASSERT (list_empty(&peer->ibp_tx_queue));
-        }
-        
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        if (!list_empty (&zombies))
-                CERROR ("Deleting messages for "LPX64": connection failed\n",
-                        peer->ibp_nid);
-
-        while (!list_empty (&zombies)) {
-                tx = list_entry (zombies.next, kib_tx_t, tx_list);
-
-                list_del (&tx->tx_list);
-                /* complete now */
-                tx->tx_status = -EHOSTUNREACH;
-                kibnal_tx_done (tx);
-        }
-}
-
-static void
-kibnal_connreq_done (kib_conn_t *conn, int active, int status)
-{
-        int               state = conn->ibc_state;
-        kib_peer_t       *peer = conn->ibc_peer;
-        kib_tx_t         *tx;
-        unsigned long     flags;
-        int               i;
-
-        /* passive connection has no connreq & vice versa */
-        LASSERTF(!active == !(conn->ibc_connreq != NULL),
-                 "%d %p\n", active, conn->ibc_connreq);
-        if (active) {
-                PORTAL_FREE (conn->ibc_connreq, sizeof (*conn->ibc_connreq));
-                conn->ibc_connreq = NULL;
-        }
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        LASSERT (peer->ibp_connecting != 0);
-        
-        if (status == 0) {                         
-                /* connection established... */
-                KIB_ASSERT_CONN_STATE(conn, IBNAL_CONN_CONNECTING);
-                conn->ibc_state = IBNAL_CONN_ESTABLISHED;
-
-                if (!kibnal_peer_active(peer)) {
-                        /* ...but peer deleted meantime */
-                        status = -ECONNABORTED;
-                }
-        } else {
-                KIB_ASSERT_CONN_STATE_RANGE(conn, IBNAL_CONN_INIT_QP,
-                                            IBNAL_CONN_CONNECTING);
-        }
-
-        if (status == 0) {
-                /* Everything worked! */
-
-                peer->ibp_connecting--;
-
-                /* +1 ref for ibc_list; caller(== CM)'s ref remains until
-                 * the IB_CM_IDLE callback */
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount);
-                list_add (&conn->ibc_list, &peer->ibp_conns);
-                
-                /* reset reconnect interval for next attempt */
-                peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
-
-                /* post blocked sends to the new connection */
-                spin_lock (&conn->ibc_lock);
-                
-                while (!list_empty (&peer->ibp_tx_queue)) {
-                        tx = list_entry (peer->ibp_tx_queue.next, 
-                                         kib_tx_t, tx_list);
-                        
-                        list_del (&tx->tx_list);
-
-                        /* +1 ref for each tx */
-                        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                               conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                               atomic_read (&conn->ibc_refcount));
-                        atomic_inc (&conn->ibc_refcount);
-                        kibnal_queue_tx_locked (tx, conn);
-                }
-                
-                spin_unlock (&conn->ibc_lock);
-
-                /* Nuke any dangling conns from a different peer instance... */
-                kibnal_close_stale_conns_locked (conn->ibc_peer,
-                                                 conn->ibc_incarnation);
-
-                write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-                /* queue up all the receives */
-                for (i = 0; i < IBNAL_RX_MSGS; i++) {
-                        /* +1 ref for rx desc */
-                        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                               conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                               atomic_read (&conn->ibc_refcount));
-                        atomic_inc (&conn->ibc_refcount);
-
-                        CDEBUG(D_NET, "RX[%d] %p->%p - "LPX64"\n",
-                               i, &conn->ibc_rxs[i], conn->ibc_rxs[i].rx_msg,
-                               conn->ibc_rxs[i].rx_vaddr);
-
-                        kibnal_post_rx (&conn->ibc_rxs[i], 0);
-                }
-
-                kibnal_check_sends (conn);
-                return;
-        }
-
-        /* connection failed */
-        if (state == IBNAL_CONN_CONNECTING) {
-                /* schedule for connd to close */
-                kibnal_close_conn_locked (conn, status);
-        } else {
-                /* Don't have a CM comm_id; just wait for refs to drain */
-                conn->ibc_state = IBNAL_CONN_DISCONNECTED;
-        } 
-
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        kibnal_peer_connect_failed (conn->ibc_peer, active, status);
-
-        /* If we didn't establish the connection we don't have to pass
-         * through the disconnect protocol before dropping the CM ref */
-        if (state < IBNAL_CONN_CONNECTING) 
-                kibnal_put_conn (conn);
-}
-
-static int
-kibnal_accept (kib_conn_t **connp, IB_HANDLE *cep,
-                ptl_nid_t nid, __u64 incarnation, int queue_depth)
-{
-        kib_conn_t    *conn = kibnal_create_conn();
-        kib_peer_t    *peer;
-        kib_peer_t    *peer2;
-        unsigned long  flags;
-
-        if (conn == NULL)
-                return (-ENOMEM);
-
-        if (queue_depth != IBNAL_MSG_QUEUE_SIZE) {
-                CERROR("Can't accept "LPX64": bad queue depth %d (%d expected)\n",
-                       nid, queue_depth, IBNAL_MSG_QUEUE_SIZE);
-                atomic_dec (&conn->ibc_refcount);
-                kibnal_destroy_conn(conn);
-                return (-EPROTO);
-        }
-        
-        /* assume 'nid' is a new peer */
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL) {
-                CDEBUG(D_NET, "--conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_dec (&conn->ibc_refcount);
-                kibnal_destroy_conn(conn);
-                return (-ENOMEM);
-        }
-        
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        peer2 = kibnal_find_peer_locked(nid);
-        if (peer2 == NULL) {
-                /* peer table takes my ref on peer */
-                list_add_tail (&peer->ibp_list, kibnal_nid2peerlist(nid));
-        } else {
-                kib_peer_decref (peer);
-                peer = peer2;
-        }
-
-        kib_peer_addref(peer); /* +1 ref for conn */
-        peer->ibp_connecting++;
-
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        conn->ibc_peer = peer;
-        conn->ibc_state = IBNAL_CONN_CONNECTING;
-        /* conn->ibc_cep is set when cm_accept is called */
-        conn->ibc_incarnation = incarnation;
-        conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
-
-        *connp = conn;
-        return (0);
-}
-
-static void kibnal_set_qp_state(IB_HANDLE *qp, IB_QP_STATE state)
-{
-        IB_QP_ATTRIBUTES_MODIFY modify_attr = {0,};
-        FSTATUS frc;
-
-        modify_attr.RequestState = state;
-
-        frc = iibt_qp_modify(qp, &modify_attr, NULL);
-        if (frc != FSUCCESS)
-                CERROR("couldn't set qp state to %d, error %d\n", state, frc);
-}
-
-static void kibnal_flush_pending(kib_conn_t *conn)
-{
-        LIST_HEAD        (zombies); 
-        struct list_head *tmp;
-        struct list_head *nxt;
-        kib_tx_t         *tx;
-        unsigned long     flags;
-        int               done;
-
-        /* NB we wait until the connection has closed before completing
-         * outstanding passive RDMAs so we can be sure the network can't 
-         * touch the mapped memory any more. */
-        KIB_ASSERT_CONN_STATE(conn, IBNAL_CONN_DISCONNECTED);
-
-        /* set the QP to the error state so that we get flush callbacks
-         * on our posted receives which can then drop their conn refs */
-        kibnal_set_qp_state(conn->ibc_qp, QPStateError);
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        /* grab passive RDMAs not waiting for the tx callback */
-        list_for_each_safe (tmp, nxt, &conn->ibc_active_txs) {
-                tx = list_entry (tmp, kib_tx_t, tx_list);
-
-                LASSERT (tx->tx_passive_rdma ||
-                         !tx->tx_passive_rdma_wait);
-
-                LASSERT (tx->tx_passive_rdma_wait ||
-                         tx->tx_sending != 0);
-
-                /* still waiting for tx callback? */
-                if (!tx->tx_passive_rdma_wait)
-                        continue;
-
-                tx->tx_status = -ECONNABORTED;
-                tx->tx_passive_rdma_wait = 0;
-                done = (tx->tx_sending == 0);
-
-                if (!done)
-                        continue;
-
-                list_del (&tx->tx_list);
-                list_add (&tx->tx_list, &zombies);
-        }
-
-        /* grab all blocked transmits */
-        list_for_each_safe (tmp, nxt, &conn->ibc_tx_queue) {
-                tx = list_entry (tmp, kib_tx_t, tx_list);
-                
-                list_del (&tx->tx_list);
-                list_add (&tx->tx_list, &zombies);
-        }
-        
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-        while (!list_empty(&zombies)) {
-                tx = list_entry (zombies.next, kib_tx_t, tx_list);
-
-                list_del(&tx->tx_list);
-                kibnal_tx_done (tx);
-        }
-}
-
-static void
-kibnal_reject (IB_HANDLE cep, uint16_t reason)
-{
-        CM_REJECT_INFO *rej;
-
-        PORTAL_ALLOC(rej, sizeof(*rej));
-        if (rej == NULL) /* PORTAL_ALLOC() will CERROR on failure */
-                return;  
-
-        rej->Reason = reason;
-        iibt_cm_reject(cep, rej);
-        PORTAL_FREE(rej, sizeof(*rej));
-}
-
-static FSTATUS
-kibnal_qp_rts(IB_HANDLE qp_handle, __u32 qpn, __u8 resp_res, 
-              IB_PATH_RECORD *path, __u8 init_depth, __u32 send_psn)
-{
-        IB_QP_ATTRIBUTES_MODIFY modify_attr;
-        FSTATUS frc;
-        ENTRY;
-
-        modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
-                .RequestState           = QPStateReadyToRecv,
-                .RecvPSN                = IBNAL_STARTING_PSN,
-                .DestQPNumber           = qpn,
-                .ResponderResources     = resp_res,
-                .MinRnrTimer            = UsecToRnrNakTimer(2000), /* 20 ms */
-                .Attrs                  = (IB_QP_ATTR_RECVPSN |
-                                           IB_QP_ATTR_DESTQPNUMBER | 
-                                           IB_QP_ATTR_RESPONDERRESOURCES | 
-                                           IB_QP_ATTR_DESTAV | 
-                                           IB_QP_ATTR_PATHMTU | 
-                                           IB_QP_ATTR_MINRNRTIMER),
-        };
-        GetAVFromPath(0, path, &modify_attr.PathMTU, NULL, 
-                      &modify_attr.DestAV);
-
-        frc = iibt_qp_modify(qp_handle, &modify_attr, NULL);
-        if (frc != FSUCCESS) 
-                RETURN(frc);
-
-        modify_attr = (IB_QP_ATTRIBUTES_MODIFY) {
-                .RequestState           = QPStateReadyToSend,
-                .FlowControl            = TRUE,
-                .InitiatorDepth         = init_depth,
-                .SendPSN                = send_psn,
-                .LocalAckTimeout        = path->PktLifeTime + 2, /* 2 or 1? */
-                .RetryCount             = IBNAL_RETRY,
-                .RnrRetryCount          = IBNAL_RNR_RETRY,
-                .Attrs                  = (IB_QP_ATTR_FLOWCONTROL | 
-                                           IB_QP_ATTR_INITIATORDEPTH | 
-                                           IB_QP_ATTR_SENDPSN | 
-                                           IB_QP_ATTR_LOCALACKTIMEOUT | 
-                                           IB_QP_ATTR_RETRYCOUNT | 
-                                           IB_QP_ATTR_RNRRETRYCOUNT),
-        };
-
-        frc = iibt_qp_modify(qp_handle, &modify_attr, NULL);
-        RETURN(frc);
-}
-
-static void
-kibnal_connect_reply (IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
-        IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs;
-        kib_conn_t *conn = arg;
-        kib_wire_connreq_t *wcr;
-        CM_REPLY_INFO *rep = &info->Info.Reply;
-        uint16_t reason;
-        FSTATUS frc;
-
-        wcr = (kib_wire_connreq_t *)info->Info.Reply.PrivateData;
-
-        if (wcr->wcr_magic != cpu_to_le32(IBNAL_MSG_MAGIC)) {
-                CERROR ("Can't connect "LPX64": bad magic %08x\n",
-                        conn->ibc_peer->ibp_nid, le32_to_cpu(wcr->wcr_magic));
-                GOTO(reject, reason = RC_USER_REJ);
-        }
-        
-        if (wcr->wcr_version != cpu_to_le16(IBNAL_MSG_VERSION)) {
-                CERROR ("Can't connect "LPX64": bad version %d\n",
-                        conn->ibc_peer->ibp_nid, le16_to_cpu(wcr->wcr_magic));
-                GOTO(reject, reason = RC_USER_REJ);
-        }
-                        
-        if (wcr->wcr_queue_depth != cpu_to_le16(IBNAL_MSG_QUEUE_SIZE)) {
-                CERROR ("Can't connect "LPX64": bad queue depth %d\n",
-                        conn->ibc_peer->ibp_nid, 
-                        le16_to_cpu(wcr->wcr_queue_depth));
-                GOTO(reject, reason = RC_USER_REJ);
-        }
-                        
-        if (le64_to_cpu(wcr->wcr_nid) != conn->ibc_peer->ibp_nid) {
-                CERROR ("Unexpected NID "LPX64" from "LPX64"\n",
-                        le64_to_cpu(wcr->wcr_nid), conn->ibc_peer->ibp_nid);
-                GOTO(reject, reason = RC_USER_REJ);
-        }
-
-        CDEBUG(D_NET, "Connection %p -> "LPX64" REP_RECEIVED.\n",
-               conn, conn->ibc_peer->ibp_nid);
-
-        conn->ibc_incarnation = le64_to_cpu(wcr->wcr_incarnation);
-        conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
-
-        frc = kibnal_qp_rts(conn->ibc_qp, rep->QPN, 
-                            min_t(__u8, rep->ArbInitiatorDepth,
-                                  ca_attr->MaxQPResponderResources),
-                            &conn->ibc_connreq->cr_path, 
-                            min_t(__u8, rep->ArbResponderResources,
-                                  ca_attr->MaxQPInitiatorDepth),
-                            rep->StartingPSN);
-        if (frc != FSUCCESS) {
-                CERROR("Connection %p -> "LPX64" QP RTS/RTR failed: %d\n",
-                       conn, conn->ibc_peer->ibp_nid, frc);
-                GOTO(reject, reason = RC_NO_QP);
-        }
-
-        /* the callback arguments are ignored for an active accept */
-        conn->ibc_connreq->cr_discarded.Status = FSUCCESS;
-        frc = iibt_cm_accept(cep, &conn->ibc_connreq->cr_discarded, 
-                             NULL, NULL, NULL, NULL);
-        if (frc != FCM_CONNECT_ESTABLISHED) {
-                CERROR("Connection %p -> "LPX64" CMAccept failed: %d\n",
-                       conn, conn->ibc_peer->ibp_nid, frc);
-                kibnal_connreq_done (conn, 1, -ECONNABORTED);
-                /* XXX don't call reject after accept fails? */
-                return;
-        }
-
-        CDEBUG(D_NET, "Connection %p -> "LPX64" Established\n",
-               conn, conn->ibc_peer->ibp_nid);
-
-        kibnal_connreq_done (conn, 1, 0);
-        return;
-
-reject:
-        kibnal_reject(cep, reason);
-        kibnal_connreq_done (conn, 1, -EPROTO);
-}
-
-/* ib_cm.h has a wealth of information on the CM procedures */
-static void
-kibnal_cm_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
-        kib_conn_t       *conn = arg;
-
-        CDEBUG(D_NET, "status 0x%x\n", info->Status);
-
-        /* Established Connection Notifier */
-        switch (info->Status) {
-        default:
-                CERROR("unknown status %d on Connection %p -> "LPX64"\n",
-                       info->Status, conn, conn->ibc_peer->ibp_nid);
-                LBUG();
-                break;
-
-        case FCM_CONNECT_REPLY:
-                kibnal_connect_reply(cep, info, arg);
-                break;
-
-        case FCM_DISCONNECT_REQUEST:
-                /* XXX lock around these state management bits? */
-                if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
-                        kibnal_close_conn (conn, 0);
-                conn->ibc_state = IBNAL_CONN_DREP;
-                iibt_cm_disconnect(conn->ibc_cep, NULL, NULL);
-                break;
-
-        /* these both guarantee that no more cm callbacks will occur */
-        case FCM_DISCONNECTED: /* aka FCM_DISCONNECT_TIMEOUT */
-        case FCM_DISCONNECT_REPLY:
-                CDEBUG(D_NET, "Connection %p -> "LPX64" disconnect done.\n",
-                       conn, conn->ibc_peer->ibp_nid);
-
-                conn->ibc_state = IBNAL_CONN_DISCONNECTED;
-                kibnal_flush_pending(conn);
-                kibnal_put_conn(conn);        /* Lose CM's ref */
-                break;
-        }
-
-        return;
-}
-
-static int
-kibnal_set_cm_flags(IB_HANDLE cep)
-{
-        FSTATUS frc;
-        uint32 value = 1;
-
-        frc = iibt_cm_modify_cep(cep, CM_FLAG_TIMEWAIT_CALLBACK,
-                                 (char *)&value, sizeof(value), 0);
-        if (frc != FSUCCESS) {
-                CERROR("error setting timeout callback: %d\n", frc);
-                return -1;
-        }
-
-#if 0
-        frc = iibt_cm_modify_cep(cep, CM_FLAG_ASYNC_ACCEPT, (char *)&value,
-                                 sizeof(value), 0);
-        if (frc != FSUCCESS) {
-                CERROR("error setting async accept: %d\n", frc);
-                return -1;
-        }
-#endif
-
-        return 0;
-}
-
-void
-kibnal_listen_callback(IB_HANDLE cep, CM_CONN_INFO *info, void *arg)
-{
-        IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs;
-        IB_QP_ATTRIBUTES_QUERY *query;
-        CM_REQUEST_INFO    *req;
-        CM_CONN_INFO       *rep = NULL, *rcv = NULL;
-        kib_wire_connreq_t *wcr;
-        kib_conn_t         *conn = NULL;
-        uint16_t            reason = 0;
-        FSTATUS             frc;
-        int                 rc = 0;
-        
-        LASSERT(cep);
-        LASSERT(info);
-        LASSERT(arg == NULL); /* no conn yet for passive */
-
-        CDEBUG(D_NET, "status 0x%x\n", info->Status);
-
-        req = &info->Info.Request;
-        wcr = (kib_wire_connreq_t *)req->PrivateData;
-
-        CDEBUG(D_NET, "%d from "LPX64"\n", info->Status, 
-               le64_to_cpu(wcr->wcr_nid));
-        
-        if (info->Status == FCM_CONNECT_CANCEL)
-                return;
-        
-        LASSERT (info->Status == FCM_CONNECT_REQUEST);
-        
-        if (wcr->wcr_magic != cpu_to_le32(IBNAL_MSG_MAGIC)) {
-                CERROR ("Can't accept: bad magic %08x\n",
-                        le32_to_cpu(wcr->wcr_magic));
-                GOTO(out, reason = RC_USER_REJ);
-        }
-
-        if (wcr->wcr_version != cpu_to_le16(IBNAL_MSG_VERSION)) {
-                CERROR ("Can't accept: bad version %d\n",
-                        le16_to_cpu(wcr->wcr_magic));
-                GOTO(out, reason = RC_USER_REJ);
-        }
-
-        rc = kibnal_accept(&conn, cep,
-                           le64_to_cpu(wcr->wcr_nid),
-                           le64_to_cpu(wcr->wcr_incarnation),
-                           le16_to_cpu(wcr->wcr_queue_depth));
-        if (rc != 0) {
-                CERROR ("Can't accept "LPX64": %d\n",
-                        le64_to_cpu(wcr->wcr_nid), rc);
-                GOTO(out, reason = RC_NO_RESOURCES);
-        }
-
-        frc = kibnal_qp_rts(conn->ibc_qp, req->CEPInfo.QPN,
-                            min_t(__u8, req->CEPInfo.OfferedInitiatorDepth, 
-                                  ca_attr->MaxQPResponderResources),
-                            &req->PathInfo.Path,
-                            min_t(__u8, req->CEPInfo.OfferedResponderResources, 
-                                  ca_attr->MaxQPInitiatorDepth),
-                            req->CEPInfo.StartingPSN);
-
-        if (frc != FSUCCESS) {
-                CERROR ("Can't mark QP RTS/RTR  "LPX64": %d\n",
-                        le64_to_cpu(wcr->wcr_nid), frc);
-                GOTO(out, reason = RC_NO_QP);
-        }
-
-        frc = iibt_qp_query(conn->ibc_qp, &conn->ibc_qp_attrs, NULL);
-        if (frc != FSUCCESS) {
-                CERROR ("Couldn't query qp attributes "LPX64": %d\n",
-                        le64_to_cpu(wcr->wcr_nid), frc);
-                GOTO(out, reason = RC_NO_QP);
-        }
-        query = &conn->ibc_qp_attrs;
-
-        PORTAL_ALLOC(rep, sizeof(*rep));
-        PORTAL_ALLOC(rcv, sizeof(*rcv));
-        if (rep == NULL || rcv == NULL) {
-                CERROR ("can't reply and receive buffers\n");
-                GOTO(out, reason = RC_INSUFFICIENT_RESP_RES);
-        }
-
-        /* don't try to deref this into the incoming wcr :) */
-        wcr = (kib_wire_connreq_t *)rep->Info.Reply.PrivateData;
-
-        rep->Info.Reply = (CM_REPLY_INFO) {
-                .QPN = query->QPNumber,
-                .QKey = query->Qkey,
-                .StartingPSN = query->RecvPSN,
-                .EndToEndFlowControl = query->FlowControl,
-                /* XXX Hmm. */
-                .ArbInitiatorDepth = query->InitiatorDepth,
-                .ArbResponderResources = query->ResponderResources,
-                .TargetAckDelay = 0,
-                .FailoverAccepted = 0,
-                .RnRRetryCount = req->CEPInfo.RnrRetryCount,
-        };
-                
-        *wcr = (kib_wire_connreq_t) {
-                .wcr_magic       = cpu_to_le32(IBNAL_MSG_MAGIC),
-                .wcr_version     = cpu_to_le16(IBNAL_MSG_VERSION),
-                .wcr_queue_depth = cpu_to_le32(IBNAL_MSG_QUEUE_SIZE),
-                .wcr_nid         = cpu_to_le64(kibnal_data.kib_nid),
-                .wcr_incarnation = cpu_to_le64(kibnal_data.kib_incarnation),
-        };
-
-        frc = iibt_cm_accept(cep, rep, rcv, kibnal_cm_callback, conn, 
-                             &conn->ibc_cep);
-
-        PORTAL_FREE(rep, sizeof(*rep));
-        PORTAL_FREE(rcv, sizeof(*rcv));
-
-        if (frc != FCM_CONNECT_ESTABLISHED) {
-                /* XXX it seems we don't call reject after this point? */
-                CERROR("iibt_cm_accept() failed: %d, aborting\n", frc);
-                rc = -ECONNABORTED;
-                goto out;
-        }
-
-        if (kibnal_set_cm_flags(conn->ibc_cep)) {
-                rc = -ECONNABORTED;
-                goto out;
-        }
-
-        CDEBUG(D_WARNING, "Connection %p -> "LPX64" ESTABLISHED.\n",
-               conn, conn->ibc_peer->ibp_nid);
-
-out:
-        if (reason) {
-                kibnal_reject(cep, reason);
-                rc = -ECONNABORTED;
-        }
-        if (conn != NULL) 
-                kibnal_connreq_done(conn, 0, rc);
-
-        return;
-}
-
-static void
-dump_path_records(PATH_RESULTS *results)
-{
-        IB_PATH_RECORD *path;
-        int i;
-
-        for(i = 0; i < results->NumPathRecords; i++) {
-                path = &results->PathRecords[i];
-                CDEBUG(D_NET, "%d: sgid "LPX64":"LPX64" dgid "
-                       LPX64":"LPX64" pkey %x\n",
-                       i,
-                       path->SGID.Type.Global.SubnetPrefix,
-                       path->SGID.Type.Global.InterfaceID,
-                       path->DGID.Type.Global.SubnetPrefix,
-                       path->DGID.Type.Global.InterfaceID,
-                       path->P_Key);
-        }
-}
-
-static void
-kibnal_pathreq_callback (void *arg, QUERY *query, 
-                         QUERY_RESULT_VALUES *query_res)
-{
-        IB_CA_ATTRIBUTES *ca_attr = &kibnal_data.kib_hca_attrs;
-        kib_conn_t *conn = arg;
-        PATH_RESULTS *path;
-        FSTATUS frc;
-        
-        if (query_res->Status != FSUCCESS || query_res->ResultDataSize == 0) {
-                CERROR ("status %d data size %d\n", query_res->Status,
-                        query_res->ResultDataSize);
-                kibnal_connreq_done (conn, 1, -EINVAL);
-                return;
-        }
-
-        path = (PATH_RESULTS *)query_res->QueryResult;
-
-        if (path->NumPathRecords < 1) {
-                CERROR ("expected path records: %d\n", path->NumPathRecords);
-                kibnal_connreq_done (conn, 1, -EINVAL);
-                return;
-        }
-
-        dump_path_records(path);
-
-        /* just using the first.  this is probably a horrible idea. */
-        conn->ibc_connreq->cr_path = path->PathRecords[0];
-
-        conn->ibc_cep = iibt_cm_create_cep(CM_RC_TYPE);
-        if (conn->ibc_cep == NULL) {
-                CERROR ("Can't create CEP\n");
-                kibnal_connreq_done (conn, 1, -EINVAL);
-                return;
-        }
-
-        if (kibnal_set_cm_flags(conn->ibc_cep)) {
-                kibnal_connreq_done (conn, 1, -EINVAL);
-                return;
-        }
-
-        conn->ibc_connreq->cr_wcr = (kib_wire_connreq_t) {
-                .wcr_magic       = cpu_to_le32(IBNAL_MSG_MAGIC),
-                .wcr_version     = cpu_to_le16(IBNAL_MSG_VERSION),
-                .wcr_queue_depth = cpu_to_le16(IBNAL_MSG_QUEUE_SIZE),
-                .wcr_nid         = cpu_to_le64(kibnal_data.kib_nid),
-                .wcr_incarnation = cpu_to_le64(kibnal_data.kib_incarnation),
-        };
-
-        conn->ibc_connreq->cr_cmreq = (CM_REQUEST_INFO) {
-                .SID = conn->ibc_connreq->cr_service.RID.ServiceID,
-                .CEPInfo = (CM_CEP_INFO) { 
-                        .CaGUID = kibnal_data.kib_hca_guids[0],
-                        .EndToEndFlowControl = FALSE,
-                        .PortGUID = conn->ibc_connreq->cr_path.SGID.Type.Global.InterfaceID,
-                        .RetryCount = IBNAL_RETRY,
-                        .RnrRetryCount = IBNAL_RNR_RETRY,
-                        .AckTimeout = IBNAL_ACK_TIMEOUT,
-                        .StartingPSN = IBNAL_STARTING_PSN,
-                        .QPN = conn->ibc_qp_attrs.QPNumber,
-                        .QKey = conn->ibc_qp_attrs.Qkey,
-                        .OfferedResponderResources = ca_attr->MaxQPResponderResources,
-                        .OfferedInitiatorDepth = ca_attr->MaxQPInitiatorDepth,
-                },
-                .PathInfo = (CM_CEP_PATHINFO) {
-                        .bSubnetLocal = TRUE,
-                        .Path = conn->ibc_connreq->cr_path,
-                },
-        };
-
-#if 0
-        /* XXX set timeout just like SDP!!!*/
-        conn->ibc_connreq->cr_path.packet_life = 13;
-#endif
-        /* Flag I'm getting involved with the CM... */
-        conn->ibc_state = IBNAL_CONN_CONNECTING;
-
-        CDEBUG(D_NET, "Connecting to, service id "LPX64", on "LPX64"\n",
-               conn->ibc_connreq->cr_service.RID.ServiceID, 
-               *kibnal_service_nid_field(&conn->ibc_connreq->cr_service));
-
-        memset(conn->ibc_connreq->cr_cmreq.PrivateData, 0, 
-               CM_REQUEST_INFO_USER_LEN);
-        memcpy(conn->ibc_connreq->cr_cmreq.PrivateData, 
-               &conn->ibc_connreq->cr_wcr, sizeof(conn->ibc_connreq->cr_wcr));
-
-        /* kibnal_cm_callback gets my conn ref */
-        frc = iibt_cm_connect(conn->ibc_cep, &conn->ibc_connreq->cr_cmreq,
-                              kibnal_cm_callback, conn);
-        if (frc != FPENDING && frc != FSUCCESS) {
-                CERROR ("Connect: %d\n", frc);
-                /* Back out state change as connect failed */
-                conn->ibc_state = IBNAL_CONN_INIT_QP;
-                kibnal_connreq_done (conn, 1, -EINVAL);
-        }
-}
-
-static void
-dump_service_records(SERVICE_RECORD_RESULTS *results)
-{
-        IB_SERVICE_RECORD *svc;
-        int i;
-
-        for(i = 0; i < results->NumServiceRecords; i++) {
-                svc = &results->ServiceRecords[i];
-                CDEBUG(D_NET, "%d: sid "LPX64" gid "LPX64":"LPX64" pkey %x\n",
-                       i,
-                       svc->RID.ServiceID,
-                       svc->RID.ServiceGID.Type.Global.SubnetPrefix,
-                       svc->RID.ServiceGID.Type.Global.InterfaceID,
-                       svc->RID.ServiceP_Key);
-        }
-}
-
-
-static void
-kibnal_service_get_callback (void *arg, QUERY *query, 
-                             QUERY_RESULT_VALUES *query_res)
-{
-        kib_conn_t *conn = arg;
-        SERVICE_RECORD_RESULTS *svc;
-        COMMAND_CONTROL_PARAMETERS sd_params;
-        QUERY   path_query;
-        FSTATUS frc;
-        
-        if (query_res->Status != FSUCCESS || query_res->ResultDataSize == 0) {
-                CERROR ("status %d data size %d\n", query_res->Status,
-                        query_res->ResultDataSize);
-                kibnal_connreq_done (conn, 1, -EINVAL);
-                return;
-        }
-
-        svc = (SERVICE_RECORD_RESULTS *)query_res->QueryResult;
-
-        if (svc->NumServiceRecords < 1) {
-                CERROR ("%d service records\n", svc->NumServiceRecords);
-                kibnal_connreq_done (conn, 1, -EINVAL);
-                return;
-        }
-
-        dump_service_records(svc);
-
-        conn->ibc_connreq->cr_service = svc->ServiceRecords[0];
-
-        CDEBUG(D_NET, "Got status %d, service id "LPX64", on "LPX64"\n",
-               query_res->Status , conn->ibc_connreq->cr_service.RID.ServiceID, 
-               *kibnal_service_nid_field(&conn->ibc_connreq->cr_service));
-
-        memset(&path_query, 0, sizeof(path_query));
-        path_query.InputType = InputTypePortGuidPair;
-        path_query.OutputType = OutputTypePathRecord;
-        path_query.InputValue.PortGuidPair.SourcePortGuid = kibnal_data.kib_port_guid;
-        path_query.InputValue.PortGuidPair.DestPortGuid  = conn->ibc_connreq->cr_service.RID.ServiceGID.Type.Global.InterfaceID;
-
-        memset(&sd_params, 0, sizeof(sd_params));
-        sd_params.RetryCount = IBNAL_RETRY;
-        sd_params.Timeout = 10 * 1000;   /* wait 10 seconds */
-
-        /* kibnal_service_get_callback gets my conn ref */
-
-        frc = iibt_sd_query_port_fabric_information(kibnal_data.kib_sd,
-                                                    kibnal_data.kib_port_guid,
-                                                    &path_query, 
-                                                    kibnal_pathreq_callback,
-                                                    &sd_params, conn);
-        if (frc == FPENDING)
-                return;
-
-        CERROR ("Path record request failed: %d\n", frc);
-        kibnal_connreq_done (conn, 1, -EINVAL);
-}
-
-static void
-kibnal_connect_peer (kib_peer_t *peer)
-{
-        COMMAND_CONTROL_PARAMETERS sd_params;
-        QUERY   query;
-        FSTATUS frc;
-        kib_conn_t  *conn = kibnal_create_conn();
-
-        LASSERT (peer->ibp_connecting != 0);
-
-        if (conn == NULL) {
-                CERROR ("Can't allocate conn\n");
-                kibnal_peer_connect_failed (peer, 1, -ENOMEM);
-                return;
-        }
-
-        conn->ibc_peer = peer;
-        kib_peer_addref(peer);
-
-        PORTAL_ALLOC (conn->ibc_connreq, sizeof (*conn->ibc_connreq));
-        if (conn->ibc_connreq == NULL) {
-                CERROR ("Can't allocate connreq\n");
-                kibnal_connreq_done (conn, 1, -ENOMEM);
-                return;
-        }
-
-        memset(conn->ibc_connreq, 0, sizeof (*conn->ibc_connreq));
-
-        kibnal_set_service_keys(&conn->ibc_connreq->cr_service, peer->ibp_nid);
-
-        memset(&query, 0, sizeof(query));
-        query.InputType = InputTypeServiceRecord;
-        query.OutputType = OutputTypeServiceRecord;
-        query.InputValue.ServiceRecordValue.ServiceRecord = conn->ibc_connreq->cr_service;
-        query.InputValue.ServiceRecordValue.ComponentMask = KIBNAL_SERVICE_KEY_MASK;
-
-        memset(&sd_params, 0, sizeof(sd_params));
-        sd_params.RetryCount = IBNAL_RETRY;
-        sd_params.Timeout = 10 * 1000;   /* wait 10 seconds */
-
-        /* kibnal_service_get_callback gets my conn ref */
-        frc = iibt_sd_query_port_fabric_information(kibnal_data.kib_sd,
-                                                    kibnal_data.kib_port_guid,
-                                                    &query, 
-                                                kibnal_service_get_callback, 
-                                                    &sd_params, conn);
-        if (frc == FPENDING)
-                return;
-
-        CERROR ("iibt_sd_query_port_fabric_information(): %d\n", frc);
-        kibnal_connreq_done (conn, 1, frc);
-}
-
-static int
-kibnal_conn_timed_out (kib_conn_t *conn)
-{
-        kib_tx_t          *tx;
-        struct list_head  *ttmp;
-        unsigned long      flags;
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        list_for_each (ttmp, &conn->ibc_tx_queue) {
-                tx = list_entry (ttmp, kib_tx_t, tx_list);
-
-                LASSERT (!tx->tx_passive_rdma_wait);
-                LASSERT (tx->tx_sending == 0);
-
-                if (time_after_eq (jiffies, tx->tx_deadline)) {
-                        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-                        return 1;
-                }
-        }
-
-        list_for_each (ttmp, &conn->ibc_active_txs) {
-                tx = list_entry (ttmp, kib_tx_t, tx_list);
-
-                LASSERT (tx->tx_passive_rdma ||
-                         !tx->tx_passive_rdma_wait);
-
-                LASSERT (tx->tx_passive_rdma_wait ||
-                         tx->tx_sending != 0);
-
-                if (time_after_eq (jiffies, tx->tx_deadline)) {
-                        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-                        return 1;
-                }
-        }
-
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-        return 0;
-}
-
-static void
-kibnal_check_conns (int idx)
-{
-        struct list_head  *peers = &kibnal_data.kib_peers[idx];
-        struct list_head  *ptmp;
-        kib_peer_t        *peer;
-        kib_conn_t        *conn;
-        struct list_head  *ctmp;
-
- again:
-        /* NB. We expect to have a look at all the peers and not find any
-         * rdmas to time out, so we just use a shared lock while we
-         * take a look... */
-        read_lock (&kibnal_data.kib_global_lock);
-
-        list_for_each (ptmp, peers) {
-                peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
-                list_for_each (ctmp, &peer->ibp_conns) {
-                        conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
-                        KIB_ASSERT_CONN_STATE(conn, IBNAL_CONN_ESTABLISHED);
-
-                        /* In case we have enough credits to return via a
-                         * NOOP, but there were no non-blocking tx descs
-                         * free to do it last time... */
-                        kibnal_check_sends(conn);
-
-                        if (!kibnal_conn_timed_out(conn))
-                                continue;
-                        
-                        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                               conn, conn->ibc_state, peer->ibp_nid,
-                               atomic_read (&conn->ibc_refcount));
-
-                        atomic_inc (&conn->ibc_refcount);
-                        read_unlock (&kibnal_data.kib_global_lock);
-
-                        CERROR("Timed out RDMA with "LPX64"\n",
-                               peer->ibp_nid);
-
-                        kibnal_close_conn (conn, -ETIMEDOUT);
-                        kibnal_put_conn (conn);
-
-                        /* start again now I've dropped the lock */
-                        goto again;
-                }
-        }
-
-        read_unlock (&kibnal_data.kib_global_lock);
-}
-
-static void
-kib_connd_handle_state(kib_conn_t *conn)
-{
-        FSTATUS frc;
-
-        switch (conn->ibc_state) {
-                /* all refs have gone, free and be done with it */ 
-                case IBNAL_CONN_DISCONNECTED:
-                        kibnal_destroy_conn (conn);
-                        return; /* avoid put_conn */
-
-                case IBNAL_CONN_SEND_DREQ:
-                        frc = iibt_cm_disconnect(conn->ibc_cep, NULL, NULL);
-                        if (frc != FSUCCESS) /* XXX do real things */
-                                CERROR("disconnect failed: %d\n", frc);
-                        conn->ibc_state = IBNAL_CONN_DREQ;
-                        break;
-
-                /* a callback got to the conn before we did */ 
-                case IBNAL_CONN_DREP:
-                        break;
-                                
-                default:
-                        CERROR ("Bad conn %p state: %d\n", conn, 
-                                conn->ibc_state);
-                        LBUG();
-                        break;
-        }
-
-        /* drop ref from close_conn */
-        kibnal_put_conn(conn);
-}
-
-int
-kibnal_connd (void *arg)
-{
-        wait_queue_t       wait;
-        unsigned long      flags;
-        kib_conn_t        *conn;
-        kib_peer_t        *peer;
-        int                timeout;
-        int                i;
-        int                peer_index = 0;
-        unsigned long      deadline = jiffies;
-        
-        kportal_daemonize ("kibnal_connd");
-        kportal_blockallsigs ();
-
-        init_waitqueue_entry (&wait, current);
-
-        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
-        for (;;) {
-                if (!list_empty (&kibnal_data.kib_connd_conns)) {
-                        conn = list_entry (kibnal_data.kib_connd_conns.next,
-                                           kib_conn_t, ibc_list);
-                        list_del (&conn->ibc_list);
-                        
-                        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-                        kib_connd_handle_state(conn);
-
-                        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-                        continue;
-                }
-
-                if (!list_empty (&kibnal_data.kib_connd_peers)) {
-                        peer = list_entry (kibnal_data.kib_connd_peers.next,
-                                           kib_peer_t, ibp_connd_list);
-                        
-                        list_del_init (&peer->ibp_connd_list);
-                        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-                        kibnal_connect_peer (peer);
-                        kib_peer_decref (peer);
-
-                        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-                }
-
-                /* shut down and nobody left to reap... */
-                if (kibnal_data.kib_shutdown &&
-                    atomic_read(&kibnal_data.kib_nconns) == 0)
-                        break;
-
-                spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-                /* careful with the jiffy wrap... */
-                while ((timeout = (int)(deadline - jiffies)) <= 0) {
-                        const int n = 4;
-                        const int p = 1;
-                        int       chunk = kibnal_data.kib_peer_hash_size;
-                        
-                        /* Time to check for RDMA timeouts on a few more
-                         * peers: I do checks every 'p' seconds on a
-                         * proportion of the peer table and I need to check
-                         * every connection 'n' times within a timeout
-                         * interval, to ensure I detect a timeout on any
-                         * connection within (n+1)/n times the timeout
-                         * interval. */
-
-                        if (kibnal_tunables.kib_io_timeout > n * p)
-                                chunk = (chunk * n * p) / 
-                                        kibnal_tunables.kib_io_timeout;
-                        if (chunk == 0)
-                                chunk = 1;
-
-                        for (i = 0; i < chunk; i++) {
-                                kibnal_check_conns (peer_index);
-                                peer_index = (peer_index + 1) % 
-                                             kibnal_data.kib_peer_hash_size;
-                        }
-
-                        deadline += p * HZ;
-                }
-
-                kibnal_data.kib_connd_waketime = jiffies + timeout;
-
-                set_current_state (TASK_INTERRUPTIBLE);
-                add_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
-                if (!kibnal_data.kib_shutdown &&
-                    list_empty (&kibnal_data.kib_connd_conns) &&
-                    list_empty (&kibnal_data.kib_connd_peers))
-                        schedule_timeout (timeout);
-
-                set_current_state (TASK_RUNNING);
-                remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
-                spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-        }
-
-        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-        kibnal_thread_fini ();
-        return (0);
-}
-
-int
-kibnal_scheduler(void *arg)
-{
-        long            id = (long)arg;
-        char            name[16];
-        kib_rx_t       *rx;
-        kib_tx_t       *tx;
-        unsigned long   flags;
-        int             rc;
-        int             counter = 0;
-        int             did_something;
-
-        snprintf(name, sizeof(name), "kibnal_sd_%02ld", id);
-        kportal_daemonize(name);
-        kportal_blockallsigs();
-
-        spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
-        for (;;) {
-                did_something = 0;
-
-                while (!list_empty(&kibnal_data.kib_sched_txq)) {
-                        tx = list_entry(kibnal_data.kib_sched_txq.next,
-                                        kib_tx_t, tx_list);
-                        list_del(&tx->tx_list);
-                        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
-                                               flags);
-                        kibnal_tx_done(tx);
-
-                        spin_lock_irqsave(&kibnal_data.kib_sched_lock,
-                                          flags);
-                }
-
-                if (!list_empty(&kibnal_data.kib_sched_rxq)) {
-                        rx = list_entry(kibnal_data.kib_sched_rxq.next,
-                                        kib_rx_t, rx_list);
-                        list_del(&rx->rx_list);
-                        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
-                                               flags);
-
-                        kibnal_rx(rx);
-
-                        did_something = 1;
-                        spin_lock_irqsave(&kibnal_data.kib_sched_lock,
-                                          flags);
-                }
-
-                /* shut down and no receives to complete... */
-                if (kibnal_data.kib_shutdown &&
-                    atomic_read(&kibnal_data.kib_nconns) == 0)
-                        break;
-
-                /* nothing to do or hogging CPU */
-                if (!did_something || counter++ == IBNAL_RESCHED) {
-                        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
-                                               flags);
-                        counter = 0;
-
-                        if (!did_something) {
-                                rc = wait_event_interruptible(
-                                        kibnal_data.kib_sched_waitq,
-                                        !list_empty(&kibnal_data.kib_sched_txq) || 
-                                        !list_empty(&kibnal_data.kib_sched_rxq) || 
-                                        (kibnal_data.kib_shutdown &&
-                                         atomic_read (&kibnal_data.kib_nconns) == 0));
-                        } else {
-                                our_cond_resched();
-                        }
-
-                        spin_lock_irqsave(&kibnal_data.kib_sched_lock,
-                                          flags);
-                }
-        }
-
-        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-
-        kibnal_thread_fini();
-        return (0);
-}
-
-
-lib_nal_t kibnal_lib = {
-        libnal_data:        &kibnal_data,      /* NAL private data */
-        libnal_send:         kibnal_send,
-        libnal_send_pages:   kibnal_send_pages,
-        libnal_recv:         kibnal_recv,
-        libnal_recv_pages:   kibnal_recv_pages,
-        libnal_dist:         kibnal_dist
-};
diff --git a/lustre/portals/knals/lonal/.cvsignore b/lustre/portals/knals/lonal/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/lonal/Makefile.in b/lustre/portals/knals/lonal/Makefile.in
deleted file mode 100644 (file)
index 222e861..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-MODULES := klonal
-klonal-objs := lonal.o lonal_cb.o
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/lonal/autoMakefile.am b/lustre/portals/knals/lonal/autoMakefile.am
deleted file mode 100644 (file)
index d1ef995..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-modulenet_DATA = klonal$(KMODEXT)
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(klonal-objs:%.o=%.c) lonal.h
diff --git a/lustre/portals/knals/lonal/lonal.c b/lustre/portals/knals/lonal/lonal.c
deleted file mode 100644 (file)
index 03c2742..0000000
+++ /dev/null
@@ -1,164 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (C) 2004 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "lonal.h"
-
-nal_t                  klonal_api;
-klonal_data_t          klonal_data;
-ptl_handle_ni_t         klonal_ni;
-
-
-int
-klonal_cmd (struct portals_cfg *pcfg, void *private)
-{
-       LASSERT (pcfg != NULL);
-       
-       switch (pcfg->pcfg_command) {
-       case NAL_CMD_REGISTER_MYNID:
-               CDEBUG (D_IOCTL, "setting NID to "LPX64" (was "LPX64")\n",
-                       pcfg->pcfg_nid, klonal_lib.libnal_ni.ni_pid.nid);
-               klonal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
-               return (0);
-               
-       default:
-               return (-EINVAL);
-       }
-}
-
-static void
-klonal_shutdown(nal_t *nal)
-{
-       /* NB The first ref was this module! */
-       if (nal->nal_refct != 0)
-               return;
-
-       CDEBUG (D_NET, "shutdown\n");
-       LASSERT (nal == &klonal_api);
-
-       switch (klonal_data.klo_init)
-       {
-       default:
-               LASSERT (0);
-
-       case KLO_INIT_ALL:
-                libcfs_nal_cmd_unregister(LONAL);
-               /* fall through */
-
-       case KLO_INIT_LIB:
-               lib_fini (&klonal_lib);
-               break;
-
-       case KLO_INIT_NOTHING:
-               return;
-       }
-
-       memset(&klonal_data, 0, sizeof (klonal_data));
-
-       CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
-
-       printk (KERN_INFO "Lustre: LO NAL unloaded (final mem %d)\n",
-                atomic_read(&portal_kmemory));
-       PORTAL_MODULE_UNUSE;
-}
-
-static int
-klonal_startup (nal_t *nal, ptl_pid_t requested_pid,
-               ptl_ni_limits_t *requested_limits, 
-               ptl_ni_limits_t *actual_limits)
-{
-       int               rc;
-       ptl_process_id_t  my_process_id;
-       int               pkmem = atomic_read(&portal_kmemory);
-
-       LASSERT (nal == &klonal_api);
-
-       if (nal->nal_refct != 0) {
-               if (actual_limits != NULL)
-                       *actual_limits = klonal_lib.libnal_ni.ni_actual_limits;
-               return (PTL_OK);
-       }
-
-       LASSERT (klonal_data.klo_init == KLO_INIT_NOTHING);
-
-       CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
-
-       /* ensure all pointers NULL etc */
-       memset (&klonal_data, 0, sizeof (klonal_data));
-
-       my_process_id.nid = 0;
-       my_process_id.pid = requested_pid;
-
-       rc = lib_init(&klonal_lib, nal, my_process_id,
-                     requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-               CERROR ("lib_init failed %d\n", rc);
-               klonal_shutdown (nal);
-               return (rc);
-       }
-
-       klonal_data.klo_init = KLO_INIT_LIB;
-
-       rc = libcfs_nal_cmd_register (LONAL, &klonal_cmd, NULL);
-       if (rc != 0) {
-               CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-               klonal_shutdown (nal);
-               return (PTL_FAIL);
-       }
-
-       klonal_data.klo_init = KLO_INIT_ALL;
-
-       printk(KERN_INFO "Lustre: LO NAL (initial mem %d)\n", pkmem);
-       PORTAL_MODULE_USE;
-
-       return (PTL_OK);
-}
-
-void __exit
-klonal_finalise (void)
-{
-       PtlNIFini(klonal_ni);
-
-       ptl_unregister_nal(LONAL);
-}
-
-static int __init
-klonal_initialise (void)
-{
-       int   rc;
-
-       klonal_api.nal_ni_init = klonal_startup;
-       klonal_api.nal_ni_fini = klonal_shutdown;
-
-       rc = ptl_register_nal(LONAL, &klonal_api);
-       if (rc != PTL_OK) {
-               CERROR("Can't register LONAL: %d\n", rc);
-               return (-ENOMEM);               /* or something... */
-       }
-
-       return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Loopback NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init (klonal_initialise);
-module_exit (klonal_finalise);
diff --git a/lustre/portals/knals/lonal/lonal.h b/lustre/portals/knals/lonal/lonal.h
deleted file mode 100644 (file)
index 9d3d3ff..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (C) 2004 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _LONAL_H
-#define _LONAL_H
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/uio.h>
-#include <linux/init.h>
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#define KLOD_IOV        153401
-#define KLOD_KIOV       153402
-
-typedef struct
-{
-        unsigned int     klod_type;
-        unsigned int     klod_niov;
-        size_t           klod_offset;
-        size_t           klod_nob;
-        union {
-                struct iovec  *iov;
-                ptl_kiov_t    *kiov;
-        }                klod_iov;
-} klo_desc_t;
-
-typedef struct
-{
-        char               klo_init;            /* what's been initialised */
-}  klonal_data_t;
-
-/* kqn_init state */
-#define KLO_INIT_NOTHING        0               /* MUST BE ZERO so zeroed state is initialised OK */
-#define KLO_INIT_LIB            1
-#define KLO_INIT_ALL            2
-
-extern lib_nal_t           klonal_lib;
-extern nal_t               klonal_api;
-extern klonal_data_t       klonal_data;
-
-#endif /* _LONAL_H */
diff --git a/lustre/portals/knals/lonal/lonal_cb.c b/lustre/portals/knals/lonal/lonal_cb.c
deleted file mode 100644 (file)
index cf5df0d..0000000
+++ /dev/null
@@ -1,267 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (C) 2004 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "lonal.h"
-
-/*
- *  LIB functions follow
- *
- */
-static int
-klonal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
-        *dist = 0;                      /* it's me */
-        return (0);
-}
-
-static ptl_err_t
-klonal_send (lib_nal_t    *nal,
-             void         *private,
-             lib_msg_t    *libmsg,
-             ptl_hdr_t    *hdr,
-             int           type,
-             ptl_nid_t     nid,
-             ptl_pid_t     pid,
-             unsigned int  payload_niov,
-             struct iovec *payload_iov,
-             size_t        payload_offset,
-             size_t        payload_nob)
-{
-        klo_desc_t klod = {
-                .klod_type    = KLOD_IOV,
-                .klod_niov    = payload_niov,
-                .klod_offset  = payload_offset,
-                .klod_nob     = payload_nob,
-                .klod_iov     = { .iov = payload_iov } };
-        ptl_err_t rc;
-
-        LASSERT(nid == klonal_lib.libnal_ni.ni_pid.nid);
-
-        rc = lib_parse(&klonal_lib, hdr, &klod);
-        if (rc == PTL_OK)
-                lib_finalize(&klonal_lib, private, libmsg, PTL_OK);
-        
-        return rc;
-}
-
-static ptl_err_t
-klonal_send_pages (lib_nal_t    *nal,
-                   void         *private,
-                   lib_msg_t    *libmsg,
-                   ptl_hdr_t    *hdr,
-                   int           type,
-                   ptl_nid_t     nid,
-                   ptl_pid_t     pid,
-                   unsigned int  payload_niov,
-                   ptl_kiov_t   *payload_kiov,
-                   size_t        payload_offset,
-                   size_t        payload_nob)
-{
-        klo_desc_t klod = {
-                .klod_type     = KLOD_KIOV,
-                .klod_niov     = payload_niov,
-                .klod_offset   = payload_offset,
-                .klod_nob      = payload_nob,
-                .klod_iov      = { .kiov = payload_kiov } };
-        ptl_err_t   rc;
-
-        LASSERT(nid == klonal_lib.libnal_ni.ni_pid.nid);
-        
-        rc = lib_parse(&klonal_lib, hdr, &klod);
-        if (rc == PTL_OK)
-                lib_finalize(&klonal_lib, private, libmsg, PTL_OK);
-        
-        return rc;
-}
-
-static ptl_err_t
-klonal_recv(lib_nal_t    *nal,
-            void         *private,
-            lib_msg_t    *libmsg,
-            unsigned int  niov,
-            struct iovec *iov,
-            size_t        offset,
-            size_t        mlen,
-            size_t        rlen)
-{
-        klo_desc_t *klod = (klo_desc_t *)private;
-
-        /* I only handle mapped->mapped matches */
-        LASSERT(klod->klod_type == KLOD_IOV);
-
-        if (mlen == 0)
-                return PTL_OK;
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                iov++;
-                niov--;
-                LASSERT(niov > 0);
-        }
-        
-        while (klod->klod_offset >= klod->klod_iov.iov->iov_len) {
-                klod->klod_offset -= klod->klod_iov.iov->iov_len;
-                klod->klod_iov.iov++;
-                klod->klod_niov--;
-                LASSERT(klod->klod_niov > 0);
-        }
-        
-        do {
-                int fraglen = MIN(iov->iov_len - offset,
-                                  klod->klod_iov.iov->iov_len - klod->klod_offset);
-
-                LASSERT(niov > 0);
-                LASSERT(klod->klod_niov > 0);
-
-                if (fraglen > mlen)
-                        fraglen = mlen;
-                
-                memcpy((void *)((unsigned long)iov->iov_base + offset),
-                       (void *)((unsigned long)klod->klod_iov.iov->iov_base +
-                                klod->klod_offset),
-                       fraglen);
-
-                if (offset + fraglen < iov->iov_len) {
-                        offset += fraglen;
-                } else {
-                        offset = 0;
-                        iov++;
-                        niov--;
-                }
-
-                if (klod->klod_offset + fraglen < klod->klod_iov.iov->iov_len ) {
-                        klod->klod_offset += fraglen;
-                } else {
-                        klod->klod_offset = 0;
-                        klod->klod_iov.iov++;
-                        klod->klod_niov--;
-                }
-
-                mlen -= fraglen;
-        } while (mlen > 0);
-        
-        lib_finalize(&klonal_lib, private, libmsg, PTL_OK);
-        return PTL_OK;
-}
-
-static ptl_err_t
-klonal_recv_pages(lib_nal_t    *nal,
-                  void         *private,
-                  lib_msg_t    *libmsg,
-                  unsigned int  niov,
-                  ptl_kiov_t   *kiov,
-                  size_t        offset,
-                  size_t        mlen,
-                  size_t        rlen)
-{
-        void          *srcaddr = NULL;
-        void          *dstaddr = NULL;
-        unsigned long  srcfrag = 0;
-        unsigned long  dstfrag = 0;
-        unsigned long  fraglen;
-        klo_desc_t    *klod = (klo_desc_t *)private;
-
-        /* I only handle unmapped->unmapped matches */
-        LASSERT(klod->klod_type == KLOD_KIOV);
-
-        if (mlen == 0)
-                return PTL_OK;
-
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                kiov++;
-                niov--;
-                LASSERT(niov > 0);
-        }
-
-        while (klod->klod_offset >= klod->klod_iov.kiov->kiov_len) {
-                klod->klod_offset -= klod->klod_iov.kiov->kiov_len;
-                klod->klod_iov.kiov++;
-                klod->klod_niov--;
-                LASSERT(klod->klod_niov > 0);
-        }
-
-        do {
-        /* CAVEAT EMPTOR: I kmap 2 pages at once == slight risk of deadlock */
-                LASSERT(niov > 0);
-                if (dstaddr == NULL) {
-                        dstaddr = (void *)((unsigned long)kmap(kiov->kiov_page) +
-                                           kiov->kiov_offset + offset);
-                        dstfrag = kiov->kiov_len -  offset;
-                }
-
-                LASSERT(klod->klod_niov > 0);
-                if (srcaddr == NULL) {
-                        srcaddr = (void *)((unsigned long)kmap(klod->klod_iov.kiov->kiov_page) +
-                                           klod->klod_iov.kiov->kiov_offset + klod->klod_offset);
-                        srcfrag = klod->klod_iov.kiov->kiov_len - klod->klod_offset;
-                }
-                
-                fraglen = MIN(srcfrag, dstfrag);
-                if (fraglen > mlen)
-                        fraglen = mlen;
-                
-                memcpy(dstaddr, srcaddr, fraglen);
-                
-                if (fraglen < dstfrag) {
-                        dstfrag -= fraglen;
-                        dstaddr = (void *)((unsigned long)dstaddr + fraglen);
-                } else {
-                        kunmap(kiov->kiov_page);
-                        dstaddr = NULL;
-                        offset = 0;
-                        kiov++;
-                        niov--;
-                }
-
-                if (fraglen < srcfrag) {
-                        srcfrag -= fraglen;
-                        srcaddr = (void *)((unsigned long)srcaddr + fraglen);
-                } else {
-                        kunmap(klod->klod_iov.kiov->kiov_page);
-                        srcaddr = NULL;
-                        klod->klod_offset = 0;
-                        klod->klod_iov.kiov++;
-                        klod->klod_niov--;
-                }
-
-                mlen -= fraglen;
-        } while (mlen > 0);
-
-        if (dstaddr != NULL)
-                kunmap(kiov->kiov_page);
-
-        if (srcaddr != NULL)
-                kunmap(klod->klod_iov.kiov->kiov_page);
-
-        lib_finalize(&klonal_lib, private, libmsg, PTL_OK);
-        return PTL_OK;
-}
-
-lib_nal_t klonal_lib =
-{
-        libnal_data:       &klonal_data,         /* NAL private data */
-        libnal_send:        klonal_send,
-        libnal_send_pages:  klonal_send_pages,
-        libnal_recv:        klonal_recv,
-        libnal_recv_pages:  klonal_recv_pages,
-        libnal_dist:        klonal_dist
-};
diff --git a/lustre/portals/knals/openibnal/.cvsignore b/lustre/portals/knals/openibnal/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/openibnal/Makefile.in b/lustre/portals/knals/openibnal/Makefile.in
deleted file mode 100644 (file)
index 9b8ed5d..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kopenibnal
-kopenibnal-objs := openibnal.o openibnal_cb.o
-
-EXTRA_POST_CFLAGS := @OPENIBCPPFLAGS@
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/openibnal/Makefile.mk b/lustre/portals/knals/openibnal/Makefile.mk
deleted file mode 100644 (file)
index bd8043e..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include $(src)/../../Kernelenv
-
-obj-y += kopenibnal.o
-kopenibnal-objs := openibnal.o openibnal_cb.o
-
diff --git a/lustre/portals/knals/openibnal/autoMakefile.am b/lustre/portals/knals/openibnal/autoMakefile.am
deleted file mode 100644 (file)
index a4207ae..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-if BUILD_OPENIBNAL
-modulenet_DATA = kopenibnal$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kopenibnal-objs:%.o=%.c) openibnal.h
diff --git a/lustre/portals/knals/openibnal/openibnal.c b/lustre/portals/knals/openibnal/openibnal.c
deleted file mode 100644 (file)
index 652eb34..0000000
+++ /dev/null
@@ -1,1486 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "openibnal.h"
-
-nal_t                   kibnal_api;
-ptl_handle_ni_t         kibnal_ni;
-kib_data_t              kibnal_data;
-kib_tunables_t          kibnal_tunables;
-
-#ifdef CONFIG_SYSCTL
-#define IBNAL_SYSCTL             202
-
-#define IBNAL_SYSCTL_TIMEOUT     1
-
-static ctl_table kibnal_ctl_table[] = {
-        {IBNAL_SYSCTL_TIMEOUT, "timeout", 
-         &kibnal_tunables.kib_io_timeout, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
-};
-
-static ctl_table kibnal_top_ctl_table[] = {
-        {IBNAL_SYSCTL, "openibnal", NULL, 0, 0555, kibnal_ctl_table},
-        { 0 }
-};
-#endif
-
-void
-print_service(struct ib_common_attrib_service *service, char *tag, int rc)
-{
-        char name[32];
-
-        if (service == NULL) 
-        {
-                CWARN("tag       : %s\n"
-                      "status    : %d (NULL)\n", tag, rc);
-                return;
-        }
-        strncpy (name, service->service_name, sizeof(name)-1);
-        name[sizeof(name)-1] = 0;
-        
-        CWARN("tag       : %s\n"
-              "status    : %d\n"
-              "service id: "LPX64"\n"
-              "name      : %s\n"
-              "NID       : "LPX64"\n", tag, rc,
-              service->service_id, name, 
-              *kibnal_service_nid_field(service));
-}
-
-void
-kibnal_service_setunset_done (tTS_IB_CLIENT_QUERY_TID tid, int status,
-                               struct ib_common_attrib_service *service, void *arg)
-{
-        *(int *)arg = status;
-        up (&kibnal_data.kib_nid_signal);
-}
-
-#if IBNAL_CHECK_ADVERT
-void
-kibnal_check_advert (void)
-{
-        struct ib_common_attrib_service *svc;
-        __u64   tid;
-        int     rc;
-        int     rc2;
-
-        PORTAL_ALLOC(svc, sizeof(*svc));
-        if (svc == NULL)
-                return;
-
-        memset (svc, 0, sizeof (*svc));
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-
-        rc = ib_service_get (kibnal_data.kib_device, 
-                             kibnal_data.kib_port,
-                             svc,
-                             KIBNAL_SERVICE_KEY_MASK,
-                             kibnal_tunables.kib_io_timeout * HZ,
-                             kibnal_service_setunset_done, &rc2, 
-                             &tid);
-
-        if (rc != 0) {
-                CERROR ("Immediate error %d checking SM service\n", rc);
-        } else {
-                down (&kibnal_data.kib_nid_signal);
-                rc = rc2;
-
-                if (rc != 0)
-                        CERROR ("Error %d checking SM service\n", rc);
-        }
-
-        PORTAL_FREE(svc, sizeof(*svc));
-}
-#endif
-
-int
-kibnal_advertise (void)
-{
-        struct ib_common_attrib_service *svc;
-        __u64   tid;
-        int     rc;
-        int     rc2;
-
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC(svc, sizeof(*svc));
-        if (svc == NULL)
-                return (-ENOMEM);
-
-        memset (svc, 0, sizeof (*svc));
-        
-        svc->service_id = kibnal_data.kib_service_id;
-
-        rc = ib_cached_gid_get(kibnal_data.kib_device,
-                               kibnal_data.kib_port,
-                               0,
-                               svc->service_gid);
-        if (rc != 0) {
-                CERROR ("Can't get port %d GID: %d\n",
-                        kibnal_data.kib_port, rc);
-                goto out;
-        }
-        
-        rc = ib_cached_pkey_get(kibnal_data.kib_device,
-                                kibnal_data.kib_port,
-                                0,
-                                &svc->service_pkey);
-        if (rc != 0) {
-                CERROR ("Can't get port %d PKEY: %d\n",
-                        kibnal_data.kib_port, rc);
-                goto out;
-        }
-        
-        svc->service_lease = 0xffffffff;
-
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-
-        CDEBUG(D_NET, "Advertising service id "LPX64" %s:"LPX64"\n", 
-               svc->service_id, 
-               svc->service_name, *kibnal_service_nid_field(svc));
-
-        rc = ib_service_set (kibnal_data.kib_device,
-                             kibnal_data.kib_port,
-                             svc,
-                             IB_SA_SERVICE_COMP_MASK_ID |
-                             IB_SA_SERVICE_COMP_MASK_GID |
-                             IB_SA_SERVICE_COMP_MASK_PKEY |
-                             IB_SA_SERVICE_COMP_MASK_LEASE |
-                             KIBNAL_SERVICE_KEY_MASK,
-                             kibnal_tunables.kib_io_timeout * HZ,
-                             kibnal_service_setunset_done, &rc2, &tid);
-
-        if (rc != 0) {
-                CERROR ("Immediate error %d advertising NID "LPX64"\n",
-                        rc, kibnal_data.kib_nid);
-                goto out;
-        }
-
-        down (&kibnal_data.kib_nid_signal);
-
-        rc = rc2;
-        if (rc != 0)
-                CERROR ("Error %d advertising NID "LPX64"\n", 
-                        rc, kibnal_data.kib_nid);
- out:
-        PORTAL_FREE(svc, sizeof(*svc));
-        return (rc);
-}
-
-void
-kibnal_unadvertise (int expect_success)
-{
-        struct ib_common_attrib_service *svc;
-        __u64   tid;
-        int     rc;
-        int     rc2;
-
-        LASSERT (kibnal_data.kib_nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC(svc, sizeof(*svc));
-        if (svc == NULL)
-                return;
-
-        memset (svc, 0, sizeof(*svc));
-
-        kibnal_set_service_keys(svc, kibnal_data.kib_nid);
-
-        CDEBUG(D_NET, "Unadvertising service %s:"LPX64"\n",
-               svc->service_name, *kibnal_service_nid_field(svc));
-
-        rc = ib_service_delete (kibnal_data.kib_device,
-                                kibnal_data.kib_port,
-                                svc,
-                                KIBNAL_SERVICE_KEY_MASK,
-                                kibnal_tunables.kib_io_timeout * HZ,
-                                kibnal_service_setunset_done, &rc2, &tid);
-        if (rc != 0) {
-                CERROR ("Immediate error %d unadvertising NID "LPX64"\n",
-                        rc, kibnal_data.kib_nid);
-                goto out;
-        }
-
-        down (&kibnal_data.kib_nid_signal);
-        
-        if ((rc2 == 0) == !!expect_success)
-                goto out;                       /* success: rc == 0 */
-
-        if (expect_success)
-                CERROR("Error %d unadvertising NID "LPX64"\n",
-                       rc, kibnal_data.kib_nid);
-        else
-                CWARN("Removed conflicting NID "LPX64"\n",
-                      kibnal_data.kib_nid);
- out:
-        PORTAL_FREE(svc, sizeof(*svc));
-}
-
-int
-kibnal_set_mynid(ptl_nid_t nid)
-{
-        struct timeval tv;
-        lib_ni_t      *ni = &kibnal_lib.libnal_ni;
-        int            rc;
-
-        CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_pid.nid);
-
-        do_gettimeofday(&tv);
-
-        down (&kibnal_data.kib_nid_mutex);
-
-        if (nid == kibnal_data.kib_nid) {
-                /* no change of NID */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
-
-        CDEBUG(D_NET, "NID "LPX64"("LPX64")\n",
-               kibnal_data.kib_nid, nid);
-        
-        if (kibnal_data.kib_nid != PTL_NID_ANY) {
-
-                kibnal_unadvertise (1);
-
-                rc = ib_cm_listen_stop (kibnal_data.kib_listen_handle);
-                if (rc != 0)
-                        CERROR ("Error %d stopping listener\n", rc);
-        }
-        
-        kibnal_data.kib_nid = ni->ni_pid.nid = nid;
-        kibnal_data.kib_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-        
-        /* Delete all existing peers and their connections after new
-         * NID/incarnation set to ensure no old connections in our brave
-         * new world. */
-        kibnal_del_peer (PTL_NID_ANY, 0);
-
-        if (kibnal_data.kib_nid == PTL_NID_ANY) {
-                /* No new NID to install */
-                up (&kibnal_data.kib_nid_mutex);
-                return (0);
-        }
-        
-        /* remove any previous advert (crashed node etc) */
-        kibnal_unadvertise(0);
-
-        /* Assign new service number */
-        kibnal_data.kib_service_id = ib_cm_service_assign();
-        CDEBUG(D_NET, "service_id "LPX64"\n", kibnal_data.kib_service_id);
-        
-        rc = ib_cm_listen(kibnal_data.kib_service_id,
-                          TS_IB_CM_SERVICE_EXACT_MASK,
-                          kibnal_passive_conn_callback, NULL,
-                          &kibnal_data.kib_listen_handle);
-        if (rc == 0) {
-                rc = kibnal_advertise();
-                if (rc == 0) {
-#if IBNAL_CHECK_ADVERT
-                        kibnal_check_advert();
-#endif
-                        up (&kibnal_data.kib_nid_mutex);
-                        return (0);
-                }
-
-                ib_cm_listen_stop(kibnal_data.kib_listen_handle);
-                /* remove any peers that sprung up while I failed to
-                 * advertise myself */
-                kibnal_del_peer (PTL_NID_ANY, 0);
-        }
-        
-        kibnal_data.kib_nid = PTL_NID_ANY;
-        up (&kibnal_data.kib_nid_mutex);
-        return (rc);
-}
-
-kib_peer_t *
-kibnal_create_peer (ptl_nid_t nid)
-{
-        kib_peer_t *peer;
-
-        LASSERT (nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC (peer, sizeof (*peer));
-        if (peer == NULL)
-                return (NULL);
-
-        memset(peer, 0, sizeof(*peer));         /* zero flags etc */
-
-        peer->ibp_nid = nid;
-        atomic_set (&peer->ibp_refcount, 1);    /* 1 ref for caller */
-
-        INIT_LIST_HEAD (&peer->ibp_list);       /* not in the peer table yet */
-        INIT_LIST_HEAD (&peer->ibp_conns);
-        INIT_LIST_HEAD (&peer->ibp_tx_queue);
-
-        peer->ibp_reconnect_time = jiffies;
-        peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
-
-        atomic_inc (&kibnal_data.kib_npeers);
-        return (peer);
-}
-
-void
-kibnal_destroy_peer (kib_peer_t *peer)
-{
-        CDEBUG (D_NET, "peer "LPX64" %p deleted\n", peer->ibp_nid, peer);
-
-        LASSERT (atomic_read (&peer->ibp_refcount) == 0);
-        LASSERT (peer->ibp_persistence == 0);
-        LASSERT (!kibnal_peer_active(peer));
-        LASSERT (peer->ibp_connecting == 0);
-        LASSERT (list_empty (&peer->ibp_conns));
-        LASSERT (list_empty (&peer->ibp_tx_queue));
-
-        PORTAL_FREE (peer, sizeof (*peer));
-
-        /* NB a peer's connections keep a reference on their peer until
-         * they are destroyed, so we can be assured that _all_ state to do
-         * with this peer has been cleaned up when its refcount drops to
-         * zero. */
-        atomic_dec (&kibnal_data.kib_npeers);
-}
-
-void
-kibnal_put_peer (kib_peer_t *peer)
-{
-        CDEBUG (D_OTHER, "putting peer[%p] -> "LPX64" (%d)\n",
-                peer, peer->ibp_nid,
-                atomic_read (&peer->ibp_refcount));
-
-        LASSERT (atomic_read (&peer->ibp_refcount) > 0);
-        if (!atomic_dec_and_test (&peer->ibp_refcount))
-                return;
-
-        kibnal_destroy_peer (peer);
-}
-
-kib_peer_t *
-kibnal_find_peer_locked (ptl_nid_t nid)
-{
-        struct list_head *peer_list = kibnal_nid2peerlist (nid);
-        struct list_head *tmp;
-        kib_peer_t       *peer;
-
-        list_for_each (tmp, peer_list) {
-
-                peer = list_entry (tmp, kib_peer_t, ibp_list);
-
-                LASSERT (peer->ibp_persistence != 0 || /* persistent peer */
-                         peer->ibp_connecting != 0 || /* creating conns */
-                         !list_empty (&peer->ibp_conns));  /* active conn */
-
-                if (peer->ibp_nid != nid)
-                        continue;
-
-                CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-                       peer, nid, atomic_read (&peer->ibp_refcount));
-                return (peer);
-        }
-        return (NULL);
-}
-
-kib_peer_t *
-kibnal_get_peer (ptl_nid_t nid)
-{
-        kib_peer_t     *peer;
-
-        read_lock (&kibnal_data.kib_global_lock);
-        peer = kibnal_find_peer_locked (nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                atomic_inc (&peer->ibp_refcount);
-        read_unlock (&kibnal_data.kib_global_lock);
-
-        return (peer);
-}
-
-void
-kibnal_unlink_peer_locked (kib_peer_t *peer)
-{
-        LASSERT (peer->ibp_persistence == 0);
-        LASSERT (list_empty(&peer->ibp_conns));
-
-        LASSERT (kibnal_peer_active(peer));
-        list_del_init (&peer->ibp_list);
-        /* lose peerlist's ref */
-        kibnal_put_peer (peer);
-}
-
-int
-kibnal_get_peer_info (int index, ptl_nid_t *nidp, int *persistencep)
-{
-        kib_peer_t        *peer;
-        struct list_head  *ptmp;
-        int                i;
-
-        read_lock (&kibnal_data.kib_global_lock);
-
-        for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-
-                list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-                        
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        if (index-- > 0)
-                                continue;
-
-                        *nidp = peer->ibp_nid;
-                        *persistencep = peer->ibp_persistence;
-                        
-                        read_unlock (&kibnal_data.kib_global_lock);
-                        return (0);
-                }
-        }
-
-        read_unlock (&kibnal_data.kib_global_lock);
-        return (-ENOENT);
-}
-
-int
-kibnal_add_persistent_peer (ptl_nid_t nid)
-{
-        unsigned long      flags;
-        kib_peer_t        *peer;
-        kib_peer_t        *peer2;
-        
-        if (nid == PTL_NID_ANY)
-                return (-EINVAL);
-
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL)
-                return (-ENOMEM);
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        peer2 = kibnal_find_peer_locked (nid);
-        if (peer2 != NULL) {
-                kibnal_put_peer (peer);
-                peer = peer2;
-        } else {
-                /* peer table takes existing ref on peer */
-                list_add_tail (&peer->ibp_list,
-                               kibnal_nid2peerlist (nid));
-        }
-
-        peer->ibp_persistence++;
-        
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-        return (0);
-}
-
-void
-kibnal_del_peer_locked (kib_peer_t *peer, int single_share)
-{
-        struct list_head *ctmp;
-        struct list_head *cnxt;
-        kib_conn_t       *conn;
-
-        if (!single_share)
-                peer->ibp_persistence = 0;
-        else if (peer->ibp_persistence > 0)
-                peer->ibp_persistence--;
-
-        if (peer->ibp_persistence != 0)
-                return;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
-                conn = list_entry(ctmp, kib_conn_t, ibc_list);
-
-                kibnal_close_conn_locked (conn, 0);
-        }
-
-        /* NB peer unlinks itself when last conn is closed */
-}
-
-int
-kibnal_del_peer (ptl_nid_t nid, int single_share)
-{
-        unsigned long      flags;
-        struct list_head  *ptmp;
-        struct list_head  *pnxt;
-        kib_peer_t        *peer;
-        int                lo;
-        int                hi;
-        int                i;
-        int                rc = -ENOENT;
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
-        else {
-                lo = 0;
-                hi = kibnal_data.kib_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        if (!(nid == PTL_NID_ANY || peer->ibp_nid == nid))
-                                continue;
-
-                        kibnal_del_peer_locked (peer, single_share);
-                        rc = 0;         /* matched something */
-
-                        if (single_share)
-                                goto out;
-                }
-        }
- out:
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        return (rc);
-}
-
-kib_conn_t *
-kibnal_get_conn_by_idx (int index)
-{
-        kib_peer_t        *peer;
-        struct list_head  *ptmp;
-        kib_conn_t        *conn;
-        struct list_head  *ctmp;
-        int                i;
-
-        read_lock (&kibnal_data.kib_global_lock);
-
-        for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-                list_for_each (ptmp, &kibnal_data.kib_peers[i]) {
-
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence > 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        list_for_each (ctmp, &peer->ibp_conns) {
-                                if (index-- > 0)
-                                        continue;
-
-                                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-                                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                                       atomic_read (&conn->ibc_refcount));
-                                atomic_inc (&conn->ibc_refcount);
-                                read_unlock (&kibnal_data.kib_global_lock);
-                                return (conn);
-                        }
-                }
-        }
-
-        read_unlock (&kibnal_data.kib_global_lock);
-        return (NULL);
-}
-
-kib_conn_t *
-kibnal_create_conn (void)
-{
-        kib_conn_t  *conn;
-        int          i;
-        __u64        vaddr = 0;
-        __u64        vaddr_base;
-        int          page_offset;
-        int          ipage;
-        int          rc;
-        union {
-                struct ib_qp_create_param  qp_create;
-                struct ib_qp_attribute     qp_attr;
-        } params;
-        
-        PORTAL_ALLOC (conn, sizeof (*conn));
-        if (conn == NULL) {
-                CERROR ("Can't allocate connection\n");
-                return (NULL);
-        }
-
-        /* zero flags, NULL pointers etc... */
-        memset (conn, 0, sizeof (*conn));
-
-        INIT_LIST_HEAD (&conn->ibc_tx_queue);
-        INIT_LIST_HEAD (&conn->ibc_active_txs);
-        spin_lock_init (&conn->ibc_lock);
-        
-        atomic_inc (&kibnal_data.kib_nconns);
-        /* well not really, but I call destroy() on failure, which decrements */
-
-        PORTAL_ALLOC (conn->ibc_rxs, IBNAL_RX_MSGS * sizeof (kib_rx_t));
-        if (conn->ibc_rxs == NULL)
-                goto failed;
-        memset (conn->ibc_rxs, 0, IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
-        rc = kibnal_alloc_pages(&conn->ibc_rx_pages,
-                                IBNAL_RX_MSG_PAGES,
-                                IB_ACCESS_LOCAL_WRITE);
-        if (rc != 0)
-                goto failed;
-
-        vaddr_base = vaddr = conn->ibc_rx_pages->ibp_vaddr;
-
-        for (i = ipage = page_offset = 0; i < IBNAL_RX_MSGS; i++) {
-                struct page *page = conn->ibc_rx_pages->ibp_pages[ipage];
-                kib_rx_t   *rx = &conn->ibc_rxs[i];
-
-                rx->rx_conn = conn;
-                rx->rx_vaddr = vaddr;
-                rx->rx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
-                
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_RX_MSG_BYTES);
-                
-                page_offset += IBNAL_MSG_SIZE;
-                LASSERT (page_offset <= PAGE_SIZE);
-
-                if (page_offset == PAGE_SIZE) {
-                        page_offset = 0;
-                        ipage++;
-                        LASSERT (ipage <= IBNAL_RX_MSG_PAGES);
-                }
-        }
-
-        params.qp_create = (struct ib_qp_create_param) {
-                .limit = {
-                        /* Sends have an optional RDMA */
-                        .max_outstanding_send_request    = 2 * IBNAL_MSG_QUEUE_SIZE,
-                        .max_outstanding_receive_request = IBNAL_MSG_QUEUE_SIZE,
-                        .max_send_gather_element         = 1,
-                        .max_receive_scatter_element     = 1,
-                },
-                .pd              = kibnal_data.kib_pd,
-                .send_queue      = kibnal_data.kib_cq,
-                .receive_queue   = kibnal_data.kib_cq,
-                .send_policy     = IB_WQ_SIGNAL_SELECTABLE,
-                .receive_policy  = IB_WQ_SIGNAL_SELECTABLE,
-                .rd_domain       = 0,
-                .transport       = IB_TRANSPORT_RC,
-                .device_specific = NULL,
-        };
-        
-        rc = ib_qp_create (&params.qp_create, &conn->ibc_qp, &conn->ibc_qpn);
-        if (rc != 0) {
-                CERROR ("Failed to create queue pair: %d\n", rc);
-                goto failed;
-        }
-        
-        /* Mark QP created */
-        conn->ibc_state = IBNAL_CONN_INIT_QP;
-
-        params.qp_attr = (struct ib_qp_attribute) {
-                .state             = IB_QP_STATE_INIT,
-                .port              = kibnal_data.kib_port,
-                .enable_rdma_read  = 1,
-                .enable_rdma_write = 1,
-                .valid_fields      = (IB_QP_ATTRIBUTE_STATE |
-                                      IB_QP_ATTRIBUTE_PORT |
-                                      IB_QP_ATTRIBUTE_PKEY_INDEX |
-                                      IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE),
-        };
-        rc = ib_qp_modify(conn->ibc_qp, &params.qp_attr);
-        if (rc != 0) {
-                CERROR ("Failed to modify queue pair: %d\n", rc);
-                goto failed;
-        }
-
-        /* 1 ref for caller */
-        atomic_set (&conn->ibc_refcount, 1);
-        return (conn);
-        
- failed:
-        kibnal_destroy_conn (conn);
-        return (NULL);
-}
-
-void
-kibnal_destroy_conn (kib_conn_t *conn)
-{
-        int    rc;
-        
-        CDEBUG (D_NET, "connection %p\n", conn);
-
-        LASSERT (atomic_read (&conn->ibc_refcount) == 0);
-        LASSERT (list_empty(&conn->ibc_tx_queue));
-        LASSERT (list_empty(&conn->ibc_active_txs));
-        LASSERT (conn->ibc_nsends_posted == 0);
-        LASSERT (conn->ibc_connreq == NULL);
-
-        switch (conn->ibc_state) {
-        case IBNAL_CONN_ZOMBIE:
-                /* called after connection sequence initiated */
-
-        case IBNAL_CONN_INIT_QP:
-                rc = ib_qp_destroy(conn->ibc_qp);
-                if (rc != 0)
-                        CERROR("Can't destroy QP: %d\n", rc);
-                /* fall through */
-                
-        case IBNAL_CONN_INIT_NOTHING:
-                break;
-
-        default:
-                LASSERT (0);
-        }
-
-        if (conn->ibc_rx_pages != NULL) 
-                kibnal_free_pages(conn->ibc_rx_pages);
-        
-        if (conn->ibc_rxs != NULL)
-                PORTAL_FREE(conn->ibc_rxs, 
-                            IBNAL_RX_MSGS * sizeof(kib_rx_t));
-
-        if (conn->ibc_peer != NULL)
-                kibnal_put_peer(conn->ibc_peer);
-
-        PORTAL_FREE(conn, sizeof (*conn));
-
-        atomic_dec(&kibnal_data.kib_nconns);
-        
-        if (atomic_read (&kibnal_data.kib_nconns) == 0 &&
-            kibnal_data.kib_shutdown) {
-                /* I just nuked the last connection on shutdown; wake up
-                 * everyone so they can exit. */
-                wake_up_all(&kibnal_data.kib_sched_waitq);
-                wake_up_all(&kibnal_data.kib_connd_waitq);
-        }
-}
-
-void
-kibnal_put_conn (kib_conn_t *conn)
-{
-        unsigned long flags;
-
-        CDEBUG (D_NET, "putting conn[%p] state %d -> "LPX64" (%d)\n",
-                conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                atomic_read (&conn->ibc_refcount));
-
-        LASSERT (atomic_read (&conn->ibc_refcount) > 0);
-        if (!atomic_dec_and_test (&conn->ibc_refcount))
-                return;
-
-        /* last ref only goes on zombies */
-        LASSERT (conn->ibc_state == IBNAL_CONN_ZOMBIE);
-
-        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
-        list_add (&conn->ibc_list, &kibnal_data.kib_connd_conns);
-        wake_up (&kibnal_data.kib_connd_waitq);
-
-        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-}
-
-int
-kibnal_close_peer_conns_locked (kib_peer_t *peer, int why)
-{
-        kib_conn_t         *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
-                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
-                count++;
-                kibnal_close_conn_locked (conn, why);
-        }
-
-        return (count);
-}
-
-int
-kibnal_close_stale_conns_locked (kib_peer_t *peer, __u64 incarnation)
-{
-        kib_conn_t         *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ibp_conns) {
-                conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
-                if (conn->ibc_incarnation == incarnation)
-                        continue;
-
-                CDEBUG(D_NET, "Closing stale conn nid:"LPX64" incarnation:"LPX64"("LPX64")\n",
-                       peer->ibp_nid, conn->ibc_incarnation, incarnation);
-                
-                count++;
-                kibnal_close_conn_locked (conn, -ESTALE);
-        }
-
-        return (count);
-}
-
-int
-kibnal_close_matching_conns (ptl_nid_t nid)
-{
-        unsigned long       flags;
-        kib_peer_t         *peer;
-        struct list_head   *ptmp;
-        struct list_head   *pnxt;
-        int                 lo;
-        int                 hi;
-        int                 i;
-        int                 count = 0;
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = kibnal_nid2peerlist(nid) - kibnal_data.kib_peers;
-        else {
-                lo = 0;
-                hi = kibnal_data.kib_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &kibnal_data.kib_peers[i]) {
-
-                        peer = list_entry (ptmp, kib_peer_t, ibp_list);
-                        LASSERT (peer->ibp_persistence != 0 ||
-                                 peer->ibp_connecting != 0 ||
-                                 !list_empty (&peer->ibp_conns));
-
-                        if (!(nid == PTL_NID_ANY || nid == peer->ibp_nid))
-                                continue;
-
-                        count += kibnal_close_peer_conns_locked (peer, 0);
-                }
-        }
-
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        /* wildcards always succeed */
-        if (nid == PTL_NID_ANY)
-                return (0);
-        
-        return (count == 0 ? -ENOENT : 0);
-}
-
-int
-kibnal_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int rc = -EINVAL;
-
-        LASSERT (pcfg != NULL);
-
-        switch(pcfg->pcfg_command) {
-        case NAL_CMD_GET_PEER: {
-                ptl_nid_t   nid = 0;
-                int         share_count = 0;
-
-                rc = kibnal_get_peer_info(pcfg->pcfg_count,
-                                          &nid, &share_count);
-                pcfg->pcfg_nid   = nid;
-                pcfg->pcfg_size  = 0;
-                pcfg->pcfg_id    = 0;
-                pcfg->pcfg_misc  = 0;
-                pcfg->pcfg_count = 0;
-                pcfg->pcfg_wait  = share_count;
-                break;
-        }
-        case NAL_CMD_ADD_PEER: {
-                rc = kibnal_add_persistent_peer (pcfg->pcfg_nid);
-                break;
-        }
-        case NAL_CMD_DEL_PEER: {
-                rc = kibnal_del_peer (pcfg->pcfg_nid, 
-                                       /* flags == single_share */
-                                       pcfg->pcfg_flags != 0);
-                break;
-        }
-        case NAL_CMD_GET_CONN: {
-                kib_conn_t *conn = kibnal_get_conn_by_idx (pcfg->pcfg_count);
-
-                if (conn == NULL)
-                        rc = -ENOENT;
-                else {
-                        rc = 0;
-                        pcfg->pcfg_nid   = conn->ibc_peer->ibp_nid;
-                        pcfg->pcfg_id    = 0;
-                        pcfg->pcfg_misc  = 0;
-                        pcfg->pcfg_flags = 0;
-                        kibnal_put_conn (conn);
-                }
-                break;
-        }
-        case NAL_CMD_CLOSE_CONNECTION: {
-                rc = kibnal_close_matching_conns (pcfg->pcfg_nid);
-                break;
-        }
-        case NAL_CMD_REGISTER_MYNID: {
-                if (pcfg->pcfg_nid == PTL_NID_ANY)
-                        rc = -EINVAL;
-                else
-                        rc = kibnal_set_mynid (pcfg->pcfg_nid);
-                break;
-        }
-        }
-
-        return rc;
-}
-
-void
-kibnal_free_pages (kib_pages_t *p)
-{
-        int     npages = p->ibp_npages;
-        int     rc;
-        int     i;
-        
-        if (p->ibp_mapped) {
-                rc = ib_memory_deregister(p->ibp_handle);
-                if (rc != 0)
-                        CERROR ("Deregister error: %d\n", rc);
-        }
-        
-        for (i = 0; i < npages; i++)
-                if (p->ibp_pages[i] != NULL)
-                        __free_page(p->ibp_pages[i]);
-        
-        PORTAL_FREE (p, offsetof(kib_pages_t, ibp_pages[npages]));
-}
-
-int
-kibnal_alloc_pages (kib_pages_t **pp, int npages, int access)
-{
-        kib_pages_t                *p;
-        struct ib_physical_buffer  *phys_pages;
-        int                         i;
-        int                         rc;
-
-        PORTAL_ALLOC(p, offsetof(kib_pages_t, ibp_pages[npages]));
-        if (p == NULL) {
-                CERROR ("Can't allocate buffer %d\n", npages);
-                return (-ENOMEM);
-        }
-
-        memset (p, 0, offsetof(kib_pages_t, ibp_pages[npages]));
-        p->ibp_npages = npages;
-        
-        for (i = 0; i < npages; i++) {
-                p->ibp_pages[i] = alloc_page (GFP_KERNEL);
-                if (p->ibp_pages[i] == NULL) {
-                        CERROR ("Can't allocate page %d of %d\n", i, npages);
-                        kibnal_free_pages(p);
-                        return (-ENOMEM);
-                }
-        }
-
-        PORTAL_ALLOC(phys_pages, npages * sizeof(*phys_pages));
-        if (phys_pages == NULL) {
-                CERROR ("Can't allocate physarray for %d pages\n", npages);
-                kibnal_free_pages(p);
-                return (-ENOMEM);
-        }
-
-        for (i = 0; i < npages; i++) {
-                phys_pages[i].size = PAGE_SIZE;
-                phys_pages[i].address =
-                        kibnal_page2phys(p->ibp_pages[i]);
-        }
-
-        p->ibp_vaddr = 0;
-        rc = ib_memory_register_physical(kibnal_data.kib_pd,
-                                         phys_pages, npages,
-                                         &p->ibp_vaddr,
-                                         npages * PAGE_SIZE, 0,
-                                         access,
-                                         &p->ibp_handle,
-                                         &p->ibp_lkey,
-                                         &p->ibp_rkey);
-        
-        PORTAL_FREE(phys_pages, npages * sizeof(*phys_pages));
-        
-        if (rc != 0) {
-                CERROR ("Error %d mapping %d pages\n", rc, npages);
-                kibnal_free_pages(p);
-                return (rc);
-        }
-        
-        p->ibp_mapped = 1;
-        *pp = p;
-        return (0);
-}
-
-int
-kibnal_setup_tx_descs (void)
-{
-        int           ipage = 0;
-        int           page_offset = 0;
-        __u64         vaddr;
-        __u64         vaddr_base;
-        struct page  *page;
-        kib_tx_t     *tx;
-        int           i;
-        int           rc;
-
-        /* pre-mapped messages are not bigger than 1 page */
-        LASSERT (IBNAL_MSG_SIZE <= PAGE_SIZE);
-
-        /* No fancy arithmetic when we do the buffer calculations */
-        LASSERT (PAGE_SIZE % IBNAL_MSG_SIZE == 0);
-
-        rc = kibnal_alloc_pages(&kibnal_data.kib_tx_pages,
-                                IBNAL_TX_MSG_PAGES, 
-                                0);            /* local read access only */
-        if (rc != 0)
-                return (rc);
-
-        vaddr = vaddr_base = kibnal_data.kib_tx_pages->ibp_vaddr;
-
-        for (i = 0; i < IBNAL_TX_MSGS; i++) {
-                page = kibnal_data.kib_tx_pages->ibp_pages[ipage];
-                tx = &kibnal_data.kib_tx_descs[i];
-
-                memset (tx, 0, sizeof(*tx));    /* zero flags etc */
-                
-                tx->tx_msg = (kib_msg_t *)(((char *)page_address(page)) + page_offset);
-                tx->tx_vaddr = vaddr;
-                tx->tx_isnblk = (i >= IBNAL_NTX);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
-
-                CDEBUG(D_NET, "Tx[%d] %p->%p - "LPX64"\n", 
-                       i, tx, tx->tx_msg, tx->tx_vaddr);
-
-                if (tx->tx_isnblk)
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_nblk_txs);
-                else
-                        list_add (&tx->tx_list, 
-                                  &kibnal_data.kib_idle_txs);
-
-                vaddr += IBNAL_MSG_SIZE;
-                LASSERT (vaddr <= vaddr_base + IBNAL_TX_MSG_BYTES);
-
-                page_offset += IBNAL_MSG_SIZE;
-                LASSERT (page_offset <= PAGE_SIZE);
-
-                if (page_offset == PAGE_SIZE) {
-                        page_offset = 0;
-                        ipage++;
-                        LASSERT (ipage <= IBNAL_TX_MSG_PAGES);
-                }
-        }
-        
-        return (0);
-}
-
-void
-kibnal_api_shutdown (nal_t *nal)
-{
-        int   i;
-        int   rc;
-
-        if (nal->nal_refct != 0) {
-                /* This module got the first ref */
-                PORTAL_MODULE_UNUSE;
-                return;
-        }
-
-        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-
-        LASSERT(nal == &kibnal_api);
-
-        switch (kibnal_data.kib_init) {
-        default:
-                CERROR ("Unexpected state %d\n", kibnal_data.kib_init);
-                LBUG();
-
-        case IBNAL_INIT_ALL:
-                /* stop calls to nal_cmd */
-                libcfs_nal_cmd_unregister(OPENIBNAL);
-                /* No new peers */
-
-                /* resetting my NID to unadvertises me, removes my
-                 * listener and nukes all current peers */
-                kibnal_set_mynid (PTL_NID_ANY);
-
-                /* Wait for all peer state to clean up */
-                i = 2;
-                while (atomic_read (&kibnal_data.kib_npeers) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d peers to close down\n",
-                               atomic_read (&kibnal_data.kib_npeers));
-                        set_current_state (TASK_INTERRUPTIBLE);
-                        schedule_timeout (HZ);
-                }
-                /* fall through */
-
-        case IBNAL_INIT_CQ:
-                rc = ib_cq_destroy (kibnal_data.kib_cq);
-                if (rc != 0)
-                        CERROR ("Destroy CQ error: %d\n", rc);
-                /* fall through */
-
-        case IBNAL_INIT_TXD:
-                kibnal_free_pages (kibnal_data.kib_tx_pages);
-                /* fall through */
-#if IBNAL_FMR
-        case IBNAL_INIT_FMR:
-                rc = ib_fmr_pool_destroy (kibnal_data.kib_fmr_pool);
-                if (rc != 0)
-                        CERROR ("Destroy FMR pool error: %d\n", rc);
-                /* fall through */
-#endif
-        case IBNAL_INIT_PD:
-                rc = ib_pd_destroy(kibnal_data.kib_pd);
-                if (rc != 0)
-                        CERROR ("Destroy PD error: %d\n", rc);
-                /* fall through */
-
-        case IBNAL_INIT_LIB:
-                lib_fini(&kibnal_lib);
-                /* fall through */
-
-        case IBNAL_INIT_DATA:
-                /* Module refcount only gets to zero when all peers
-                 * have been closed so all lists must be empty */
-                LASSERT (atomic_read (&kibnal_data.kib_npeers) == 0);
-                LASSERT (kibnal_data.kib_peers != NULL);
-                for (i = 0; i < kibnal_data.kib_peer_hash_size; i++) {
-                        LASSERT (list_empty (&kibnal_data.kib_peers[i]));
-                }
-                LASSERT (atomic_read (&kibnal_data.kib_nconns) == 0);
-                LASSERT (list_empty (&kibnal_data.kib_sched_rxq));
-                LASSERT (list_empty (&kibnal_data.kib_sched_txq));
-                LASSERT (list_empty (&kibnal_data.kib_connd_conns));
-                LASSERT (list_empty (&kibnal_data.kib_connd_peers));
-
-                /* flag threads to terminate; wake and wait for them to die */
-                kibnal_data.kib_shutdown = 1;
-                wake_up_all (&kibnal_data.kib_sched_waitq);
-                wake_up_all (&kibnal_data.kib_connd_waitq);
-
-                i = 2;
-                while (atomic_read (&kibnal_data.kib_nthreads) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "Waiting for %d threads to terminate\n",
-                               atomic_read (&kibnal_data.kib_nthreads));
-                        set_current_state (TASK_INTERRUPTIBLE);
-                        schedule_timeout (HZ);
-                }
-                /* fall through */
-                
-        case IBNAL_INIT_NOTHING:
-                break;
-        }
-
-        if (kibnal_data.kib_tx_descs != NULL)
-                PORTAL_FREE (kibnal_data.kib_tx_descs,
-                             IBNAL_TX_MSGS * sizeof(kib_tx_t));
-
-        if (kibnal_data.kib_peers != NULL)
-                PORTAL_FREE (kibnal_data.kib_peers,
-                             sizeof (struct list_head) * 
-                             kibnal_data.kib_peer_hash_size);
-
-        CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-        printk(KERN_INFO "Lustre: OpenIB NAL unloaded (final mem %d)\n",
-               atomic_read(&portal_kmemory));
-
-        kibnal_data.kib_init = IBNAL_INIT_NOTHING;
-}
-
-int
-kibnal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
-                     ptl_ni_limits_t *requested_limits,
-                     ptl_ni_limits_t *actual_limits)
-{
-        ptl_process_id_t  process_id;
-        int               pkmem = atomic_read(&portal_kmemory);
-        int               rc;
-        int               i;
-
-        LASSERT (nal == &kibnal_api);
-
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL)
-                        *actual_limits = kibnal_lib.libnal_ni.ni_actual_limits;
-                /* This module got the first ref */
-                PORTAL_MODULE_USE;
-                return (PTL_OK);
-        }
-
-        LASSERT (kibnal_data.kib_init == IBNAL_INIT_NOTHING);
-
-        memset (&kibnal_data, 0, sizeof (kibnal_data)); /* zero pointers, flags etc */
-
-        init_MUTEX (&kibnal_data.kib_nid_mutex);
-        init_MUTEX_LOCKED (&kibnal_data.kib_nid_signal);
-        kibnal_data.kib_nid = PTL_NID_ANY;
-
-        rwlock_init(&kibnal_data.kib_global_lock);
-
-        kibnal_data.kib_peer_hash_size = IBNAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC (kibnal_data.kib_peers,
-                      sizeof (struct list_head) * kibnal_data.kib_peer_hash_size);
-        if (kibnal_data.kib_peers == NULL) {
-                goto failed;
-        }
-        for (i = 0; i < kibnal_data.kib_peer_hash_size; i++)
-                INIT_LIST_HEAD(&kibnal_data.kib_peers[i]);
-
-        spin_lock_init (&kibnal_data.kib_connd_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_connd_peers);
-        INIT_LIST_HEAD (&kibnal_data.kib_connd_conns);
-        init_waitqueue_head (&kibnal_data.kib_connd_waitq);
-
-        spin_lock_init (&kibnal_data.kib_sched_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_txq);
-        INIT_LIST_HEAD (&kibnal_data.kib_sched_rxq);
-        init_waitqueue_head (&kibnal_data.kib_sched_waitq);
-
-        spin_lock_init (&kibnal_data.kib_tx_lock);
-        INIT_LIST_HEAD (&kibnal_data.kib_idle_txs);
-        INIT_LIST_HEAD (&kibnal_data.kib_idle_nblk_txs);
-        init_waitqueue_head(&kibnal_data.kib_idle_tx_waitq);
-
-        PORTAL_ALLOC (kibnal_data.kib_tx_descs,
-                      IBNAL_TX_MSGS * sizeof(kib_tx_t));
-        if (kibnal_data.kib_tx_descs == NULL) {
-                CERROR ("Can't allocate tx descs\n");
-                goto failed;
-        }
-
-        /* lists/ptrs/locks initialised */
-        kibnal_data.kib_init = IBNAL_INIT_DATA;
-        /*****************************************************/
-
-
-        process_id.pid = requested_pid;
-        process_id.nid = kibnal_data.kib_nid;
-        
-        rc = lib_init(&kibnal_lib, nal, process_id,
-                      requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-                CERROR("lib_init failed: error %d\n", rc);
-                goto failed;
-        }
-
-        /* lib interface initialised */
-        kibnal_data.kib_init = IBNAL_INIT_LIB;
-        /*****************************************************/
-
-        for (i = 0; i < IBNAL_N_SCHED; i++) {
-                rc = kibnal_thread_start (kibnal_scheduler, (void *)i);
-                if (rc != 0) {
-                        CERROR("Can't spawn openibnal scheduler[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
-        }
-
-        rc = kibnal_thread_start (kibnal_connd, NULL);
-        if (rc != 0) {
-                CERROR ("Can't spawn openibnal connd: %d\n", rc);
-                goto failed;
-        }
-
-        kibnal_data.kib_device = ib_device_get_by_index(0);
-        if (kibnal_data.kib_device == NULL) {
-                CERROR ("Can't open ib device 0\n");
-                goto failed;
-        }
-        
-        rc = ib_device_properties_get(kibnal_data.kib_device,
-                                      &kibnal_data.kib_device_props);
-        if (rc != 0) {
-                CERROR ("Can't get device props: %d\n", rc);
-                goto failed;
-        }
-
-        CDEBUG(D_NET, "Max Initiator: %d Max Responder %d\n", 
-               kibnal_data.kib_device_props.max_initiator_per_qp,
-               kibnal_data.kib_device_props.max_responder_per_qp);
-
-        kibnal_data.kib_port = 0;
-        for (i = 1; i <= 2; i++) {
-                rc = ib_port_properties_get(kibnal_data.kib_device, i,
-                                            &kibnal_data.kib_port_props);
-                if (rc == 0) {
-                        kibnal_data.kib_port = i;
-                        break;
-                }
-        }
-        if (kibnal_data.kib_port == 0) {
-                CERROR ("Can't find a port\n");
-                goto failed;
-        }
-
-        rc = ib_pd_create(kibnal_data.kib_device,
-                          NULL, &kibnal_data.kib_pd);
-        if (rc != 0) {
-                CERROR ("Can't create PD: %d\n", rc);
-                goto failed;
-        }
-        
-        /* flag PD initialised */
-        kibnal_data.kib_init = IBNAL_INIT_PD;
-        /*****************************************************/
-#if IBNAL_FMR
-        {
-                const int pool_size = IBNAL_NTX + IBNAL_NTX_NBLK;
-                struct ib_fmr_pool_param params = {
-                        .max_pages_per_fmr = PTL_MTU/PAGE_SIZE,
-                        .access            = (IB_ACCESS_LOCAL_WRITE |
-                                              IB_ACCESS_REMOTE_WRITE |
-                                              IB_ACCESS_REMOTE_READ),
-                        .pool_size         = pool_size,
-                        .dirty_watermark   = (pool_size * 3)/4,
-                        .flush_function    = NULL,
-                        .flush_arg         = NULL,
-                        .cache             = 1,
-                };
-                rc = ib_fmr_pool_create(kibnal_data.kib_pd, &params,
-                                        &kibnal_data.kib_fmr_pool);
-                if (rc != 0) {
-                        CERROR ("Can't create FMR pool size %d: %d\n", 
-                                pool_size, rc);
-                        goto failed;
-                }
-        }
-
-        /* flag FMR pool initialised */
-        kibnal_data.kib_init = IBNAL_INIT_FMR;
-#endif
-        /*****************************************************/
-
-        rc = kibnal_setup_tx_descs();
-        if (rc != 0) {
-                CERROR ("Can't register tx descs: %d\n", rc);
-                goto failed;
-        }
-        
-        /* flag TX descs initialised */
-        kibnal_data.kib_init = IBNAL_INIT_TXD;
-        /*****************************************************/
-        
-        {
-                struct ib_cq_callback callback = {
-                        .context        = IBNAL_CALLBACK_CTXT,
-                        .policy         = IB_CQ_PROVIDER_REARM,
-                        .function       = {
-                                .entry  = kibnal_callback,
-                        },
-                        .arg            = NULL,
-                };
-                int  nentries = IBNAL_CQ_ENTRIES;
-                
-                rc = ib_cq_create (kibnal_data.kib_device, 
-                                   &nentries, &callback, NULL,
-                                   &kibnal_data.kib_cq);
-                if (rc != 0) {
-                        CERROR ("Can't create CQ: %d\n", rc);
-                        goto failed;
-                }
-
-                /* I only want solicited events */
-                rc = ib_cq_request_notification(kibnal_data.kib_cq, 1);
-                LASSERT (rc == 0);
-        }
-        
-        /* flag CQ initialised */
-        kibnal_data.kib_init = IBNAL_INIT_CQ;
-        /*****************************************************/
-        
-        rc = libcfs_nal_cmd_register(OPENIBNAL, &kibnal_cmd, NULL);
-        if (rc != 0) {
-                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-                goto failed;
-        }
-
-        /* flag everything initialised */
-        kibnal_data.kib_init = IBNAL_INIT_ALL;
-        /*****************************************************/
-
-        printk(KERN_INFO "Lustre: OpenIB NAL loaded "
-               "(initial mem %d)\n", pkmem);
-
-        return (PTL_OK);
-
- failed:
-        kibnal_api_shutdown (&kibnal_api);    
-        return (PTL_FAIL);
-}
-
-void __exit
-kibnal_module_fini (void)
-{
-#ifdef CONFIG_SYSCTL
-        if (kibnal_tunables.kib_sysctl != NULL)
-                unregister_sysctl_table (kibnal_tunables.kib_sysctl);
-#endif
-        PtlNIFini(kibnal_ni);
-
-        ptl_unregister_nal(OPENIBNAL);
-}
-
-int __init
-kibnal_module_init (void)
-{
-        int    rc;
-
-        /* the following must be sizeof(int) for proc_dointvec() */
-        LASSERT(sizeof (kibnal_tunables.kib_io_timeout) == sizeof (int));
-
-        kibnal_api.nal_ni_init = kibnal_api_startup;
-        kibnal_api.nal_ni_fini = kibnal_api_shutdown;
-
-        /* Initialise dynamic tunables to defaults once only */
-        kibnal_tunables.kib_io_timeout = IBNAL_IO_TIMEOUT;
-
-        rc = ptl_register_nal(OPENIBNAL, &kibnal_api);
-        if (rc != PTL_OK) {
-                CERROR("Can't register IBNAL: %d\n", rc);
-                return (-ENOMEM);               /* or something... */
-        }
-
-        /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(OPENIBNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kibnal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(OPENIBNAL);
-                return (-ENODEV);
-        }
-        
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kibnal_tunables.kib_sysctl = 
-                register_sysctl_table (kibnal_top_ctl_table, 0);
-#endif
-        return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel OpenIB NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(kibnal_module_init);
-module_exit(kibnal_module_fini);
-
diff --git a/lustre/portals/knals/openibnal/openibnal.h b/lustre/portals/knals/openibnal/openibnal.h
deleted file mode 100644 (file)
index 2fbd88b..0000000
+++ /dev/null
@@ -1,533 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#include <ts_ib_core.h>
-#include <ts_ib_cm.h>
-#include <ts_ib_sa_client.h>
-
-#define IBNAL_SERVICE_NAME   "openibnal"
-
-#if CONFIG_SMP
-# define IBNAL_N_SCHED      num_online_cpus()   /* # schedulers */
-#else
-# define IBNAL_N_SCHED      1                   /* # schedulers */
-#endif
-
-#define IBNAL_MIN_RECONNECT_INTERVAL HZ         /* first failed connection retry... */
-#define IBNAL_MAX_RECONNECT_INTERVAL (60*HZ)    /* ...exponentially increasing to this */
-
-#define IBNAL_MSG_SIZE       (4<<10)            /* max size of queued messages (inc hdr) */
-
-#define IBNAL_MSG_QUEUE_SIZE   8                /* # messages/RDMAs in-flight */
-#define IBNAL_CREDIT_HIGHWATER 6                /* when to eagerly return credits */
-#define IBNAL_RETRY            7                /* # times to retry */
-#define IBNAL_RNR_RETRY        7                /*  */
-#define IBNAL_CM_RETRY         7                /* # times to retry connection */
-#define IBNAL_FLOW_CONTROL     1
-#define IBNAL_RESPONDER_RESOURCES 8
-
-#define IBNAL_NTX             64                /* # tx descs */
-#define IBNAL_NTX_NBLK        256               /* # reserved tx descs */
-
-#define IBNAL_PEER_HASH_SIZE  101               /* # peer lists */
-
-#define IBNAL_RESCHED         100               /* # scheduler loops before reschedule */
-
-#define IBNAL_CONCURRENT_PEERS 1000             /* # nodes all talking at once to me */
-
-/* default vals for runtime tunables */
-#define IBNAL_IO_TIMEOUT      50                /* default comms timeout (seconds) */
-
-/************************/
-/* derived constants... */
-
-/* TX messages (shared by all connections) */
-#define IBNAL_TX_MSGS       (IBNAL_NTX + IBNAL_NTX_NBLK)
-#define IBNAL_TX_MSG_BYTES  (IBNAL_TX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_TX_MSG_PAGES  ((IBNAL_TX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* RX messages (per connection) */
-#define IBNAL_RX_MSGS       IBNAL_MSG_QUEUE_SIZE
-#define IBNAL_RX_MSG_BYTES  (IBNAL_RX_MSGS * IBNAL_MSG_SIZE)
-#define IBNAL_RX_MSG_PAGES  ((IBNAL_RX_MSG_BYTES + PAGE_SIZE - 1)/PAGE_SIZE)
-
-/* we may have up to 2 completions per transmit +
-   1 completion per receive, per connection */
-#define IBNAL_CQ_ENTRIES  ((2*IBNAL_TX_MSGS) +                          \
-                           (IBNAL_RX_MSGS * IBNAL_CONCURRENT_PEERS))
-
-#define IBNAL_RDMA_BASE  0x0eeb0000
-#define IBNAL_FMR        1
-#define IBNAL_CKSUM      0
-//#define IBNAL_CALLBACK_CTXT  IB_CQ_CALLBACK_PROCESS
-#define IBNAL_CALLBACK_CTXT  IB_CQ_CALLBACK_INTERRUPT
-
-typedef struct 
-{
-        int               kib_io_timeout;       /* comms timeout (seconds) */
-        struct ctl_table_header *kib_sysctl;    /* sysctl interface */
-} kib_tunables_t;
-
-typedef struct
-{
-        int               ibp_npages;           /* # pages */
-        int               ibp_mapped;           /* mapped? */
-        __u64             ibp_vaddr;            /* mapped region vaddr */
-        __u32             ibp_lkey;             /* mapped region lkey */
-        __u32             ibp_rkey;             /* mapped region rkey */
-        struct ib_mr     *ibp_handle;           /* mapped region handle */
-        struct page      *ibp_pages[0];
-} kib_pages_t;
-        
-typedef struct 
-{
-        int               kib_init;             /* initialisation state */
-        __u64             kib_incarnation;      /* which one am I */
-        int               kib_shutdown;         /* shut down? */
-        atomic_t          kib_nthreads;         /* # live threads */
-
-        __u64             kib_service_id;       /* service number I listen on */
-        ptl_nid_t         kib_nid;              /* my NID */
-        struct semaphore  kib_nid_mutex;        /* serialise NID ops */
-        struct semaphore  kib_nid_signal;       /* signal completion */
-
-        rwlock_t          kib_global_lock;      /* stabilize peer/conn ops */
-
-        struct list_head *kib_peers;            /* hash table of all my known peers */
-        int               kib_peer_hash_size;   /* size of kib_peers */
-        atomic_t          kib_npeers;           /* # peers extant */
-        atomic_t          kib_nconns;           /* # connections extant */
-
-        struct list_head  kib_connd_conns;      /* connections to progress */
-        struct list_head  kib_connd_peers;      /* peers waiting for a connection */
-        wait_queue_head_t kib_connd_waitq;      /* connection daemons sleep here */
-        unsigned long     kib_connd_waketime;   /* when connd will wake */
-        spinlock_t        kib_connd_lock;       /* serialise */
-
-        wait_queue_head_t kib_sched_waitq;      /* schedulers sleep here */
-        struct list_head  kib_sched_txq;        /* tx requiring attention */
-        struct list_head  kib_sched_rxq;        /* rx requiring attention */
-        spinlock_t        kib_sched_lock;       /* serialise */
-        
-        struct kib_tx    *kib_tx_descs;         /* all the tx descriptors */
-        kib_pages_t      *kib_tx_pages;         /* premapped tx msg pages */
-
-        struct list_head  kib_idle_txs;         /* idle tx descriptors */
-        struct list_head  kib_idle_nblk_txs;    /* idle reserved tx descriptors */
-        wait_queue_head_t kib_idle_tx_waitq;    /* block here for tx descriptor */
-        __u64             kib_next_tx_cookie;   /* RDMA completion cookie */
-        spinlock_t        kib_tx_lock;          /* serialise */
-        
-        struct ib_device *kib_device;           /* "the" device */
-        struct ib_device_properties kib_device_props; /* its properties */
-        int               kib_port;             /* port on the device */
-        struct ib_port_properties kib_port_props; /* its properties */
-        struct ib_pd     *kib_pd;               /* protection domain */
-#if IBNAL_FMR
-        struct ib_fmr_pool *kib_fmr_pool;       /* fast memory region pool */
-#endif
-        struct ib_cq     *kib_cq;               /* completion queue */
-        void             *kib_listen_handle;    /* where I listen for connections */
-        
-} kib_data_t;
-
-#define IBNAL_INIT_NOTHING         0
-#define IBNAL_INIT_DATA            1
-#define IBNAL_INIT_LIB             2
-#define IBNAL_INIT_PD              3
-#define IBNAL_INIT_FMR             4
-#define IBNAL_INIT_TXD             5
-#define IBNAL_INIT_CQ              6
-#define IBNAL_INIT_ALL             7
-
-/************************************************************************
- * Wire message structs.
- * These are sent in sender's byte order (i.e. receiver flips).
- * CAVEAT EMPTOR: other structs communicated between nodes (e.g. MAD
- * private data and SM service info), is LE on the wire.
- */
-
-typedef struct
-{
-        union {
-                struct ib_mr    *mr;
-                struct ib_fmr   *fmr;
-        }                 md_handle;
-        __u32             md_lkey;
-        __u32             md_rkey;
-        __u64             md_addr;
-} kib_md_t;
-
-typedef struct
-{
-        __u32                 rd_key;           /* remote key */
-        __u32                 rd_nob;           /* # of bytes */
-        __u64                 rd_addr;          /* remote io vaddr */
-} kib_rdma_desc_t;
-
-
-typedef struct
-{
-        ptl_hdr_t         ibim_hdr;             /* portals header */
-        char              ibim_payload[0];      /* piggy-backed payload */
-} kib_immediate_msg_t;
-
-typedef struct
-{
-        ptl_hdr_t         ibrm_hdr;             /* portals header */
-        __u64             ibrm_cookie;          /* opaque completion cookie */
-        kib_rdma_desc_t   ibrm_desc;            /* where to suck/blow */
-} kib_rdma_msg_t;
-
-typedef struct
-{
-        __u64             ibcm_cookie;          /* opaque completion cookie */
-        __u32             ibcm_status;          /* completion status */
-} kib_completion_msg_t;
-
-typedef struct
-{
-        __u32              ibm_magic;           /* I'm an openibnal message */
-        __u16              ibm_version;         /* this is my version number */
-        __u8               ibm_type;            /* msg type */
-        __u8               ibm_credits;         /* returned credits */
-#if IBNAL_CKSUM
-        __u32              ibm_nob;
-        __u32              ibm_cksum;
-#endif
-        union {
-                kib_immediate_msg_t   immediate;
-                kib_rdma_msg_t        rdma;
-                kib_completion_msg_t  completion;
-        }                    ibm_u;
-} kib_msg_t;
-
-#define IBNAL_MSG_MAGIC       0x0be91b91        /* unique magic */
-#define IBNAL_MSG_VERSION              1        /* current protocol version */
-
-#define IBNAL_MSG_NOOP              0xd0        /* nothing (just credits) */
-#define IBNAL_MSG_IMMEDIATE         0xd1        /* portals hdr + payload */
-#define IBNAL_MSG_PUT_RDMA          0xd2        /* portals PUT hdr + source rdma desc */
-#define IBNAL_MSG_PUT_DONE          0xd3        /* signal PUT rdma completion */
-#define IBNAL_MSG_GET_RDMA          0xd4        /* portals GET hdr + sink rdma desc */
-#define IBNAL_MSG_GET_DONE          0xd5        /* signal GET rdma completion */
-
-/***********************************************************************/
-
-typedef struct kib_rx                           /* receive message */
-{
-        struct list_head          rx_list;      /* queue for attention */
-        struct kib_conn          *rx_conn;      /* owning conn */
-        int                       rx_rdma;      /* RDMA completion posted? */
-        int                       rx_posted;    /* posted? */
-        __u64                     rx_vaddr;     /* pre-mapped buffer (hca vaddr) */
-        kib_msg_t                *rx_msg;       /* pre-mapped buffer (host vaddr) */
-        struct ib_receive_param   rx_sp;        /* receive work item */
-        struct ib_gather_scatter  rx_gl;        /* and it's memory */
-} kib_rx_t;
-
-typedef struct kib_tx                           /* transmit message */
-{
-        struct list_head          tx_list;      /* queue on idle_txs ibc_tx_queue etc. */
-        int                       tx_isnblk;    /* I'm reserved for non-blocking sends */
-        struct kib_conn          *tx_conn;      /* owning conn */
-        int                       tx_mapped;    /* mapped for RDMA? */
-        int                       tx_sending;   /* # tx callbacks outstanding */
-        int                       tx_status;    /* completion status */
-        unsigned long             tx_deadline;  /* completion deadline */
-        int                       tx_passive_rdma; /* peer sucks/blows */
-        int                       tx_passive_rdma_wait; /* waiting for peer to complete */
-        __u64                     tx_passive_rdma_cookie; /* completion cookie */
-        lib_msg_t                *tx_libmsg[2]; /* lib msgs to finalize on completion */
-        kib_md_t                  tx_md;        /* RDMA mapping (active/passive) */
-        __u64                     tx_vaddr;     /* pre-mapped buffer (hca vaddr) */
-        kib_msg_t                *tx_msg;       /* pre-mapped buffer (host vaddr) */
-        int                       tx_nsp;       /* # send work items */
-        struct ib_send_param      tx_sp[2];     /* send work items... */
-        struct ib_gather_scatter  tx_gl[2];     /* ...and their memory */
-} kib_tx_t;
-
-#define KIB_TX_UNMAPPED       0
-#define KIB_TX_MAPPED         1
-#define KIB_TX_MAPPED_FMR     2
-
-typedef struct kib_wire_connreq
-{
-        __u32        wcr_magic;                 /* I'm an openibnal connreq */
-        __u16        wcr_version;               /* this is my version number */
-        __u16        wcr_queue_depth;           /* this is my receive queue size */
-        __u64        wcr_nid;                   /* peer's NID */
-        __u64        wcr_incarnation;           /* peer's incarnation */
-} kib_wire_connreq_t;
-
-typedef struct kib_connreq
-{
-        /* connection-in-progress */
-        struct kib_conn                    *cr_conn;
-        kib_wire_connreq_t                  cr_wcr;
-        __u64                               cr_tid;
-        struct ib_common_attrib_service     cr_service;
-        tTS_IB_GID                          cr_gid;
-        struct ib_path_record               cr_path;
-        struct ib_cm_active_param           cr_connparam;
-} kib_connreq_t;
-
-typedef struct kib_conn
-{ 
-        struct kib_peer    *ibc_peer;           /* owning peer */
-        struct list_head    ibc_list;           /* stash on peer's conn list */
-        __u64               ibc_incarnation;    /* which instance of the peer */
-        atomic_t            ibc_refcount;       /* # users */
-        int                 ibc_state;          /* what's happening */
-        atomic_t            ibc_nob;            /* # bytes buffered */
-        int                 ibc_nsends_posted;  /* # uncompleted sends */
-        int                 ibc_credits;        /* # credits I have */
-        int                 ibc_outstanding_credits; /* # credits to return */
-        struct list_head    ibc_tx_queue;       /* send queue */
-        struct list_head    ibc_active_txs;     /* active tx awaiting completion */
-        spinlock_t          ibc_lock;           /* serialise */
-        kib_rx_t           *ibc_rxs;            /* the rx descs */
-        kib_pages_t        *ibc_rx_pages;       /* premapped rx msg pages */
-        struct ib_qp       *ibc_qp;             /* queue pair */
-        __u32               ibc_qpn;            /* queue pair number */
-        tTS_IB_CM_COMM_ID   ibc_comm_id;        /* connection ID? */
-        kib_connreq_t      *ibc_connreq;        /* connection request state */
-} kib_conn_t;
-
-#define IBNAL_CONN_INIT_NOTHING      0          /* initial state */
-#define IBNAL_CONN_INIT_QP           1          /* ibc_qp set up */
-#define IBNAL_CONN_CONNECTING        2          /* started to connect */
-#define IBNAL_CONN_ESTABLISHED       3          /* connection established */
-#define IBNAL_CONN_DEATHROW          4          /* waiting to be closed */
-#define IBNAL_CONN_ZOMBIE            5          /* waiting to be freed */
-
-typedef struct kib_peer
-{
-        struct list_head    ibp_list;           /* stash on global peer list */
-        struct list_head    ibp_connd_list;     /* schedule on kib_connd_peers */
-        ptl_nid_t           ibp_nid;            /* who's on the other end(s) */
-        atomic_t            ibp_refcount;       /* # users */
-        int                 ibp_persistence;    /* "known" peer refs */
-        struct list_head    ibp_conns;          /* all active connections */
-        struct list_head    ibp_tx_queue;       /* msgs waiting for a conn */
-        int                 ibp_connecting;     /* connecting+accepting */
-        unsigned long       ibp_reconnect_time; /* when reconnect may be attempted */
-        unsigned long       ibp_reconnect_interval; /* exponential backoff */
-} kib_peer_t;
-
-
-extern lib_nal_t       kibnal_lib;
-extern kib_data_t      kibnal_data;
-extern kib_tunables_t  kibnal_tunables;
-
-static inline struct list_head *
-kibnal_nid2peerlist (ptl_nid_t nid) 
-{
-        unsigned int hash = ((unsigned int)nid) % kibnal_data.kib_peer_hash_size;
-        
-        return (&kibnal_data.kib_peers [hash]);
-}
-
-static inline int
-kibnal_peer_active(kib_peer_t *peer)
-{
-        /* Am I in the peer hash table? */
-        return (!list_empty(&peer->ibp_list));
-}
-
-static inline void
-kibnal_queue_tx_locked (kib_tx_t *tx, kib_conn_t *conn)
-{
-        /* CAVEAT EMPTOR: tx takes caller's ref on conn */
-
-        LASSERT (tx->tx_nsp > 0);               /* work items set up */
-        LASSERT (tx->tx_conn == NULL);          /* only set here */
-
-        tx->tx_conn = conn;
-        tx->tx_deadline = jiffies + kibnal_tunables.kib_io_timeout * HZ;
-        list_add_tail(&tx->tx_list, &conn->ibc_tx_queue);
-}
-
-#define KIBNAL_SERVICE_KEY_MASK  (IB_SA_SERVICE_COMP_MASK_NAME |        \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_1 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_2 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_3 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_4 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_5 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_6 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_7 |     \
-                                  IB_SA_SERVICE_COMP_MASK_DATA8_8)
-
-static inline __u64*
-kibnal_service_nid_field(struct ib_common_attrib_service *srv)
-{
-        /* must be consistent with KIBNAL_SERVICE_KEY_MASK */
-        return (__u64 *)srv->service_data8;
-}
-
-
-static inline void
-kibnal_set_service_keys(struct ib_common_attrib_service *srv, ptl_nid_t nid)
-{
-        LASSERT (strlen (IBNAL_SERVICE_NAME) < sizeof(srv->service_name));
-        memset (srv->service_name, 0, sizeof(srv->service_name));
-        strcpy (srv->service_name, IBNAL_SERVICE_NAME);
-
-        *kibnal_service_nid_field(srv) = cpu_to_le64(nid);
-}
-
-#if 0
-static inline void
-kibnal_show_rdma_attr (kib_conn_t *conn)
-{
-        struct ib_qp_attribute qp_attr;
-        int                    rc;
-        
-        memset (&qp_attr, 0, sizeof(qp_attr));
-        rc = ib_qp_query(conn->ibc_qp, &qp_attr);
-        if (rc != 0) {
-                CERROR ("Can't get qp attrs: %d\n", rc);
-                return;
-        }
-        
-        CWARN ("RDMA CAPABILITY: write %s read %s\n",
-               (qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ?
-               (qp_attr.enable_rdma_write ? "enabled" : "disabled") : "invalid",
-               (qp_attr.valid_fields & TS_IB_QP_ATTRIBUTE_RDMA_ATOMIC_ENABLE) ?
-               (qp_attr.enable_rdma_read ? "enabled" : "disabled") : "invalid");
-}
-#endif
-
-#if CONFIG_X86
-static inline __u64
-kibnal_page2phys (struct page *p)
-{
-        __u64 page_number = p - mem_map;
-        
-        return (page_number << PAGE_SHIFT);
-}
-#else
-# error "no page->phys"
-#endif
-
-/* CAVEAT EMPTOR:
- * We rely on tx/rx descriptor alignment to allow us to use the lowest bit
- * of the work request id as a flag to determine if the completion is for a
- * transmit or a receive.  It seems that that the CQ entry's 'op' field
- * isn't always set correctly on completions that occur after QP teardown. */
-
-static inline __u64
-kibnal_ptr2wreqid (void *ptr, int isrx)
-{
-        unsigned long lptr = (unsigned long)ptr;
-
-        LASSERT ((lptr & 1) == 0);
-        return (__u64)(lptr | (isrx ? 1 : 0));
-}
-
-static inline void *
-kibnal_wreqid2ptr (__u64 wreqid)
-{
-        return (void *)(((unsigned long)wreqid) & ~1UL);
-}
-
-static inline int
-kibnal_wreqid_is_rx (__u64 wreqid)
-{
-        return (wreqid & 1) != 0;
-}
-
-extern kib_peer_t *kibnal_create_peer (ptl_nid_t nid);
-extern void kibnal_put_peer (kib_peer_t *peer);
-extern int kibnal_del_peer (ptl_nid_t nid, int single_share);
-extern kib_peer_t *kibnal_find_peer_locked (ptl_nid_t nid);
-extern void kibnal_unlink_peer_locked (kib_peer_t *peer);
-extern int  kibnal_close_stale_conns_locked (kib_peer_t *peer, 
-                                              __u64 incarnation);
-extern kib_conn_t *kibnal_create_conn (void);
-extern void kibnal_put_conn (kib_conn_t *conn);
-extern void kibnal_destroy_conn (kib_conn_t *conn);
-extern int kibnal_alloc_pages (kib_pages_t **pp, int npages, int access);
-extern void kibnal_free_pages (kib_pages_t *p);
-
-extern void kibnal_check_sends (kib_conn_t *conn);
-
-extern tTS_IB_CM_CALLBACK_RETURN
-kibnal_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
-                       void *param, void *arg);
-extern tTS_IB_CM_CALLBACK_RETURN 
-kibnal_passive_conn_callback (tTS_IB_CM_EVENT event, tTS_IB_CM_COMM_ID cid,
-                               void *param, void *arg);
-
-extern void kibnal_close_conn_locked (kib_conn_t *conn, int error);
-extern void kibnal_destroy_conn (kib_conn_t *conn);
-extern int  kibnal_thread_start (int (*fn)(void *arg), void *arg);
-extern int  kibnal_scheduler(void *arg);
-extern int  kibnal_connd (void *arg);
-extern void kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg);
-extern void kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob);
-extern int  kibnal_close_conn (kib_conn_t *conn, int why);
-extern void kibnal_start_active_rdma (int type, int status, 
-                                      kib_rx_t *rx, lib_msg_t *libmsg, 
-                                      unsigned int niov, 
-                                      struct iovec *iov, ptl_kiov_t *kiov,
-                                      size_t offset, size_t nob);
-
-
-
-
-
diff --git a/lustre/portals/knals/openibnal/openibnal_cb.c b/lustre/portals/knals/openibnal/openibnal_cb.c
deleted file mode 100644 (file)
index d774853..0000000
+++ /dev/null
@@ -1,2597 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "openibnal.h"
-
-/*
- *  LIB functions follow
- *
- */
-void
-kibnal_schedule_tx_done (kib_tx_t *tx)
-{
-        unsigned long flags;
-
-        spin_lock_irqsave (&kibnal_data.kib_sched_lock, flags);
-
-        list_add_tail(&tx->tx_list, &kibnal_data.kib_sched_txq);
-        wake_up (&kibnal_data.kib_sched_waitq);
-
-        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-}
-
-void
-kibnal_tx_done (kib_tx_t *tx)
-{
-        ptl_err_t        ptlrc = (tx->tx_status == 0) ? PTL_OK : PTL_FAIL;
-        unsigned long    flags;
-        int              i;
-        int              rc;
-
-        LASSERT (tx->tx_sending == 0);          /* mustn't be awaiting callback */
-        LASSERT (!tx->tx_passive_rdma_wait);    /* mustn't be awaiting RDMA */
-
-        switch (tx->tx_mapped) {
-        default:
-                LBUG();
-
-        case KIB_TX_UNMAPPED:
-                break;
-                
-        case KIB_TX_MAPPED:
-                if (in_interrupt()) {
-                        /* can't deregister memory in IRQ context... */
-                        kibnal_schedule_tx_done(tx);
-                        return;
-                }
-                rc = ib_memory_deregister(tx->tx_md.md_handle.mr);
-                LASSERT (rc == 0);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
-                break;
-
-#if IBNAL_FMR
-        case KIB_TX_MAPPED_FMR:
-                if (in_interrupt() && tx->tx_status != 0) {
-                        /* can't flush FMRs in IRQ context... */
-                        kibnal_schedule_tx_done(tx);
-                        return;
-                }              
-
-                rc = ib_fmr_deregister(tx->tx_md.md_handle.fmr);
-                LASSERT (rc == 0);
-
-                if (tx->tx_status != 0)
-                        ib_fmr_pool_force_flush(kibnal_data.kib_fmr_pool);
-                tx->tx_mapped = KIB_TX_UNMAPPED;
-                break;
-#endif
-        }
-
-        for (i = 0; i < 2; i++) {
-                /* tx may have up to 2 libmsgs to finalise */
-                if (tx->tx_libmsg[i] == NULL)
-                        continue;
-
-                lib_finalize (&kibnal_lib, NULL, tx->tx_libmsg[i], ptlrc);
-                tx->tx_libmsg[i] = NULL;
-        }
-        
-        if (tx->tx_conn != NULL) {
-                kibnal_put_conn (tx->tx_conn);
-                tx->tx_conn = NULL;
-        }
-
-        tx->tx_nsp = 0;
-        tx->tx_passive_rdma = 0;
-        tx->tx_status = 0;
-
-        spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags);
-
-        if (tx->tx_isnblk) {
-                list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_nblk_txs);
-        } else {
-                list_add_tail (&tx->tx_list, &kibnal_data.kib_idle_txs);
-                wake_up (&kibnal_data.kib_idle_tx_waitq);
-        }
-
-        spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-}
-
-kib_tx_t *
-kibnal_get_idle_tx (int may_block) 
-{
-        unsigned long  flags;
-        kib_tx_t      *tx = NULL;
-        
-        for (;;) {
-                spin_lock_irqsave (&kibnal_data.kib_tx_lock, flags);
-
-                /* "normal" descriptor is free */
-                if (!list_empty (&kibnal_data.kib_idle_txs)) {
-                        tx = list_entry (kibnal_data.kib_idle_txs.next,
-                                         kib_tx_t, tx_list);
-                        break;
-                }
-
-                if (!may_block) {
-                        /* may dip into reserve pool */
-                        if (list_empty (&kibnal_data.kib_idle_nblk_txs)) {
-                                CERROR ("reserved tx desc pool exhausted\n");
-                                break;
-                        }
-
-                        tx = list_entry (kibnal_data.kib_idle_nblk_txs.next,
-                                         kib_tx_t, tx_list);
-                        break;
-                }
-
-                /* block for idle tx */
-                spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-
-                wait_event (kibnal_data.kib_idle_tx_waitq,
-                            !list_empty (&kibnal_data.kib_idle_txs) ||
-                            kibnal_data.kib_shutdown);
-        }
-
-        if (tx != NULL) {
-                list_del (&tx->tx_list);
-
-                /* Allocate a new passive RDMA completion cookie.  It might
-                 * not be needed, but we've got a lock right now and we're
-                 * unlikely to wrap... */
-                tx->tx_passive_rdma_cookie = kibnal_data.kib_next_tx_cookie++;
-
-                LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-                LASSERT (tx->tx_nsp == 0);
-                LASSERT (tx->tx_sending == 0);
-                LASSERT (tx->tx_status == 0);
-                LASSERT (tx->tx_conn == NULL);
-                LASSERT (!tx->tx_passive_rdma);
-                LASSERT (!tx->tx_passive_rdma_wait);
-                LASSERT (tx->tx_libmsg[0] == NULL);
-                LASSERT (tx->tx_libmsg[1] == NULL);
-        }
-
-        spin_unlock_irqrestore (&kibnal_data.kib_tx_lock, flags);
-        
-        return (tx);
-}
-
-int
-kibnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
-        /* I would guess that if kibnal_get_peer (nid) == NULL,
-           and we're not routing, then 'nid' is very distant :) */
-        if ( nal->libnal_ni.ni_pid.nid == nid ) {
-                *dist = 0;
-        } else {
-                *dist = 1;
-        }
-
-        return 0;
-}
-
-void
-kibnal_complete_passive_rdma(kib_conn_t *conn, __u64 cookie, int status)
-{
-        struct list_head *ttmp;
-        unsigned long     flags;
-        int               idle;
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        list_for_each (ttmp, &conn->ibc_active_txs) {
-                kib_tx_t *tx = list_entry(ttmp, kib_tx_t, tx_list);
-
-                LASSERT (tx->tx_passive_rdma ||
-                         !tx->tx_passive_rdma_wait);
-
-                LASSERT (tx->tx_passive_rdma_wait ||
-                         tx->tx_sending != 0);
-
-                if (!tx->tx_passive_rdma_wait ||
-                    tx->tx_passive_rdma_cookie != cookie)
-                        continue;
-
-                CDEBUG(D_NET, "Complete %p "LPD64": %d\n", tx, cookie, status);
-
-                tx->tx_status = status;
-                tx->tx_passive_rdma_wait = 0;
-                idle = (tx->tx_sending == 0);
-
-                if (idle)
-                        list_del (&tx->tx_list);
-
-                spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-                /* I could be racing with tx callbacks.  It's whoever
-                 * _makes_ tx idle that frees it */
-                if (idle)
-                        kibnal_tx_done (tx);
-                return;
-        }
-                
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-        CERROR ("Unmatched (late?) RDMA completion "LPX64" from "LPX64"\n",
-                cookie, conn->ibc_peer->ibp_nid);
-}
-
-void
-kibnal_post_rx (kib_rx_t *rx, int do_credits)
-{
-        kib_conn_t   *conn = rx->rx_conn;
-        int           rc;
-        unsigned long flags;
-
-        rx->rx_gl = (struct ib_gather_scatter) {
-                .address = rx->rx_vaddr,
-                .length  = IBNAL_MSG_SIZE,
-                .key     = conn->ibc_rx_pages->ibp_lkey,
-        };
-
-        rx->rx_sp = (struct ib_receive_param) {
-                .work_request_id        = kibnal_ptr2wreqid(rx, 1),
-                .scatter_list           = &rx->rx_gl,
-                .num_scatter_entries    = 1,
-                .device_specific        = NULL,
-                .signaled               = 1,
-        };
-
-        LASSERT (conn->ibc_state >= IBNAL_CONN_ESTABLISHED);
-        LASSERT (!rx->rx_posted);
-        rx->rx_posted = 1;
-        mb();
-
-        if (conn->ibc_state != IBNAL_CONN_ESTABLISHED)
-                rc = -ECONNABORTED;
-        else
-                rc = ib_receive (conn->ibc_qp, &rx->rx_sp, 1);
-
-        if (rc == 0) {
-                if (do_credits) {
-                        spin_lock_irqsave(&conn->ibc_lock, flags);
-                        conn->ibc_outstanding_credits++;
-                        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
-                        kibnal_check_sends(conn);
-                }
-                return;
-        }
-
-        if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-                CERROR ("Error posting receive -> "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, rc);
-                kibnal_close_conn (rx->rx_conn, rc);
-        } else {
-                CDEBUG (D_NET, "Error posting receive -> "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, rc);
-        }
-
-        /* Drop rx's ref */
-        kibnal_put_conn (conn);
-}
-
-#if IBNAL_CKSUM
-__u32 kibnal_cksum (void *ptr, int nob)
-{
-        char  *c  = ptr;
-        __u32  sum = 0;
-
-        while (nob-- > 0)
-                sum = ((sum << 1) | (sum >> 31)) + *c++;
-        
-        return (sum);
-}
-#endif
-
-void
-kibnal_rx_callback (struct ib_cq_entry *e)
-{
-        kib_rx_t     *rx = (kib_rx_t *)kibnal_wreqid2ptr(e->work_request_id);
-        kib_msg_t    *msg = rx->rx_msg;
-        kib_conn_t   *conn = rx->rx_conn;
-        int           nob = e->bytes_transferred;
-        const int     base_nob = offsetof(kib_msg_t, ibm_u);
-        int           credits;
-        int           flipped;
-        unsigned long flags;
-#if IBNAL_CKSUM
-        __u32         msg_cksum;
-        __u32         computed_cksum;
-#endif
-
-        CDEBUG (D_NET, "rx %p conn %p\n", rx, conn);
-        LASSERT (rx->rx_posted);
-        rx->rx_posted = 0;
-        mb();
-
-        /* receives complete with error in any case after we've started
-         * closing the QP */
-        if (conn->ibc_state >= IBNAL_CONN_DEATHROW)
-                goto failed;
-
-        /* We don't post receives until the conn is established */
-        LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
-        if (e->status != IB_COMPLETION_STATUS_SUCCESS) {
-                CERROR("Rx from "LPX64" failed: %d\n", 
-                       conn->ibc_peer->ibp_nid, e->status);
-                goto failed;
-        }
-
-        if (nob < base_nob) {
-                CERROR ("Short rx from "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, nob);
-                goto failed;
-        }
-
-        /* Receiver does any byte flipping if necessary... */
-
-        if (msg->ibm_magic == IBNAL_MSG_MAGIC) {
-                flipped = 0;
-        } else {
-                if (msg->ibm_magic != __swab32(IBNAL_MSG_MAGIC)) {
-                        CERROR ("Unrecognised magic: %08x from "LPX64"\n", 
-                                msg->ibm_magic, conn->ibc_peer->ibp_nid);
-                        goto failed;
-                }
-                flipped = 1;
-                __swab16s (&msg->ibm_version);
-                LASSERT (sizeof(msg->ibm_type) == 1);
-                LASSERT (sizeof(msg->ibm_credits) == 1);
-        }
-
-        if (msg->ibm_version != IBNAL_MSG_VERSION) {
-                CERROR ("Incompatible msg version %d (%d expected)\n",
-                        msg->ibm_version, IBNAL_MSG_VERSION);
-                goto failed;
-        }
-
-#if IBNAL_CKSUM
-        if (nob != msg->ibm_nob) {
-                CERROR ("Unexpected # bytes %d (%d expected)\n", nob, msg->ibm_nob);
-                goto failed;
-        }
-
-        msg_cksum = le32_to_cpu(msg->ibm_cksum);
-        msg->ibm_cksum = 0;
-        computed_cksum = kibnal_cksum (msg, nob);
-        
-        if (msg_cksum != computed_cksum) {
-                CERROR ("Checksum failure %d: (%d expected)\n",
-                        computed_cksum, msg_cksum);
-                goto failed;
-        }
-        CDEBUG(D_NET, "cksum %x, nob %d\n", computed_cksum, nob);
-#endif
-
-        /* Have I received credits that will let me send? */
-        credits = msg->ibm_credits;
-        if (credits != 0) {
-                spin_lock_irqsave(&conn->ibc_lock, flags);
-                conn->ibc_credits += credits;
-                spin_unlock_irqrestore(&conn->ibc_lock, flags);
-                
-                kibnal_check_sends(conn);
-        }
-
-        switch (msg->ibm_type) {
-        case IBNAL_MSG_NOOP:
-                kibnal_post_rx (rx, 1);
-                return;
-
-        case IBNAL_MSG_IMMEDIATE:
-                if (nob < base_nob + sizeof (kib_immediate_msg_t)) {
-                        CERROR ("Short IMMEDIATE from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid, nob);
-                        goto failed;
-                }
-                break;
-                
-        case IBNAL_MSG_PUT_RDMA:
-        case IBNAL_MSG_GET_RDMA:
-                if (nob < base_nob + sizeof (kib_rdma_msg_t)) {
-                        CERROR ("Short RDMA msg from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid, nob);
-                        goto failed;
-                }
-                if (flipped) {
-                        __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_key);
-                        __swab32s(&msg->ibm_u.rdma.ibrm_desc.rd_nob);
-                        __swab64s(&msg->ibm_u.rdma.ibrm_desc.rd_addr);
-                }
-                CDEBUG(D_NET, "%d RDMA: cookie "LPX64", key %x, addr "LPX64", nob %d\n",
-                       msg->ibm_type, msg->ibm_u.rdma.ibrm_cookie,
-                       msg->ibm_u.rdma.ibrm_desc.rd_key,
-                       msg->ibm_u.rdma.ibrm_desc.rd_addr,
-                       msg->ibm_u.rdma.ibrm_desc.rd_nob);
-                break;
-                
-        case IBNAL_MSG_PUT_DONE:
-        case IBNAL_MSG_GET_DONE:
-                if (nob < base_nob + sizeof (kib_completion_msg_t)) {
-                        CERROR ("Short COMPLETION msg from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid, nob);
-                        goto failed;
-                }
-                if (flipped)
-                        __swab32s(&msg->ibm_u.completion.ibcm_status);
-                
-                CDEBUG(D_NET, "%d DONE: cookie "LPX64", status %d\n",
-                       msg->ibm_type, msg->ibm_u.completion.ibcm_cookie,
-                       msg->ibm_u.completion.ibcm_status);
-
-                kibnal_complete_passive_rdma (conn, 
-                                              msg->ibm_u.completion.ibcm_cookie,
-                                              msg->ibm_u.completion.ibcm_status);
-                kibnal_post_rx (rx, 1);
-                return;
-                        
-        default:
-                CERROR ("Can't parse type from "LPX64": %d\n",
-                        conn->ibc_peer->ibp_nid, msg->ibm_type);
-                goto failed;
-        }
-
-        /* schedule for kibnal_rx() in thread context */
-        spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-        
-        list_add_tail (&rx->rx_list, &kibnal_data.kib_sched_rxq);
-        wake_up (&kibnal_data.kib_sched_waitq);
-        
-        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-        return;
-        
- failed:
-        CDEBUG(D_NET, "rx %p conn %p\n", rx, conn);
-        kibnal_close_conn(conn, -ECONNABORTED);
-
-        /* Don't re-post rx & drop its ref on conn */
-        kibnal_put_conn(conn);
-}
-
-void
-kibnal_rx (kib_rx_t *rx)
-{
-        kib_msg_t   *msg = rx->rx_msg;
-
-        /* Clear flag so I can detect if I've sent an RDMA completion */
-        rx->rx_rdma = 0;
-
-        switch (msg->ibm_type) {
-        case IBNAL_MSG_GET_RDMA:
-                lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx);
-                /* If the incoming get was matched, I'll have initiated the
-                 * RDMA and the completion message... */
-                if (rx->rx_rdma)
-                        break;
-
-                /* Otherwise, I'll send a failed completion now to prevent
-                 * the peer's GET blocking for the full timeout. */
-                CERROR ("Completing unmatched RDMA GET from "LPX64"\n",
-                        rx->rx_conn->ibc_peer->ibp_nid);
-                kibnal_start_active_rdma (IBNAL_MSG_GET_DONE, -EIO,
-                                          rx, NULL, 0, NULL, NULL, 0, 0);
-                break;
-                
-        case IBNAL_MSG_PUT_RDMA:
-                lib_parse(&kibnal_lib, &msg->ibm_u.rdma.ibrm_hdr, rx);
-                if (rx->rx_rdma)
-                        break;
-                /* This is most unusual, since even if lib_parse() didn't
-                 * match anything, it should have asked us to read (and
-                 * discard) the payload.  The portals header must be
-                 * inconsistent with this message type, so it's the
-                 * sender's fault for sending garbage and she can time
-                 * herself out... */
-                CERROR ("Uncompleted RMDA PUT from "LPX64"\n",
-                        rx->rx_conn->ibc_peer->ibp_nid);
-                break;
-
-        case IBNAL_MSG_IMMEDIATE:
-                lib_parse(&kibnal_lib, &msg->ibm_u.immediate.ibim_hdr, rx);
-                LASSERT (!rx->rx_rdma);
-                break;
-                
-        default:
-                LBUG();
-                break;
-        }
-
-        kibnal_post_rx (rx, 1);
-}
-
-#if 0
-int
-kibnal_kvaddr_to_phys (unsigned long vaddr, __u64 *physp)
-{
-        struct page *page;
-
-        if (vaddr >= VMALLOC_START &&
-            vaddr < VMALLOC_END)
-                page = vmalloc_to_page ((void *)vaddr);
-#if CONFIG_HIGHMEM
-        else if (vaddr >= PKMAP_BASE &&
-                 vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
-                page = vmalloc_to_page ((void *)vaddr);
-        /* in 2.4 ^ just walks the page tables */
-#endif
-        else
-                page = virt_to_page (vaddr);
-
-        if (page == NULL ||
-            !VALID_PAGE (page))
-                return (-EFAULT);
-
-        *physp = kibnal_page2phys(page) + (vaddr & (PAGE_SIZE - 1));
-        return (0);
-}
-#endif
-
-int
-kibnal_map_iov (kib_tx_t *tx, enum ib_memory_access access,
-                 int niov, struct iovec *iov, int offset, int nob)
-                 
-{
-        void   *vaddr;
-        int     rc;
-
-        LASSERT (nob > 0);
-        LASSERT (niov > 0);
-        LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                niov--;
-                iov++;
-                LASSERT (niov > 0);
-        }
-
-        if (nob > iov->iov_len - offset) {
-                CERROR ("Can't map multiple vaddr fragments\n");
-                return (-EMSGSIZE);
-        }
-
-        vaddr = (void *)(((unsigned long)iov->iov_base) + offset);
-        tx->tx_md.md_addr = (__u64)((unsigned long)vaddr);
-
-        rc = ib_memory_register (kibnal_data.kib_pd,
-                                 vaddr, nob,
-                                 access,
-                                 &tx->tx_md.md_handle.mr,
-                                 &tx->tx_md.md_lkey,
-                                 &tx->tx_md.md_rkey);
-        
-        if (rc != 0) {
-                CERROR ("Can't map vaddr: %d\n", rc);
-                return (rc);
-        }
-
-        tx->tx_mapped = KIB_TX_MAPPED;
-        return (0);
-}
-
-int
-kibnal_map_kiov (kib_tx_t *tx, enum ib_memory_access access,
-                  int nkiov, ptl_kiov_t *kiov,
-                  int offset, int nob)
-{
-#if IBNAL_FMR
-        __u64                      *phys;
-        const int                   mapped = KIB_TX_MAPPED_FMR;
-#else
-        struct ib_physical_buffer  *phys;
-        const int                   mapped = KIB_TX_MAPPED;
-#endif
-        int                         page_offset;
-        int                         nphys;
-        int                         resid;
-        int                         phys_size;
-        int                         rc;
-
-        CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
-        LASSERT (nob > 0);
-        LASSERT (nkiov > 0);
-        LASSERT (tx->tx_mapped == KIB_TX_UNMAPPED);
-
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                nkiov--;
-                kiov++;
-                LASSERT (nkiov > 0);
-        }
-
-        phys_size = nkiov * sizeof (*phys);
-        PORTAL_ALLOC(phys, phys_size);
-        if (phys == NULL) {
-                CERROR ("Can't allocate tmp phys\n");
-                return (-ENOMEM);
-        }
-
-        page_offset = kiov->kiov_offset + offset;
-#if IBNAL_FMR
-        phys[0] = kibnal_page2phys(kiov->kiov_page);
-#else
-        phys[0].address = kibnal_page2phys(kiov->kiov_page);
-        phys[0].size = PAGE_SIZE;
-#endif
-        nphys = 1;
-        resid = nob - (kiov->kiov_len - offset);
-
-        while (resid > 0) {
-                kiov++;
-                nkiov--;
-                LASSERT (nkiov > 0);
-
-                if (kiov->kiov_offset != 0 ||
-                    ((resid > PAGE_SIZE) && 
-                     kiov->kiov_len < PAGE_SIZE)) {
-                        int i;
-                        /* Can't have gaps */
-                        CERROR ("Can't make payload contiguous in I/O VM:"
-                                "page %d, offset %d, len %d \n", nphys, 
-                                kiov->kiov_offset, kiov->kiov_len);
-
-                        for (i = -nphys; i < nkiov; i++) 
-                        {
-                                CERROR("kiov[%d] %p +%d for %d\n",
-                                       i, kiov[i].kiov_page, kiov[i].kiov_offset, kiov[i].kiov_len);
-                        }
-                        
-                        rc = -EINVAL;
-                        goto out;
-                }
-
-                if (nphys == PTL_MD_MAX_IOV) {
-                        CERROR ("payload too big (%d)\n", nphys);
-                        rc = -EMSGSIZE;
-                        goto out;
-                }
-
-                LASSERT (nphys * sizeof (*phys) < phys_size);
-#if IBNAL_FMR
-                phys[nphys] = kibnal_page2phys(kiov->kiov_page);
-#else
-                phys[nphys].address = kibnal_page2phys(kiov->kiov_page);
-                phys[nphys].size = PAGE_SIZE;
-#endif
-                nphys++;
-
-                resid -= PAGE_SIZE;
-        }
-
-#if 0
-        CWARN ("nphys %d, nob %d, page_offset %d\n", nphys, nob, page_offset);
-        for (rc = 0; rc < nphys; rc++)
-                CWARN ("   [%d] "LPX64" / %d\n", rc, phys[rc].address, phys[rc].size);
-#endif
-        tx->tx_md.md_addr = IBNAL_RDMA_BASE;
-
-#if IBNAL_FMR
-        rc = ib_fmr_register_physical (kibnal_data.kib_fmr_pool,
-                                       phys, nphys,
-                                       &tx->tx_md.md_addr,
-                                       page_offset,
-                                       &tx->tx_md.md_handle.fmr,
-                                       &tx->tx_md.md_lkey,
-                                       &tx->tx_md.md_rkey);
-#else
-        rc = ib_memory_register_physical (kibnal_data.kib_pd,
-                                          phys, nphys,
-                                          &tx->tx_md.md_addr,
-                                          nob, page_offset,
-                                          access,
-                                          &tx->tx_md.md_handle.mr,
-                                          &tx->tx_md.md_lkey,
-                                          &tx->tx_md.md_rkey);
-#endif
-        if (rc == 0) {
-                CDEBUG(D_NET, "Mapped %d pages %d bytes @ offset %d: lkey %x, rkey %x\n",
-                       nphys, nob, page_offset, tx->tx_md.md_lkey, tx->tx_md.md_rkey);
-                tx->tx_mapped = mapped;
-        } else {
-                CERROR ("Can't map phys: %d\n", rc);
-                rc = -EFAULT;
-        }
-
- out:
-        PORTAL_FREE(phys, phys_size);
-        return (rc);
-}
-
-kib_conn_t *
-kibnal_find_conn_locked (kib_peer_t *peer)
-{
-        struct list_head *tmp;
-
-        /* just return the first connection */
-        list_for_each (tmp, &peer->ibp_conns) {
-                return (list_entry(tmp, kib_conn_t, ibc_list));
-        }
-
-        return (NULL);
-}
-
-void
-kibnal_check_sends (kib_conn_t *conn)
-{
-        unsigned long   flags;
-        kib_tx_t       *tx;
-        int             rc;
-        int             i;
-        int             done;
-        int             nwork;
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        LASSERT (conn->ibc_nsends_posted <= IBNAL_MSG_QUEUE_SIZE);
-
-        if (list_empty(&conn->ibc_tx_queue) &&
-            conn->ibc_outstanding_credits >= IBNAL_CREDIT_HIGHWATER) {
-                spin_unlock_irqrestore(&conn->ibc_lock, flags);
-                
-                tx = kibnal_get_idle_tx(0);     /* don't block */
-                if (tx != NULL)
-                        kibnal_init_tx_msg(tx, IBNAL_MSG_NOOP, 0);
-
-                spin_lock_irqsave(&conn->ibc_lock, flags);
-                
-                if (tx != NULL) {
-                        atomic_inc(&conn->ibc_refcount);
-                        kibnal_queue_tx_locked(tx, conn);
-                }
-        }
-
-        while (!list_empty (&conn->ibc_tx_queue)) {
-                tx = list_entry (conn->ibc_tx_queue.next, kib_tx_t, tx_list);
-
-                /* We rely on this for QP sizing */
-                LASSERT (tx->tx_nsp > 0 && tx->tx_nsp <= 2);
-
-                LASSERT (conn->ibc_outstanding_credits >= 0);
-                LASSERT (conn->ibc_outstanding_credits <= IBNAL_MSG_QUEUE_SIZE);
-                LASSERT (conn->ibc_credits >= 0);
-                LASSERT (conn->ibc_credits <= IBNAL_MSG_QUEUE_SIZE);
-
-                /* Not on ibc_rdma_queue */
-                LASSERT (!tx->tx_passive_rdma_wait);
-
-                if (conn->ibc_nsends_posted == IBNAL_MSG_QUEUE_SIZE)
-                        break;
-
-                if (conn->ibc_credits == 0)     /* no credits */
-                        break;
-                
-                if (conn->ibc_credits == 1 &&   /* last credit reserved for */
-                    conn->ibc_outstanding_credits == 0) /* giving back credits */
-                        break;
-
-                list_del (&tx->tx_list);
-
-                if (tx->tx_msg->ibm_type == IBNAL_MSG_NOOP &&
-                    (!list_empty(&conn->ibc_tx_queue) ||
-                     conn->ibc_outstanding_credits < IBNAL_CREDIT_HIGHWATER)) {
-                        /* redundant NOOP */
-                        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-                        kibnal_tx_done(tx);
-                        spin_lock_irqsave(&conn->ibc_lock, flags);
-                        continue;
-                }
-
-                tx->tx_msg->ibm_credits = conn->ibc_outstanding_credits;
-                conn->ibc_outstanding_credits = 0;
-
-                conn->ibc_nsends_posted++;
-                conn->ibc_credits--;
-
-                tx->tx_sending = tx->tx_nsp;
-                tx->tx_passive_rdma_wait = tx->tx_passive_rdma;
-                list_add (&tx->tx_list, &conn->ibc_active_txs);
-#if IBNAL_CKSUM
-                tx->tx_msg->ibm_cksum = 0;
-                tx->tx_msg->ibm_cksum = kibnal_cksum(tx->tx_msg, tx->tx_msg->ibm_nob);
-                CDEBUG(D_NET, "cksum %x, nob %d\n", tx->tx_msg->ibm_cksum, tx->tx_msg->ibm_nob);
-#endif
-                spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-                /* NB the gap between removing tx from the queue and sending it
-                 * allows message re-ordering to occur */
-
-                LASSERT (tx->tx_nsp > 0);
-
-                rc = -ECONNABORTED;
-                nwork = 0;
-                if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-                        tx->tx_status = 0;
-                        /* Driver only accepts 1 item at a time */
-                        for (i = 0; i < tx->tx_nsp; i++) {
-                                rc = ib_send (conn->ibc_qp, &tx->tx_sp[i], 1);
-                                if (rc != 0)
-                                        break;
-                                nwork++;
-                        }
-                }
-
-                spin_lock_irqsave (&conn->ibc_lock, flags);
-                if (rc != 0) {
-                        /* NB credits are transferred in the actual
-                         * message, which can only be the last work item */
-                        conn->ibc_outstanding_credits += tx->tx_msg->ibm_credits;
-                        conn->ibc_credits++;
-                        conn->ibc_nsends_posted--;
-
-                        tx->tx_status = rc;
-                        tx->tx_passive_rdma_wait = 0;
-                        tx->tx_sending -= tx->tx_nsp - nwork;
-
-                        done = (tx->tx_sending == 0);
-                        if (done)
-                                list_del (&tx->tx_list);
-                        
-                        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-                        
-                        if (conn->ibc_state == IBNAL_CONN_ESTABLISHED)
-                                CERROR ("Error %d posting transmit to "LPX64"\n", 
-                                        rc, conn->ibc_peer->ibp_nid);
-                        else
-                                CDEBUG (D_NET, "Error %d posting transmit to "
-                                        LPX64"\n", rc, conn->ibc_peer->ibp_nid);
-
-                        kibnal_close_conn (conn, rc);
-
-                        if (done)
-                                kibnal_tx_done (tx);
-                        return;
-                }
-                
-        }
-
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-}
-
-void
-kibnal_tx_callback (struct ib_cq_entry *e)
-{
-        kib_tx_t     *tx = (kib_tx_t *)kibnal_wreqid2ptr(e->work_request_id);
-        kib_conn_t   *conn;
-        unsigned long flags;
-        int           idle;
-
-        conn = tx->tx_conn;
-        LASSERT (conn != NULL);
-        LASSERT (tx->tx_sending != 0);
-
-        spin_lock_irqsave(&conn->ibc_lock, flags);
-
-        CDEBUG(D_NET, "conn %p tx %p [%d/%d]: %d\n", conn, tx,
-               tx->tx_nsp - tx->tx_sending, tx->tx_nsp,
-               e->status);
-
-        /* I could be racing with rdma completion.  Whoever makes 'tx' idle
-         * gets to free it, which also drops its ref on 'conn'.  If it's
-         * not me, then I take an extra ref on conn so it can't disappear
-         * under me. */
-
-        tx->tx_sending--;
-        idle = (tx->tx_sending == 0) &&         /* This is the final callback */
-               (!tx->tx_passive_rdma_wait);     /* Not waiting for RDMA completion */
-        if (idle)
-                list_del(&tx->tx_list);
-
-        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-               conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-               atomic_read (&conn->ibc_refcount));
-        atomic_inc (&conn->ibc_refcount);
-
-        if (tx->tx_sending == 0)
-                conn->ibc_nsends_posted--;
-
-        if (e->status != IB_COMPLETION_STATUS_SUCCESS &&
-            tx->tx_status == 0)
-                tx->tx_status = -ECONNABORTED;
-                
-        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-
-        if (idle)
-                kibnal_tx_done (tx);
-
-        if (e->status != IB_COMPLETION_STATUS_SUCCESS) {
-                CERROR ("Tx completion to "LPX64" failed: %d\n", 
-                        conn->ibc_peer->ibp_nid, e->status);
-                kibnal_close_conn (conn, -ENETDOWN);
-        } else {
-                /* can I shovel some more sends out the door? */
-                kibnal_check_sends(conn);
-        }
-
-        kibnal_put_conn (conn);
-}
-
-void
-kibnal_callback (struct ib_cq *cq, struct ib_cq_entry *e, void *arg)
-{
-        if (kibnal_wreqid_is_rx(e->work_request_id))
-                kibnal_rx_callback (e);
-        else
-                kibnal_tx_callback (e);
-}
-
-void
-kibnal_init_tx_msg (kib_tx_t *tx, int type, int body_nob)
-{
-        struct ib_gather_scatter *gl = &tx->tx_gl[tx->tx_nsp];
-        struct ib_send_param     *sp = &tx->tx_sp[tx->tx_nsp];
-        int                       fence;
-        int                       nob = offsetof (kib_msg_t, ibm_u) + body_nob;
-
-        LASSERT (tx->tx_nsp >= 0 && 
-                 tx->tx_nsp < sizeof(tx->tx_sp)/sizeof(tx->tx_sp[0]));
-        LASSERT (nob <= IBNAL_MSG_SIZE);
-        
-        tx->tx_msg->ibm_magic = IBNAL_MSG_MAGIC;
-        tx->tx_msg->ibm_version = IBNAL_MSG_VERSION;
-        tx->tx_msg->ibm_type = type;
-#if IBNAL_CKSUM
-        tx->tx_msg->ibm_nob = nob;
-#endif
-        /* Fence the message if it's bundled with an RDMA read */
-        fence = (tx->tx_nsp > 0) &&
-                (type == IBNAL_MSG_PUT_DONE);
-
-        *gl = (struct ib_gather_scatter) {
-                .address = tx->tx_vaddr,
-                .length  = nob,
-                .key     = kibnal_data.kib_tx_pages->ibp_lkey,
-        };
-
-        /* NB If this is an RDMA read, the completion message must wait for
-         * the RDMA to complete.  Sends wait for previous RDMA writes
-         * anyway... */
-        *sp = (struct ib_send_param) {
-                .work_request_id      = kibnal_ptr2wreqid(tx, 0),
-                .op                   = IB_OP_SEND,
-                .gather_list          = gl,
-                .num_gather_entries   = 1,
-                .device_specific      = NULL,
-                .solicited_event      = 1,
-                .signaled             = 1,
-                .immediate_data_valid = 0,
-                .fence                = fence,
-                .inline_data          = 0,
-        };
-
-        tx->tx_nsp++;
-}
-
-void
-kibnal_queue_tx (kib_tx_t *tx, kib_conn_t *conn)
-{
-        unsigned long         flags;
-
-        spin_lock_irqsave(&conn->ibc_lock, flags);
-
-        kibnal_queue_tx_locked (tx, conn);
-        
-        spin_unlock_irqrestore(&conn->ibc_lock, flags);
-        
-        kibnal_check_sends(conn);
-}
-
-void
-kibnal_launch_tx (kib_tx_t *tx, ptl_nid_t nid)
-{
-        unsigned long    flags;
-        kib_peer_t      *peer;
-        kib_conn_t      *conn;
-        rwlock_t        *g_lock = &kibnal_data.kib_global_lock;
-
-        /* If I get here, I've committed to send, so I complete the tx with
-         * failure on any problems */
-        
-        LASSERT (tx->tx_conn == NULL);          /* only set when assigned a conn */
-        LASSERT (tx->tx_nsp > 0);               /* work items have been set up */
-
-        read_lock (g_lock);
-        
-        peer = kibnal_find_peer_locked (nid);
-        if (peer == NULL) {
-                read_unlock (g_lock);
-                tx->tx_status = -EHOSTUNREACH;
-                kibnal_tx_done (tx);
-                return;
-        }
-
-        conn = kibnal_find_conn_locked (peer);
-        if (conn != NULL) {
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount); /* 1 ref for the tx */
-                read_unlock (g_lock);
-                
-                kibnal_queue_tx (tx, conn);
-                return;
-        }
-        
-        /* Making one or more connections; I'll need a write lock... */
-        read_unlock (g_lock);
-        write_lock_irqsave (g_lock, flags);
-
-        peer = kibnal_find_peer_locked (nid);
-        if (peer == NULL) {
-                write_unlock_irqrestore (g_lock, flags);
-                tx->tx_status = -EHOSTUNREACH;
-                kibnal_tx_done (tx);
-                return;
-        }
-
-        conn = kibnal_find_conn_locked (peer);
-        if (conn != NULL) {
-                /* Connection exists; queue message on it */
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount); /* 1 ref for the tx */
-                write_unlock_irqrestore (g_lock, flags);
-                
-                kibnal_queue_tx (tx, conn);
-                return;
-        }
-
-        if (peer->ibp_connecting == 0) {
-                if (!time_after_eq(jiffies, peer->ibp_reconnect_time)) {
-                        write_unlock_irqrestore (g_lock, flags);
-                        tx->tx_status = -EHOSTUNREACH;
-                        kibnal_tx_done (tx);
-                        return;
-                }
-        
-                peer->ibp_connecting = 1;
-                atomic_inc (&peer->ibp_refcount); /* extra ref for connd */
-        
-                spin_lock (&kibnal_data.kib_connd_lock);
-        
-                list_add_tail (&peer->ibp_connd_list,
-                               &kibnal_data.kib_connd_peers);
-                wake_up (&kibnal_data.kib_connd_waitq);
-        
-                spin_unlock (&kibnal_data.kib_connd_lock);
-        }
-        
-        /* A connection is being established; queue the message... */
-        list_add_tail (&tx->tx_list, &peer->ibp_tx_queue);
-
-        write_unlock_irqrestore (g_lock, flags);
-}
-
-ptl_err_t
-kibnal_start_passive_rdma (int type, ptl_nid_t nid,
-                            lib_msg_t *libmsg, ptl_hdr_t *hdr)
-{
-        int         nob = libmsg->md->length;
-        kib_tx_t   *tx;
-        kib_msg_t  *ibmsg;
-        int         rc;
-        int         access;
-        
-        LASSERT (type == IBNAL_MSG_PUT_RDMA || 
-                 type == IBNAL_MSG_GET_RDMA);
-        LASSERT (nob > 0);
-        LASSERT (!in_interrupt());              /* Mapping could block */
-
-        if (type == IBNAL_MSG_PUT_RDMA) {
-                access = IB_ACCESS_REMOTE_READ;
-        } else {
-                access = IB_ACCESS_REMOTE_WRITE |
-                         IB_ACCESS_LOCAL_WRITE;
-        }
-
-        tx = kibnal_get_idle_tx (1);           /* May block; caller is an app thread */
-        LASSERT (tx != NULL);
-
-        if ((libmsg->md->options & PTL_MD_KIOV) == 0) 
-                rc = kibnal_map_iov (tx, access,
-                                     libmsg->md->md_niov,
-                                     libmsg->md->md_iov.iov,
-                                     0, nob);
-        else
-                rc = kibnal_map_kiov (tx, access,
-                                      libmsg->md->md_niov, 
-                                      libmsg->md->md_iov.kiov,
-                                      0, nob);
-
-        if (rc != 0) {
-                CERROR ("Can't map RDMA for "LPX64": %d\n", nid, rc);
-                goto failed;
-        }
-        
-        if (type == IBNAL_MSG_GET_RDMA) {
-                /* reply gets finalized when tx completes */
-                tx->tx_libmsg[1] = lib_create_reply_msg(&kibnal_lib, 
-                                                        nid, libmsg);
-                if (tx->tx_libmsg[1] == NULL) {
-                        CERROR ("Can't create reply for GET -> "LPX64"\n",
-                                nid);
-                        rc = -ENOMEM;
-                        goto failed;
-                }
-        }
-        
-        tx->tx_passive_rdma = 1;
-
-        ibmsg = tx->tx_msg;
-
-        ibmsg->ibm_u.rdma.ibrm_hdr = *hdr;
-        ibmsg->ibm_u.rdma.ibrm_cookie = tx->tx_passive_rdma_cookie;
-        ibmsg->ibm_u.rdma.ibrm_desc.rd_key = tx->tx_md.md_rkey;
-        ibmsg->ibm_u.rdma.ibrm_desc.rd_addr = tx->tx_md.md_addr;
-        ibmsg->ibm_u.rdma.ibrm_desc.rd_nob = nob;
-
-        kibnal_init_tx_msg (tx, type, sizeof (kib_rdma_msg_t));
-
-        CDEBUG(D_NET, "Passive: %p cookie "LPX64", key %x, addr "
-               LPX64", nob %d\n",
-               tx, tx->tx_passive_rdma_cookie, tx->tx_md.md_rkey,
-               tx->tx_md.md_addr, nob);
-        
-        /* libmsg gets finalized when tx completes. */
-        tx->tx_libmsg[0] = libmsg;
-
-        kibnal_launch_tx(tx, nid);
-        return (PTL_OK);
-
- failed:
-        tx->tx_status = rc;
-        kibnal_tx_done (tx);
-        return (PTL_FAIL);
-}
-
-void
-kibnal_start_active_rdma (int type, int status,
-                           kib_rx_t *rx, lib_msg_t *libmsg, 
-                           unsigned int niov,
-                           struct iovec *iov, ptl_kiov_t *kiov,
-                           size_t offset, size_t nob)
-{
-        kib_msg_t    *rxmsg = rx->rx_msg;
-        kib_msg_t    *txmsg;
-        kib_tx_t     *tx;
-        int           access;
-        int           rdma_op;
-        int           rc;
-
-        CDEBUG(D_NET, "type %d, status %d, niov %d, offset %d, nob %d\n",
-               type, status, niov, offset, nob);
-
-        /* Called by scheduler */
-        LASSERT (!in_interrupt ());
-
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        /* No data if we're completing with failure */
-        LASSERT (status == 0 || nob == 0);
-
-        LASSERT (type == IBNAL_MSG_GET_DONE ||
-                 type == IBNAL_MSG_PUT_DONE);
-
-        /* Flag I'm completing the RDMA.  Even if I fail to send the
-         * completion message, I will have tried my best so further
-         * attempts shouldn't be tried. */
-        LASSERT (!rx->rx_rdma);
-        rx->rx_rdma = 1;
-
-        if (type == IBNAL_MSG_GET_DONE) {
-                access   = 0;
-                rdma_op  = IB_OP_RDMA_WRITE;
-                LASSERT (rxmsg->ibm_type == IBNAL_MSG_GET_RDMA);
-        } else {
-                access   = IB_ACCESS_LOCAL_WRITE;
-                rdma_op  = IB_OP_RDMA_READ;
-                LASSERT (rxmsg->ibm_type == IBNAL_MSG_PUT_RDMA);
-        }
-
-        tx = kibnal_get_idle_tx (0);           /* Mustn't block */
-        if (tx == NULL) {
-                CERROR ("tx descs exhausted on RDMA from "LPX64
-                        " completing locally with failure\n",
-                        rx->rx_conn->ibc_peer->ibp_nid);
-                lib_finalize (&kibnal_lib, NULL, libmsg, PTL_NO_SPACE);
-                return;
-        }
-        LASSERT (tx->tx_nsp == 0);
-                        
-        if (nob != 0) {
-                /* We actually need to transfer some data (the transfer
-                 * size could get truncated to zero when the incoming
-                 * message is matched) */
-
-                if (kiov != NULL)
-                        rc = kibnal_map_kiov (tx, access,
-                                              niov, kiov, offset, nob);
-                else
-                        rc = kibnal_map_iov (tx, access,
-                                             niov, iov, offset, nob);
-                
-                if (rc != 0) {
-                        CERROR ("Can't map RDMA -> "LPX64": %d\n", 
-                                rx->rx_conn->ibc_peer->ibp_nid, rc);
-                        /* We'll skip the RDMA and complete with failure. */
-                        status = rc;
-                        nob = 0;
-                } else {
-                        tx->tx_gl[0] = (struct ib_gather_scatter) {
-                                .address = tx->tx_md.md_addr,
-                                .length  = nob,
-                                .key     = tx->tx_md.md_lkey,
-                        };
-                
-                        tx->tx_sp[0] = (struct ib_send_param) {
-                                .work_request_id      = kibnal_ptr2wreqid(tx, 0),
-                                .op                   = rdma_op,
-                                .gather_list          = &tx->tx_gl[0],
-                                .num_gather_entries   = 1,
-                                .remote_address       = rxmsg->ibm_u.rdma.ibrm_desc.rd_addr,
-                                .rkey                 = rxmsg->ibm_u.rdma.ibrm_desc.rd_key,
-                                .device_specific      = NULL,
-                                .solicited_event      = 0,
-                                .signaled             = 1,
-                                .immediate_data_valid = 0,
-                                .fence                = 0,
-                                .inline_data          = 0,
-                        };
-
-                        tx->tx_nsp = 1;
-                }
-        }
-
-        txmsg = tx->tx_msg;
-
-        txmsg->ibm_u.completion.ibcm_cookie = rxmsg->ibm_u.rdma.ibrm_cookie;
-        txmsg->ibm_u.completion.ibcm_status = status;
-        
-        kibnal_init_tx_msg(tx, type, sizeof (kib_completion_msg_t));
-
-        if (status == 0 && nob != 0) {
-                LASSERT (tx->tx_nsp > 1);
-                /* RDMA: libmsg gets finalized when the tx completes.  This
-                 * is after the completion message has been sent, which in
-                 * turn is after the RDMA has finished. */
-                tx->tx_libmsg[0] = libmsg;
-        } else {
-                LASSERT (tx->tx_nsp == 1);
-                /* No RDMA: local completion happens now! */
-                CDEBUG(D_WARNING,"No data: immediate completion\n");
-                lib_finalize (&kibnal_lib, NULL, libmsg,
-                              status == 0 ? PTL_OK : PTL_FAIL);
-        }
-
-        /* +1 ref for this tx... */
-        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-               rx->rx_conn, rx->rx_conn->ibc_state, 
-               rx->rx_conn->ibc_peer->ibp_nid,
-               atomic_read (&rx->rx_conn->ibc_refcount));
-        atomic_inc (&rx->rx_conn->ibc_refcount);
-        /* ...and queue it up */
-        kibnal_queue_tx(tx, rx->rx_conn);
-}
-
-ptl_err_t
-kibnal_sendmsg(lib_nal_t    *nal, 
-                void         *private,
-                lib_msg_t    *libmsg,
-                ptl_hdr_t    *hdr, 
-                int           type, 
-                ptl_nid_t     nid, 
-                ptl_pid_t     pid,
-                unsigned int  payload_niov, 
-                struct iovec *payload_iov, 
-                ptl_kiov_t   *payload_kiov,
-                size_t        payload_offset,
-                size_t        payload_nob)
-{
-        kib_msg_t  *ibmsg;
-        kib_tx_t   *tx;
-        int         nob;
-
-        /* NB 'private' is different depending on what we're sending.... */
-
-        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
-               " pid %d\n", payload_nob, payload_niov, nid , pid);
-
-        LASSERT (payload_nob == 0 || payload_niov > 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
-        /* Thread context if we're sending payload */
-        LASSERT (!in_interrupt() || payload_niov == 0);
-        /* payload is either all vaddrs or all pages */
-        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
-        switch (type) {
-        default:
-                LBUG();
-                return (PTL_FAIL);
-                
-        case PTL_MSG_REPLY: {
-                /* reply's 'private' is the incoming receive */
-                kib_rx_t *rx = private;
-
-                /* RDMA reply expected? */
-                if (rx->rx_msg->ibm_type == IBNAL_MSG_GET_RDMA) {
-                        kibnal_start_active_rdma(IBNAL_MSG_GET_DONE, 0,
-                                                 rx, libmsg, payload_niov, 
-                                                 payload_iov, payload_kiov,
-                                                 payload_offset, payload_nob);
-                        return (PTL_OK);
-                }
-                
-                /* Incoming message consistent with immediate reply? */
-                if (rx->rx_msg->ibm_type != IBNAL_MSG_IMMEDIATE) {
-                        CERROR ("REPLY to "LPX64" bad opbm type %d!!!\n",
-                                nid, rx->rx_msg->ibm_type);
-                        return (PTL_FAIL);
-                }
-
-                /* Will it fit in a message? */
-                nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
-                if (nob >= IBNAL_MSG_SIZE) {
-                        CERROR("REPLY for "LPX64" too big (RDMA not requested): %d\n", 
-                               nid, payload_nob);
-                        return (PTL_FAIL);
-                }
-                break;
-        }
-
-        case PTL_MSG_GET:
-                /* might the REPLY message be big enough to need RDMA? */
-                nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[libmsg->md->length]);
-                if (nob > IBNAL_MSG_SIZE)
-                        return (kibnal_start_passive_rdma(IBNAL_MSG_GET_RDMA, 
-                                                          nid, libmsg, hdr));
-                break;
-
-        case PTL_MSG_ACK:
-                LASSERT (payload_nob == 0);
-                break;
-
-        case PTL_MSG_PUT:
-                /* Is the payload big enough to need RDMA? */
-                nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[payload_nob]);
-                if (nob > IBNAL_MSG_SIZE)
-                        return (kibnal_start_passive_rdma(IBNAL_MSG_PUT_RDMA,
-                                                          nid, libmsg, hdr));
-                
-                break;
-        }
-
-        tx = kibnal_get_idle_tx(!(type == PTL_MSG_ACK ||
-                                  type == PTL_MSG_REPLY ||
-                                  in_interrupt()));
-        if (tx == NULL) {
-                CERROR ("Can't send %d to "LPX64": tx descs exhausted%s\n", 
-                        type, nid, in_interrupt() ? " (intr)" : "");
-                return (PTL_NO_SPACE);
-        }
-
-        ibmsg = tx->tx_msg;
-        ibmsg->ibm_u.immediate.ibim_hdr = *hdr;
-
-        if (payload_nob > 0) {
-                if (payload_kiov != NULL)
-                        lib_copy_kiov2buf(ibmsg->ibm_u.immediate.ibim_payload,
-                                          payload_niov, payload_kiov,
-                                          payload_offset, payload_nob);
-                else
-                        lib_copy_iov2buf(ibmsg->ibm_u.immediate.ibim_payload,
-                                         payload_niov, payload_iov,
-                                         payload_offset, payload_nob);
-        }
-
-        kibnal_init_tx_msg (tx, IBNAL_MSG_IMMEDIATE,
-                            offsetof(kib_immediate_msg_t, 
-                                     ibim_payload[payload_nob]));
-
-        /* libmsg gets finalized when tx completes */
-        tx->tx_libmsg[0] = libmsg;
-
-        kibnal_launch_tx(tx, nid);
-        return (PTL_OK);
-}
-
-ptl_err_t
-kibnal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-               unsigned int payload_niov, struct iovec *payload_iov,
-               size_t payload_offset, size_t payload_len)
-{
-        return (kibnal_sendmsg(nal, private, cookie,
-                               hdr, type, nid, pid,
-                               payload_niov, payload_iov, NULL,
-                               payload_offset, payload_len));
-}
-
-ptl_err_t
-kibnal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
-                     ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                     unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
-                     size_t payload_offset, size_t payload_len)
-{
-        return (kibnal_sendmsg(nal, private, cookie,
-                               hdr, type, nid, pid,
-                               payload_niov, NULL, payload_kiov,
-                               payload_offset, payload_len));
-}
-
-ptl_err_t
-kibnal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg,
-                 unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov,
-                 size_t offset, size_t mlen, size_t rlen)
-{
-        kib_rx_t    *rx = private;
-        kib_msg_t   *rxmsg = rx->rx_msg;
-        int          msg_nob;
-        
-        LASSERT (mlen <= rlen);
-        LASSERT (!in_interrupt ());
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        switch (rxmsg->ibm_type) {
-        default:
-                LBUG();
-                return (PTL_FAIL);
-                
-        case IBNAL_MSG_IMMEDIATE:
-                msg_nob = offsetof(kib_msg_t, ibm_u.immediate.ibim_payload[rlen]);
-                if (msg_nob > IBNAL_MSG_SIZE) {
-                        CERROR ("Immediate message from "LPX64" too big: %d\n",
-                                rxmsg->ibm_u.immediate.ibim_hdr.src_nid, rlen);
-                        return (PTL_FAIL);
-                }
-
-                if (kiov != NULL)
-                        lib_copy_buf2kiov(niov, kiov, offset,
-                                          rxmsg->ibm_u.immediate.ibim_payload,
-                                          mlen);
-                else
-                        lib_copy_buf2iov(niov, iov, offset,
-                                         rxmsg->ibm_u.immediate.ibim_payload,
-                                         mlen);
-
-                lib_finalize (nal, NULL, libmsg, PTL_OK);
-                return (PTL_OK);
-
-        case IBNAL_MSG_GET_RDMA:
-                /* We get called here just to discard any junk after the
-                 * GET hdr. */
-                LASSERT (libmsg == NULL);
-                lib_finalize (nal, NULL, libmsg, PTL_OK);
-                return (PTL_OK);
-
-        case IBNAL_MSG_PUT_RDMA:
-                kibnal_start_active_rdma (IBNAL_MSG_PUT_DONE, 0,
-                                          rx, libmsg, 
-                                          niov, iov, kiov, offset, mlen);
-                return (PTL_OK);
-        }
-}
-
-ptl_err_t
-kibnal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
-              unsigned int niov, struct iovec *iov, 
-              size_t offset, size_t mlen, size_t rlen)
-{
-        return (kibnal_recvmsg (nal, private, msg, niov, iov, NULL,
-                                offset, mlen, rlen));
-}
-
-ptl_err_t
-kibnal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
-                     unsigned int niov, ptl_kiov_t *kiov, 
-                     size_t offset, size_t mlen, size_t rlen)
-{
-        return (kibnal_recvmsg (nal, private, msg, niov, NULL, kiov,
-                                offset, mlen, rlen));
-}
-
-int
-kibnal_thread_start (int (*fn)(void *arg), void *arg)
-{
-        long    pid = kernel_thread (fn, arg, 0);
-
-        if (pid < 0)
-                return ((int)pid);
-
-        atomic_inc (&kibnal_data.kib_nthreads);
-        return (0);
-}
-
-void
-kibnal_thread_fini (void)
-{
-        atomic_dec (&kibnal_data.kib_nthreads);
-}
-
-void
-kibnal_close_conn_locked (kib_conn_t *conn, int error)
-{
-        /* This just does the immmediate housekeeping, and schedules the
-         * connection for the connd to finish off.
-         * Caller holds kib_global_lock exclusively in irq context */
-        kib_peer_t   *peer = conn->ibc_peer;
-
-        CDEBUG (error == 0 ? D_NET : D_ERROR,
-                "closing conn to "LPX64": error %d\n", peer->ibp_nid, error);
-        
-        LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED ||
-                 conn->ibc_state == IBNAL_CONN_CONNECTING);
-
-        if (conn->ibc_state == IBNAL_CONN_ESTABLISHED) {
-                /* kib_connd_conns takes ibc_list's ref */
-                list_del (&conn->ibc_list);
-        } else {
-                /* new ref for kib_connd_conns */
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount);
-        }
-        
-        if (list_empty (&peer->ibp_conns) &&
-            peer->ibp_persistence == 0) {
-                /* Non-persistent peer with no more conns... */
-                kibnal_unlink_peer_locked (peer);
-        }
-
-        conn->ibc_state = IBNAL_CONN_DEATHROW;
-
-        /* Schedule conn for closing/destruction */
-        spin_lock (&kibnal_data.kib_connd_lock);
-
-        list_add_tail (&conn->ibc_list, &kibnal_data.kib_connd_conns);
-        wake_up (&kibnal_data.kib_connd_waitq);
-                
-        spin_unlock (&kibnal_data.kib_connd_lock);
-}
-
-int
-kibnal_close_conn (kib_conn_t *conn, int why)
-{
-        unsigned long     flags;
-        int               count = 0;
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        LASSERT (conn->ibc_state >= IBNAL_CONN_CONNECTING);
-        
-        if (conn->ibc_state <= IBNAL_CONN_ESTABLISHED) {
-                count = 1;
-                kibnal_close_conn_locked (conn, why);
-        }
-        
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-        return (count);
-}
-
-void
-kibnal_peer_connect_failed (kib_peer_t *peer, int active, int rc)
-{
-        LIST_HEAD        (zombies);
-        kib_tx_t         *tx;
-        unsigned long     flags;
-
-        LASSERT (rc != 0);
-        LASSERT (peer->ibp_reconnect_interval >= IBNAL_MIN_RECONNECT_INTERVAL);
-
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        LASSERT (peer->ibp_connecting != 0);
-        peer->ibp_connecting--;
-
-        if (peer->ibp_connecting != 0) {
-                /* another connection attempt under way (loopback?)... */
-                write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-                return;
-        }
-
-        if (list_empty(&peer->ibp_conns)) {
-                /* Say when active connection can be re-attempted */
-                peer->ibp_reconnect_time = jiffies + peer->ibp_reconnect_interval;
-                /* Increase reconnection interval */
-                peer->ibp_reconnect_interval = MIN (peer->ibp_reconnect_interval * 2,
-                                                    IBNAL_MAX_RECONNECT_INTERVAL);
-        
-                /* Take peer's blocked blocked transmits; I'll complete
-                 * them with error */
-                while (!list_empty (&peer->ibp_tx_queue)) {
-                        tx = list_entry (peer->ibp_tx_queue.next,
-                                         kib_tx_t, tx_list);
-                        
-                        list_del (&tx->tx_list);
-                        list_add_tail (&tx->tx_list, &zombies);
-                }
-                
-                if (kibnal_peer_active(peer) &&
-                    (peer->ibp_persistence == 0)) {
-                        /* failed connection attempt on non-persistent peer */
-                        kibnal_unlink_peer_locked (peer);
-                }
-        } else {
-                /* Can't have blocked transmits if there are connections */
-                LASSERT (list_empty(&peer->ibp_tx_queue));
-        }
-        
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        if (!list_empty (&zombies))
-                CERROR ("Deleting messages for "LPX64": connection failed\n",
-                        peer->ibp_nid);
-
-        while (!list_empty (&zombies)) {
-                tx = list_entry (zombies.next, kib_tx_t, tx_list);
-
-                list_del (&tx->tx_list);
-                /* complete now */
-                tx->tx_status = -EHOSTUNREACH;
-                kibnal_tx_done (tx);
-        }
-}
-
-void
-kibnal_connreq_done (kib_conn_t *conn, int active, int status)
-{
-        int               state = conn->ibc_state;
-        kib_peer_t       *peer = conn->ibc_peer;
-        kib_tx_t         *tx;
-        unsigned long     flags;
-        int               rc;
-        int               i;
-
-        /* passive connection has no connreq & vice versa */
-        LASSERT (!active == !(conn->ibc_connreq != NULL));
-        if (active) {
-                PORTAL_FREE (conn->ibc_connreq, sizeof (*conn->ibc_connreq));
-                conn->ibc_connreq = NULL;
-        }
-
-        if (state == IBNAL_CONN_CONNECTING) {
-                /* Install common (active/passive) callback for
-                 * disconnect/idle notification if I got as far as getting
-                 * a CM comm_id */
-                rc = tsIbCmCallbackModify(conn->ibc_comm_id, 
-                                          kibnal_conn_callback, conn);
-                LASSERT (rc == 0);
-        }
-        
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        LASSERT (peer->ibp_connecting != 0);
-        
-        if (status == 0) {                         
-                /* connection established... */
-                LASSERT (state == IBNAL_CONN_CONNECTING);
-                conn->ibc_state = IBNAL_CONN_ESTABLISHED;
-
-                if (!kibnal_peer_active(peer)) {
-                        /* ...but peer deleted meantime */
-                        status = -ECONNABORTED;
-                }
-        } else {
-                LASSERT (state == IBNAL_CONN_INIT_QP ||
-                         state == IBNAL_CONN_CONNECTING);
-        }
-
-        if (status == 0) {
-                /* Everything worked! */
-
-                peer->ibp_connecting--;
-
-                /* +1 ref for ibc_list; caller(== CM)'s ref remains until
-                 * the IB_CM_IDLE callback */
-                CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_inc (&conn->ibc_refcount);
-                list_add (&conn->ibc_list, &peer->ibp_conns);
-                
-                /* reset reconnect interval for next attempt */
-                peer->ibp_reconnect_interval = IBNAL_MIN_RECONNECT_INTERVAL;
-
-                /* post blocked sends to the new connection */
-                spin_lock (&conn->ibc_lock);
-                
-                while (!list_empty (&peer->ibp_tx_queue)) {
-                        tx = list_entry (peer->ibp_tx_queue.next, 
-                                         kib_tx_t, tx_list);
-                        
-                        list_del (&tx->tx_list);
-
-                        /* +1 ref for each tx */
-                        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                               conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                               atomic_read (&conn->ibc_refcount));
-                        atomic_inc (&conn->ibc_refcount);
-                        kibnal_queue_tx_locked (tx, conn);
-                }
-                
-                spin_unlock (&conn->ibc_lock);
-
-                /* Nuke any dangling conns from a different peer instance... */
-                kibnal_close_stale_conns_locked (conn->ibc_peer,
-                                                 conn->ibc_incarnation);
-
-                write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-                /* queue up all the receives */
-                for (i = 0; i < IBNAL_RX_MSGS; i++) {
-                        /* +1 ref for rx desc */
-                        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                               conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                               atomic_read (&conn->ibc_refcount));
-                        atomic_inc (&conn->ibc_refcount);
-
-                        CDEBUG(D_NET, "RX[%d] %p->%p - "LPX64"\n",
-                               i, &conn->ibc_rxs[i], conn->ibc_rxs[i].rx_msg,
-                               conn->ibc_rxs[i].rx_vaddr);
-
-                        kibnal_post_rx (&conn->ibc_rxs[i], 0);
-                }
-
-                kibnal_check_sends (conn);
-                return;
-        }
-
-        /* connection failed */
-        if (state == IBNAL_CONN_CONNECTING) {
-                /* schedule for connd to close */
-                kibnal_close_conn_locked (conn, status);
-        } else {
-                /* Don't have a CM comm_id; just wait for refs to drain */
-                conn->ibc_state = IBNAL_CONN_ZOMBIE;
-        } 
-
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        kibnal_peer_connect_failed (conn->ibc_peer, active, status);
-
-        if (state != IBNAL_CONN_CONNECTING) {
-                /* drop caller's ref if we're not waiting for the
-                 * IB_CM_IDLE callback */
-                kibnal_put_conn (conn);
-        }
-}
-
-int
-kibnal_accept (kib_conn_t **connp, tTS_IB_CM_COMM_ID cid,
-                ptl_nid_t nid, __u64 incarnation, int queue_depth)
-{
-        kib_conn_t    *conn = kibnal_create_conn();
-        kib_peer_t    *peer;
-        kib_peer_t    *peer2;
-        unsigned long  flags;
-
-        if (conn == NULL)
-                return (-ENOMEM);
-
-        if (queue_depth != IBNAL_MSG_QUEUE_SIZE) {
-                CERROR("Can't accept "LPX64": bad queue depth %d (%d expected)\n",
-                       nid, queue_depth, IBNAL_MSG_QUEUE_SIZE);
-                return (-EPROTO);
-        }
-        
-        /* assume 'nid' is a new peer */
-        peer = kibnal_create_peer (nid);
-        if (peer == NULL) {
-                CDEBUG(D_NET, "--conn[%p] state %d -> "LPX64" (%d)\n",
-                       conn, conn->ibc_state, conn->ibc_peer->ibp_nid,
-                       atomic_read (&conn->ibc_refcount));
-                atomic_dec (&conn->ibc_refcount);
-                kibnal_destroy_conn(conn);
-                return (-ENOMEM);
-        }
-        
-        write_lock_irqsave (&kibnal_data.kib_global_lock, flags);
-
-        peer2 = kibnal_find_peer_locked(nid);
-        if (peer2 == NULL) {
-                /* peer table takes my ref on peer */
-                list_add_tail (&peer->ibp_list,
-                               kibnal_nid2peerlist(nid));
-        } else {
-                kibnal_put_peer (peer);
-                peer = peer2;
-        }
-
-        /* +1 ref for conn */
-        atomic_inc (&peer->ibp_refcount);
-        peer->ibp_connecting++;
-
-        write_unlock_irqrestore (&kibnal_data.kib_global_lock, flags);
-
-        conn->ibc_peer = peer;
-        conn->ibc_state = IBNAL_CONN_CONNECTING;
-        conn->ibc_comm_id = cid;
-        conn->ibc_incarnation = incarnation;
-        conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
-
-        *connp = conn;
-        return (0);
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_idle_conn_callback (tTS_IB_CM_EVENT event,
-                            tTS_IB_CM_COMM_ID cid,
-                            void *param,
-                            void *arg)
-{
-        /* Shouldn't ever get a callback after TS_IB_CM_IDLE */
-        CERROR ("Unexpected event %d: conn %p\n", event, arg);
-        LBUG ();
-        return TS_IB_CM_CALLBACK_PROCEED;
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_conn_callback (tTS_IB_CM_EVENT event,
-                       tTS_IB_CM_COMM_ID cid,
-                       void *param,
-                       void *arg)
-{
-        kib_conn_t       *conn = arg;
-        LIST_HEAD        (zombies); 
-        struct list_head *tmp;
-        struct list_head *nxt;
-        kib_tx_t         *tx;
-        unsigned long     flags;
-        int               done;
-        int               rc;
-
-        /* Established Connection Notifier */
-
-        switch (event) {
-        default:
-                CERROR("Connection %p -> "LPX64" ERROR %d\n",
-                       conn, conn->ibc_peer->ibp_nid, event);
-                kibnal_close_conn (conn, -ECONNABORTED);
-                break;
-                
-        case TS_IB_CM_DISCONNECTED:
-                CDEBUG(D_WARNING, "Connection %p -> "LPX64" DISCONNECTED.\n",
-                       conn, conn->ibc_peer->ibp_nid);
-                kibnal_close_conn (conn, 0);
-                break;
-
-        case TS_IB_CM_IDLE:
-                CDEBUG(D_NET, "Connection %p -> "LPX64" IDLE.\n",
-                       conn, conn->ibc_peer->ibp_nid);
-                kibnal_put_conn (conn);        /* Lose CM's ref */
-
-                /* LASSERT (no further callbacks) */
-                rc = tsIbCmCallbackModify(cid, 
-                                          kibnal_idle_conn_callback, conn);
-                LASSERT (rc == 0);
-
-                /* NB we wait until the connection has closed before
-                 * completing outstanding passive RDMAs so we can be sure
-                 * the network can't touch the mapped memory any more. */
-
-                spin_lock_irqsave (&conn->ibc_lock, flags);
-
-                /* grab passive RDMAs not waiting for the tx callback */
-                list_for_each_safe (tmp, nxt, &conn->ibc_active_txs) {
-                        tx = list_entry (tmp, kib_tx_t, tx_list);
-
-                        LASSERT (tx->tx_passive_rdma ||
-                                 !tx->tx_passive_rdma_wait);
-
-                        LASSERT (tx->tx_passive_rdma_wait ||
-                                 tx->tx_sending != 0);
-
-                        /* still waiting for tx callback? */
-                        if (!tx->tx_passive_rdma_wait)
-                                continue;
-
-                        tx->tx_status = -ECONNABORTED;
-                        tx->tx_passive_rdma_wait = 0;
-                        done = (tx->tx_sending == 0);
-
-                        if (!done)
-                                continue;
-
-                        list_del (&tx->tx_list);
-                        list_add (&tx->tx_list, &zombies);
-                }
-
-                /* grab all blocked transmits */
-                list_for_each_safe (tmp, nxt, &conn->ibc_tx_queue) {
-                        tx = list_entry (tmp, kib_tx_t, tx_list);
-                        
-                        list_del (&tx->tx_list);
-                        list_add (&tx->tx_list, &zombies);
-                }
-                
-                spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-                while (!list_empty(&zombies)) {
-                        tx = list_entry (zombies.next, kib_tx_t, tx_list);
-
-                        list_del(&tx->tx_list);
-                        kibnal_tx_done (tx);
-                }
-                break;
-        }
-
-        return TS_IB_CM_CALLBACK_PROCEED;
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_passive_conn_callback (tTS_IB_CM_EVENT event,
-                               tTS_IB_CM_COMM_ID cid,
-                               void *param,
-                               void *arg)
-{
-        kib_conn_t *conn = arg;
-        int          rc;
-        
-        switch (event) {
-        default:
-                if (conn == NULL) {
-                        /* no connection yet */
-                        CERROR ("Unexpected event: %d\n", event);
-                        return TS_IB_CM_CALLBACK_ABORT;
-                }
-                
-                CERROR ("Unexpected event %p -> "LPX64": %d\n", 
-                        conn, conn->ibc_peer->ibp_nid, event);
-                kibnal_connreq_done (conn, 0, -ECONNABORTED);
-                break;
-                
-        case TS_IB_CM_REQ_RECEIVED: {
-                struct ib_cm_req_received_param *req = param;
-                kib_wire_connreq_t             *wcr = req->remote_private_data;
-
-                LASSERT (conn == NULL);
-
-                CDEBUG(D_NET, "REQ from "LPX64"\n", le64_to_cpu(wcr->wcr_nid));
-
-                if (req->remote_private_data_len < sizeof (*wcr)) {
-                        CERROR("Connect from remote LID %04x: too short %d\n",
-                               req->dlid, req->remote_private_data_len);
-                        return TS_IB_CM_CALLBACK_ABORT;
-                }
-
-                if (wcr->wcr_magic != cpu_to_le32(IBNAL_MSG_MAGIC)) {
-                        CERROR ("Can't accept LID %04x: bad magic %08x\n",
-                                req->dlid, le32_to_cpu(wcr->wcr_magic));
-                        return TS_IB_CM_CALLBACK_ABORT;
-                }
-                
-                if (wcr->wcr_version != cpu_to_le16(IBNAL_MSG_VERSION)) {
-                        CERROR ("Can't accept LID %04x: bad version %d\n",
-                                req->dlid, le16_to_cpu(wcr->wcr_magic));
-                        return TS_IB_CM_CALLBACK_ABORT;
-                }
-                                
-                rc = kibnal_accept(&conn,
-                                   cid,
-                                   le64_to_cpu(wcr->wcr_nid),
-                                   le64_to_cpu(wcr->wcr_incarnation),
-                                   le16_to_cpu(wcr->wcr_queue_depth));
-                if (rc != 0) {
-                        CERROR ("Can't accept "LPX64": %d\n",
-                                le64_to_cpu(wcr->wcr_nid), rc);
-                        return TS_IB_CM_CALLBACK_ABORT;
-                }
-
-                /* update 'arg' for next callback */
-                rc = tsIbCmCallbackModify(cid, 
-                                          kibnal_passive_conn_callback, conn);
-                LASSERT (rc == 0);
-
-                req->accept_param.qp                     = conn->ibc_qp;
-                *((kib_wire_connreq_t *)req->accept_param.reply_private_data)
-                        = (kib_wire_connreq_t) {
-                                .wcr_magic       = cpu_to_le32(IBNAL_MSG_MAGIC),
-                                .wcr_version     = cpu_to_le16(IBNAL_MSG_VERSION),
-                                .wcr_queue_depth = cpu_to_le32(IBNAL_MSG_QUEUE_SIZE),
-                                .wcr_nid         = cpu_to_le64(kibnal_data.kib_nid),
-                                .wcr_incarnation = cpu_to_le64(kibnal_data.kib_incarnation),
-                        };
-                req->accept_param.reply_private_data_len = sizeof(kib_wire_connreq_t);
-                req->accept_param.responder_resources    = IBNAL_RESPONDER_RESOURCES;
-                req->accept_param.initiator_depth        = IBNAL_RESPONDER_RESOURCES;
-                req->accept_param.rnr_retry_count        = IBNAL_RNR_RETRY;
-                req->accept_param.flow_control           = IBNAL_FLOW_CONTROL;
-
-                CDEBUG(D_NET, "Proceeding\n");
-                break;
-        }
-
-        case TS_IB_CM_ESTABLISHED:
-                LASSERT (conn != NULL);
-                CDEBUG(D_WARNING, "Connection %p -> "LPX64" ESTABLISHED.\n",
-                       conn, conn->ibc_peer->ibp_nid);
-
-                kibnal_connreq_done (conn, 0, 0);
-                break;
-        }
-
-        /* NB if the connreq is done, we switch to kibnal_conn_callback */
-        return TS_IB_CM_CALLBACK_PROCEED;
-}
-
-tTS_IB_CM_CALLBACK_RETURN
-kibnal_active_conn_callback (tTS_IB_CM_EVENT event,
-                              tTS_IB_CM_COMM_ID cid,
-                              void *param,
-                              void *arg)
-{
-        kib_conn_t *conn = arg;
-
-        switch (event) {
-        case TS_IB_CM_REP_RECEIVED: {
-                struct ib_cm_rep_received_param *rep = param;
-                kib_wire_connreq_t             *wcr = rep->remote_private_data;
-
-                if (rep->remote_private_data_len < sizeof (*wcr)) {
-                        CERROR ("Short reply from "LPX64": %d\n",
-                                conn->ibc_peer->ibp_nid,
-                                rep->remote_private_data_len);
-                        kibnal_connreq_done (conn, 1, -EPROTO);
-                        break;
-                }
-
-                if (wcr->wcr_magic != cpu_to_le32(IBNAL_MSG_MAGIC)) {
-                        CERROR ("Can't connect "LPX64": bad magic %08x\n",
-                                conn->ibc_peer->ibp_nid, le32_to_cpu(wcr->wcr_magic));
-                        kibnal_connreq_done (conn, 1, -EPROTO);
-                        break;
-                }
-                
-                if (wcr->wcr_version != cpu_to_le16(IBNAL_MSG_VERSION)) {
-                        CERROR ("Can't connect "LPX64": bad version %d\n",
-                                conn->ibc_peer->ibp_nid, le16_to_cpu(wcr->wcr_magic));
-                        kibnal_connreq_done (conn, 1, -EPROTO);
-                        break;
-                }
-                                
-                if (wcr->wcr_queue_depth != cpu_to_le16(IBNAL_MSG_QUEUE_SIZE)) {
-                        CERROR ("Can't connect "LPX64": bad queue depth %d\n",
-                                conn->ibc_peer->ibp_nid, le16_to_cpu(wcr->wcr_queue_depth));
-                        kibnal_connreq_done (conn, 1, -EPROTO);
-                        break;
-                }
-                                
-                if (le64_to_cpu(wcr->wcr_nid) != conn->ibc_peer->ibp_nid) {
-                        CERROR ("Unexpected NID "LPX64" from "LPX64"\n",
-                                le64_to_cpu(wcr->wcr_nid), conn->ibc_peer->ibp_nid);
-                        kibnal_connreq_done (conn, 1, -EPROTO);
-                        break;
-                }
-
-                CDEBUG(D_NET, "Connection %p -> "LPX64" REP_RECEIVED.\n",
-                       conn, conn->ibc_peer->ibp_nid);
-
-                conn->ibc_incarnation = le64_to_cpu(wcr->wcr_incarnation);
-                conn->ibc_credits = IBNAL_MSG_QUEUE_SIZE;
-                break;
-        }
-
-        case TS_IB_CM_ESTABLISHED:
-                CDEBUG(D_WARNING, "Connection %p -> "LPX64" Established\n",
-                       conn, conn->ibc_peer->ibp_nid);
-
-                kibnal_connreq_done (conn, 1, 0);
-                break;
-
-        case TS_IB_CM_IDLE:
-                CERROR("Connection %p -> "LPX64" IDLE\n",
-                       conn, conn->ibc_peer->ibp_nid);
-                /* Back out state change: I'm disengaged from CM */
-                conn->ibc_state = IBNAL_CONN_INIT_QP;
-                
-                kibnal_connreq_done (conn, 1, -ECONNABORTED);
-                break;
-
-        default:
-                CERROR("Connection %p -> "LPX64" ERROR %d\n",
-                       conn, conn->ibc_peer->ibp_nid, event);
-                kibnal_connreq_done (conn, 1, -ECONNABORTED);
-                break;
-        }
-
-        /* NB if the connreq is done, we switch to kibnal_conn_callback */
-        return TS_IB_CM_CALLBACK_PROCEED;
-}
-
-int
-kibnal_pathreq_callback (tTS_IB_CLIENT_QUERY_TID tid, int status,
-                          struct ib_path_record *resp, int remaining,
-                          void *arg)
-{
-        kib_conn_t *conn = arg;
-        
-        if (status != 0) {
-                CERROR ("status %d\n", status);
-                kibnal_connreq_done (conn, 1, status);
-                goto out;
-        }
-
-        conn->ibc_connreq->cr_path = *resp;
-
-        conn->ibc_connreq->cr_wcr = (kib_wire_connreq_t) {
-                .wcr_magic       = cpu_to_le32(IBNAL_MSG_MAGIC),
-                .wcr_version     = cpu_to_le16(IBNAL_MSG_VERSION),
-                .wcr_queue_depth = cpu_to_le16(IBNAL_MSG_QUEUE_SIZE),
-                .wcr_nid         = cpu_to_le64(kibnal_data.kib_nid),
-                .wcr_incarnation = cpu_to_le64(kibnal_data.kib_incarnation),
-        };
-
-        conn->ibc_connreq->cr_connparam = (struct ib_cm_active_param) {
-                .qp                   = conn->ibc_qp,
-                .req_private_data     = &conn->ibc_connreq->cr_wcr,
-                .req_private_data_len = sizeof(conn->ibc_connreq->cr_wcr),
-                .responder_resources  = IBNAL_RESPONDER_RESOURCES,
-                .initiator_depth      = IBNAL_RESPONDER_RESOURCES,
-                .retry_count          = IBNAL_RETRY,
-                .rnr_retry_count      = IBNAL_RNR_RETRY,
-                .cm_response_timeout  = kibnal_tunables.kib_io_timeout,
-                .max_cm_retries       = IBNAL_CM_RETRY,
-                .flow_control         = IBNAL_FLOW_CONTROL,
-        };
-
-        /* XXX set timeout just like SDP!!!*/
-        conn->ibc_connreq->cr_path.packet_life = 13;
-        
-        /* Flag I'm getting involved with the CM... */
-        conn->ibc_state = IBNAL_CONN_CONNECTING;
-
-        CDEBUG(D_NET, "Connecting to, service id "LPX64", on "LPX64"\n",
-               conn->ibc_connreq->cr_service.service_id, 
-               *kibnal_service_nid_field(&conn->ibc_connreq->cr_service));
-
-        /* kibnal_connect_callback gets my conn ref */
-        status = ib_cm_connect (&conn->ibc_connreq->cr_connparam, 
-                                &conn->ibc_connreq->cr_path, NULL,
-                                conn->ibc_connreq->cr_service.service_id, 0,
-                                kibnal_active_conn_callback, conn,
-                                &conn->ibc_comm_id);
-        if (status != 0) {
-                CERROR ("Connect: %d\n", status);
-                /* Back out state change: I've not got a CM comm_id yet... */
-                conn->ibc_state = IBNAL_CONN_INIT_QP;
-                kibnal_connreq_done (conn, 1, status);
-        }
-        
- out:
-        /* return non-zero to prevent further callbacks */
-        return 1;
-}
-
-void
-kibnal_service_get_callback (tTS_IB_CLIENT_QUERY_TID tid, int status,
-                             struct ib_common_attrib_service *resp, void *arg)
-{
-        kib_conn_t *conn = arg;
-        
-        if (status != 0) {
-                CERROR ("status %d\n", status);
-                kibnal_connreq_done (conn, 1, status);
-                return;
-        }
-
-        CDEBUG(D_NET, "Got status %d, service id "LPX64", on "LPX64"\n",
-               status, resp->service_id, 
-               *kibnal_service_nid_field(resp));
-
-        conn->ibc_connreq->cr_service = *resp;
-
-        status = ib_cached_gid_get(kibnal_data.kib_device,
-                                   kibnal_data.kib_port, 0,
-                                   conn->ibc_connreq->cr_gid);
-        LASSERT (status == 0);
-
-        /* kibnal_pathreq_callback gets my conn ref */
-        status = tsIbPathRecordRequest (kibnal_data.kib_device,
-                                        kibnal_data.kib_port,
-                                        conn->ibc_connreq->cr_gid,
-                                        conn->ibc_connreq->cr_service.service_gid,
-                                        conn->ibc_connreq->cr_service.service_pkey,
-                                        0,
-                                        kibnal_tunables.kib_io_timeout * HZ,
-                                        0,
-                                        kibnal_pathreq_callback, conn, 
-                                        &conn->ibc_connreq->cr_tid);
-
-        if (status == 0)
-                return;
-
-        CERROR ("Path record request: %d\n", status);
-        kibnal_connreq_done (conn, 1, status);
-}
-
-void
-kibnal_connect_peer (kib_peer_t *peer)
-{
-        kib_conn_t  *conn = kibnal_create_conn();
-        int          rc;
-
-        LASSERT (peer->ibp_connecting != 0);
-
-        if (conn == NULL) {
-                CERROR ("Can't allocate conn\n");
-                kibnal_peer_connect_failed (peer, 1, -ENOMEM);
-                return;
-        }
-
-        conn->ibc_peer = peer;
-        atomic_inc (&peer->ibp_refcount);
-
-        PORTAL_ALLOC (conn->ibc_connreq, sizeof (*conn->ibc_connreq));
-        if (conn->ibc_connreq == NULL) {
-                CERROR ("Can't allocate connreq\n");
-                kibnal_connreq_done (conn, 1, -ENOMEM);
-                return;
-        }
-
-        memset(conn->ibc_connreq, 0, sizeof (*conn->ibc_connreq));
-
-        kibnal_set_service_keys(&conn->ibc_connreq->cr_service, peer->ibp_nid);
-
-        /* kibnal_service_get_callback gets my conn ref */
-        rc = ib_service_get (kibnal_data.kib_device, 
-                             kibnal_data.kib_port,
-                             &conn->ibc_connreq->cr_service,
-                             KIBNAL_SERVICE_KEY_MASK,
-                             kibnal_tunables.kib_io_timeout * HZ,
-                             kibnal_service_get_callback, conn, 
-                             &conn->ibc_connreq->cr_tid);
-        
-        if (rc == 0)
-                return;
-
-        CERROR ("ib_service_get: %d\n", rc);
-        kibnal_connreq_done (conn, 1, rc);
-}
-
-int
-kibnal_conn_timed_out (kib_conn_t *conn)
-{
-        kib_tx_t          *tx;
-        struct list_head  *ttmp;
-        unsigned long      flags;
-
-        spin_lock_irqsave (&conn->ibc_lock, flags);
-
-        list_for_each (ttmp, &conn->ibc_tx_queue) {
-                tx = list_entry (ttmp, kib_tx_t, tx_list);
-
-                LASSERT (!tx->tx_passive_rdma_wait);
-                LASSERT (tx->tx_sending == 0);
-
-                if (time_after_eq (jiffies, tx->tx_deadline)) {
-                        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-                        return 1;
-                }
-        }
-
-        list_for_each (ttmp, &conn->ibc_active_txs) {
-                tx = list_entry (ttmp, kib_tx_t, tx_list);
-
-                LASSERT (tx->tx_passive_rdma ||
-                         !tx->tx_passive_rdma_wait);
-
-                LASSERT (tx->tx_passive_rdma_wait ||
-                         tx->tx_sending != 0);
-
-                if (time_after_eq (jiffies, tx->tx_deadline)) {
-                        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-                        return 1;
-                }
-        }
-
-        spin_unlock_irqrestore (&conn->ibc_lock, flags);
-
-        return 0;
-}
-
-void
-kibnal_check_conns (int idx)
-{
-        struct list_head  *peers = &kibnal_data.kib_peers[idx];
-        struct list_head  *ptmp;
-        kib_peer_t        *peer;
-        kib_conn_t        *conn;
-        struct list_head  *ctmp;
-
- again:
-        /* NB. We expect to have a look at all the peers and not find any
-         * rdmas to time out, so we just use a shared lock while we
-         * take a look... */
-        read_lock (&kibnal_data.kib_global_lock);
-
-        list_for_each (ptmp, peers) {
-                peer = list_entry (ptmp, kib_peer_t, ibp_list);
-
-                list_for_each (ctmp, &peer->ibp_conns) {
-                        conn = list_entry (ctmp, kib_conn_t, ibc_list);
-
-                        LASSERT (conn->ibc_state == IBNAL_CONN_ESTABLISHED);
-
-
-                        /* In case we have enough credits to return via a
-                         * NOOP, but there were no non-blocking tx descs
-                         * free to do it last time... */
-                        kibnal_check_sends(conn);
-
-                        if (!kibnal_conn_timed_out(conn))
-                                continue;
-                        
-                        CDEBUG(D_NET, "++conn[%p] state %d -> "LPX64" (%d)\n",
-                               conn, conn->ibc_state, peer->ibp_nid,
-                               atomic_read (&conn->ibc_refcount));
-
-                        atomic_inc (&conn->ibc_refcount);
-                        read_unlock (&kibnal_data.kib_global_lock);
-
-                        CERROR("Timed out RDMA with "LPX64"\n",
-                               peer->ibp_nid);
-
-                        kibnal_close_conn (conn, -ETIMEDOUT);
-                        kibnal_put_conn (conn);
-
-                        /* start again now I've dropped the lock */
-                        goto again;
-                }
-        }
-
-        read_unlock (&kibnal_data.kib_global_lock);
-}
-
-void
-kibnal_terminate_conn (kib_conn_t *conn)
-{
-        int           rc;
-
-        CDEBUG(D_NET, "conn %p\n", conn);
-        LASSERT (conn->ibc_state == IBNAL_CONN_DEATHROW);
-        conn->ibc_state = IBNAL_CONN_ZOMBIE;
-
-        rc = ib_cm_disconnect (conn->ibc_comm_id);
-        if (rc != 0)
-                CERROR ("Error %d disconnecting conn %p -> "LPX64"\n",
-                        rc, conn, conn->ibc_peer->ibp_nid);
-}
-
-int
-kibnal_connd (void *arg)
-{
-        wait_queue_t       wait;
-        unsigned long      flags;
-        kib_conn_t        *conn;
-        kib_peer_t        *peer;
-        int                timeout;
-        int                i;
-        int                peer_index = 0;
-        unsigned long      deadline = jiffies;
-        
-        kportal_daemonize ("kibnal_connd");
-        kportal_blockallsigs ();
-
-        init_waitqueue_entry (&wait, current);
-
-        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-
-        for (;;) {
-                if (!list_empty (&kibnal_data.kib_connd_conns)) {
-                        conn = list_entry (kibnal_data.kib_connd_conns.next,
-                                           kib_conn_t, ibc_list);
-                        list_del (&conn->ibc_list);
-                        
-                        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-                        switch (conn->ibc_state) {
-                        case IBNAL_CONN_DEATHROW:
-                                LASSERT (conn->ibc_comm_id != TS_IB_CM_COMM_ID_INVALID);
-                                /* Disconnect: conn becomes a zombie in the
-                                 * callback and last ref reschedules it
-                                 * here... */
-                                kibnal_terminate_conn(conn);
-                                kibnal_put_conn (conn);
-                                break;
-                                
-                        case IBNAL_CONN_ZOMBIE:
-                                kibnal_destroy_conn (conn);
-                                break;
-                                
-                        default:
-                                CERROR ("Bad conn %p state: %d\n",
-                                        conn, conn->ibc_state);
-                                LBUG();
-                        }
-
-                        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-                        continue;
-                }
-
-                if (!list_empty (&kibnal_data.kib_connd_peers)) {
-                        peer = list_entry (kibnal_data.kib_connd_peers.next,
-                                           kib_peer_t, ibp_connd_list);
-                        
-                        list_del_init (&peer->ibp_connd_list);
-                        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-                        kibnal_connect_peer (peer);
-                        kibnal_put_peer (peer);
-
-                        spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-                }
-
-                /* shut down and nobody left to reap... */
-                if (kibnal_data.kib_shutdown &&
-                    atomic_read(&kibnal_data.kib_nconns) == 0)
-                        break;
-
-                spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-                /* careful with the jiffy wrap... */
-                while ((timeout = (int)(deadline - jiffies)) <= 0) {
-                        const int n = 4;
-                        const int p = 1;
-                        int       chunk = kibnal_data.kib_peer_hash_size;
-                        
-                        /* Time to check for RDMA timeouts on a few more
-                         * peers: I do checks every 'p' seconds on a
-                         * proportion of the peer table and I need to check
-                         * every connection 'n' times within a timeout
-                         * interval, to ensure I detect a timeout on any
-                         * connection within (n+1)/n times the timeout
-                         * interval. */
-
-                        if (kibnal_tunables.kib_io_timeout > n * p)
-                                chunk = (chunk * n * p) / 
-                                        kibnal_tunables.kib_io_timeout;
-                        if (chunk == 0)
-                                chunk = 1;
-
-                        for (i = 0; i < chunk; i++) {
-                                kibnal_check_conns (peer_index);
-                                peer_index = (peer_index + 1) % 
-                                             kibnal_data.kib_peer_hash_size;
-                        }
-
-                        deadline += p * HZ;
-                }
-
-                kibnal_data.kib_connd_waketime = jiffies + timeout;
-
-                set_current_state (TASK_INTERRUPTIBLE);
-                add_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
-                if (!kibnal_data.kib_shutdown &&
-                    list_empty (&kibnal_data.kib_connd_conns) &&
-                    list_empty (&kibnal_data.kib_connd_peers))
-                        schedule_timeout (timeout);
-
-                set_current_state (TASK_RUNNING);
-                remove_wait_queue (&kibnal_data.kib_connd_waitq, &wait);
-
-                spin_lock_irqsave (&kibnal_data.kib_connd_lock, flags);
-        }
-
-        spin_unlock_irqrestore (&kibnal_data.kib_connd_lock, flags);
-
-        kibnal_thread_fini ();
-        return (0);
-}
-
-int
-kibnal_scheduler(void *arg)
-{
-        long            id = (long)arg;
-        char            name[16];
-        kib_rx_t       *rx;
-        kib_tx_t       *tx;
-        unsigned long   flags;
-        int             rc;
-        int             counter = 0;
-        int             did_something;
-
-        snprintf(name, sizeof(name), "kibnal_sd_%02ld", id);
-        kportal_daemonize(name);
-        kportal_blockallsigs();
-
-        spin_lock_irqsave(&kibnal_data.kib_sched_lock, flags);
-
-        for (;;) {
-                did_something = 0;
-
-                while (!list_empty(&kibnal_data.kib_sched_txq)) {
-                        tx = list_entry(kibnal_data.kib_sched_txq.next,
-                                        kib_tx_t, tx_list);
-                        list_del(&tx->tx_list);
-                        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
-                                               flags);
-                        kibnal_tx_done(tx);
-
-                        spin_lock_irqsave(&kibnal_data.kib_sched_lock,
-                                          flags);
-                }
-
-                if (!list_empty(&kibnal_data.kib_sched_rxq)) {
-                        rx = list_entry(kibnal_data.kib_sched_rxq.next,
-                                        kib_rx_t, rx_list);
-                        list_del(&rx->rx_list);
-                        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
-                                               flags);
-
-                        kibnal_rx(rx);
-
-                        did_something = 1;
-                        spin_lock_irqsave(&kibnal_data.kib_sched_lock,
-                                          flags);
-                }
-
-                /* shut down and no receives to complete... */
-                if (kibnal_data.kib_shutdown &&
-                    atomic_read(&kibnal_data.kib_nconns) == 0)
-                        break;
-
-                /* nothing to do or hogging CPU */
-                if (!did_something || counter++ == IBNAL_RESCHED) {
-                        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock,
-                                               flags);
-                        counter = 0;
-
-                        if (!did_something) {
-                                rc = wait_event_interruptible(
-                                        kibnal_data.kib_sched_waitq,
-                                        !list_empty(&kibnal_data.kib_sched_txq) || 
-                                        !list_empty(&kibnal_data.kib_sched_rxq) || 
-                                        (kibnal_data.kib_shutdown &&
-                                         atomic_read (&kibnal_data.kib_nconns) == 0));
-                        } else {
-                                our_cond_resched();
-                        }
-
-                        spin_lock_irqsave(&kibnal_data.kib_sched_lock,
-                                          flags);
-                }
-        }
-
-        spin_unlock_irqrestore(&kibnal_data.kib_sched_lock, flags);
-
-        kibnal_thread_fini();
-        return (0);
-}
-
-
-lib_nal_t kibnal_lib = {
-        libnal_data:        &kibnal_data,      /* NAL private data */
-        libnal_send:         kibnal_send,
-        libnal_send_pages:   kibnal_send_pages,
-        libnal_recv:         kibnal_recv,
-        libnal_recv_pages:   kibnal_recv_pages,
-        libnal_dist:         kibnal_dist
-};
diff --git a/lustre/portals/knals/qswnal/.cvsignore b/lustre/portals/knals/qswnal/.cvsignore
deleted file mode 100644 (file)
index 48b17e9..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.*.cmd
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/qswnal/Makefile.in b/lustre/portals/knals/qswnal/Makefile.in
deleted file mode 100644 (file)
index d27240c..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kqswnal
-kqswnal-objs := qswnal.o qswnal_cb.o
-
-EXTRA_POST_CFLAGS := @QSWCPPFLAGS@ -I/usr/include
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/qswnal/autoMakefile.am b/lustre/portals/knals/qswnal/autoMakefile.am
deleted file mode 100644 (file)
index b5b2e07..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-if BUILD_QSWNAL
-modulenet_DATA = kqswnal$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kqswnal-objs:%.o=%.c) qswnal.h
diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c
deleted file mode 100644 (file)
index 5aff4e9..0000000
+++ /dev/null
@@ -1,800 +0,0 @@
-/*
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * W. Marcus Miller - Based on ksocknal
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "qswnal.h"
-
-nal_t                  kqswnal_api;
-kqswnal_data_t         kqswnal_data;
-ptl_handle_ni_t         kqswnal_ni;
-kqswnal_tunables_t      kqswnal_tunables;
-
-kpr_nal_interface_t kqswnal_router_interface = {
-       kprni_nalid:    QSWNAL,
-       kprni_arg:      NULL,
-       kprni_fwd:      kqswnal_fwd_packet,
-       kprni_notify:   NULL,                   /* we're connectionless */
-};
-
-#if CONFIG_SYSCTL
-#define QSWNAL_SYSCTL  201
-
-#define QSWNAL_SYSCTL_OPTIMIZED_GETS     1
-#define QSWNAL_SYSCTL_OPTIMIZED_PUTS     2
-
-static ctl_table kqswnal_ctl_table[] = {
-       {QSWNAL_SYSCTL_OPTIMIZED_PUTS, "optimized_puts",
-        &kqswnal_tunables.kqn_optimized_puts, sizeof (int),
-        0644, NULL, &proc_dointvec},
-       {QSWNAL_SYSCTL_OPTIMIZED_GETS, "optimized_gets",
-        &kqswnal_tunables.kqn_optimized_gets, sizeof (int),
-        0644, NULL, &proc_dointvec},
-       {0}
-};
-
-static ctl_table kqswnal_top_ctl_table[] = {
-       {QSWNAL_SYSCTL, "qswnal", NULL, 0, 0555, kqswnal_ctl_table},
-       {0}
-};
-#endif
-
-int
-kqswnal_get_tx_desc (struct portals_cfg *pcfg)
-{
-       unsigned long      flags;
-       struct list_head  *tmp;
-       kqswnal_tx_t      *ktx;
-       ptl_hdr_t         *hdr;
-       int                index = pcfg->pcfg_count;
-       int                rc = -ENOENT;
-
-       spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
-       list_for_each (tmp, &kqswnal_data.kqn_activetxds) {
-               if (index-- != 0)
-                       continue;
-
-               ktx = list_entry (tmp, kqswnal_tx_t, ktx_list);
-               hdr = (ptl_hdr_t *)ktx->ktx_buffer;
-
-               pcfg->pcfg_pbuf1 = (char *)ktx;
-               pcfg->pcfg_count = le32_to_cpu(hdr->type);
-               pcfg->pcfg_size  = le32_to_cpu(hdr->payload_length);
-               pcfg->pcfg_nid   = le64_to_cpu(hdr->dest_nid);
-               pcfg->pcfg_nid2  = ktx->ktx_nid;
-               pcfg->pcfg_misc  = ktx->ktx_launcher;
-               pcfg->pcfg_flags = (list_empty (&ktx->ktx_delayed_list) ? 0 : 1) |
-                                 (!ktx->ktx_isnblk                    ? 0 : 2) |
-                                 (ktx->ktx_state << 2);
-               rc = 0;
-               break;
-       }
-       
-       spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-       return (rc);
-}
-
-int
-kqswnal_cmd (struct portals_cfg *pcfg, void *private)
-{
-       LASSERT (pcfg != NULL);
-       
-       switch (pcfg->pcfg_command) {
-       case NAL_CMD_GET_TXDESC:
-               return (kqswnal_get_tx_desc (pcfg));
-
-       case NAL_CMD_REGISTER_MYNID:
-               CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n",
-                       pcfg->pcfg_nid - kqswnal_data.kqn_elanid,
-                       kqswnal_data.kqn_nid_offset);
-               kqswnal_data.kqn_nid_offset =
-                       pcfg->pcfg_nid - kqswnal_data.kqn_elanid;
-               kqswnal_lib.libnal_ni.ni_pid.nid = pcfg->pcfg_nid;
-               return (0);
-               
-       default:
-               return (-EINVAL);
-       }
-}
-
-static void
-kqswnal_shutdown(nal_t *nal)
-{
-       unsigned long flags;
-       kqswnal_tx_t *ktx;
-       kqswnal_rx_t *krx;
-       int           do_lib_fini = 0;
-
-       /* NB The first ref was this module! */
-       if (nal->nal_refct != 0) {
-               PORTAL_MODULE_UNUSE;
-               return;
-       }
-
-       CDEBUG (D_NET, "shutdown\n");
-       LASSERT (nal == &kqswnal_api);
-
-       switch (kqswnal_data.kqn_init)
-       {
-       default:
-               LASSERT (0);
-
-       case KQN_INIT_ALL:
-                libcfs_nal_cmd_unregister(QSWNAL);
-               /* fall through */
-
-       case KQN_INIT_LIB:
-               do_lib_fini = 1;
-               /* fall through */
-
-       case KQN_INIT_DATA:
-               break;
-
-       case KQN_INIT_NOTHING:
-               return;
-       }
-
-       /**********************************************************************/
-       /* Tell router we're shutting down.  Any router calls my threads
-        * make will now fail immediately and the router will stop calling
-        * into me. */
-       kpr_shutdown (&kqswnal_data.kqn_router);
-       
-       /**********************************************************************/
-       /* Signal the start of shutdown... */
-       spin_lock_irqsave(&kqswnal_data.kqn_idletxd_lock, flags);
-       kqswnal_data.kqn_shuttingdown = 1;
-       spin_unlock_irqrestore(&kqswnal_data.kqn_idletxd_lock, flags);
-
-       wake_up_all(&kqswnal_data.kqn_idletxd_waitq);
-
-       /**********************************************************************/
-       /* wait for sends that have allocated a tx desc to launch or give up */
-       while (atomic_read (&kqswnal_data.kqn_pending_txs) != 0) {
-               CDEBUG(D_NET, "waiting for %d pending sends\n",
-                      atomic_read (&kqswnal_data.kqn_pending_txs));
-               set_current_state (TASK_UNINTERRUPTIBLE);
-               schedule_timeout (HZ);
-       }
-
-       /**********************************************************************/
-       /* close elan comms */
-#if MULTIRAIL_EKC
-       /* Shut down receivers first; rx callbacks might try sending... */
-       if (kqswnal_data.kqn_eprx_small != NULL)
-               ep_free_rcvr (kqswnal_data.kqn_eprx_small);
-
-       if (kqswnal_data.kqn_eprx_large != NULL)
-               ep_free_rcvr (kqswnal_data.kqn_eprx_large);
-
-       /* NB ep_free_rcvr() returns only after we've freed off all receive
-        * buffers (see shutdown handling in kqswnal_requeue_rx()).  This
-        * means we must have completed any messages we passed to
-        * lib_parse() or kpr_fwd_start(). */
-
-       if (kqswnal_data.kqn_eptx != NULL)
-               ep_free_xmtr (kqswnal_data.kqn_eptx);
-
-       /* NB ep_free_xmtr() returns only after all outstanding transmits
-        * have called their callback... */
-       LASSERT(list_empty(&kqswnal_data.kqn_activetxds));
-#else
-       /* "Old" EKC just pretends to shutdown cleanly but actually
-        * provides no guarantees */
-       if (kqswnal_data.kqn_eprx_small != NULL)
-               ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small);
-
-       if (kqswnal_data.kqn_eprx_large != NULL)
-               ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large);
-
-       /* wait for transmits to complete */
-       while (!list_empty(&kqswnal_data.kqn_activetxds)) {
-               CWARN("waiting for active transmits to complete\n");
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(HZ);
-       }
-
-       if (kqswnal_data.kqn_eptx != NULL)
-               ep_free_large_xmtr (kqswnal_data.kqn_eptx);
-#endif
-       /**********************************************************************/
-       /* flag threads to terminate, wake them and wait for them to die */
-       kqswnal_data.kqn_shuttingdown = 2;
-       wake_up_all (&kqswnal_data.kqn_sched_waitq);
-
-       while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) {
-               CDEBUG(D_NET, "waiting for %d threads to terminate\n",
-                      atomic_read (&kqswnal_data.kqn_nthreads));
-               set_current_state (TASK_UNINTERRUPTIBLE);
-               schedule_timeout (HZ);
-       }
-
-       /**********************************************************************/
-       /* No more threads.  No more portals, router or comms callbacks!
-        * I control the horizontals and the verticals...
-        */
-
-#if MULTIRAIL_EKC
-       LASSERT (list_empty (&kqswnal_data.kqn_readyrxds));
-       LASSERT (list_empty (&kqswnal_data.kqn_delayedtxds));
-       LASSERT (list_empty (&kqswnal_data.kqn_delayedfwds));
-#endif
-
-       /**********************************************************************/
-       /* Complete any blocked forwarding packets, with error
-        */
-
-       while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq))
-       {
-               kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
-                                                 kpr_fwd_desc_t, kprfd_list);
-               list_del (&fwd->kprfd_list);
-               kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -ESHUTDOWN);
-       }
-
-       /**********************************************************************/
-       /* finalise router and portals lib */
-
-       kpr_deregister (&kqswnal_data.kqn_router);
-
-       if (do_lib_fini)
-               lib_fini (&kqswnal_lib);
-
-       /**********************************************************************/
-       /* Unmap message buffers and free all descriptors and buffers
-        */
-
-#if MULTIRAIL_EKC
-       /* FTTB, we need to unmap any remaining mapped memory.  When
-        * ep_dvma_release() get fixed (and releases any mappings in the
-        * region), we can delete all the code from here -------->  */
-
-       for (ktx = kqswnal_data.kqn_txds; ktx != NULL; ktx = ktx->ktx_alloclist) {
-               /* If ktx has a buffer, it got mapped; unmap now.  NB only
-                * the pre-mapped stuff is still mapped since all tx descs
-                * must be idle */
-
-               if (ktx->ktx_buffer != NULL)
-                       ep_dvma_unload(kqswnal_data.kqn_ep,
-                                      kqswnal_data.kqn_ep_tx_nmh,
-                                      &ktx->ktx_ebuffer);
-       }
-
-       for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
-               /* If krx_kiov[0].kiov_page got allocated, it got mapped.  
-                * NB subsequent pages get merged */
-
-               if (krx->krx_kiov[0].kiov_page != NULL)
-                       ep_dvma_unload(kqswnal_data.kqn_ep,
-                                      kqswnal_data.kqn_ep_rx_nmh,
-                                      &krx->krx_elanbuffer);
-       }
-       /* <----------- to here */
-
-       if (kqswnal_data.kqn_ep_rx_nmh != NULL)
-               ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_rx_nmh);
-
-       if (kqswnal_data.kqn_ep_tx_nmh != NULL)
-               ep_dvma_release(kqswnal_data.kqn_ep, kqswnal_data.kqn_ep_tx_nmh);
-#else
-       if (kqswnal_data.kqn_eprxdmahandle != NULL)
-       {
-               elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
-                                 kqswnal_data.kqn_eprxdmahandle, 0,
-                                 KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
-                                 KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE);
-
-               elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
-                                 kqswnal_data.kqn_eprxdmahandle);
-       }
-
-       if (kqswnal_data.kqn_eptxdmahandle != NULL)
-       {
-               elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
-                                 kqswnal_data.kqn_eptxdmahandle, 0,
-                                 KQSW_NTXMSGPAGES * (KQSW_NTXMSGS +
-                                                     KQSW_NNBLK_TXMSGS));
-
-               elan3_dma_release(kqswnal_data.kqn_ep->DmaState,
-                                 kqswnal_data.kqn_eptxdmahandle);
-       }
-#endif
-
-       while (kqswnal_data.kqn_txds != NULL) {
-               ktx = kqswnal_data.kqn_txds;
-
-               if (ktx->ktx_buffer != NULL)
-                       PORTAL_FREE(ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
-
-               kqswnal_data.kqn_txds = ktx->ktx_alloclist;
-               PORTAL_FREE(ktx, sizeof(*ktx));
-       }
-
-       while (kqswnal_data.kqn_rxds != NULL) {
-               int           i;
-
-               krx = kqswnal_data.kqn_rxds;
-               for (i = 0; i < krx->krx_npages; i++)
-                       if (krx->krx_kiov[i].kiov_page != NULL)
-                               __free_page (krx->krx_kiov[i].kiov_page);
-
-               kqswnal_data.kqn_rxds = krx->krx_alloclist;
-               PORTAL_FREE(krx, sizeof (*krx));
-       }
-
-       /* resets flags, pointers to NULL etc */
-       memset(&kqswnal_data, 0, sizeof (kqswnal_data));
-
-       CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory));
-
-       printk (KERN_INFO "Lustre: Routing QSW NAL unloaded (final mem %d)\n",
-                atomic_read(&portal_kmemory));
-}
-
-static int
-kqswnal_startup (nal_t *nal, ptl_pid_t requested_pid,
-                ptl_ni_limits_t *requested_limits, 
-                ptl_ni_limits_t *actual_limits)
-{
-#if MULTIRAIL_EKC
-       EP_RAILMASK       all_rails = EP_RAILMASK_ALL;
-#else
-       ELAN3_DMA_REQUEST dmareq;
-#endif
-       int               rc;
-       int               i;
-       kqswnal_rx_t     *krx;
-       kqswnal_tx_t     *ktx;
-       int               elan_page_idx;
-       ptl_process_id_t  my_process_id;
-       int               pkmem = atomic_read(&portal_kmemory);
-
-       LASSERT (nal == &kqswnal_api);
-
-       if (nal->nal_refct != 0) {
-               if (actual_limits != NULL)
-                       *actual_limits = kqswnal_lib.libnal_ni.ni_actual_limits;
-               /* This module got the first ref */
-               PORTAL_MODULE_USE;
-               return (PTL_OK);
-       }
-
-       LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING);
-
-       CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory));
-
-       /* ensure all pointers NULL etc */
-       memset (&kqswnal_data, 0, sizeof (kqswnal_data));
-
-       INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds);
-       INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds);
-       INIT_LIST_HEAD (&kqswnal_data.kqn_activetxds);
-       spin_lock_init (&kqswnal_data.kqn_idletxd_lock);
-       init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq);
-       INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq);
-
-       INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds);
-       INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds);
-       INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds);
-
-       spin_lock_init (&kqswnal_data.kqn_sched_lock);
-       init_waitqueue_head (&kqswnal_data.kqn_sched_waitq);
-
-       /* Leave kqn_rpc_success zeroed */
-#if MULTIRAIL_EKC
-       kqswnal_data.kqn_rpc_failed.Data[0] = -ECONNREFUSED;
-#else
-       kqswnal_data.kqn_rpc_failed.Status = -ECONNREFUSED;
-#endif
-
-       /* pointers/lists/locks initialised */
-       kqswnal_data.kqn_init = KQN_INIT_DATA;
-       
-#if MULTIRAIL_EKC
-       kqswnal_data.kqn_ep = ep_system();
-       if (kqswnal_data.kqn_ep == NULL) {
-               CERROR("Can't initialise EKC\n");
-               kqswnal_shutdown(nal);
-               return (PTL_IFACE_INVALID);
-       }
-
-       if (ep_waitfor_nodeid(kqswnal_data.kqn_ep) == ELAN_INVALID_NODE) {
-               CERROR("Can't get elan ID\n");
-               kqswnal_shutdown(nal);
-               return (PTL_IFACE_INVALID);
-       }
-#else
-       /**********************************************************************/
-       /* Find the first Elan device */
-
-       kqswnal_data.kqn_ep = ep_device (0);
-       if (kqswnal_data.kqn_ep == NULL)
-       {
-               CERROR ("Can't get elan device 0\n");
-               kqswnal_shutdown(nal);
-               return (PTL_IFACE_INVALID);
-       }
-#endif
-
-       kqswnal_data.kqn_nid_offset = 0;
-       kqswnal_data.kqn_nnodes     = ep_numnodes (kqswnal_data.kqn_ep);
-       kqswnal_data.kqn_elanid     = ep_nodeid (kqswnal_data.kqn_ep);
-       
-       /**********************************************************************/
-       /* Get the transmitter */
-
-       kqswnal_data.kqn_eptx = ep_alloc_xmtr (kqswnal_data.kqn_ep);
-       if (kqswnal_data.kqn_eptx == NULL)
-       {
-               CERROR ("Can't allocate transmitter\n");
-               kqswnal_shutdown (nal);
-               return (PTL_NO_SPACE);
-       }
-
-       /**********************************************************************/
-       /* Get the receivers */
-
-       kqswnal_data.kqn_eprx_small = ep_alloc_rcvr (kqswnal_data.kqn_ep,
-                                                    EP_MSG_SVC_PORTALS_SMALL,
-                                                    KQSW_EP_ENVELOPES_SMALL);
-       if (kqswnal_data.kqn_eprx_small == NULL)
-       {
-               CERROR ("Can't install small msg receiver\n");
-               kqswnal_shutdown (nal);
-               return (PTL_NO_SPACE);
-       }
-
-       kqswnal_data.kqn_eprx_large = ep_alloc_rcvr (kqswnal_data.kqn_ep,
-                                                    EP_MSG_SVC_PORTALS_LARGE,
-                                                    KQSW_EP_ENVELOPES_LARGE);
-       if (kqswnal_data.kqn_eprx_large == NULL)
-       {
-               CERROR ("Can't install large msg receiver\n");
-               kqswnal_shutdown (nal);
-               return (PTL_NO_SPACE);
-       }
-
-       /**********************************************************************/
-       /* Reserve Elan address space for transmit descriptors NB we may
-        * either send the contents of associated buffers immediately, or
-        * map them for the peer to suck/blow... */
-#if MULTIRAIL_EKC
-       kqswnal_data.kqn_ep_tx_nmh = 
-               ep_dvma_reserve(kqswnal_data.kqn_ep,
-                               KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
-                               EP_PERM_WRITE);
-       if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
-               CERROR("Can't reserve tx dma space\n");
-               kqswnal_shutdown(nal);
-               return (PTL_NO_SPACE);
-       }
-#else
-        dmareq.Waitfn   = DDI_DMA_SLEEP;
-        dmareq.ElanAddr = (E3_Addr) 0;
-        dmareq.Attr     = PTE_LOAD_LITTLE_ENDIAN;
-        dmareq.Perm     = ELAN_PERM_REMOTEWRITE;
-
-       rc = elan3_dma_reserve(kqswnal_data.kqn_ep->DmaState,
-                             KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS),
-                             &dmareq, &kqswnal_data.kqn_eptxdmahandle);
-       if (rc != DDI_SUCCESS)
-       {
-               CERROR ("Can't reserve rx dma space\n");
-               kqswnal_shutdown (nal);
-               return (PTL_NO_SPACE);
-       }
-#endif
-       /**********************************************************************/
-       /* Reserve Elan address space for receive buffers */
-#if MULTIRAIL_EKC
-       kqswnal_data.kqn_ep_rx_nmh =
-               ep_dvma_reserve(kqswnal_data.kqn_ep,
-                               KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
-                               KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
-                               EP_PERM_WRITE);
-       if (kqswnal_data.kqn_ep_tx_nmh == NULL) {
-               CERROR("Can't reserve rx dma space\n");
-               kqswnal_shutdown(nal);
-               return (PTL_NO_SPACE);
-       }
-#else
-        dmareq.Waitfn   = DDI_DMA_SLEEP;
-        dmareq.ElanAddr = (E3_Addr) 0;
-        dmareq.Attr     = PTE_LOAD_LITTLE_ENDIAN;
-        dmareq.Perm     = ELAN_PERM_REMOTEWRITE;
-
-       rc = elan3_dma_reserve (kqswnal_data.kqn_ep->DmaState,
-                               KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL +
-                               KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE,
-                               &dmareq, &kqswnal_data.kqn_eprxdmahandle);
-       if (rc != DDI_SUCCESS)
-       {
-               CERROR ("Can't reserve rx dma space\n");
-               kqswnal_shutdown (nal);
-               return (PTL_NO_SPACE);
-       }
-#endif
-       /**********************************************************************/
-       /* Allocate/Initialise transmit descriptors */
-
-       kqswnal_data.kqn_txds = NULL;
-       for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++)
-       {
-               int           premapped_pages;
-               int           basepage = i * KQSW_NTXMSGPAGES;
-
-               PORTAL_ALLOC (ktx, sizeof(*ktx));
-               if (ktx == NULL) {
-                       kqswnal_shutdown (nal);
-                       return (PTL_NO_SPACE);
-               }
-
-               memset(ktx, 0, sizeof(*ktx));   /* NULL pointers; zero flags */
-               ktx->ktx_alloclist = kqswnal_data.kqn_txds;
-               kqswnal_data.kqn_txds = ktx;
-
-               PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE);
-               if (ktx->ktx_buffer == NULL)
-               {
-                       kqswnal_shutdown (nal);
-                       return (PTL_NO_SPACE);
-               }
-
-               /* Map pre-allocated buffer NOW, to save latency on transmit */
-               premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer,
-                                                       KQSW_TX_BUFFER_SIZE);
-#if MULTIRAIL_EKC
-               ep_dvma_load(kqswnal_data.kqn_ep, NULL, 
-                            ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, 
-                            kqswnal_data.kqn_ep_tx_nmh, basepage,
-                            &all_rails, &ktx->ktx_ebuffer);
-#else
-               elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
-                                      kqswnal_data.kqn_eptxdmahandle,
-                                      ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE,
-                                      basepage, &ktx->ktx_ebuffer);
-#endif
-               ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */
-               ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */
-
-               INIT_LIST_HEAD (&ktx->ktx_delayed_list);
-
-               ktx->ktx_state = KTX_IDLE;
-#if MULTIRAIL_EKC
-               ktx->ktx_rail = -1;             /* unset rail */
-#endif
-               ktx->ktx_isnblk = (i >= KQSW_NTXMSGS);
-               list_add_tail (&ktx->ktx_list, 
-                              ktx->ktx_isnblk ? &kqswnal_data.kqn_nblk_idletxds :
-                                                &kqswnal_data.kqn_idletxds);
-       }
-
-       /**********************************************************************/
-       /* Allocate/Initialise receive descriptors */
-       kqswnal_data.kqn_rxds = NULL;
-       elan_page_idx = 0;
-       for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++)
-       {
-#if MULTIRAIL_EKC
-               EP_NMD        elanbuffer;
-#else
-               E3_Addr       elanbuffer;
-#endif
-               int           j;
-
-               PORTAL_ALLOC(krx, sizeof(*krx));
-               if (krx == NULL) {
-                       kqswnal_shutdown(nal);
-                       return (PTL_NO_SPACE);
-               }
-
-               memset(krx, 0, sizeof(*krx)); /* clear flags, null pointers etc */
-               krx->krx_alloclist = kqswnal_data.kqn_rxds;
-               kqswnal_data.kqn_rxds = krx;
-
-               if (i < KQSW_NRXMSGS_SMALL)
-               {
-                       krx->krx_npages = KQSW_NRXMSGPAGES_SMALL;
-                       krx->krx_eprx   = kqswnal_data.kqn_eprx_small;
-               }
-               else
-               {
-                       krx->krx_npages = KQSW_NRXMSGPAGES_LARGE;
-                       krx->krx_eprx   = kqswnal_data.kqn_eprx_large;
-               }
-
-               LASSERT (krx->krx_npages > 0);
-               for (j = 0; j < krx->krx_npages; j++)
-               {
-                       struct page *page = alloc_page(GFP_KERNEL);
-                       
-                       if (page == NULL) {
-                               kqswnal_shutdown (nal);
-                               return (PTL_NO_SPACE);
-                       }
-
-                       krx->krx_kiov[j].kiov_page = page;
-                       LASSERT(page_address(page) != NULL);
-
-#if MULTIRAIL_EKC
-                       ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                                    page_address(page),
-                                    PAGE_SIZE, kqswnal_data.kqn_ep_rx_nmh,
-                                    elan_page_idx, &all_rails, &elanbuffer);
-                       
-                       if (j == 0) {
-                               krx->krx_elanbuffer = elanbuffer;
-                       } else {
-                               rc = ep_nmd_merge(&krx->krx_elanbuffer,
-                                                 &krx->krx_elanbuffer, 
-                                                 &elanbuffer);
-                               /* NB contiguous mapping */
-                               LASSERT(rc);
-                       }
-#else
-                       elan3_dvma_kaddr_load(kqswnal_data.kqn_ep->DmaState,
-                                             kqswnal_data.kqn_eprxdmahandle,
-                                             page_address(page),
-                                             PAGE_SIZE, elan_page_idx,
-                                             &elanbuffer);
-                       if (j == 0)
-                               krx->krx_elanbuffer = elanbuffer;
-
-                       /* NB contiguous mapping */
-                       LASSERT (elanbuffer == krx->krx_elanbuffer + j * PAGE_SIZE);
-#endif
-                       elan_page_idx++;
-
-               }
-       }
-       LASSERT (elan_page_idx ==
-                (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) +
-                (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE));
-
-       /**********************************************************************/
-       /* Network interface ready to initialise */
-
-       my_process_id.nid = kqswnal_elanid2nid(kqswnal_data.kqn_elanid);
-       my_process_id.pid = requested_pid;
-
-       rc = lib_init(&kqswnal_lib, nal, my_process_id,
-                     requested_limits, actual_limits);
-        if (rc != PTL_OK)
-       {
-               CERROR ("lib_init failed %d\n", rc);
-               kqswnal_shutdown (nal);
-               return (rc);
-       }
-
-       kqswnal_data.kqn_init = KQN_INIT_LIB;
-
-       /**********************************************************************/
-       /* Queue receives, now that it's OK to run their completion callbacks */
-
-       for (krx = kqswnal_data.kqn_rxds; krx != NULL; krx = krx->krx_alloclist) {
-               /* NB this enqueue can allocate/sleep (attr == 0) */
-               krx->krx_state = KRX_POSTED;
-#if MULTIRAIL_EKC
-               rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-                                     &krx->krx_elanbuffer, 0);
-#else
-               rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx,
-                                     krx->krx_elanbuffer,
-                                     krx->krx_npages * PAGE_SIZE, 0);
-#endif
-               if (rc != EP_SUCCESS)
-               {
-                       CERROR ("failed ep_queue_receive %d\n", rc);
-                       kqswnal_shutdown (nal);
-                       return (PTL_FAIL);
-               }
-       }
-
-       /**********************************************************************/
-       /* Spawn scheduling threads */
-       for (i = 0; i < num_online_cpus(); i++) {
-               rc = kqswnal_thread_start (kqswnal_scheduler, NULL);
-               if (rc != 0)
-               {
-                       CERROR ("failed to spawn scheduling thread: %d\n", rc);
-                       kqswnal_shutdown (nal);
-                       return (PTL_FAIL);
-               }
-       }
-
-       /**********************************************************************/
-       /* Connect to the router */
-       rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface);
-       CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc);
-
-       rc = libcfs_nal_cmd_register (QSWNAL, &kqswnal_cmd, NULL);
-       if (rc != 0) {
-               CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-               kqswnal_shutdown (nal);
-               return (PTL_FAIL);
-       }
-
-       kqswnal_data.kqn_init = KQN_INIT_ALL;
-
-       printk(KERN_INFO "Lustre: Routing QSW NAL loaded on node %d of %d "
-              "(Routing %s, initial mem %d)\n", 
-              kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes,
-              kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled",
-              pkmem);
-
-       return (PTL_OK);
-}
-
-void __exit
-kqswnal_finalise (void)
-{
-#if CONFIG_SYSCTL
-       if (kqswnal_tunables.kqn_sysctl != NULL)
-               unregister_sysctl_table (kqswnal_tunables.kqn_sysctl);
-#endif
-       PtlNIFini(kqswnal_ni);
-
-       ptl_unregister_nal(QSWNAL);
-}
-
-static int __init
-kqswnal_initialise (void)
-{
-       int   rc;
-
-       kqswnal_api.nal_ni_init = kqswnal_startup;
-       kqswnal_api.nal_ni_fini = kqswnal_shutdown;
-
-       /* Initialise dynamic tunables to defaults once only */
-       kqswnal_tunables.kqn_optimized_puts = KQSW_OPTIMIZED_PUTS;
-       kqswnal_tunables.kqn_optimized_gets = KQSW_OPTIMIZED_GETS;
-       
-       rc = ptl_register_nal(QSWNAL, &kqswnal_api);
-       if (rc != PTL_OK) {
-               CERROR("Can't register QSWNAL: %d\n", rc);
-               return (-ENOMEM);               /* or something... */
-       }
-
-       /* Pure gateways, and the workaround for 'EKC blocks forever until
-        * the service is active' want the NAL started up at module load
-        * time... */
-       rc = PtlNIInit(QSWNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kqswnal_ni);
-       if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-               ptl_unregister_nal(QSWNAL);
-               return (-ENODEV);
-       }
-
-#if CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kqswnal_tunables.kqn_sysctl = 
-               register_sysctl_table (kqswnal_top_ctl_table, 0);
-#endif
-       return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel Quadrics/Elan NAL v1.01");
-MODULE_LICENSE("GPL");
-
-module_init (kqswnal_initialise);
-module_exit (kqswnal_finalise);
diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h
deleted file mode 100644 (file)
index 6e04752..0000000
+++ /dev/null
@@ -1,376 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Basic library routines. 
- *
- */
-
-#ifndef _QSWNAL_H
-#define _QSWNAL_H
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <qsnet/kernel.h>
-#undef printf                                   /* nasty QSW #define */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#if MULTIRAIL_EKC
-# include <elan/epcomms.h>
-#else
-# include <elan3/elanregs.h>
-# include <elan3/elandev.h>
-# include <elan3/elanvp.h>
-# include <elan3/elan3mmu.h>
-# include <elan3/elanctxt.h>
-# include <elan3/elandebug.h>
-# include <elan3/urom_addrs.h>
-# include <elan3/busops.h>
-# include <elan3/kcomm.h>
-#endif
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#include <linux/locks.h>        /* wait_on_buffer */
-#else
-#include <linux/buffer_head.h>  /* wait_on_buffer */
-#endif
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/sysctl.h>
-#include <asm/segment.h>
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <linux/kp30.h>
-#include <linux/kpr.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#define KQSW_CHECKSUM   0
-#if KQSW_CHECKSUM
-typedef unsigned long kqsw_csum_t;
-#define KQSW_CSUM_SIZE  (2 * sizeof (kqsw_csum_t))
-#else
-#define KQSW_CSUM_SIZE  0
-#endif
-#define KQSW_HDR_SIZE   (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE)
-
-/*
- * Performance Tuning defines
- * NB no mention of PAGE_SIZE for interoperability
- */
-#define KQSW_MAXPAYLOAD                 PTL_MTU
-#define KQSW_SMALLPAYLOAD               ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */
-
-#define KQSW_TX_MAXCONTIG               (1<<10) /* largest payload that gets made contiguous on transmit */
-
-#define KQSW_NTXMSGS                    8       /* # normal transmit messages */
-#define KQSW_NNBLK_TXMSGS               (PAGE_SIZE == 4096 ? 512 : 256)     /* # reserved transmit messages if can't block */ /* avoid qsnet crash b=5291 */
-
-#define KQSW_NRXMSGS_LARGE              64      /* # large receive buffers */
-#define KQSW_EP_ENVELOPES_LARGE         256     /* # large ep envelopes */
-
-#define KQSW_NRXMSGS_SMALL              256     /* # small receive buffers */
-#define KQSW_EP_ENVELOPES_SMALL         2048    /* # small ep envelopes */
-
-#define KQSW_RESCHED                    100     /* # busy loops that forces scheduler to yield */
-
-#define KQSW_OPTIMIZED_GETS             1       /* optimize gets >= this size */
-#define KQSW_OPTIMIZED_PUTS            (32<<10) /* optimize puts >= this size */
-#define KQSW_COPY_SMALL_FWD             0       /* copy small fwd messages to pre-mapped buffer? */
-
-/*
- * derived constants
- */
-
-#define KQSW_TX_BUFFER_SIZE     (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG)
-/* The pre-allocated tx buffer (hdr + small payload) */
-
-#define KQSW_NTXMSGPAGES        (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1)
-/* Reserve elan address space for pre-allocated and pre-mapped transmit
- * buffer and a full payload too.  Extra pages allow for page alignment */
-
-#define KQSW_NRXMSGPAGES_SMALL  (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD))
-/* receive hdr/payload always contiguous and page aligned */
-#define KQSW_NRXMSGBYTES_SMALL  (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE)
-
-#define KQSW_NRXMSGPAGES_LARGE  (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD))
-/* receive hdr/payload always contiguous and page aligned */
-#define KQSW_NRXMSGBYTES_LARGE  (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE)
-/* biggest complete packet we can receive (or transmit) */
-
-/* Remote memory descriptor */
-typedef struct
-{
-        __u32            kqrmd_nfrag;           /* # frags */
-#if MULTIRAIL_EKC
-        EP_NMD           kqrmd_frag[0];         /* actual frags */
-#else
-        EP_IOVEC         kqrmd_frag[0];         /* actual frags */
-#endif
-} kqswnal_remotemd_t;
-
-typedef struct kqswnal_rx
-{
-        struct list_head krx_list;              /* enqueue -> thread */
-        struct kqswnal_rx *krx_alloclist;       /* stack in kqn_rxds */
-        EP_RCVR         *krx_eprx;              /* port to post receives to */
-        EP_RXD          *krx_rxd;               /* receive descriptor (for repost) */
-#if MULTIRAIL_EKC
-        EP_NMD           krx_elanbuffer;        /* contiguous Elan buffer */
-#else
-        E3_Addr          krx_elanbuffer;        /* contiguous Elan buffer */
-#endif
-        int              krx_npages;            /* # pages in receive buffer */
-        int              krx_nob;               /* Number Of Bytes received into buffer */
-        int              krx_rpc_reply_needed;  /* peer waiting for EKC RPC reply */
-        int              krx_rpc_reply_status;  /* what status to send */
-        int              krx_state;             /* what this RX is doing */
-        atomic_t         krx_refcount;          /* how to tell when rpc is done */
-        kpr_fwd_desc_t   krx_fwd;               /* embedded forwarding descriptor */
-        ptl_kiov_t       krx_kiov[KQSW_NRXMSGPAGES_LARGE]; /* buffer frags */
-}  kqswnal_rx_t;
-
-#define KRX_POSTED       1                      /* receiving */
-#define KRX_PARSE        2                      /* ready to be parsed */
-#define KRX_COMPLETING   3                      /* waiting to be completed */
-
-
-typedef struct kqswnal_tx
-{
-        struct list_head  ktx_list;             /* enqueue idle/active */
-        struct list_head  ktx_delayed_list;     /* enqueue delayedtxds */
-        struct kqswnal_tx *ktx_alloclist;       /* stack in kqn_txds */
-        unsigned int      ktx_isnblk:1;         /* reserved descriptor? */
-        unsigned int      ktx_state:7;          /* What I'm doing */
-        unsigned int      ktx_firsttmpfrag:1;   /* ktx_frags[0] is in my ebuffer ? 0 : 1 */
-        uint32_t          ktx_basepage;         /* page offset in reserved elan tx vaddrs for mapping pages */
-        int               ktx_npages;           /* pages reserved for mapping messages */
-        int               ktx_nmappedpages;     /* # pages mapped for current message */
-        int               ktx_port;             /* destination ep port */
-        ptl_nid_t         ktx_nid;              /* destination node */
-        void             *ktx_args[3];          /* completion passthru */
-        char             *ktx_buffer;           /* pre-allocated contiguous buffer for hdr + small payloads */
-        unsigned long     ktx_launchtime;       /* when (in jiffies) the transmit was launched */
-
-        /* debug/info fields */
-        pid_t             ktx_launcher;         /* pid of launching process */
-
-        int               ktx_nfrag;            /* # message frags */
-#if MULTIRAIL_EKC
-        int               ktx_rail;             /* preferred rail */
-        EP_NMD            ktx_ebuffer;          /* elan mapping of ktx_buffer */
-        EP_NMD            ktx_frags[EP_MAXFRAG];/* elan mapping of msg frags */
-#else
-        E3_Addr           ktx_ebuffer;          /* elan address of ktx_buffer */
-        EP_IOVEC          ktx_frags[EP_MAXFRAG];/* msg frags (elan vaddrs) */
-#endif
-} kqswnal_tx_t;
-
-#define KTX_IDLE        0                       /* on kqn_(nblk_)idletxds */
-#define KTX_FORWARDING  1                       /* sending a forwarded packet */
-#define KTX_SENDING     2                       /* normal send */
-#define KTX_GETTING     3                       /* sending optimised get */
-#define KTX_PUTTING     4                       /* sending optimised put */
-#define KTX_RDMAING     5                       /* handling optimised put/get */
-
-typedef struct
-{
-        /* dynamic tunables... */
-        int                      kqn_optimized_puts;  /* optimized PUTs? */
-        int                      kqn_optimized_gets;  /* optimized GETs? */
-#if CONFIG_SYSCTL
-        struct ctl_table_header *kqn_sysctl;          /* sysctl interface */
-#endif        
-} kqswnal_tunables_t;
-
-typedef struct
-{
-        char               kqn_init;            /* what's been initialised */
-        char               kqn_shuttingdown;    /* I'm trying to shut down */
-        atomic_t           kqn_nthreads;        /* # threads running */
-
-        kqswnal_rx_t      *kqn_rxds;            /* stack of all the receive descriptors */
-        kqswnal_tx_t      *kqn_txds;            /* stack of all the transmit descriptors */
-
-        struct list_head   kqn_idletxds;        /* transmit descriptors free to use */
-        struct list_head   kqn_nblk_idletxds;   /* reserved free transmit descriptors */
-        struct list_head   kqn_activetxds;      /* transmit descriptors being used */
-        spinlock_t         kqn_idletxd_lock;    /* serialise idle txd access */
-        wait_queue_head_t  kqn_idletxd_waitq;   /* sender blocks here waiting for idle txd */
-        struct list_head   kqn_idletxd_fwdq;    /* forwarded packets block here waiting for idle txd */
-        atomic_t           kqn_pending_txs;     /* # transmits being prepped */
-        
-        spinlock_t         kqn_sched_lock;      /* serialise packet schedulers */
-        wait_queue_head_t  kqn_sched_waitq;     /* scheduler blocks here */
-
-        struct list_head   kqn_readyrxds;       /* rxds full of data */
-        struct list_head   kqn_delayedfwds;     /* delayed forwards */
-        struct list_head   kqn_delayedtxds;     /* delayed transmits */
-
-#if MULTIRAIL_EKC
-        EP_SYS            *kqn_ep;              /* elan system */
-        EP_NMH            *kqn_ep_tx_nmh;       /* elan reserved tx vaddrs */
-        EP_NMH            *kqn_ep_rx_nmh;       /* elan reserved rx vaddrs */
-#else
-        EP_DEV            *kqn_ep;              /* elan device */
-        ELAN3_DMA_HANDLE  *kqn_eptxdmahandle;   /* elan reserved tx vaddrs */
-        ELAN3_DMA_HANDLE  *kqn_eprxdmahandle;   /* elan reserved rx vaddrs */
-#endif
-        EP_XMTR           *kqn_eptx;            /* elan transmitter */
-        EP_RCVR           *kqn_eprx_small;      /* elan receiver (small messages) */
-        EP_RCVR           *kqn_eprx_large;      /* elan receiver (large messages) */
-        kpr_router_t       kqn_router;          /* connection to Kernel Portals Router module */
-
-        ptl_nid_t          kqn_nid_offset;      /* this cluster's NID offset */
-        int                kqn_nnodes;          /* this cluster's size */
-        int                kqn_elanid;          /* this nodes's elan ID */
-
-        EP_STATUSBLK       kqn_rpc_success;     /* preset RPC reply status blocks */
-        EP_STATUSBLK       kqn_rpc_failed;
-}  kqswnal_data_t;
-
-/* kqn_init state */
-#define KQN_INIT_NOTHING        0               /* MUST BE ZERO so zeroed state is initialised OK */
-#define KQN_INIT_DATA           1
-#define KQN_INIT_LIB            2
-#define KQN_INIT_ALL            3
-
-extern lib_nal_t           kqswnal_lib;
-extern nal_t               kqswnal_api;
-extern kqswnal_tunables_t  kqswnal_tunables;
-extern kqswnal_data_t      kqswnal_data;
-
-extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg);
-extern void kqswnal_rxhandler(EP_RXD *rxd);
-extern int kqswnal_scheduler (void *);
-extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void kqswnal_rx_done (kqswnal_rx_t *krx);
-
-static inline ptl_nid_t
-kqswnal_elanid2nid (int elanid) 
-{
-        return (kqswnal_data.kqn_nid_offset + elanid);
-}
-
-static inline int
-kqswnal_nid2elanid (ptl_nid_t nid) 
-{
-        /* not in this cluster? */
-        if (nid < kqswnal_data.kqn_nid_offset ||
-            nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes)
-                return (-1);
-        
-        return (nid - kqswnal_data.kqn_nid_offset);
-}
-
-static inline ptl_nid_t
-kqswnal_rx_nid(kqswnal_rx_t *krx) 
-{
-        return (kqswnal_elanid2nid(ep_rxd_node(krx->krx_rxd)));
-}
-
-static inline int
-kqswnal_pages_spanned (void *base, int nob)
-{
-        unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT;
-        unsigned long last_page  = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT;
-
-        LASSERT (last_page >= first_page);      /* can't wrap address space */
-        return (last_page - first_page + 1);
-}
-
-#if KQSW_CHECKSUM
-static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob)
-{
-        unsigned char *ptr = (unsigned char *)base;
-        
-        while (nob-- > 0)
-                sum += *ptr++;
-        
-        return (sum);
-}
-#endif
-
-static inline void kqswnal_rx_decref (kqswnal_rx_t *krx)
-{
-        LASSERT (atomic_read (&krx->krx_refcount) > 0);
-        if (atomic_dec_and_test (&krx->krx_refcount))
-                kqswnal_rx_done(krx);
-}
-
-#if MULTIRAIL_EKC
-# ifndef EP_RAILMASK_ALL
-#  error "old (unsupported) version of EKC headers"
-# endif
-#else
-/* multirail defines these in <elan/epcomms.h> */
-#define EP_MSG_SVC_PORTALS_SMALL      (0x10)  /* Portals over elan port number (large payloads) */
-#define EP_MSG_SVC_PORTALS_LARGE      (0x11)  /* Portals over elan port number (small payloads) */
-/* NB small/large message sizes are GLOBAL constants */
-
-/* A minimal attempt to minimise inline #ifdeffing */
-
-#define EP_SUCCESS      ESUCCESS
-#define EP_ENOMEM      ENOMEM
-
-static inline EP_XMTR *
-ep_alloc_xmtr(EP_DEV *e) 
-{
-        return (ep_alloc_large_xmtr(e));
-}
-
-static inline EP_RCVR *
-ep_alloc_rcvr(EP_DEV *e, int svc, int nenv)
-{
-        return (ep_install_large_rcvr(e, svc, nenv));
-}
-
-static inline void
-ep_free_xmtr(EP_XMTR *x) 
-{
-        ep_free_large_xmtr(x);
-}
-
-static inline void
-ep_free_rcvr(EP_RCVR *r)
-{
-        ep_remove_large_rcvr(r);
-}
-#endif
-
-#endif /* _QSWNAL_H */
diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c
deleted file mode 100644 (file)
index 7aee376..0000000
+++ /dev/null
@@ -1,2008 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * W. Marcus Miller - Based on ksocknal
- *
- * This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "qswnal.h"
-
-/*
- *  LIB functions follow
- *
- */
-static int
-kqswnal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
-        if (nid == nal->libnal_ni.ni_pid.nid)
-                *dist = 0;                      /* it's me */
-        else if (kqswnal_nid2elanid (nid) >= 0)
-                *dist = 1;                      /* it's my peer */
-        else
-                *dist = 2;                      /* via router */
-        return (0);
-}
-
-void
-kqswnal_notify_peer_down(kqswnal_tx_t *ktx)
-{
-        struct timeval     now;
-        time_t             then;
-
-        do_gettimeofday (&now);
-        then = now.tv_sec - (jiffies - ktx->ktx_launchtime)/HZ;
-
-        kpr_notify(&kqswnal_data.kqn_router, ktx->ktx_nid, 0, then);
-}
-
-void
-kqswnal_unmap_tx (kqswnal_tx_t *ktx)
-{
-#if MULTIRAIL_EKC
-        int      i;
-
-        ktx->ktx_rail = -1;                     /* unset rail */
-#endif
-
-        if (ktx->ktx_nmappedpages == 0)
-                return;
-        
-#if MULTIRAIL_EKC
-        CDEBUG(D_NET, "%p unloading %d frags starting at %d\n",
-               ktx, ktx->ktx_nfrag, ktx->ktx_firsttmpfrag);
-
-        for (i = ktx->ktx_firsttmpfrag; i < ktx->ktx_nfrag; i++)
-                ep_dvma_unload(kqswnal_data.kqn_ep,
-                               kqswnal_data.kqn_ep_tx_nmh,
-                               &ktx->ktx_frags[i]);
-#else
-        CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n",
-                ktx, ktx->ktx_nfrag, ktx->ktx_basepage, ktx->ktx_nmappedpages);
-
-        LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages);
-        LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <=
-                 kqswnal_data.kqn_eptxdmahandle->NumDvmaPages);
-
-        elan3_dvma_unload(kqswnal_data.kqn_ep->DmaState,
-                          kqswnal_data.kqn_eptxdmahandle,
-                          ktx->ktx_basepage, ktx->ktx_nmappedpages);
-#endif
-        ktx->ktx_nmappedpages = 0;
-}
-
-int
-kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int offset, int nob, int niov, ptl_kiov_t *kiov)
-{
-        int       nfrags    = ktx->ktx_nfrag;
-        int       nmapped   = ktx->ktx_nmappedpages;
-        int       maxmapped = ktx->ktx_npages;
-        uint32_t  basepage  = ktx->ktx_basepage + nmapped;
-        char     *ptr;
-#if MULTIRAIL_EKC
-        EP_RAILMASK railmask;
-        int         rail;
-
-        if (ktx->ktx_rail < 0)
-                ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
-                                                 EP_RAILMASK_ALL,
-                                                 kqswnal_nid2elanid(ktx->ktx_nid));
-        rail = ktx->ktx_rail;
-        if (rail < 0) {
-                CERROR("No rails available for "LPX64"\n", ktx->ktx_nid);
-                return (-ENETDOWN);
-        }
-        railmask = 1 << rail;
-#endif
-        LASSERT (nmapped <= maxmapped);
-        LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
-        LASSERT (nfrags <= EP_MAXFRAG);
-        LASSERT (niov > 0);
-        LASSERT (nob > 0);
-
-        /* skip complete frags before 'offset' */
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                kiov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-
-        do {
-                int  fraglen = kiov->kiov_len - offset;
-
-                /* each page frag is contained in one page */
-                LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE);
-
-                if (fraglen > nob)
-                        fraglen = nob;
-
-                nmapped++;
-                if (nmapped > maxmapped) {
-                        CERROR("Can't map message in %d pages (max %d)\n",
-                               nmapped, maxmapped);
-                        return (-EMSGSIZE);
-                }
-
-                if (nfrags == EP_MAXFRAG) {
-                        CERROR("Message too fragmented in Elan VM (max %d frags)\n",
-                               EP_MAXFRAG);
-                        return (-EMSGSIZE);
-                }
-
-                /* XXX this is really crap, but we'll have to kmap until
-                 * EKC has a page (rather than vaddr) mapping interface */
-
-                ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-
-                CDEBUG(D_NET,
-                       "%p[%d] loading %p for %d, page %d, %d total\n",
-                        ktx, nfrags, ptr, fraglen, basepage, nmapped);
-
-#if MULTIRAIL_EKC
-                ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                             ptr, fraglen,
-                             kqswnal_data.kqn_ep_tx_nmh, basepage,
-                             &railmask, &ktx->ktx_frags[nfrags]);
-
-                if (nfrags == ktx->ktx_firsttmpfrag ||
-                    !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags])) {
-                        /* new frag if this is the first or can't merge */
-                        nfrags++;
-                }
-#else
-                elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
-                                       kqswnal_data.kqn_eptxdmahandle,
-                                       ptr, fraglen,
-                                       basepage, &ktx->ktx_frags[nfrags].Base);
-
-                if (nfrags > 0 &&                /* previous frag mapped */
-                    ktx->ktx_frags[nfrags].Base == /* contiguous with this one */
-                    (ktx->ktx_frags[nfrags-1].Base + ktx->ktx_frags[nfrags-1].Len))
-                        /* just extend previous */
-                        ktx->ktx_frags[nfrags - 1].Len += fraglen;
-                else {
-                        ktx->ktx_frags[nfrags].Len = fraglen;
-                        nfrags++;                /* new frag */
-                }
-#endif
-
-                kunmap (kiov->kiov_page);
-                
-                /* keep in loop for failure case */
-                ktx->ktx_nmappedpages = nmapped;
-
-                basepage++;
-                kiov++;
-                niov--;
-                nob -= fraglen;
-                offset = 0;
-
-                /* iov must not run out before end of data */
-                LASSERT (nob == 0 || niov > 0);
-
-        } while (nob > 0);
-
-        ktx->ktx_nfrag = nfrags;
-        CDEBUG (D_NET, "%p got %d frags over %d pages\n",
-                ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
-
-        return (0);
-}
-
-int
-kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int offset, int nob, 
-                    int niov, struct iovec *iov)
-{
-        int       nfrags    = ktx->ktx_nfrag;
-        int       nmapped   = ktx->ktx_nmappedpages;
-        int       maxmapped = ktx->ktx_npages;
-        uint32_t  basepage  = ktx->ktx_basepage + nmapped;
-#if MULTIRAIL_EKC
-        EP_RAILMASK railmask;
-        int         rail;
-        
-        if (ktx->ktx_rail < 0)
-                ktx->ktx_rail = ep_xmtr_prefrail(kqswnal_data.kqn_eptx,
-                                                 EP_RAILMASK_ALL,
-                                                 kqswnal_nid2elanid(ktx->ktx_nid));
-        rail = ktx->ktx_rail;
-        if (rail < 0) {
-                CERROR("No rails available for "LPX64"\n", ktx->ktx_nid);
-                return (-ENETDOWN);
-        }
-        railmask = 1 << rail;
-#endif
-        LASSERT (nmapped <= maxmapped);
-        LASSERT (nfrags >= ktx->ktx_firsttmpfrag);
-        LASSERT (nfrags <= EP_MAXFRAG);
-        LASSERT (niov > 0);
-        LASSERT (nob > 0);
-
-        /* skip complete frags before offset */
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                iov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-        
-        do {
-                int  fraglen = iov->iov_len - offset;
-                long npages;
-                
-                if (fraglen > nob)
-                        fraglen = nob;
-                npages = kqswnal_pages_spanned (iov->iov_base, fraglen);
-
-                nmapped += npages;
-                if (nmapped > maxmapped) {
-                        CERROR("Can't map message in %d pages (max %d)\n",
-                               nmapped, maxmapped);
-                        return (-EMSGSIZE);
-                }
-
-                if (nfrags == EP_MAXFRAG) {
-                        CERROR("Message too fragmented in Elan VM (max %d frags)\n",
-                               EP_MAXFRAG);
-                        return (-EMSGSIZE);
-                }
-
-                CDEBUG(D_NET,
-                       "%p[%d] loading %p for %d, pages %d for %ld, %d total\n",
-                       ktx, nfrags, iov->iov_base + offset, fraglen, 
-                       basepage, npages, nmapped);
-
-#if MULTIRAIL_EKC
-                ep_dvma_load(kqswnal_data.kqn_ep, NULL,
-                             iov->iov_base + offset, fraglen,
-                             kqswnal_data.kqn_ep_tx_nmh, basepage,
-                             &railmask, &ktx->ktx_frags[nfrags]);
-
-                if (nfrags == ktx->ktx_firsttmpfrag ||
-                    !ep_nmd_merge(&ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags - 1],
-                                  &ktx->ktx_frags[nfrags])) {
-                        /* new frag if this is the first or can't merge */
-                        nfrags++;
-                }
-#else
-                elan3_dvma_kaddr_load (kqswnal_data.kqn_ep->DmaState,
-                                       kqswnal_data.kqn_eptxdmahandle,
-                                       iov->iov_base + offset, fraglen,
-                                       basepage, &ktx->ktx_frags[nfrags].Base);
-
-                if (nfrags > 0 &&                /* previous frag mapped */
-                    ktx->ktx_frags[nfrags].Base == /* contiguous with this one */
-                    (ktx->ktx_frags[nfrags-1].Base + ktx->ktx_frags[nfrags-1].Len))
-                        /* just extend previous */
-                        ktx->ktx_frags[nfrags - 1].Len += fraglen;
-                else {
-                        ktx->ktx_frags[nfrags].Len = fraglen;
-                        nfrags++;                /* new frag */
-                }
-#endif
-
-                /* keep in loop for failure case */
-                ktx->ktx_nmappedpages = nmapped;
-
-                basepage += npages;
-                iov++;
-                niov--;
-                nob -= fraglen;
-                offset = 0;
-
-                /* iov must not run out before end of data */
-                LASSERT (nob == 0 || niov > 0);
-
-        } while (nob > 0);
-
-        ktx->ktx_nfrag = nfrags;
-        CDEBUG (D_NET, "%p got %d frags over %d pages\n",
-                ktx, ktx->ktx_nfrag, ktx->ktx_nmappedpages);
-
-        return (0);
-}
-
-
-void
-kqswnal_put_idle_tx (kqswnal_tx_t *ktx)
-{
-        kpr_fwd_desc_t   *fwd = NULL;
-        unsigned long     flags;
-
-        kqswnal_unmap_tx (ktx);                 /* release temporary mappings */
-        ktx->ktx_state = KTX_IDLE;
-
-        spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
-        list_del (&ktx->ktx_list);              /* take off active list */
-
-        if (ktx->ktx_isnblk) {
-                /* reserved for non-blocking tx */
-                list_add (&ktx->ktx_list, &kqswnal_data.kqn_nblk_idletxds);
-                spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-                return;
-        }
-
-        list_add (&ktx->ktx_list, &kqswnal_data.kqn_idletxds);
-
-        /* anything blocking for a tx descriptor? */
-        if (!kqswnal_data.kqn_shuttingdown &&
-            !list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */
-        {
-                CDEBUG(D_NET,"wakeup fwd\n");
-
-                fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next,
-                                  kpr_fwd_desc_t, kprfd_list);
-                list_del (&fwd->kprfd_list);
-        }
-
-        wake_up (&kqswnal_data.kqn_idletxd_waitq);
-
-        spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-
-        if (fwd == NULL)
-                return;
-
-        /* schedule packet for forwarding again */
-        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
-        list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds);
-        wake_up (&kqswnal_data.kqn_sched_waitq);
-
-        spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-}
-
-kqswnal_tx_t *
-kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block)
-{
-        unsigned long  flags;
-        kqswnal_tx_t  *ktx = NULL;
-
-        for (;;) {
-                spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags);
-
-                if (kqswnal_data.kqn_shuttingdown)
-                        break;
-
-                /* "normal" descriptor is free */
-                if (!list_empty (&kqswnal_data.kqn_idletxds)) {
-                        ktx = list_entry (kqswnal_data.kqn_idletxds.next,
-                                          kqswnal_tx_t, ktx_list);
-                        break;
-                }
-
-                if (fwd != NULL)                /* forwarded packet? */
-                        break;
-
-                /* doing a local transmit */
-                if (!may_block) {
-                        if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) {
-                                CERROR ("intr tx desc pool exhausted\n");
-                                break;
-                        }
-
-                        ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next,
-                                          kqswnal_tx_t, ktx_list);
-                        break;
-                }
-
-                /* block for idle tx */
-
-                spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-
-                CDEBUG (D_NET, "blocking for tx desc\n");
-                wait_event (kqswnal_data.kqn_idletxd_waitq,
-                            !list_empty (&kqswnal_data.kqn_idletxds) ||
-                            kqswnal_data.kqn_shuttingdown);
-        }
-
-        if (ktx != NULL) {
-                list_del (&ktx->ktx_list);
-                list_add (&ktx->ktx_list, &kqswnal_data.kqn_activetxds);
-                ktx->ktx_launcher = current->pid;
-                atomic_inc(&kqswnal_data.kqn_pending_txs);
-        } else if (fwd != NULL) {
-                /* queue forwarded packet until idle txd available */
-                CDEBUG (D_NET, "blocked fwd [%p]\n", fwd);
-                list_add_tail (&fwd->kprfd_list,
-                               &kqswnal_data.kqn_idletxd_fwdq);
-        }
-
-        spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags);
-
-        /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */
-        LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0);
-
-        return (ktx);
-}
-
-void
-kqswnal_tx_done (kqswnal_tx_t *ktx, int error)
-{
-        switch (ktx->ktx_state) {
-        case KTX_FORWARDING:       /* router asked me to forward this packet */
-                kpr_fwd_done (&kqswnal_data.kqn_router,
-                              (kpr_fwd_desc_t *)ktx->ktx_args[0], error);
-                break;
-
-        case KTX_RDMAING:          /* optimized GET/PUT handled */
-        case KTX_PUTTING:          /* optimized PUT sent */
-        case KTX_SENDING:          /* normal send */
-                lib_finalize (&kqswnal_lib, NULL,
-                              (lib_msg_t *)ktx->ktx_args[1],
-                              (error == 0) ? PTL_OK : PTL_FAIL);
-                break;
-
-        case KTX_GETTING:          /* optimized GET sent & REPLY received */
-                /* Complete the GET with success since we can't avoid
-                 * delivering a REPLY event; we committed to it when we
-                 * launched the GET */
-                lib_finalize (&kqswnal_lib, NULL, 
-                              (lib_msg_t *)ktx->ktx_args[1], PTL_OK);
-                lib_finalize (&kqswnal_lib, NULL,
-                              (lib_msg_t *)ktx->ktx_args[2],
-                              (error == 0) ? PTL_OK : PTL_FAIL);
-                break;
-
-        default:
-                LASSERT (0);
-        }
-
-        kqswnal_put_idle_tx (ktx);
-}
-
-static void
-kqswnal_txhandler(EP_TXD *txd, void *arg, int status)
-{
-        kqswnal_tx_t      *ktx = (kqswnal_tx_t *)arg;
-
-        LASSERT (txd != NULL);
-        LASSERT (ktx != NULL);
-
-        CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status);
-
-        if (status != EP_SUCCESS) {
-
-                CERROR ("Tx completion to "LPX64" failed: %d\n", 
-                        ktx->ktx_nid, status);
-
-                kqswnal_notify_peer_down(ktx);
-                status = -EHOSTDOWN;
-
-        } else switch (ktx->ktx_state) {
-
-        case KTX_GETTING:
-        case KTX_PUTTING:
-                /* RPC completed OK; but what did our peer put in the status
-                 * block? */
-#if MULTIRAIL_EKC
-                status = ep_txd_statusblk(txd)->Data[0];
-#else
-                status = ep_txd_statusblk(txd)->Status;
-#endif
-                break;
-                
-        case KTX_FORWARDING:
-        case KTX_SENDING:
-                status = 0;
-                break;
-                
-        default:
-                LBUG();
-                break;
-        }
-
-        kqswnal_tx_done (ktx, status);
-}
-
-int
-kqswnal_launch (kqswnal_tx_t *ktx)
-{
-        /* Don't block for transmit descriptor if we're in interrupt context */
-        int   attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0;
-        int   dest = kqswnal_nid2elanid (ktx->ktx_nid);
-        unsigned long flags;
-        int   rc;
-
-        ktx->ktx_launchtime = jiffies;
-
-        if (kqswnal_data.kqn_shuttingdown)
-                return (-ESHUTDOWN);
-
-        LASSERT (dest >= 0);                    /* must be a peer */
-
-#if MULTIRAIL_EKC
-        if (ktx->ktx_nmappedpages != 0)
-                attr = EP_SET_PREFRAIL(attr, ktx->ktx_rail);
-#endif
-
-        switch (ktx->ktx_state) {
-        case KTX_GETTING:
-        case KTX_PUTTING:
-                /* NB ktx_frag[0] is the GET/PUT hdr + kqswnal_remotemd_t.
-                 * The other frags are the payload, awaiting RDMA */
-                rc = ep_transmit_rpc(kqswnal_data.kqn_eptx, dest,
-                                     ktx->ktx_port, attr,
-                                     kqswnal_txhandler, ktx,
-                                     NULL, ktx->ktx_frags, 1);
-                break;
-
-        case KTX_FORWARDING:
-        case KTX_SENDING:
-#if MULTIRAIL_EKC
-                rc = ep_transmit_message(kqswnal_data.kqn_eptx, dest,
-                                         ktx->ktx_port, attr,
-                                         kqswnal_txhandler, ktx,
-                                         NULL, ktx->ktx_frags, ktx->ktx_nfrag);
-#else
-                rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest,
-                                       ktx->ktx_port, attr, 
-                                       kqswnal_txhandler, ktx, 
-                                       ktx->ktx_frags, ktx->ktx_nfrag);
-#endif
-                break;
-                
-        default:
-                LBUG();
-                rc = -EINVAL;                   /* no compiler warning please */
-                break;
-        }
-
-        switch (rc) {
-        case EP_SUCCESS: /* success */
-                return (0);
-
-        case EP_ENOMEM: /* can't allocate ep txd => queue for later */
-                spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
-                list_add_tail (&ktx->ktx_delayed_list, &kqswnal_data.kqn_delayedtxds);
-                wake_up (&kqswnal_data.kqn_sched_waitq);
-
-                spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-                return (0);
-
-        default: /* fatal error */
-                CERROR ("Tx to "LPX64" failed: %d\n", ktx->ktx_nid, rc);
-                kqswnal_notify_peer_down(ktx);
-                return (-EHOSTUNREACH);
-        }
-}
-
-#if 0
-static char *
-hdr_type_string (ptl_hdr_t *hdr)
-{
-        switch (hdr->type) {
-        case PTL_MSG_ACK:
-                return ("ACK");
-        case PTL_MSG_PUT:
-                return ("PUT");
-        case PTL_MSG_GET:
-                return ("GET");
-        case PTL_MSG_REPLY:
-                return ("REPLY");
-        default:
-                return ("<UNKNOWN>");
-        }
-}
-
-static void
-kqswnal_cerror_hdr(ptl_hdr_t * hdr)
-{
-        char *type_str = hdr_type_string (hdr);
-
-        CERROR("P3 Header at %p of type %s length %d\n", hdr, type_str,
-               le32_to_cpu(hdr->payload_length));
-        CERROR("    From nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->src_nid),
-               le32_to_cpu(hdr->src_pid));
-        CERROR("    To nid/pid "LPU64"/%u\n", le64_to_cpu(hdr->dest_nid),
-               le32_to_cpu(hdr->dest_pid));
-
-        switch (le32_to_cpu(hdr->type)) {
-        case PTL_MSG_PUT:
-                CERROR("    Ptl index %d, ack md "LPX64"."LPX64", "
-                       "match bits "LPX64"\n",
-                       le32_to_cpu(hdr->msg.put.ptl_index),
-                       hdr->msg.put.ack_wmd.wh_interface_cookie,
-                       hdr->msg.put.ack_wmd.wh_object_cookie,
-                       le64_to_cpu(hdr->msg.put.match_bits));
-                CERROR("    offset %d, hdr data "LPX64"\n",
-                       le32_to_cpu(hdr->msg.put.offset),
-                       hdr->msg.put.hdr_data);
-                break;
-
-        case PTL_MSG_GET:
-                CERROR("    Ptl index %d, return md "LPX64"."LPX64", "
-                       "match bits "LPX64"\n",
-                       le32_to_cpu(hdr->msg.get.ptl_index),
-                       hdr->msg.get.return_wmd.wh_interface_cookie,
-                       hdr->msg.get.return_wmd.wh_object_cookie,
-                       hdr->msg.get.match_bits);
-                CERROR("    Length %d, src offset %d\n",
-                       le32_to_cpu(hdr->msg.get.sink_length),
-                       le32_to_cpu(hdr->msg.get.src_offset));
-                break;
-
-        case PTL_MSG_ACK:
-                CERROR("    dst md "LPX64"."LPX64", manipulated length %d\n",
-                       hdr->msg.ack.dst_wmd.wh_interface_cookie,
-                       hdr->msg.ack.dst_wmd.wh_object_cookie,
-                       le32_to_cpu(hdr->msg.ack.mlength));
-                break;
-
-        case PTL_MSG_REPLY:
-                CERROR("    dst md "LPX64"."LPX64"\n",
-                       hdr->msg.reply.dst_wmd.wh_interface_cookie,
-                       hdr->msg.reply.dst_wmd.wh_object_cookie);
-        }
-
-}                               /* end of print_hdr() */
-#endif
-
-#if !MULTIRAIL_EKC
-void
-kqswnal_print_eiov (int how, char *str, int n, EP_IOVEC *iov) 
-{
-        int          i;
-
-        CDEBUG (how, "%s: %d\n", str, n);
-        for (i = 0; i < n; i++) {
-                CDEBUG (how, "   %08x for %d\n", iov[i].Base, iov[i].Len);
-        }
-}
-
-int
-kqswnal_eiovs2datav (int ndv, EP_DATAVEC *dv,
-                     int nsrc, EP_IOVEC *src,
-                     int ndst, EP_IOVEC *dst) 
-{
-        int        count;
-        int        nob;
-
-        LASSERT (ndv > 0);
-        LASSERT (nsrc > 0);
-        LASSERT (ndst > 0);
-
-        for (count = 0; count < ndv; count++, dv++) {
-
-                if (nsrc == 0 || ndst == 0) {
-                        if (nsrc != ndst) {
-                                /* For now I'll barf on any left over entries */
-                                CERROR ("mismatched src and dst iovs\n");
-                                return (-EINVAL);
-                        }
-                        return (count);
-                }
-
-                nob = (src->Len < dst->Len) ? src->Len : dst->Len;
-                dv->Len    = nob;
-                dv->Source = src->Base;
-                dv->Dest   = dst->Base;
-
-                if (nob >= src->Len) {
-                        src++;
-                        nsrc--;
-                } else {
-                        src->Len -= nob;
-                        src->Base += nob;
-                }
-                
-                if (nob >= dst->Len) {
-                        dst++;
-                        ndst--;
-                } else {
-                        src->Len -= nob;
-                        src->Base += nob;
-                }
-        }
-
-        CERROR ("DATAVEC too small\n");
-        return (-E2BIG);
-}
-#else
-int
-kqswnal_check_rdma (int nlfrag, EP_NMD *lfrag,
-                    int nrfrag, EP_NMD *rfrag)
-{
-        int  i;
-
-        if (nlfrag != nrfrag) {
-                CERROR("Can't cope with unequal # frags: %d local %d remote\n",
-                       nlfrag, nrfrag);
-                return (-EINVAL);
-        }
-        
-        for (i = 0; i < nlfrag; i++)
-                if (lfrag[i].nmd_len != rfrag[i].nmd_len) {
-                        CERROR("Can't cope with unequal frags %d(%d):"
-                               " %d local %d remote\n",
-                               i, nlfrag, lfrag[i].nmd_len, rfrag[i].nmd_len);
-                        return (-EINVAL);
-                }
-        
-        return (0);
-}
-#endif
-
-kqswnal_remotemd_t *
-kqswnal_parse_rmd (kqswnal_rx_t *krx, int type, ptl_nid_t expected_nid)
-{
-        char               *buffer = (char *)page_address(krx->krx_kiov[0].kiov_page);
-        ptl_hdr_t          *hdr = (ptl_hdr_t *)buffer;
-        kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(buffer + KQSW_HDR_SIZE);
-        ptl_nid_t           nid = kqswnal_rx_nid(krx);
-
-        /* Note (1) lib_parse has already flipped hdr.
-         *      (2) RDMA addresses are sent in native endian-ness.  When
-         *      EKC copes with different endian nodes, I'll fix this (and
-         *      eat my hat :) */
-
-        LASSERT (krx->krx_nob >= sizeof(*hdr));
-
-        if (hdr->type != type) {
-                CERROR ("Unexpected optimized get/put type %d (%d expected)"
-                        "from "LPX64"\n", hdr->type, type, nid);
-                return (NULL);
-        }
-        
-        if (hdr->src_nid != nid) {
-                CERROR ("Unexpected optimized get/put source NID "
-                        LPX64" from "LPX64"\n", hdr->src_nid, nid);
-                return (NULL);
-        }
-
-        LASSERT (nid == expected_nid);
-
-        if (buffer + krx->krx_nob < (char *)(rmd + 1)) {
-                /* msg too small to discover rmd size */
-                CERROR ("Incoming message [%d] too small for RMD (%d needed)\n",
-                        krx->krx_nob, (int)(((char *)(rmd + 1)) - buffer));
-                return (NULL);
-        }
-
-        if (buffer + krx->krx_nob < (char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) {
-                /* rmd doesn't fit in the incoming message */
-                CERROR ("Incoming message [%d] too small for RMD[%d] (%d needed)\n",
-                        krx->krx_nob, rmd->kqrmd_nfrag,
-                        (int)(((char *)&rmd->kqrmd_frag[rmd->kqrmd_nfrag]) - buffer));
-                return (NULL);
-        }
-
-        return (rmd);
-}
-
-void
-kqswnal_rdma_store_complete (EP_RXD *rxd) 
-{
-        int           status = ep_rxd_status(rxd);
-        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
-        LASSERT (ktx->ktx_state == KTX_RDMAING);
-        LASSERT (krx->krx_rxd == rxd);
-        LASSERT (krx->krx_rpc_reply_needed);
-
-        krx->krx_rpc_reply_needed = 0;
-        kqswnal_rx_decref (krx);
-
-        /* free ktx & finalize() its lib_msg_t */
-        kqswnal_tx_done(ktx, (status == EP_SUCCESS) ? 0 : -ECONNABORTED);
-}
-
-void
-kqswnal_rdma_fetch_complete (EP_RXD *rxd) 
-{
-        /* Completed fetching the PUT data */
-        int           status = ep_rxd_status(rxd);
-        kqswnal_tx_t *ktx = (kqswnal_tx_t *)ep_rxd_arg(rxd);
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)ktx->ktx_args[0];
-        unsigned long flags;
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, ktx %p, status %d\n", rxd, ktx, status);
-
-        LASSERT (ktx->ktx_state == KTX_RDMAING);
-        LASSERT (krx->krx_rxd == rxd);
-        /* RPC completes with failure by default */
-        LASSERT (krx->krx_rpc_reply_needed);
-        LASSERT (krx->krx_rpc_reply_status != 0);
-
-        if (status == EP_SUCCESS) {
-                status = krx->krx_rpc_reply_status = 0;
-        } else {
-                /* Abandon RPC since get failed */
-                krx->krx_rpc_reply_needed = 0;
-                status = -ECONNABORTED;
-        }
-
-        /* free ktx & finalize() its lib_msg_t */
-        kqswnal_tx_done(ktx, status);
-
-        if (!in_interrupt()) {
-                /* OK to complete the RPC now (iff I had the last ref) */
-                kqswnal_rx_decref (krx);
-                return;
-        }
-
-        LASSERT (krx->krx_state == KRX_PARSE);
-        krx->krx_state = KRX_COMPLETING;
-
-        /* Complete the RPC in thread context */
-        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
-        list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
-        wake_up (&kqswnal_data.kqn_sched_waitq);
-
-        spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-}
-
-int
-kqswnal_rdma (kqswnal_rx_t *krx, lib_msg_t *libmsg, int type,
-              int niov, struct iovec *iov, ptl_kiov_t *kiov,
-              size_t offset, size_t len)
-{
-        kqswnal_remotemd_t *rmd;
-        kqswnal_tx_t       *ktx;
-        int                 eprc;
-        int                 rc;
-#if !MULTIRAIL_EKC
-        EP_DATAVEC          datav[EP_MAXFRAG];
-        int                 ndatav;
-#endif
-
-        LASSERT (type == PTL_MSG_GET || type == PTL_MSG_PUT);
-        /* Not both mapped and paged payload */
-        LASSERT (iov == NULL || kiov == NULL);
-        /* RPC completes with failure by default */
-        LASSERT (krx->krx_rpc_reply_needed);
-        LASSERT (krx->krx_rpc_reply_status != 0);
-
-        rmd = kqswnal_parse_rmd(krx, type, libmsg->ev.initiator.nid);
-        if (rmd == NULL)
-                return (-EPROTO);
-
-        if (len == 0) {
-                /* data got truncated to nothing. */
-                lib_finalize(&kqswnal_lib, krx, libmsg, PTL_OK);
-                /* Let kqswnal_rx_done() complete the RPC with success */
-                krx->krx_rpc_reply_status = 0;
-                return (0);
-        }
-        
-        /* NB I'm using 'ktx' just to map the local RDMA buffers; I'm not
-           actually sending a portals message with it */
-        ktx = kqswnal_get_idle_tx(NULL, 0);
-        if (ktx == NULL) {
-                CERROR ("Can't get txd for RDMA with "LPX64"\n",
-                        libmsg->ev.initiator.nid);
-                return (-ENOMEM);
-        }
-
-        ktx->ktx_state   = KTX_RDMAING;
-        ktx->ktx_nid     = libmsg->ev.initiator.nid;
-        ktx->ktx_args[0] = krx;
-        ktx->ktx_args[1] = libmsg;
-
-#if MULTIRAIL_EKC
-        /* Map on the rail the RPC prefers */
-        ktx->ktx_rail = ep_rcvr_prefrail(krx->krx_eprx,
-                                         ep_rxd_railmask(krx->krx_rxd));
-#endif
-
-        /* Start mapping at offset 0 (we're not mapping any headers) */
-        ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 0;
-        
-        if (kiov != NULL)
-                rc = kqswnal_map_tx_kiov(ktx, offset, len, niov, kiov);
-        else
-                rc = kqswnal_map_tx_iov(ktx, offset, len, niov, iov);
-
-        if (rc != 0) {
-                CERROR ("Can't map local RDMA data: %d\n", rc);
-                goto out;
-        }
-
-#if MULTIRAIL_EKC
-        rc = kqswnal_check_rdma (ktx->ktx_nfrag, ktx->ktx_frags,
-                                 rmd->kqrmd_nfrag, rmd->kqrmd_frag);
-        if (rc != 0) {
-                CERROR ("Incompatible RDMA descriptors\n");
-                goto out;
-        }
-#else
-        switch (type) {
-        default:
-                LBUG();
-
-        case PTL_MSG_GET:
-                ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
-                                             ktx->ktx_nfrag, ktx->ktx_frags,
-                                             rmd->kqrmd_nfrag, rmd->kqrmd_frag);
-                break;
-
-        case PTL_MSG_PUT:
-                ndatav = kqswnal_eiovs2datav(EP_MAXFRAG, datav,
-                                             rmd->kqrmd_nfrag, rmd->kqrmd_frag,
-                                             ktx->ktx_nfrag, ktx->ktx_frags);
-                break;
-        }
-                
-        if (ndatav < 0) {
-                CERROR ("Can't create datavec: %d\n", ndatav);
-                rc = ndatav;
-                goto out;
-        }
-#endif
-
-        LASSERT (atomic_read(&krx->krx_refcount) > 0);
-        /* Take an extra ref for the completion callback */
-        atomic_inc(&krx->krx_refcount);
-
-        switch (type) {
-        default:
-                LBUG();
-
-        case PTL_MSG_GET:
-#if MULTIRAIL_EKC
-                eprc = ep_complete_rpc(krx->krx_rxd, 
-                                       kqswnal_rdma_store_complete, ktx, 
-                                       &kqswnal_data.kqn_rpc_success,
-                                       ktx->ktx_frags, rmd->kqrmd_frag, rmd->kqrmd_nfrag);
-#else
-                eprc = ep_complete_rpc (krx->krx_rxd, 
-                                        kqswnal_rdma_store_complete, ktx,
-                                        &kqswnal_data.kqn_rpc_success, 
-                                        datav, ndatav);
-                if (eprc != EP_SUCCESS) /* "old" EKC destroys rxd on failed completion */
-                        krx->krx_rxd = NULL;
-#endif
-                if (eprc != EP_SUCCESS) {
-                        CERROR("can't complete RPC: %d\n", eprc);
-                        /* don't re-attempt RPC completion */
-                        krx->krx_rpc_reply_needed = 0;
-                        rc = -ECONNABORTED;
-                }
-                break;
-                
-        case PTL_MSG_PUT:
-#if MULTIRAIL_EKC
-                eprc = ep_rpc_get (krx->krx_rxd, 
-                                   kqswnal_rdma_fetch_complete, ktx,
-                                   rmd->kqrmd_frag, ktx->ktx_frags, ktx->ktx_nfrag);
-#else
-                eprc = ep_rpc_get (krx->krx_rxd,
-                                   kqswnal_rdma_fetch_complete, ktx,
-                                   datav, ndatav);
-#endif
-                if (eprc != EP_SUCCESS) {
-                        CERROR("ep_rpc_get failed: %d\n", eprc);
-                        /* Don't attempt RPC completion: 
-                         * EKC nuked it when the get failed */
-                        krx->krx_rpc_reply_needed = 0;
-                        rc = -ECONNABORTED;
-                }
-                break;
-        }
-
- out:
-        if (rc != 0) {
-                kqswnal_rx_decref(krx);                 /* drop callback's ref */
-                kqswnal_put_idle_tx (ktx);
-        }
-
-        atomic_dec(&kqswnal_data.kqn_pending_txs);
-        return (rc);
-}
-
-static ptl_err_t
-kqswnal_sendmsg (lib_nal_t    *nal,
-                 void         *private,
-                 lib_msg_t    *libmsg,
-                 ptl_hdr_t    *hdr,
-                 int           type,
-                 ptl_nid_t     nid,
-                 ptl_pid_t     pid,
-                 unsigned int  payload_niov,
-                 struct iovec *payload_iov,
-                 ptl_kiov_t   *payload_kiov,
-                 size_t        payload_offset,
-                 size_t        payload_nob)
-{
-        kqswnal_tx_t      *ktx;
-        int                rc;
-        ptl_nid_t          targetnid;
-#if KQSW_CHECKSUM
-        int                i;
-        kqsw_csum_t        csum;
-        int                sumoff;
-        int                sumnob;
-#endif
-        /* NB 1. hdr is in network byte order */
-        /*    2. 'private' depends on the message type */
-        
-        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64
-               " pid %u\n", payload_nob, payload_niov, nid, pid);
-
-        LASSERT (payload_nob == 0 || payload_niov > 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
-        /* It must be OK to kmap() if required */
-        LASSERT (payload_kiov == NULL || !in_interrupt ());
-        /* payload is either all vaddrs or all pages */
-        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-
-        if (payload_nob > KQSW_MAXPAYLOAD) {
-                CERROR ("request exceeds MTU size "LPSZ" (max %u).\n",
-                        payload_nob, KQSW_MAXPAYLOAD);
-                return (PTL_FAIL);
-        }
-
-        if (type == PTL_MSG_REPLY &&            /* can I look in 'private' */
-            ((kqswnal_rx_t *)private)->krx_rpc_reply_needed) { /* is it an RPC */
-                /* Must be a REPLY for an optimized GET */
-                rc = kqswnal_rdma ((kqswnal_rx_t *)private, libmsg, PTL_MSG_GET,
-                                   payload_niov, payload_iov, payload_kiov, 
-                                   payload_offset, payload_nob);
-                return ((rc == 0) ? PTL_OK : PTL_FAIL);
-        }
-
-        targetnid = nid;
-        if (kqswnal_nid2elanid (nid) < 0) {     /* Can't send direct: find gateway? */
-                rc = kpr_lookup (&kqswnal_data.kqn_router, nid, 
-                                 sizeof (ptl_hdr_t) + payload_nob, &targetnid);
-                if (rc != 0) {
-                        CERROR("Can't route to "LPX64": router error %d\n",
-                               nid, rc);
-                        return (PTL_FAIL);
-                }
-                if (kqswnal_nid2elanid (targetnid) < 0) {
-                        CERROR("Bad gateway "LPX64" for "LPX64"\n",
-                               targetnid, nid);
-                        return (PTL_FAIL);
-                }
-        }
-
-        /* I may not block for a transmit descriptor if I might block the
-         * receiver, or an interrupt handler. */
-        ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK ||
-                                          type == PTL_MSG_REPLY ||
-                                          in_interrupt()));
-        if (ktx == NULL) {
-                CERROR ("Can't get txd for msg type %d for "LPX64"\n",
-                        type, libmsg->ev.initiator.nid);
-                return (PTL_NO_SPACE);
-        }
-
-        ktx->ktx_state   = KTX_SENDING;
-        ktx->ktx_nid     = targetnid;
-        ktx->ktx_args[0] = private;
-        ktx->ktx_args[1] = libmsg;
-        ktx->ktx_args[2] = NULL;    /* set when a GET commits to REPLY */
-
-        memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */
-
-#if KQSW_CHECKSUM
-        csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr));
-        memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum));
-        for (csum = 0, i = 0, sumoff = payload_offset, sumnob = payload_nob; sumnob > 0; i++) {
-                LASSERT(i < niov);
-                if (payload_kiov != NULL) {
-                        ptl_kiov_t *kiov = &payload_kiov[i];
-
-                        if (sumoff >= kiov->kiov_len) {
-                                sumoff -= kiov->kiov_len;
-                        } else {
-                                char *addr = ((char *)kmap (kiov->kiov_page)) +
-                                             kiov->kiov_offset + sumoff;
-                                int   fragnob = kiov->kiov_len - sumoff;
-
-                                csum = kqsw_csum(csum, addr, MIN(sumnob, fragnob));
-                                sumnob -= fragnob;
-                                sumoff = 0;
-                                kunmap(kiov->kiov_page);
-                        }
-                } else {
-                        struct iovec *iov = &payload_iov[i];
-
-                        if (sumoff > iov->iov_len) {
-                                sumoff -= iov->iov_len;
-                        } else {
-                                char *addr = iov->iov_base + sumoff;
-                                int   fragnob = iov->iov_len - sumoff;
-                                
-                                csum = kqsw_csum(csum, addr, MIN(sumnob, fragnob));
-                                sumnob -= fragnob;
-                                sumoff = 0;
-                        }
-                }
-        }
-        memcpy(ktx->ktx_buffer + sizeof(*hdr) + sizeof(csum), &csum, sizeof(csum));
-#endif
-
-        /* The first frag will be the pre-mapped buffer for (at least) the
-         * portals header. */
-        ktx->ktx_nfrag = ktx->ktx_firsttmpfrag = 1;
-
-        if (nid == targetnid &&                 /* not forwarding */
-            ((type == PTL_MSG_GET &&            /* optimize GET? */
-              kqswnal_tunables.kqn_optimized_gets != 0 &&
-              le32_to_cpu(hdr->msg.get.sink_length) >= kqswnal_tunables.kqn_optimized_gets) ||
-             (type == PTL_MSG_PUT &&            /* optimize PUT? */
-              kqswnal_tunables.kqn_optimized_puts != 0 &&
-              payload_nob >= kqswnal_tunables.kqn_optimized_puts))) {
-                lib_md_t           *md = libmsg->md;
-                kqswnal_remotemd_t *rmd = (kqswnal_remotemd_t *)(ktx->ktx_buffer + KQSW_HDR_SIZE);
-                
-                /* Optimised path: I send over the Elan vaddrs of the local
-                 * buffers, and my peer DMAs directly to/from them.
-                 *
-                 * First I set up ktx as if it was going to send this
-                 * payload, (it needs to map it anyway).  This fills
-                 * ktx_frags[1] and onward with the network addresses
-                 * of the GET sink frags.  I copy these into ktx_buffer,
-                 * immediately after the header, and send that as my
-                 * message. */
-
-                ktx->ktx_state = (type == PTL_MSG_PUT) ? KTX_PUTTING : KTX_GETTING;
-
-                if ((libmsg->md->options & PTL_MD_KIOV) != 0) 
-                        rc = kqswnal_map_tx_kiov (ktx, 0, md->length,
-                                                  md->md_niov, md->md_iov.kiov);
-                else
-                        rc = kqswnal_map_tx_iov (ktx, 0, md->length,
-                                                 md->md_niov, md->md_iov.iov);
-                if (rc != 0)
-                        goto out;
-
-                rmd->kqrmd_nfrag = ktx->ktx_nfrag - 1;
-
-                payload_nob = offsetof(kqswnal_remotemd_t,
-                                       kqrmd_frag[rmd->kqrmd_nfrag]);
-                LASSERT (KQSW_HDR_SIZE + payload_nob <= KQSW_TX_BUFFER_SIZE);
-
-#if MULTIRAIL_EKC
-                memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1],
-                       rmd->kqrmd_nfrag * sizeof(EP_NMD));
-
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, KQSW_HDR_SIZE + payload_nob);
-#else
-                memcpy(&rmd->kqrmd_frag[0], &ktx->ktx_frags[1],
-                       rmd->kqrmd_nfrag * sizeof(EP_IOVEC));
-                
-                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
-#endif
-                if (type == PTL_MSG_GET) {
-                        /* Allocate reply message now while I'm in thread context */
-                        ktx->ktx_args[2] = lib_create_reply_msg (&kqswnal_lib,
-                                                                 nid, libmsg);
-                        if (ktx->ktx_args[2] == NULL)
-                                goto out;
-
-                        /* NB finalizing the REPLY message is my
-                         * responsibility now, whatever happens. */
-                }
-                
-        } else if (payload_nob <= KQSW_TX_MAXCONTIG) {
-
-                /* small message: single frag copied into the pre-mapped buffer */
-
-#if MULTIRAIL_EKC
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, KQSW_HDR_SIZE + payload_nob);
-#else
-                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + payload_nob;
-#endif
-                if (payload_nob > 0) {
-                        if (payload_kiov != NULL)
-                                lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
-                                                   payload_niov, payload_kiov, 
-                                                   payload_offset, payload_nob);
-                        else
-                                lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE,
-                                                  payload_niov, payload_iov, 
-                                                  payload_offset, payload_nob);
-                }
-        } else {
-
-                /* large message: multiple frags: first is hdr in pre-mapped buffer */
-
-#if MULTIRAIL_EKC
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, KQSW_HDR_SIZE);
-#else
-                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
-#endif
-                if (payload_kiov != NULL)
-                        rc = kqswnal_map_tx_kiov (ktx, payload_offset, payload_nob, 
-                                                  payload_niov, payload_kiov);
-                else
-                        rc = kqswnal_map_tx_iov (ktx, payload_offset, payload_nob,
-                                                 payload_niov, payload_iov);
-                if (rc != 0)
-                        goto out;
-        }
-        
-        ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ?
-                        EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
-
-        rc = kqswnal_launch (ktx);
-
- out:
-        CDEBUG(rc == 0 ? D_NET : D_ERROR, 
-               "%s "LPSZ" bytes to "LPX64" via "LPX64": rc %d\n", 
-               rc == 0 ? "Sent" : "Failed to send",
-               payload_nob, nid, targetnid, rc);
-
-        if (rc != 0) {
-                if (ktx->ktx_state == KTX_GETTING &&
-                    ktx->ktx_args[2] != NULL) {
-                        /* We committed to reply, but there was a problem
-                         * launching the GET.  We can't avoid delivering a
-                         * REPLY event since we committed above, so we
-                         * pretend the GET succeeded but the REPLY
-                         * failed. */
-                        rc = 0;
-                        lib_finalize (&kqswnal_lib, private, libmsg, PTL_OK);
-                        lib_finalize (&kqswnal_lib, private,
-                                      (lib_msg_t *)ktx->ktx_args[2], PTL_FAIL);
-                }
-                
-                kqswnal_put_idle_tx (ktx);
-        }
-        
-        atomic_dec(&kqswnal_data.kqn_pending_txs);
-        return (rc == 0 ? PTL_OK : PTL_FAIL);
-}
-
-static ptl_err_t
-kqswnal_send (lib_nal_t    *nal,
-              void         *private,
-              lib_msg_t    *libmsg,
-              ptl_hdr_t    *hdr,
-              int           type,
-              ptl_nid_t     nid,
-              ptl_pid_t     pid,
-              unsigned int  payload_niov,
-              struct iovec *payload_iov,
-              size_t        payload_offset,
-              size_t        payload_nob)
-{
-        return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid,
-                                 payload_niov, payload_iov, NULL, 
-                                 payload_offset, payload_nob));
-}
-
-static ptl_err_t
-kqswnal_send_pages (lib_nal_t    *nal,
-                    void         *private,
-                    lib_msg_t    *libmsg,
-                    ptl_hdr_t    *hdr,
-                    int           type,
-                    ptl_nid_t     nid,
-                    ptl_pid_t     pid,
-                    unsigned int  payload_niov,
-                    ptl_kiov_t   *payload_kiov,
-                    size_t        payload_offset,
-                    size_t        payload_nob)
-{
-        return (kqswnal_sendmsg (nal, private, libmsg, hdr, type, nid, pid,
-                                 payload_niov, NULL, payload_kiov, 
-                                 payload_offset, payload_nob));
-}
-
-void
-kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
-{
-        int             rc;
-        kqswnal_tx_t   *ktx;
-        ptl_kiov_t     *kiov = fwd->kprfd_kiov;
-        int             niov = fwd->kprfd_niov;
-        int             nob = fwd->kprfd_nob;
-        ptl_nid_t       nid = fwd->kprfd_gateway_nid;
-
-#if KQSW_CHECKSUM
-        CERROR ("checksums for forwarded packets not implemented\n");
-        LBUG ();
-#endif
-        /* The router wants this NAL to forward a packet */
-        CDEBUG (D_NET, "forwarding [%p] to "LPX64", payload: %d frags %d bytes\n",
-                fwd, nid, niov, nob);
-
-        ktx = kqswnal_get_idle_tx (fwd, 0);
-        if (ktx == NULL)        /* can't get txd right now */
-                return;         /* fwd will be scheduled when tx desc freed */
-
-        if (nid == kqswnal_lib.libnal_ni.ni_pid.nid) /* gateway is me */
-                nid = fwd->kprfd_target_nid;    /* target is final dest */
-
-        if (kqswnal_nid2elanid (nid) < 0) {
-                CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid);
-                rc = -EHOSTUNREACH;
-                goto out;
-        }
-
-        /* copy hdr into pre-mapped buffer */
-        memcpy(ktx->ktx_buffer, fwd->kprfd_hdr, sizeof(ptl_hdr_t));
-
-        ktx->ktx_port    = (nob <= KQSW_SMALLPAYLOAD) ?
-                           EP_MSG_SVC_PORTALS_SMALL : EP_MSG_SVC_PORTALS_LARGE;
-        ktx->ktx_nid     = nid;
-        ktx->ktx_state   = KTX_FORWARDING;
-        ktx->ktx_args[0] = fwd;
-        ktx->ktx_nfrag   = ktx->ktx_firsttmpfrag = 1;
-
-        if (nob <= KQSW_TX_MAXCONTIG) 
-        {
-                /* send payload from ktx's pre-mapped contiguous buffer */
-#if MULTIRAIL_EKC
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, KQSW_HDR_SIZE + nob);
-#else
-                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE + nob;
-#endif
-                if (nob > 0)
-                        lib_copy_kiov2buf(ktx->ktx_buffer + KQSW_HDR_SIZE,
-                                          niov, kiov, 0, nob);
-        }
-        else
-        {
-                /* zero copy payload */
-#if MULTIRAIL_EKC
-                ep_nmd_subset(&ktx->ktx_frags[0], &ktx->ktx_ebuffer,
-                              0, KQSW_HDR_SIZE);
-#else
-                ktx->ktx_frags[0].Base = ktx->ktx_ebuffer;
-                ktx->ktx_frags[0].Len = KQSW_HDR_SIZE;
-#endif
-                rc = kqswnal_map_tx_kiov (ktx, 0, nob, niov, kiov);
-                if (rc != 0)
-                        goto out;
-        }
-
-        rc = kqswnal_launch (ktx);
- out:
-        if (rc != 0) {
-                CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc);
-
-                /* complete now (with failure) */
-                kqswnal_tx_done (ktx, rc);
-        }
-
-        atomic_dec(&kqswnal_data.kqn_pending_txs);
-}
-
-void
-kqswnal_fwd_callback (void *arg, int error)
-{
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)arg;
-
-        /* The router has finished forwarding this packet */
-
-        if (error != 0)
-        {
-                ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
-
-                CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n",
-                       le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid),error);
-        }
-
-        LASSERT (atomic_read(&krx->krx_refcount) == 1);
-        kqswnal_rx_decref (krx);
-}
-
-void
-kqswnal_requeue_rx (kqswnal_rx_t *krx)
-{
-        LASSERT (atomic_read(&krx->krx_refcount) == 0);
-        LASSERT (!krx->krx_rpc_reply_needed);
-
-        krx->krx_state = KRX_POSTED;
-
-#if MULTIRAIL_EKC
-        if (kqswnal_data.kqn_shuttingdown) {
-                /* free EKC rxd on shutdown */
-                ep_complete_receive(krx->krx_rxd);
-        } else {
-                /* repost receive */
-                ep_requeue_receive(krx->krx_rxd, 
-                                   kqswnal_rxhandler, krx,
-                                   &krx->krx_elanbuffer, 0);
-        }
-#else                
-        if (kqswnal_data.kqn_shuttingdown)
-                return;
-
-        if (krx->krx_rxd == NULL) {
-                /* We had a failed ep_complete_rpc() which nukes the
-                 * descriptor in "old" EKC */
-                int eprc = ep_queue_receive(krx->krx_eprx, 
-                                            kqswnal_rxhandler, krx,
-                                            krx->krx_elanbuffer, 
-                                            krx->krx_npages * PAGE_SIZE, 0);
-                LASSERT (eprc == EP_SUCCESS);
-                /* We don't handle failure here; it's incredibly rare
-                 * (never reported?) and only happens with "old" EKC */
-        } else {
-                ep_requeue_receive(krx->krx_rxd, kqswnal_rxhandler, krx,
-                                   krx->krx_elanbuffer, 
-                                   krx->krx_npages * PAGE_SIZE);
-        }
-#endif
-}
-
-void
-kqswnal_rpc_complete (EP_RXD *rxd)
-{
-        int           status = ep_rxd_status(rxd);
-        kqswnal_rx_t *krx    = (kqswnal_rx_t *)ep_rxd_arg(rxd);
-        
-        CDEBUG((status == EP_SUCCESS) ? D_NET : D_ERROR,
-               "rxd %p, krx %p, status %d\n", rxd, krx, status);
-
-        LASSERT (krx->krx_rxd == rxd);
-        LASSERT (krx->krx_rpc_reply_needed);
-        
-        krx->krx_rpc_reply_needed = 0;
-        kqswnal_requeue_rx (krx);
-}
-
-void
-kqswnal_rx_done (kqswnal_rx_t *krx) 
-{
-        int           rc;
-        EP_STATUSBLK *sblk;
-
-        LASSERT (atomic_read(&krx->krx_refcount) == 0);
-
-        if (krx->krx_rpc_reply_needed) {
-                /* We've not completed the peer's RPC yet... */
-                sblk = (krx->krx_rpc_reply_status == 0) ? 
-                       &kqswnal_data.kqn_rpc_success : 
-                       &kqswnal_data.kqn_rpc_failed;
-
-                LASSERT (!in_interrupt());
-#if MULTIRAIL_EKC
-                rc = ep_complete_rpc(krx->krx_rxd, 
-                                     kqswnal_rpc_complete, krx,
-                                     sblk, NULL, NULL, 0);
-                if (rc == EP_SUCCESS)
-                        return;
-#else
-                rc = ep_complete_rpc(krx->krx_rxd, 
-                                     kqswnal_rpc_complete, krx,
-                                     sblk, NULL, 0);
-                if (rc == EP_SUCCESS)
-                        return;
-
-                /* "old" EKC destroys rxd on failed completion */
-                krx->krx_rxd = NULL;
-#endif
-                CERROR("can't complete RPC: %d\n", rc);
-                krx->krx_rpc_reply_needed = 0;
-        }
-
-        kqswnal_requeue_rx(krx);
-}
-        
-void
-kqswnal_parse (kqswnal_rx_t *krx)
-{
-        ptl_hdr_t      *hdr = (ptl_hdr_t *) page_address(krx->krx_kiov[0].kiov_page);
-        ptl_nid_t       dest_nid = le64_to_cpu(hdr->dest_nid);
-        int             payload_nob;
-        int             nob;
-        int             niov;
-
-        LASSERT (atomic_read(&krx->krx_refcount) == 1);
-
-        if (dest_nid == kqswnal_lib.libnal_ni.ni_pid.nid) { /* It's for me :) */
-                /* I ignore parse errors since I'm not consuming a byte
-                 * stream */
-                (void)lib_parse (&kqswnal_lib, hdr, krx);
-
-                /* Drop my ref; any RDMA activity takes an additional ref */
-                kqswnal_rx_decref(krx);
-                return;
-        }
-
-#if KQSW_CHECKSUM
-        LASSERTF (0, "checksums for forwarded packets not implemented\n");
-#endif
-
-        if (kqswnal_nid2elanid (dest_nid) >= 0)  /* should have gone direct to peer */
-        {
-                CERROR("dropping packet from "LPX64" for "LPX64
-                       ": target is peer\n", le64_to_cpu(hdr->src_nid), dest_nid);
-
-                kqswnal_rx_decref (krx);
-                return;
-        }
-
-        nob = payload_nob = krx->krx_nob - KQSW_HDR_SIZE;
-        niov = 0;
-        if (nob > 0) {
-                krx->krx_kiov[0].kiov_offset = KQSW_HDR_SIZE;
-                krx->krx_kiov[0].kiov_len = MIN(PAGE_SIZE - KQSW_HDR_SIZE, nob);
-                niov = 1;
-                nob -= PAGE_SIZE - KQSW_HDR_SIZE;
-                
-                while (nob > 0) {
-                        LASSERT (niov < krx->krx_npages);
-                        
-                        krx->krx_kiov[niov].kiov_offset = 0;
-                        krx->krx_kiov[niov].kiov_len = MIN(PAGE_SIZE, nob);
-                        niov++;
-                        nob -= PAGE_SIZE;
-                }
-        }
-
-        kpr_fwd_init (&krx->krx_fwd, dest_nid, 
-                      hdr, payload_nob, niov, krx->krx_kiov,
-                      kqswnal_fwd_callback, krx);
-
-        kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd);
-}
-
-/* Receive Interrupt Handler: posts to schedulers */
-void 
-kqswnal_rxhandler(EP_RXD *rxd)
-{
-        unsigned long flags;
-        int           nob    = ep_rxd_len (rxd);
-        int           status = ep_rxd_status (rxd);
-        kqswnal_rx_t *krx    = (kqswnal_rx_t *)ep_rxd_arg (rxd);
-
-        CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n",
-               rxd, krx, nob, status);
-
-        LASSERT (krx != NULL);
-        LASSERT (krx->krx_state = KRX_POSTED);
-        
-        krx->krx_state = KRX_PARSE;
-        krx->krx_rxd = rxd;
-        krx->krx_nob = nob;
-
-        /* RPC reply iff rpc request received without error */
-        krx->krx_rpc_reply_needed = ep_rxd_isrpc(rxd) &&
-                                    (status == EP_SUCCESS ||
-                                     status == EP_MSG_TOO_BIG);
-
-        /* Default to failure if an RPC reply is requested but not handled */
-        krx->krx_rpc_reply_status = -EPROTO;
-        atomic_set (&krx->krx_refcount, 1);
-
-        /* must receive a whole header to be able to parse */
-        if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t))
-        {
-                /* receives complete with failure when receiver is removed */
-#if MULTIRAIL_EKC
-                if (status == EP_SHUTDOWN)
-                        LASSERT (kqswnal_data.kqn_shuttingdown);
-                else
-                        CERROR("receive status failed with status %d nob %d\n",
-                               ep_rxd_status(rxd), nob);
-#else
-                if (!kqswnal_data.kqn_shuttingdown)
-                        CERROR("receive status failed with status %d nob %d\n",
-                               ep_rxd_status(rxd), nob);
-#endif
-                kqswnal_rx_decref(krx);
-                return;
-        }
-
-        if (!in_interrupt()) {
-                kqswnal_parse(krx);
-                return;
-        }
-
-        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
-        list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds);
-        wake_up (&kqswnal_data.kqn_sched_waitq);
-
-        spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-}
-
-#if KQSW_CHECKSUM
-void
-kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr)
-{
-        ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_kiov[0].kiov_page);
-
-        CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64
-                ", dpid %d, spid %d, type %d\n",
-                ishdr ? "Header" : "Payload", krx,
-                le64_to_cpu(hdr->dest_nid), le64_to_cpu(hdr->src_nid)
-                le32_to_cpu(hdr->dest_pid), le32_to_cpu(hdr->src_pid),
-                le32_to_cpu(hdr->type));
-
-        switch (le32_to_cpu(hdr->type))
-        {
-        case PTL_MSG_ACK:
-                CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64
-                       " len %u\n",
-                       le32_to_cpu(hdr->msg.ack.mlength),
-                       hdr->msg.ack.dst_wmd.handle_cookie,
-                       hdr->msg.ack.dst_wmd.handle_idx,
-                       le64_to_cpu(hdr->msg.ack.match_bits),
-                       le32_to_cpu(hdr->msg.ack.length));
-                break;
-        case PTL_MSG_PUT:
-                CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64
-                       " len %u off %u data "LPX64"\n",
-                       le32_to_cpu(hdr->msg.put.ptl_index),
-                       hdr->msg.put.ack_wmd.handle_cookie,
-                       hdr->msg.put.ack_wmd.handle_idx,
-                       le64_to_cpu(hdr->msg.put.match_bits),
-                       le32_to_cpu(hdr->msg.put.length),
-                       le32_to_cpu(hdr->msg.put.offset),
-                       hdr->msg.put.hdr_data);
-                break;
-        case PTL_MSG_GET:
-                CERROR ("GET: <>\n");
-                break;
-        case PTL_MSG_REPLY:
-                CERROR ("REPLY: <>\n");
-                break;
-        default:
-                CERROR ("TYPE?: <>\n");
-        }
-}
-#endif
-
-static ptl_err_t
-kqswnal_recvmsg (lib_nal_t    *nal,
-                 void         *private,
-                 lib_msg_t    *libmsg,
-                 unsigned int  niov,
-                 struct iovec *iov,
-                 ptl_kiov_t   *kiov,
-                 size_t        offset,
-                 size_t        mlen,
-                 size_t        rlen)
-{
-        kqswnal_rx_t *krx = (kqswnal_rx_t *)private;
-        char         *buffer = page_address(krx->krx_kiov[0].kiov_page);
-        ptl_hdr_t    *hdr = (ptl_hdr_t *)buffer;
-        int           page;
-        char         *page_ptr;
-        int           page_nob;
-        char         *iov_ptr;
-        int           iov_nob;
-        int           frag;
-        int           rc;
-#if KQSW_CHECKSUM
-        kqsw_csum_t   senders_csum;
-        kqsw_csum_t   payload_csum = 0;
-        kqsw_csum_t   hdr_csum = kqsw_csum(0, hdr, sizeof(*hdr));
-        size_t        csum_len = mlen;
-        int           csum_frags = 0;
-        int           csum_nob = 0;
-        static atomic_t csum_counter;
-        int           csum_verbose = (atomic_read(&csum_counter)%1000001) == 0;
-
-        atomic_inc (&csum_counter);
-
-        memcpy (&senders_csum, buffer + sizeof (ptl_hdr_t), sizeof (kqsw_csum_t));
-        if (senders_csum != hdr_csum)
-                kqswnal_csum_error (krx, 1);
-#endif
-        /* NB lib_parse() has already flipped *hdr */
-
-        CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen);
-
-        if (krx->krx_rpc_reply_needed &&
-            hdr->type == PTL_MSG_PUT) {
-                /* This must be an optimized PUT */
-                rc = kqswnal_rdma (krx, libmsg, PTL_MSG_PUT,
-                                   niov, iov, kiov, offset, mlen);
-                return (rc == 0 ? PTL_OK : PTL_FAIL);
-        }
-
-        /* What was actually received must be >= payload. */
-        LASSERT (mlen <= rlen);
-        if (krx->krx_nob < KQSW_HDR_SIZE + mlen) {
-                CERROR("Bad message size: have %d, need %d + %d\n",
-                       krx->krx_nob, (int)KQSW_HDR_SIZE, (int)mlen);
-                return (PTL_FAIL);
-        }
-
-        /* It must be OK to kmap() if required */
-        LASSERT (kiov == NULL || !in_interrupt ());
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        if (mlen != 0) {
-                page     = 0;
-                page_ptr = buffer + KQSW_HDR_SIZE;
-                page_nob = PAGE_SIZE - KQSW_HDR_SIZE;
-
-                LASSERT (niov > 0);
-
-                if (kiov != NULL) {
-                        /* skip complete frags */
-                        while (offset >= kiov->kiov_len) {
-                                offset -= kiov->kiov_len;
-                                kiov++;
-                                niov--;
-                                LASSERT (niov > 0);
-                        }
-                        iov_ptr = ((char *)kmap (kiov->kiov_page)) +
-                                kiov->kiov_offset + offset;
-                        iov_nob = kiov->kiov_len - offset;
-                } else {
-                        /* skip complete frags */
-                        while (offset >= iov->iov_len) {
-                                offset -= iov->iov_len;
-                                iov++;
-                                niov--;
-                                LASSERT (niov > 0);
-                        }
-                        iov_ptr = iov->iov_base + offset;
-                        iov_nob = iov->iov_len - offset;
-                }
-                
-                for (;;)
-                {
-                        frag = mlen;
-                        if (frag > page_nob)
-                                frag = page_nob;
-                        if (frag > iov_nob)
-                                frag = iov_nob;
-
-                        memcpy (iov_ptr, page_ptr, frag);
-#if KQSW_CHECKSUM
-                        payload_csum = kqsw_csum (payload_csum, iov_ptr, frag);
-                        csum_nob += frag;
-                        csum_frags++;
-#endif
-                        mlen -= frag;
-                        if (mlen == 0)
-                                break;
-
-                        page_nob -= frag;
-                        if (page_nob != 0)
-                                page_ptr += frag;
-                        else
-                        {
-                                page++;
-                                LASSERT (page < krx->krx_npages);
-                                page_ptr = page_address(krx->krx_kiov[page].kiov_page);
-                                page_nob = PAGE_SIZE;
-                        }
-
-                        iov_nob -= frag;
-                        if (iov_nob != 0)
-                                iov_ptr += frag;
-                        else if (kiov != NULL) {
-                                kunmap (kiov->kiov_page);
-                                kiov++;
-                                niov--;
-                                LASSERT (niov > 0);
-                                iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset;
-                                iov_nob = kiov->kiov_len;
-                        } else {
-                                iov++;
-                                niov--;
-                                LASSERT (niov > 0);
-                                iov_ptr = iov->iov_base;
-                                iov_nob = iov->iov_len;
-                        }
-                }
-
-                if (kiov != NULL)
-                        kunmap (kiov->kiov_page);
-        }
-
-#if KQSW_CHECKSUM
-        memcpy (&senders_csum, buffer + sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), 
-                sizeof(kqsw_csum_t));
-
-        if (csum_len != rlen)
-                CERROR("Unable to checksum data in user's buffer\n");
-        else if (senders_csum != payload_csum)
-                kqswnal_csum_error (krx, 0);
-
-        if (csum_verbose)
-                CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, "
-                       "csum_nob %d\n",
-                        hdr_csum, payload_csum, csum_frags, csum_nob);
-#endif
-        lib_finalize(nal, private, libmsg, PTL_OK);
-
-        return (PTL_OK);
-}
-
-static ptl_err_t
-kqswnal_recv(lib_nal_t    *nal,
-             void         *private,
-             lib_msg_t    *libmsg,
-             unsigned int  niov,
-             struct iovec *iov,
-             size_t        offset,
-             size_t        mlen,
-             size_t        rlen)
-{
-        return (kqswnal_recvmsg(nal, private, libmsg, 
-                                niov, iov, NULL, 
-                                offset, mlen, rlen));
-}
-
-static ptl_err_t
-kqswnal_recv_pages (lib_nal_t    *nal,
-                    void         *private,
-                    lib_msg_t    *libmsg,
-                    unsigned int  niov,
-                    ptl_kiov_t   *kiov,
-                    size_t        offset,
-                    size_t        mlen,
-                    size_t        rlen)
-{
-        return (kqswnal_recvmsg(nal, private, libmsg, 
-                                niov, NULL, kiov, 
-                                offset, mlen, rlen));
-}
-
-int
-kqswnal_thread_start (int (*fn)(void *arg), void *arg)
-{
-        long    pid = kernel_thread (fn, arg, 0);
-
-        if (pid < 0)
-                return ((int)pid);
-
-        atomic_inc (&kqswnal_data.kqn_nthreads);
-        return (0);
-}
-
-void
-kqswnal_thread_fini (void)
-{
-        atomic_dec (&kqswnal_data.kqn_nthreads);
-}
-
-int
-kqswnal_scheduler (void *arg)
-{
-        kqswnal_rx_t    *krx;
-        kqswnal_tx_t    *ktx;
-        kpr_fwd_desc_t  *fwd;
-        unsigned long    flags;
-        int              rc;
-        int              counter = 0;
-        int              did_something;
-
-        kportal_daemonize ("kqswnal_sched");
-        kportal_blockallsigs ();
-        
-        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-
-        for (;;)
-        {
-                did_something = 0;
-
-                if (!list_empty (&kqswnal_data.kqn_readyrxds))
-                {
-                        krx = list_entry(kqswnal_data.kqn_readyrxds.next,
-                                         kqswnal_rx_t, krx_list);
-                        list_del (&krx->krx_list);
-                        spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                               flags);
-
-                        switch (krx->krx_state) {
-                        case KRX_PARSE:
-                                kqswnal_parse (krx);
-                                break;
-                        case KRX_COMPLETING:
-                                kqswnal_rx_decref (krx);
-                                break;
-                        default:
-                                LBUG();
-                        }
-
-                        did_something = 1;
-                        spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags);
-                }
-
-                if (!list_empty (&kqswnal_data.kqn_delayedtxds))
-                {
-                        ktx = list_entry(kqswnal_data.kqn_delayedtxds.next,
-                                         kqswnal_tx_t, ktx_list);
-                        list_del_init (&ktx->ktx_delayed_list);
-                        spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                               flags);
-
-                        rc = kqswnal_launch (ktx);
-                        if (rc != 0) {
-                                CERROR("Failed delayed transmit to "LPX64
-                                       ": %d\n", ktx->ktx_nid, rc);
-                                kqswnal_tx_done (ktx, rc);
-                        }
-                        atomic_dec (&kqswnal_data.kqn_pending_txs);
-
-                        did_something = 1;
-                        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-                }
-
-                if (!list_empty (&kqswnal_data.kqn_delayedfwds))
-                {
-                        fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list);
-                        list_del (&fwd->kprfd_list);
-                        spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags);
-
-                        /* If we're shutting down, this will just requeue fwd on kqn_idletxd_fwdq */
-                        kqswnal_fwd_packet (NULL, fwd);
-
-                        did_something = 1;
-                        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-                }
-
-                /* nothing to do or hogging CPU */
-                if (!did_something || counter++ == KQSW_RESCHED) {
-                        spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock,
-                                               flags);
-
-                        counter = 0;
-
-                        if (!did_something) {
-                                if (kqswnal_data.kqn_shuttingdown == 2) {
-                                        /* We only exit in stage 2 of shutdown when 
-                                         * there's nothing left to do */
-                                        break;
-                                }
-                                rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq,
-                                                               kqswnal_data.kqn_shuttingdown == 2 ||
-                                                               !list_empty(&kqswnal_data.kqn_readyrxds) ||
-                                                               !list_empty(&kqswnal_data.kqn_delayedtxds) ||
-                                                               !list_empty(&kqswnal_data.kqn_delayedfwds));
-                                LASSERT (rc == 0);
-                        } else if (need_resched())
-                                schedule ();
-
-                        spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags);
-                }
-        }
-
-        kqswnal_thread_fini ();
-        return (0);
-}
-
-lib_nal_t kqswnal_lib =
-{
-        libnal_data:       &kqswnal_data,         /* NAL private data */
-        libnal_send:        kqswnal_send,
-        libnal_send_pages:  kqswnal_send_pages,
-        libnal_recv:        kqswnal_recv,
-        libnal_recv_pages:  kqswnal_recv_pages,
-        libnal_dist:        kqswnal_dist
-};
diff --git a/lustre/portals/knals/ranal/.cvsignore b/lustre/portals/knals/ranal/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/ranal/Makefile.in b/lustre/portals/knals/ranal/Makefile.in
deleted file mode 100644 (file)
index 1772cc2..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := kranal
-kranal-objs := ranal.o ranal_cb.o
-
-EXTRA_POST_CFLAGS := @RACPPFLAGS@
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/ranal/autoMakefile.am b/lustre/portals/knals/ranal/autoMakefile.am
deleted file mode 100644 (file)
index f136aa5..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-if BUILD_RANAL
-modulenet_DATA = kranal$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kranal-objs:%.o=%.c) ranal.h
diff --git a/lustre/portals/knals/ranal/ranal.c b/lustre/portals/knals/ranal/ranal.c
deleted file mode 100644 (file)
index c924827..0000000
+++ /dev/null
@@ -1,1983 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include "ranal.h"
-
-
-nal_t                   kranal_api;
-ptl_handle_ni_t         kranal_ni;
-kra_data_t              kranal_data;
-kra_tunables_t          kranal_tunables;
-
-#ifdef CONFIG_SYSCTL
-#define RANAL_SYSCTL_TIMEOUT           1
-#define RANAL_SYSCTL_LISTENER_TIMEOUT  2
-#define RANAL_SYSCTL_BACKLOG           3
-#define RANAL_SYSCTL_PORT              4
-#define RANAL_SYSCTL_MAX_IMMEDIATE     5
-
-#define RANAL_SYSCTL                   202
-
-static ctl_table kranal_ctl_table[] = {
-        {RANAL_SYSCTL_TIMEOUT, "timeout", 
-         &kranal_tunables.kra_timeout, sizeof(int),
-         0644, NULL, &proc_dointvec},
-        {RANAL_SYSCTL_LISTENER_TIMEOUT, "listener_timeout", 
-         &kranal_tunables.kra_listener_timeout, sizeof(int),
-         0644, NULL, &proc_dointvec},
-       {RANAL_SYSCTL_BACKLOG, "backlog",
-        &kranal_tunables.kra_backlog, sizeof(int),
-        0644, NULL, kranal_listener_procint},
-       {RANAL_SYSCTL_PORT, "port",
-        &kranal_tunables.kra_port, sizeof(int),
-        0644, NULL, kranal_listener_procint},
-        {RANAL_SYSCTL_MAX_IMMEDIATE, "max_immediate", 
-         &kranal_tunables.kra_max_immediate, sizeof(int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
-};
-
-static ctl_table kranal_top_ctl_table[] = {
-        {RANAL_SYSCTL, "ranal", NULL, 0, 0555, kranal_ctl_table},
-        { 0 }
-};
-#endif
-
-int
-kranal_sock_write (struct socket *sock, void *buffer, int nob)
-{
-        int           rc;
-        mm_segment_t  oldmm = get_fs();
-       struct iovec  iov = {
-               .iov_base = buffer,
-               .iov_len  = nob
-       };
-       struct msghdr msg = {
-               .msg_name       = NULL,
-               .msg_namelen    = 0,
-               .msg_iov        = &iov,
-               .msg_iovlen     = 1,
-               .msg_control    = NULL,
-               .msg_controllen = 0,
-               .msg_flags      = MSG_DONTWAIT
-       };
-
-       /* We've set up the socket's send buffer to be large enough for
-        * everything we send, so a single non-blocking send should
-        * complete without error. */
-
-       set_fs(KERNEL_DS);
-       rc = sock_sendmsg(sock, &msg, iov.iov_len);
-       set_fs(oldmm);
-
-       return rc;
-}
-
-int
-kranal_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
-{
-        int            rc;
-        mm_segment_t   oldmm = get_fs();
-       long           ticks = timeout * HZ;
-       unsigned long  then;
-       struct timeval tv;
-
-       LASSERT (nob > 0);
-       LASSERT (ticks > 0);
-
-        for (;;) {
-                struct iovec  iov = {
-                        .iov_base = buffer,
-                        .iov_len  = nob
-                };
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = &iov,
-                        .msg_iovlen     = 1,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = 0
-                };
-
-               /* Set receive timeout to remaining time */
-               tv = (struct timeval) {
-                       .tv_sec = ticks / HZ,
-                       .tv_usec = ((ticks % HZ) * 1000000) / HZ
-               };
-               set_fs(KERNEL_DS);
-               rc = sock_setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
-                                    (char *)&tv, sizeof(tv));
-               set_fs(oldmm);
-               if (rc != 0) {
-                       CERROR("Can't set socket recv timeout %d: %d\n",
-                              timeout, rc);
-                       return rc;
-               }
-
-                set_fs(KERNEL_DS);
-               then = jiffies;
-                rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
-               ticks -= jiffies - then;
-                set_fs(oldmm);
-
-                if (rc < 0)
-                        return rc;
-
-                if (rc == 0)
-                        return -ECONNABORTED;
-
-                buffer = ((char *)buffer) + rc;
-                nob -= rc;
-
-               if (nob == 0)
-                       return 0;
-
-               if (ticks <= 0)
-                       return -ETIMEDOUT;
-        }
-}
-
-int
-kranal_create_sock(struct socket **sockp)
-{
-       struct socket       *sock;
-       int                  rc;
-        struct timeval       tv;
-       int                  option;
-        mm_segment_t         oldmm = get_fs();
-
-       rc = sock_create(PF_INET, SOCK_STREAM, 0, &sock);
-       if (rc != 0) {
-               CERROR("Can't create socket: %d\n", rc);
-               return rc;
-       }
-
-       /* Ensure sending connection info doesn't block */
-       option = 2 * sizeof(kra_connreq_t);
-       set_fs(KERNEL_DS);
-       rc = sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF,
-                            (char *)&option, sizeof(option));
-       set_fs(oldmm);
-       if (rc != 0) {
-               CERROR("Can't set send buffer %d: %d\n", option, rc);
-               goto failed;
-       }
-
-       option = 1;
-       set_fs(KERNEL_DS);
-       rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
-                            (char *)&option, sizeof(option));
-       set_fs(oldmm);
-       if (rc != 0) {
-               CERROR("Can't set SO_REUSEADDR: %d\n", rc);
-               goto failed;
-       }
-
-       *sockp = sock;
-       return 0;
-
- failed:
-       sock_release(sock);
-       return rc;
-}
-
-void
-kranal_pause(int ticks)
-{
-       set_current_state(TASK_UNINTERRUPTIBLE);
-       schedule_timeout(ticks);
-}
-
-void
-kranal_pack_connreq(kra_connreq_t *connreq, kra_conn_t *conn)
-{
-        RAP_RETURN   rrc;
-
-        memset(connreq, 0, sizeof(*connreq));
-
-        connreq->racr_magic       = RANAL_MSG_MAGIC;
-        connreq->racr_version     = RANAL_MSG_VERSION;
-        connreq->racr_devid       = conn->rac_device->rad_id;
-        connreq->racr_nid         = kranal_lib.libnal_ni.ni_pid.nid;
-        connreq->racr_timeout     = conn->rac_timeout;
-        connreq->racr_incarnation = conn->rac_my_incarnation;
-
-        rrc = RapkGetRiParams(conn->rac_rihandle, &connreq->racr_riparams);
-        LASSERT(rrc == RAP_SUCCESS);
-}
-
-int
-kranal_recv_connreq(struct socket *sock, kra_connreq_t *connreq, int timeout)
-{
-        int         i;
-       int         rc;
-
-       rc = kranal_sock_read(sock, connreq, sizeof(*connreq), timeout);
-       if (rc != 0) {
-               CERROR("Read failed: %d\n", rc);
-               return rc;
-       }
-
-       if (connreq->racr_magic != RANAL_MSG_MAGIC) {
-               if (__swab32(connreq->racr_magic) != RANAL_MSG_MAGIC) {
-                       CERROR("Unexpected magic %08x\n", connreq->racr_magic);
-                       return -EPROTO;
-               }
-
-               __swab32s(&connreq->racr_magic);
-               __swab16s(&connreq->racr_version);
-                __swab16s(&connreq->racr_devid);
-               __swab64s(&connreq->racr_nid);
-               __swab64s(&connreq->racr_incarnation);
-                __swab32s(&connreq->racr_timeout);
-
-               __swab32s(&connreq->racr_riparams.FmaDomainHndl);
-               __swab32s(&connreq->racr_riparams.RcvCqHndl);
-               __swab32s(&connreq->racr_riparams.PTag);
-                __swab32s(&connreq->racr_riparams.CompletionCookie);
-       }
-
-       if (connreq->racr_version != RANAL_MSG_VERSION) {
-               CERROR("Unexpected version %d\n", connreq->racr_version);
-               return -EPROTO;
-       }
-
-        if (connreq->racr_nid == PTL_NID_ANY) {
-                CERROR("Received PTL_NID_ANY\n");
-                return -EPROTO;
-        }
-
-        if (connreq->racr_timeout < RANAL_MIN_TIMEOUT) {
-                CERROR("Received timeout %d < MIN %d\n",
-                       connreq->racr_timeout, RANAL_MIN_TIMEOUT);
-                return -EPROTO;
-        }
-        
-        for (i = 0; i < kranal_data.kra_ndevs; i++)
-                if (connreq->racr_devid == 
-                    kranal_data.kra_devices[i].rad_id)
-                        break;
-
-        if (i == kranal_data.kra_ndevs) {
-                CERROR("Can't match device %d\n", connreq->racr_devid);
-                return -ENODEV;
-        }
-
-       return 0;
-}
-
-int
-kranal_conn_isdup_locked(kra_peer_t *peer, __u64 incarnation)
-{
-       kra_conn_t       *conn;
-       struct list_head *tmp;
-        int               loopback = 0;
-
-       list_for_each(tmp, &peer->rap_conns) {
-               conn = list_entry(tmp, kra_conn_t, rac_list);
-
-                if (conn->rac_peer_incarnation < incarnation) {
-                        /* Conns with an older incarnation get culled later */
-                        continue;
-                }
-
-                if (!loopback &&
-                    conn->rac_peer_incarnation == incarnation &&
-                    peer->rap_nid == kranal_lib.libnal_ni.ni_pid.nid) {
-                        /* loopback creates 2 conns */
-                        loopback = 1;
-                        continue;
-                }
-
-                return 1;
-       }
-
-       return 0;
-}
-
-void
-kranal_set_conn_uniqueness (kra_conn_t *conn)
-{
-        unsigned long  flags;
-
-        write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        conn->rac_my_incarnation = kranal_data.kra_next_incarnation++;
-
-        do {    /* allocate a unique cqid */
-                conn->rac_cqid = kranal_data.kra_next_cqid++;
-        } while (kranal_cqid2conn_locked(conn->rac_cqid) != NULL);
-        
-
-        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-}
-
-int
-kranal_alloc_conn(kra_conn_t **connp, kra_device_t *dev)
-{
-       kra_conn_t    *conn;
-        RAP_RETURN     rrc;
-
-        LASSERT (!in_interrupt());
-       PORTAL_ALLOC(conn, sizeof(*conn));
-
-       if (conn == NULL)
-               return -ENOMEM;
-
-       memset(conn, 0, sizeof(*conn));
-       atomic_set(&conn->rac_refcount, 1);
-       INIT_LIST_HEAD(&conn->rac_list);
-       INIT_LIST_HEAD(&conn->rac_hashlist);
-       INIT_LIST_HEAD(&conn->rac_fmaq);
-       INIT_LIST_HEAD(&conn->rac_rdmaq);
-       INIT_LIST_HEAD(&conn->rac_replyq);
-       spin_lock_init(&conn->rac_lock);
-
-        kranal_set_conn_uniqueness(conn);
-
-        conn->rac_timeout = MAX(kranal_tunables.kra_timeout, RANAL_MIN_TIMEOUT);
-        kranal_update_reaper_timeout(conn->rac_timeout);
-
-        rrc = RapkCreateRi(dev->rad_handle, conn->rac_cqid,
-                           dev->rad_ptag,
-                           dev->rad_rdma_cq, dev->rad_fma_cq,
-                           &conn->rac_rihandle);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("RapkCreateRi failed: %d\n", rrc);
-                PORTAL_FREE(conn, sizeof(*conn));
-                return -ENETDOWN;
-        }
-
-        atomic_inc(&kranal_data.kra_nconns);
-       *connp = conn;
-       return 0;
-}
-
-void
-__kranal_conn_decref(kra_conn_t *conn) 
-{
-        kra_tx_t          *tx;
-        RAP_RETURN         rrc;
-
-        LASSERT (!in_interrupt());
-        LASSERT (!conn->rac_scheduled);
-        LASSERT (list_empty(&conn->rac_list));
-        LASSERT (list_empty(&conn->rac_hashlist));
-        LASSERT (atomic_read(&conn->rac_refcount) == 0);
-
-        while (!list_empty(&conn->rac_fmaq)) {
-                tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
-                
-                list_del(&tx->tx_list);
-                kranal_tx_done(tx, -ECONNABORTED);
-        }
-        
-        /* We may not destroy this connection while it has RDMAs outstanding */
-        LASSERT (list_empty(&conn->rac_rdmaq));
-
-        while (!list_empty(&conn->rac_replyq)) {
-                tx = list_entry(conn->rac_replyq.next, kra_tx_t, tx_list);
-                
-                list_del(&tx->tx_list);
-                kranal_tx_done(tx, -ECONNABORTED);
-        }
-        
-        rrc = RapkDestroyRi(conn->rac_device->rad_handle,
-                            conn->rac_rihandle);
-        LASSERT (rrc == RAP_SUCCESS);
-
-        if (conn->rac_peer != NULL)
-                kranal_peer_decref(conn->rac_peer);
-
-       PORTAL_FREE(conn, sizeof(*conn));
-        atomic_dec(&kranal_data.kra_nconns);
-}
-
-void
-kranal_terminate_conn_locked (kra_conn_t *conn)
-{
-        kra_peer_t *peer = conn->rac_peer;
-
-        LASSERT (!in_interrupt());
-        LASSERT (conn->rac_closing);
-        LASSERT (!list_empty(&conn->rac_hashlist));
-        LASSERT (list_empty(&conn->rac_list));
-
-        /* Remove from conn hash table (no new callbacks) */
-        list_del_init(&conn->rac_hashlist);
-        kranal_conn_decref(conn);
-
-        /* Conn is now just waiting for remaining refs to go */
-}
-
-void
-kranal_close_conn_locked (kra_conn_t *conn, int error)
-{
-        kra_peer_t        *peer = conn->rac_peer;
-
-        CDEBUG(error == 0 ? D_NET : D_ERROR,
-              "closing conn to "LPX64": error %d\n", peer->rap_nid, error);
-
-        LASSERT (!in_interrupt());
-        LASSERT (!conn->rac_closing);
-        LASSERT (!list_empty(&conn->rac_hashlist));
-        LASSERT (!list_empty(&conn->rac_list));
-
-        list_del_init(&conn->rac_list);
-
-        if (list_empty(&peer->rap_conns) &&
-            peer->rap_persistence == 0) {
-                /* Non-persistent peer with no more conns... */
-                kranal_unlink_peer_locked(peer);
-        }
-
-        conn->rac_closing = 1;
-        kranal_schedule_conn(conn);
-
-        kranal_conn_decref(conn);               /* lose peer's ref */
-}
-
-void
-kranal_close_conn (kra_conn_t *conn, int error)
-{
-        unsigned long    flags;
-        
-
-        write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-        
-        if (!conn->rac_closing)
-                kranal_close_conn_locked(conn, error);
-        
-        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-}
-
-int
-kranal_passive_conn_handshake (struct socket *sock, 
-                               ptl_nid_t *peer_nidp, kra_conn_t **connp)
-{
-       struct sockaddr_in   addr;
-       __u32                peer_ip;
-        unsigned int         peer_port;
-       kra_connreq_t        connreq;
-       ptl_nid_t            peer_nid;
-        kra_conn_t          *conn;
-        kra_device_t        *dev;
-       RAP_RETURN           rrc;
-       int                  rc;
-        int                  len;
-        int                  i;
-
-        len = sizeof(addr);
-       rc = sock->ops->getname(sock, (struct sockaddr *)&addr, &len, 2);
-        if (rc != 0) {
-                CERROR("Can't get peer's IP: %d\n", rc);
-                return rc;
-        }
-
-        peer_ip = ntohl(addr.sin_addr.s_addr);
-        peer_port = ntohs(addr.sin_port);
-
-        if (peer_port >= 1024) {
-                CERROR("Refusing unprivileged connection from %u.%u.%u.%u/%d\n",
-                       HIPQUAD(peer_ip), peer_port);
-                return -ECONNREFUSED;
-        }
-
-        rc = kranal_recv_connreq(sock, &connreq, 
-                                 kranal_tunables.kra_listener_timeout);
-        if (rc != 0) {
-                CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n", 
-                       HIPQUAD(peer_ip), peer_port, rc);
-                return rc;
-        }
-
-        peer_nid = connreq.racr_nid;
-        LASSERT (peer_nid != PTL_NID_ANY);
-
-        for (i = 0;;i++) {
-                LASSERT(i < kranal_data.kra_ndevs);
-                dev = &kranal_data.kra_devices[i];
-                if (dev->rad_id == connreq.racr_devid)
-                        break;
-        }
-
-        rc = kranal_alloc_conn(&conn, dev);
-        if (rc != 0)
-                return rc;
-
-        conn->rac_peer_incarnation = connreq.racr_incarnation;
-        conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq.racr_timeout);
-        kranal_update_reaper_timeout(conn->rac_keepalive);
-        
-        rrc = RapkSetRiParams(conn->rac_rihandle, &connreq.racr_riparams);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't set riparams for "LPX64": %d\n", peer_nid, rrc);
-                kranal_conn_decref(conn);
-                return -EPROTO;
-        }
-
-        kranal_pack_connreq(&connreq, conn);
-
-        rc = kranal_sock_write(sock, &connreq, sizeof(connreq));
-        if (rc != 0) {
-                CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n", 
-                       HIPQUAD(peer_ip), peer_port, rc);
-                kranal_conn_decref(conn);
-                return rc;
-        }
-
-        *connp = conn;
-        *peer_nidp = peer_nid;
-        return 0;
-}
-
-int
-ranal_connect_sock(kra_peer_t *peer, struct socket **sockp)
-{
-        struct sockaddr_in  locaddr;
-        struct sockaddr_in  srvaddr;
-        struct socket      *sock;
-        unsigned int        port;
-        int                 rc;
-        int                 option;
-        mm_segment_t        oldmm = get_fs();
-        struct timeval      tv;
-
-        for (port = 1023; port >= 512; port--) {
-
-                memset(&locaddr, 0, sizeof(locaddr)); 
-                locaddr.sin_family      = AF_INET; 
-                locaddr.sin_port        = htons(port);
-                locaddr.sin_addr.s_addr = htonl(INADDR_ANY);
-
-                memset (&srvaddr, 0, sizeof (srvaddr));
-                srvaddr.sin_family      = AF_INET;
-                srvaddr.sin_port        = htons (peer->rap_port);
-                srvaddr.sin_addr.s_addr = htonl (peer->rap_ip);
-
-                rc = kranal_create_sock(&sock);
-                if (rc != 0)
-                        return rc;
-
-                rc = sock->ops->bind(sock,
-                                     (struct sockaddr *)&locaddr, sizeof(locaddr));
-                if (rc != 0) {
-                        sock_release(sock);
-                        
-                        if (rc == -EADDRINUSE) {
-                                CDEBUG(D_NET, "Port %d already in use\n", port);
-                                continue;
-                        }
-
-                        CERROR("Can't bind to reserved port %d: %d\n", port, rc);
-                        return rc;
-                }
-
-                rc = sock->ops->connect(sock,
-                                        (struct sockaddr *)&srvaddr, sizeof(srvaddr),
-                                        0);
-                if (rc == 0) {
-                        *sockp = sock;
-                        return 0;
-                }
-                
-                sock_release(sock);
-
-                if (rc != -EADDRNOTAVAIL) {
-                        CERROR("Can't connect port %d to %u.%u.%u.%u/%d: %d\n",
-                               port, HIPQUAD(peer->rap_ip), peer->rap_port, rc);
-                        return rc;
-                }
-                
-                CDEBUG(D_NET, "Port %d not available for %u.%u.%u.%u/%d\n", 
-                       port, HIPQUAD(peer->rap_ip), peer->rap_port);
-        }
-
-        /* all ports busy */
-        return -EHOSTUNREACH;
-}
-
-
-int
-kranal_active_conn_handshake(kra_peer_t *peer, kra_conn_t **connp)
-{
-        struct sockaddr_in  dstaddr;
-       kra_connreq_t       connreq;
-        kra_conn_t         *conn;
-        kra_device_t       *dev;
-        struct socket      *sock;
-       RAP_RETURN          rrc;
-       int                 rc;
-        int                 idx;
-        
-        idx = peer->rap_nid & 0x7fffffff;
-        dev = &kranal_data.kra_devices[idx % kranal_data.kra_ndevs];
-
-        rc = kranal_alloc_conn(&conn, dev);
-        if (rc != 0)
-                return rc;
-
-        kranal_pack_connreq(&connreq, conn);
-        
-        rc = ranal_connect_sock(peer, &sock);
-        if (rc != 0)
-                goto failed_0;
-
-        /* CAVEAT EMPTOR: the passive side receives with a SHORT rx timeout
-         * immediately after accepting a connection, so we connect and then
-         * send immediately. */
-
-        rc = kranal_sock_write(sock, &connreq, sizeof(connreq));
-        if (rc != 0) {
-                CERROR("Can't tx connreq to %u.%u.%u.%u/%d: %d\n", 
-                       HIPQUAD(peer->rap_ip), peer->rap_port, rc);
-                goto failed_1;
-        }
-
-        rc = kranal_recv_connreq(sock, &connreq, kranal_tunables.kra_timeout);
-        if (rc != 0) {
-                CERROR("Can't rx connreq from %u.%u.%u.%u/%d: %d\n", 
-                       HIPQUAD(peer->rap_ip), peer->rap_port, rc);
-                goto failed_1;
-        }
-
-        sock_release(sock);
-        rc = -EPROTO;
-
-        if (connreq.racr_nid != peer->rap_nid) {
-                CERROR("Unexpected nid from %u.%u.%u.%u/%d: "
-                       "received "LPX64" expected "LPX64"\n",
-                       HIPQUAD(peer->rap_ip), peer->rap_port, 
-                       connreq.racr_nid, peer->rap_nid);
-                goto failed_0;
-        }
-
-        if (connreq.racr_devid != dev->rad_id) {
-                CERROR("Unexpected device id from %u.%u.%u.%u/%d: "
-                       "received %d expected %d\n",
-                       HIPQUAD(peer->rap_ip), peer->rap_port, 
-                       connreq.racr_devid, dev->rad_id);
-                goto failed_0;
-        }
-
-        conn->rac_peer_incarnation = connreq.racr_incarnation; 
-        conn->rac_keepalive = RANAL_TIMEOUT2KEEPALIVE(connreq.racr_timeout);
-        kranal_update_reaper_timeout(conn->rac_keepalive);
-
-        rc = -ENETDOWN;
-        rrc = RapkSetRiParams(conn->rac_rihandle, &connreq.racr_riparams);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't set riparams for "LPX64": %d\n",
-                       peer->rap_nid, rrc);
-                goto failed_0;
-        }
-
-        *connp = conn;
-       return 0;
-
- failed_1:
-        sock_release(sock);
- failed_0:
-        kranal_conn_decref(conn);
-        return rc;
-}
-
-int
-kranal_conn_handshake (struct socket *sock, kra_peer_t *peer)
-{
-        kra_peer_t        *peer2;
-        kra_tx_t          *tx;
-       ptl_nid_t          peer_nid;
-       unsigned long      flags;
-        unsigned long      timeout;
-       kra_conn_t        *conn;
-       int                rc;
-        int                nstale;
-
-        if (sock != NULL) {
-                /* passive: listener accepted sock */
-                LASSERT (peer == NULL);
-
-                rc = kranal_passive_conn_handshake(sock, &peer_nid, &conn);
-                if (rc != 0)
-                        return rc;
-
-               /* assume this is a new peer */
-               peer = kranal_create_peer(peer_nid);
-               if (peer == NULL) {
-                       CERROR("Can't allocate peer for "LPX64"\n", peer_nid);
-                        kranal_conn_decref(conn);
-                        return -ENOMEM;
-               }
-
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-               peer2 = kranal_find_peer_locked(peer_nid);
-               if (peer2 == NULL) {
-                       /* peer table takes my initial ref on peer */
-                       list_add_tail(&peer->rap_list,
-                                     kranal_nid2peerlist(peer_nid));
-               } else {
-                       /* peer_nid already in the peer table */
-                       kranal_peer_decref(peer);
-                       peer = peer2;
-               }
-                /* NB I may now have a non-persistent peer in the peer
-                 * table with no connections: I can't drop the global lock
-                 * until I've given it a connection or removed it, and when
-                 * I do 'peer' can disappear under me. */
-        } else {
-                /* active: connd wants to connect to peer */
-                LASSERT (peer != NULL);
-                LASSERT (peer->rap_connecting);
-                
-                rc = kranal_active_conn_handshake(peer, &conn);
-                if (rc != 0)
-                        return rc;
-
-               write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-               if (!kranal_peer_active(peer)) {
-                       /* raced with peer getting unlinked */
-                        write_unlock_irqrestore(&kranal_data.kra_global_lock, 
-                                                flags);
-                        kranal_conn_decref(conn);
-                       return ESTALE;
-               }
-       }
-
-       LASSERT (kranal_peer_active(peer));     /* peer is in the peer table */
-        peer_nid = peer->rap_nid;
-
-        /* Refuse to duplicate an existing connection (both sides might try
-         * to connect at once).  NB we return success!  We _do_ have a
-         * connection (so we don't need to remove the peer from the peer
-         * table) and we _don't_ have any blocked txs to complete */
-       if (kranal_conn_isdup_locked(peer, conn->rac_peer_incarnation)) {
-                LASSERT (!list_empty(&peer->rap_conns));
-                LASSERT (list_empty(&peer->rap_tx_queue));
-                write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-               CWARN("Not creating duplicate connection to "LPX64"\n",
-                      peer_nid);
-                kranal_conn_decref(conn);
-                return 0;
-       }
-
-       kranal_peer_addref(peer);               /* +1 ref for conn */
-       conn->rac_peer = peer;
-       list_add_tail(&conn->rac_list, &peer->rap_conns);
-
-        kranal_conn_addref(conn);               /* +1 ref for conn table */
-        list_add_tail(&conn->rac_hashlist,
-                      kranal_cqid2connlist(conn->rac_cqid));
-
-        /* Schedule all packets blocking for a connection */
-        while (!list_empty(&peer->rap_tx_queue)) {
-                tx = list_entry(&peer->rap_tx_queue.next,
-                                kra_tx_t, tx_list);
-
-                list_del(&tx->tx_list);
-                kranal_post_fma(conn, tx);
-        }
-
-       nstale = kranal_close_stale_conns_locked(peer, conn->rac_peer_incarnation);
-
-       write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        /* CAVEAT EMPTOR: passive peer can disappear NOW */
-
-        if (nstale != 0)
-                CWARN("Closed %d stale conns to "LPX64"\n", nstale, peer_nid);
-
-        /* Ensure conn gets checked.  Transmits may have been queued and an
-         * FMA event may have happened before it got in the cq hash table */
-        kranal_schedule_conn(conn);
-       return 0;
-}
-
-void
-kranal_connect (kra_peer_t *peer)
-{
-        kra_tx_t          *tx;
-        unsigned long      flags;
-        struct list_head   zombies;
-        int                rc;
-
-        LASSERT (peer->rap_connecting);
-
-        rc = kranal_conn_handshake(NULL, peer);
-
-        write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        LASSERT (peer->rap_connecting);
-        peer->rap_connecting = 0;
-
-        if (rc == 0) {
-                /* kranal_conn_handshake() queues blocked txs immediately on
-                 * success to avoid messages jumping the queue */
-                LASSERT (list_empty(&peer->rap_tx_queue));
-
-                /* reset reconnection timeouts */
-                peer->rap_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL;
-                peer->rap_reconnect_time = CURRENT_TIME;
-
-                write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-                return;
-        }
-
-        LASSERT (peer->rap_reconnect_interval != 0);
-        peer->rap_reconnect_time = CURRENT_TIME + peer->rap_reconnect_interval;
-        peer->rap_reconnect_interval = MAX(RANAL_MAX_RECONNECT_INTERVAL,
-                                           1 * peer->rap_reconnect_interval);
-
-        /* Grab all blocked packets while we have the global lock */
-        list_add(&zombies, &peer->rap_tx_queue);
-        list_del_init(&peer->rap_tx_queue);
-
-        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        if (list_empty(&zombies))
-                return;
-
-        CWARN("Dropping packets for "LPX64": connection failed\n",
-              peer->rap_nid);
-
-        do {
-                tx = list_entry(zombies.next, kra_tx_t, tx_list);
-
-                list_del(&tx->tx_list);
-                kranal_tx_done(tx, -EHOSTUNREACH);
-
-        } while (!list_empty(&zombies));
-}
-
-int
-kranal_listener(void *arg)
-{
-       struct sockaddr_in addr;
-       wait_queue_t       wait;
-       struct socket     *sock;
-       struct socket     *newsock;
-       int                port;
-       kra_connreq_t     *connreqs;
-       char               name[16];
-        int                rc;
-
-       /* Parent thread holds kra_nid_mutex, and is, or is about to
-        * block on kra_listener_signal */
-
-       port = kranal_tunables.kra_port;
-       snprintf(name, sizeof(name), "kranal_lstn%03d", port);
-       kportal_daemonize(name);
-       kportal_blockallsigs();
-
-       init_waitqueue_entry(&wait, current);
-
-       rc = -ENOMEM;
-       PORTAL_ALLOC(connreqs, 2 * sizeof(*connreqs));
-       if (connreqs == NULL)
-               goto out_0;
-
-       rc = kranal_create_sock(&sock);
-       if (rc != 0)
-               goto out_1;
-
-        memset(&addr, 0, sizeof(addr));
-        addr.sin_family      = AF_INET;
-        addr.sin_port        = htons(port);
-        addr.sin_addr.s_addr = INADDR_ANY;
-
-       rc = sock->ops->bind(sock, (struct sockaddr *)&addr, sizeof(addr));
-       if (rc != 0) {
-               CERROR("Can't bind to port %d\n", port);
-               goto out_2;
-       }
-
-       rc = sock->ops->listen(sock, kranal_tunables.kra_backlog);
-       if (rc != 0) {
-               CERROR("Can't set listen backlog %d: %d\n", 
-                       kranal_tunables.kra_backlog, rc);
-               goto out_2;
-       }
-
-       LASSERT (kranal_data.kra_listener_sock == NULL);
-       kranal_data.kra_listener_sock = sock;
-
-       /* unblock waiting parent */
-       LASSERT (kranal_data.kra_listener_shutdown == 0);
-       up(&kranal_data.kra_listener_signal);
-
-       /* Wake me any time something happens on my socket */
-       add_wait_queue(sock->sk->sk_sleep, &wait);
-
-       while (kranal_data.kra_listener_shutdown == 0) {
-
-               newsock = sock_alloc();
-               if (newsock == NULL) {
-                       CERROR("Can't allocate new socket for accept\n");
-                       kranal_pause(HZ);
-                       continue;
-               }
-
-               set_current_state(TASK_INTERRUPTIBLE);
-
-               rc = sock->ops->accept(sock, newsock, O_NONBLOCK);
-
-               if (rc == -EAGAIN &&
-                   kranal_data.kra_listener_shutdown == 0)
-                       schedule();
-
-               set_current_state(TASK_RUNNING);
-
-               if (rc != 0) {
-                       sock_release(newsock);
-                       if (rc != -EAGAIN) {
-                               CERROR("Accept failed: %d\n", rc);
-                               kranal_pause(HZ);
-                       }
-                       continue;
-               } 
-
-                kranal_conn_handshake(newsock, NULL);
-                sock_release(newsock);
-       }
-
-       rc = 0;
-       remove_wait_queue(sock->sk->sk_sleep, &wait);
- out_2:
-       sock_release(sock);
-       kranal_data.kra_listener_sock = NULL;
- out_1:
-       PORTAL_FREE(connreqs, 2 * sizeof(*connreqs));
- out_0:
-       /* set completion status and unblock thread waiting for me 
-        * (parent on startup failure, executioner on normal shutdown) */
-       kranal_data.kra_listener_shutdown = rc;
-       up(&kranal_data.kra_listener_signal);
-
-       return 0;
-}
-
-int
-kranal_start_listener (void)
-{
-       long           pid;
-       int            rc;
-
-        CDEBUG(D_WARNING, "Starting listener\n");
-
-       /* Called holding kra_nid_mutex: listener stopped */
-       LASSERT (kranal_data.kra_listener_sock == NULL);
-
-       kranal_data.kra_listener_shutdown == 0;
-       pid = kernel_thread(kranal_listener, NULL, 0);
-       if (pid < 0) {
-               CERROR("Can't spawn listener: %ld\n", pid);
-               return (int)pid;
-       }
-
-       /* Block until listener has started up. */
-       down(&kranal_data.kra_listener_signal);
-
-       rc = kranal_data.kra_listener_shutdown;
-       LASSERT ((rc != 0) == (kranal_data.kra_listener_sock == NULL));
-
-        CDEBUG(D_WARNING, "Listener %ld started OK\n", pid);
-       return rc;
-}
-
-void
-kranal_stop_listener(void)
-{
-        CDEBUG(D_WARNING, "Stopping listener\n");
-
-       /* Called holding kra_nid_mutex: listener running */
-       LASSERT (kranal_data.kra_listener_sock != NULL);
-
-       kranal_data.kra_listener_shutdown = 1;
-       wake_up_all(kranal_data.kra_listener_sock->sk->sk_sleep);
-
-       /* Block until listener has torn down. */
-       down(&kranal_data.kra_listener_signal);
-
-       LASSERT (kranal_data.kra_listener_sock == NULL);
-        CDEBUG(D_WARNING, "Listener stopped\n");
-}
-
-int 
-kranal_listener_procint(ctl_table *table, int write, struct file *filp,
-                       void *buffer, size_t *lenp)
-{
-       int   *tunable = (int *)table->data;
-       int    old_val;
-       int    rc;
-
-       down(&kranal_data.kra_nid_mutex);
-
-       LASSERT (tunable == &kranal_tunables.kra_port ||
-                tunable == &kranal_tunables.kra_backlog);
-       old_val = *tunable;
-
-       rc = proc_dointvec(table, write, filp, buffer, lenp);
-
-       if (write &&
-           (*tunable != old_val ||
-            kranal_data.kra_listener_sock == NULL)) {
-
-               if (kranal_data.kra_listener_sock != NULL)
-                       kranal_stop_listener();
-
-               rc = kranal_start_listener();
-
-               if (rc != 0) {
-                       *tunable = old_val;
-                       kranal_start_listener();
-               }
-       }
-
-       up(&kranal_data.kra_nid_mutex);
-       return rc;
-}
-
-int
-kranal_set_mynid(ptl_nid_t nid)
-{
-        lib_ni_t      *ni = &kranal_lib.libnal_ni;
-        int            rc = 0;
-
-        CDEBUG(D_NET, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_pid.nid);
-
-        down(&kranal_data.kra_nid_mutex);
-
-        if (nid == ni->ni_pid.nid) {
-                /* no change of NID */
-                up(&kranal_data.kra_nid_mutex);
-                return 0;
-        }
-
-       if (kranal_data.kra_listener_sock != NULL)
-               kranal_stop_listener();
-
-        ni->ni_pid.nid = nid;
-
-        /* Delete all existing peers and their connections after new
-         * NID/incarnation set to ensure no old connections in our brave
-         * new world. */
-        kranal_del_peer(PTL_NID_ANY, 0);
-
-        if (nid != PTL_NID_ANY)
-                rc = kranal_start_listener();
-
-        up(&kranal_data.kra_nid_mutex);
-        return rc;
-}
-
-kra_peer_t *
-kranal_create_peer (ptl_nid_t nid)
-{
-        kra_peer_t *peer;
-
-        LASSERT (nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC(peer, sizeof(*peer));
-        if (peer == NULL)
-                return NULL;
-
-        memset(peer, 0, sizeof(*peer));         /* zero flags etc */
-
-        peer->rap_nid = nid;
-        atomic_set(&peer->rap_refcount, 1);     /* 1 ref for caller */
-
-        INIT_LIST_HEAD(&peer->rap_list);        /* not in the peer table yet */
-        INIT_LIST_HEAD(&peer->rap_conns);
-        INIT_LIST_HEAD(&peer->rap_tx_queue);
-
-        peer->rap_reconnect_time = CURRENT_TIME;
-        peer->rap_reconnect_interval = RANAL_MIN_RECONNECT_INTERVAL;
-
-        atomic_inc(&kranal_data.kra_npeers);
-        return peer;
-}
-
-void
-__kranal_peer_decref (kra_peer_t *peer)
-{
-        CDEBUG(D_NET, "peer "LPX64" %p deleted\n", peer->rap_nid, peer);
-
-        LASSERT (atomic_read(&peer->rap_refcount) == 0);
-        LASSERT (peer->rap_persistence == 0);
-        LASSERT (!kranal_peer_active(peer));
-        LASSERT (peer->rap_connecting == 0);
-        LASSERT (list_empty(&peer->rap_conns));
-        LASSERT (list_empty(&peer->rap_tx_queue));
-
-        PORTAL_FREE(peer, sizeof(*peer));
-
-        /* NB a peer's connections keep a reference on their peer until
-         * they are destroyed, so we can be assured that _all_ state to do
-         * with this peer has been cleaned up when its refcount drops to
-         * zero. */
-        atomic_dec(&kranal_data.kra_npeers);
-}
-
-kra_peer_t *
-kranal_find_peer_locked (ptl_nid_t nid)
-{
-        struct list_head *peer_list = kranal_nid2peerlist(nid);
-        struct list_head *tmp;
-        kra_peer_t       *peer;
-
-        list_for_each (tmp, peer_list) {
-
-                peer = list_entry(tmp, kra_peer_t, rap_list);
-
-                LASSERT (peer->rap_persistence > 0 ||     /* persistent peer */
-                         !list_empty(&peer->rap_conns));  /* active conn */
-
-                if (peer->rap_nid != nid)
-                        continue;
-
-                CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-                       peer, nid, atomic_read(&peer->rap_refcount));
-                return peer;
-        }
-        return NULL;
-}
-
-kra_peer_t *
-kranal_find_peer (ptl_nid_t nid)
-{
-        kra_peer_t     *peer;
-
-        read_lock(&kranal_data.kra_global_lock);
-        peer = kranal_find_peer_locked(nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                kranal_peer_addref(peer);
-        read_unlock(&kranal_data.kra_global_lock);
-
-        return peer;
-}
-
-void
-kranal_unlink_peer_locked (kra_peer_t *peer)
-{
-        LASSERT (peer->rap_persistence == 0);
-        LASSERT (list_empty(&peer->rap_conns));
-
-        LASSERT (kranal_peer_active(peer));
-        list_del_init(&peer->rap_list);
-
-        /* lose peerlist's ref */
-        kranal_peer_decref(peer);
-}
-
-int
-kranal_get_peer_info (int index, ptl_nid_t *nidp, __u32 *ipp, int *portp, 
-                      int *persistencep)
-{
-        kra_peer_t        *peer;
-        struct list_head  *ptmp;
-        int                i;
-
-        read_lock(&kranal_data.kra_global_lock);
-
-        for (i = 0; i < kranal_data.kra_peer_hash_size; i++) {
-
-                list_for_each(ptmp, &kranal_data.kra_peers[i]) {
-
-                        peer = list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !list_empty(&peer->rap_conns));
-
-                        if (index-- > 0)
-                                continue;
-
-                        *nidp = peer->rap_nid;
-                        *ipp = peer->rap_ip;
-                        *portp = peer->rap_port;
-                        *persistencep = peer->rap_persistence;
-
-                        read_unlock(&kranal_data.kra_global_lock);
-                        return 0;
-                }
-        }
-
-        read_unlock(&kranal_data.kra_global_lock);
-        return -ENOENT;
-}
-
-int
-kranal_add_persistent_peer (ptl_nid_t nid, __u32 ip, int port)
-{
-        unsigned long      flags;
-        kra_peer_t        *peer;
-        kra_peer_t        *peer2;
-
-        if (nid == PTL_NID_ANY)
-                return -EINVAL;
-
-        peer = kranal_create_peer(nid);
-        if (peer == NULL)
-                return -ENOMEM;
-
-        write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        peer2 = kranal_find_peer_locked(nid);
-        if (peer2 != NULL) {
-                kranal_peer_decref(peer);
-                peer = peer2;
-        } else {
-                /* peer table takes existing ref on peer */
-                list_add_tail(&peer->rap_list,
-                              kranal_nid2peerlist(nid));
-        }
-
-        peer->rap_ip = ip;
-        peer->rap_port = port;
-        peer->rap_persistence++;
-
-        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-        return 0;
-}
-
-void
-kranal_del_peer_locked (kra_peer_t *peer, int single_share)
-{
-        struct list_head *ctmp;
-        struct list_head *cnxt;
-        kra_conn_t       *conn;
-
-        if (!single_share)
-                peer->rap_persistence = 0;
-        else if (peer->rap_persistence > 0)
-                peer->rap_persistence--;
-
-        if (peer->rap_persistence != 0)
-                return;
-
-        if (list_empty(&peer->rap_conns)) {
-                kranal_unlink_peer_locked(peer);
-        } else {
-                list_for_each_safe(ctmp, cnxt, &peer->rap_conns) {
-                        conn = list_entry(ctmp, kra_conn_t, rac_list);
-
-                        kranal_close_conn_locked(conn, 0);
-                }
-                /* peer unlinks itself when last conn is closed */
-        }
-}
-
-int
-kranal_del_peer (ptl_nid_t nid, int single_share)
-{
-        unsigned long      flags;
-        struct list_head  *ptmp;
-        struct list_head  *pnxt;
-        kra_peer_t        *peer;
-        int                lo;
-        int                hi;
-        int                i;
-        int                rc = -ENOENT;
-
-        write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers;
-        else {
-                lo = 0;
-                hi = kranal_data.kra_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) {
-                        peer = list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !list_empty(&peer->rap_conns));
-
-                        if (!(nid == PTL_NID_ANY || peer->rap_nid == nid))
-                                continue;
-
-                        kranal_del_peer_locked(peer, single_share);
-                        rc = 0;         /* matched something */
-
-                        if (single_share)
-                                goto out;
-                }
-        }
- out:
-        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        return rc;
-}
-
-kra_conn_t *
-kranal_get_conn_by_idx (int index)
-{
-        kra_peer_t        *peer;
-        struct list_head  *ptmp;
-        kra_conn_t        *conn;
-        struct list_head  *ctmp;
-        int                i;
-
-        read_lock (&kranal_data.kra_global_lock);
-
-        for (i = 0; i < kranal_data.kra_peer_hash_size; i++) {
-                list_for_each (ptmp, &kranal_data.kra_peers[i]) {
-
-                        peer = list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !list_empty(&peer->rap_conns));
-
-                        list_for_each (ctmp, &peer->rap_conns) {
-                                if (index-- > 0)
-                                        continue;
-
-                                conn = list_entry(ctmp, kra_conn_t, rac_list);
-                                CDEBUG(D_NET, "++conn[%p] -> "LPX64" (%d)\n",
-                                       conn, conn->rac_peer->rap_nid,
-                                       atomic_read(&conn->rac_refcount));
-                                atomic_inc(&conn->rac_refcount);
-                                read_unlock(&kranal_data.kra_global_lock);
-                                return conn;
-                        }
-                }
-        }
-
-        read_unlock(&kranal_data.kra_global_lock);
-        return NULL;
-}
-
-int
-kranal_close_peer_conns_locked (kra_peer_t *peer, int why)
-{
-        kra_conn_t         *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->rap_conns) {
-                conn = list_entry(ctmp, kra_conn_t, rac_list);
-
-                count++;
-                kranal_close_conn_locked(conn, why);
-        }
-
-        return count;
-}
-
-int
-kranal_close_stale_conns_locked (kra_peer_t *peer, __u64 incarnation)
-{
-        kra_conn_t         *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->rap_conns) {
-                conn = list_entry(ctmp, kra_conn_t, rac_list);
-
-                if (conn->rac_peer_incarnation == incarnation)
-                        continue;
-
-                CDEBUG(D_NET, "Closing stale conn nid:"LPX64" incarnation:"LPX64"("LPX64")\n",
-                       peer->rap_nid, conn->rac_peer_incarnation, incarnation);
-                LASSERT (conn->rac_peer_incarnation < incarnation);
-
-                count++;
-                kranal_close_conn_locked(conn, -ESTALE);
-        }
-
-        return count;
-}
-
-int
-kranal_close_matching_conns (ptl_nid_t nid)
-{
-        unsigned long       flags;
-        kra_peer_t         *peer;
-        struct list_head   *ptmp;
-        struct list_head   *pnxt;
-        int                 lo;
-        int                 hi;
-        int                 i;
-        int                 count = 0;
-
-        write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = kranal_nid2peerlist(nid) - kranal_data.kra_peers;
-        else {
-                lo = 0;
-                hi = kranal_data.kra_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &kranal_data.kra_peers[i]) {
-
-                        peer = list_entry(ptmp, kra_peer_t, rap_list);
-                        LASSERT (peer->rap_persistence > 0 ||
-                                 !list_empty(&peer->rap_conns));
-
-                        if (!(nid == PTL_NID_ANY || nid == peer->rap_nid))
-                                continue;
-
-                        count += kranal_close_peer_conns_locked(peer, 0);
-                }
-        }
-
-        write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-        /* wildcards always succeed */
-        if (nid == PTL_NID_ANY)
-                return 0;
-
-        return (count == 0) ? -ENOENT : 0;
-}
-
-int
-kranal_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int rc = -EINVAL;
-
-        LASSERT (pcfg != NULL);
-
-        switch(pcfg->pcfg_command) {
-        case NAL_CMD_GET_PEER: {
-                ptl_nid_t   nid = 0;
-                __u32       ip = 0;
-                int         port = 0;
-                int         share_count = 0;
-
-                rc = kranal_get_peer_info(pcfg->pcfg_count,
-                                          &nid, &ip, &port, &share_count);
-                pcfg->pcfg_nid   = nid;
-                pcfg->pcfg_size  = 0;
-                pcfg->pcfg_id    = ip;
-                pcfg->pcfg_misc  = port;
-                pcfg->pcfg_count = 0;
-                pcfg->pcfg_wait  = share_count;
-                break;
-        }
-        case NAL_CMD_ADD_PEER: {
-                rc = kranal_add_persistent_peer(pcfg->pcfg_nid,
-                                                pcfg->pcfg_id, /* IP */
-                                                pcfg->pcfg_misc); /* port */
-                break;
-        }
-        case NAL_CMD_DEL_PEER: {
-                rc = kranal_del_peer(pcfg->pcfg_nid, 
-                                     /* flags == single_share */
-                                     pcfg->pcfg_flags != 0);
-                break;
-        }
-        case NAL_CMD_GET_CONN: {
-                kra_conn_t *conn = kranal_get_conn_by_idx(pcfg->pcfg_count);
-
-                if (conn == NULL)
-                        rc = -ENOENT;
-                else {
-                        rc = 0;
-                        pcfg->pcfg_nid   = conn->rac_peer->rap_nid;
-                        pcfg->pcfg_id    = 0;
-                        pcfg->pcfg_misc  = 0;
-                        pcfg->pcfg_flags = 0;
-                        kranal_conn_decref(conn);
-                }
-                break;
-        }
-        case NAL_CMD_CLOSE_CONNECTION: {
-                rc = kranal_close_matching_conns(pcfg->pcfg_nid);
-                break;
-        }
-        case NAL_CMD_REGISTER_MYNID: {
-                if (pcfg->pcfg_nid == PTL_NID_ANY)
-                        rc = -EINVAL;
-                else
-                        rc = kranal_set_mynid(pcfg->pcfg_nid);
-                break;
-        }
-        }
-
-        return rc;
-}
-
-void
-kranal_free_txdescs(struct list_head *freelist)
-{
-        kra_tx_t    *tx;
-
-        while (!list_empty(freelist)) {
-                tx = list_entry(freelist->next, kra_tx_t, tx_list);
-
-                list_del(&tx->tx_list);
-                PORTAL_FREE(tx->tx_phys, PTL_MD_MAX_IOV * sizeof(*tx->tx_phys));
-                PORTAL_FREE(tx, sizeof(*tx));
-        }
-}
-
-int
-kranal_alloc_txdescs(struct list_head *freelist, int n)
-{
-        int            isnblk = (freelist == &kranal_data.kra_idle_nblk_txs);
-        int            i;
-        kra_tx_t      *tx;
-
-        LASSERT (freelist == &kranal_data.kra_idle_txs ||
-                 freelist == &kranal_data.kra_idle_nblk_txs);
-        LASSERT (list_empty(freelist));
-
-        for (i = 0; i < n; i++) {
-
-                PORTAL_ALLOC(tx, sizeof(*tx));
-                if (tx == NULL) {
-                        CERROR("Can't allocate %stx[%d]\n",
-                               isnblk ? "nblk " : "", i);
-                        kranal_free_txdescs(freelist);
-                        return -ENOMEM;
-                }
-
-                PORTAL_ALLOC(tx->tx_phys,
-                             PTL_MD_MAX_IOV * sizeof(*tx->tx_phys));
-                if (tx->tx_phys == NULL) {
-                        CERROR("Can't allocate %stx[%d]->tx_phys\n", 
-                               isnblk ? "nblk " : "", i);
-
-                        PORTAL_FREE(tx, sizeof(*tx));
-                        kranal_free_txdescs(freelist);
-                        return -ENOMEM;
-                }
-
-                tx->tx_isnblk = isnblk;
-                tx->tx_buftype = RANAL_BUF_NONE;
-
-                list_add(&tx->tx_list, freelist);
-        }
-
-        return 0;
-}
-
-int
-kranal_device_init(int id, kra_device_t *dev)
-{
-        const int         total_ntx = RANAL_NTX + RANAL_NTX_NBLK;
-        RAP_RETURN        rrc;
-
-        dev->rad_id = id;
-        rrc = RapkGetDeviceByIndex(id, kranal_device_callback,
-                                   &dev->rad_handle);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't get Rapidarray Device %d: %d\n", id, rrc);
-                goto failed_0;
-        }
-
-        rrc = RapkReserveRdma(dev->rad_handle, total_ntx);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't reserve %d RDMA descriptors"
-                       " for device %d: %d\n", total_ntx, id, rrc);
-                goto failed_1;
-        }
-
-        rrc = RapkCreatePtag(dev->rad_handle,
-                             &dev->rad_ptag);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't create ptag"
-                       " for device %d: %d\n", id, rrc);
-                goto failed_1;
-        }
-
-        rrc = RapkCreateCQ(dev->rad_handle, total_ntx, dev->rad_ptag,
-                           &dev->rad_rdma_cq);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't create rdma cq size %d"
-                       " for device %d: %d\n", total_ntx, id, rrc);
-                goto failed_2;
-        }
-
-        rrc = RapkCreateCQ(dev->rad_handle, RANAL_FMA_CQ_SIZE,
-                           dev->rad_ptag, &dev->rad_fma_cq);
-        if (rrc != RAP_SUCCESS) {
-                CERROR("Can't create fma cq size %d"
-                       " for device %d: %d\n", RANAL_FMA_CQ_SIZE, id, rrc);
-                goto failed_3;
-        }
-
-        return 0;
-
- failed_3:
-        RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cq, dev->rad_ptag);
- failed_2:
-        RapkDestroyPtag(dev->rad_handle, dev->rad_ptag);
- failed_1:
-        RapkReleaseDevice(dev->rad_handle);
- failed_0:
-        return -ENODEV;
-}
-
-void
-kranal_device_fini(kra_device_t *dev)
-{
-        RapkDestroyCQ(dev->rad_handle, dev->rad_fma_cq, dev->rad_ptag);
-        RapkDestroyCQ(dev->rad_handle, dev->rad_rdma_cq, dev->rad_ptag);
-        RapkDestroyPtag(dev->rad_handle, dev->rad_ptag);
-        RapkReleaseDevice(dev->rad_handle);
-}
-
-void
-kranal_api_shutdown (nal_t *nal)
-{
-        int           i;
-        int           rc;
-        unsigned long flags;
-        
-        if (nal->nal_refct != 0) {
-                /* This module got the first ref */
-                PORTAL_MODULE_UNUSE;
-                return;
-        }
-
-        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               atomic_read(&portal_kmemory));
-
-        LASSERT (nal == &kranal_api);
-
-        switch (kranal_data.kra_init) {
-        default:
-                CERROR("Unexpected state %d\n", kranal_data.kra_init);
-                LBUG();
-
-        case RANAL_INIT_ALL:
-                /* stop calls to nal_cmd */
-                libcfs_nal_cmd_unregister(RANAL);
-                /* No new persistent peers */
-
-                /* resetting my NID to unadvertises me, removes my
-                 * listener and nukes all current peers */
-                kranal_set_mynid(PTL_NID_ANY);
-                /* no new peers or conns */
-
-                /* Wait for all peer/conn state to clean up */
-                i = 2;
-                while (atomic_read(&kranal_data.kra_nconns) != 0 ||
-                       atomic_read(&kranal_data.kra_npeers) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d peers and %d conns to close down\n",
-                               atomic_read(&kranal_data.kra_npeers),
-                               atomic_read(&kranal_data.kra_nconns));
-                       kranal_pause(HZ);
-                }
-                /* fall through */
-
-        case RANAL_INIT_LIB:
-                lib_fini(&kranal_lib);
-                /* fall through */
-
-        case RANAL_INIT_DATA:
-                break;
-        }
-
-        /* flag threads to terminate; wake and wait for them to die */
-        kranal_data.kra_shutdown = 1;
-
-        for (i = 0; i < kranal_data.kra_ndevs; i++) {
-                kra_device_t *dev = &kranal_data.kra_devices[i];
-
-                LASSERT (list_empty(&dev->rad_connq));
-
-                spin_lock_irqsave(&dev->rad_lock, flags);
-                wake_up(&dev->rad_waitq);
-                spin_unlock_irqrestore(&dev->rad_lock, flags);
-        }
-
-        spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-        wake_up_all(&kranal_data.kra_reaper_waitq);
-        spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
-        LASSERT (list_empty(&kranal_data.kra_connd_peers));
-        spin_lock_irqsave(&kranal_data.kra_connd_lock, flags); 
-        wake_up_all(&kranal_data.kra_connd_waitq);
-        spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags); 
-
-        i = 2;
-        while (atomic_read(&kranal_data.kra_nthreads) != 0) {
-                i++;
-                CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                       "Waiting for %d threads to terminate\n",
-                       atomic_read(&kranal_data.kra_nthreads));
-                kranal_pause(HZ);
-        }
-
-        LASSERT (atomic_read(&kranal_data.kra_npeers) == 0);
-        if (kranal_data.kra_peers != NULL) {
-                for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
-                        LASSERT (list_empty(&kranal_data.kra_peers[i]));
-
-                PORTAL_FREE(kranal_data.kra_peers,
-                            sizeof (struct list_head) * 
-                            kranal_data.kra_peer_hash_size);
-        }
-
-        LASSERT (atomic_read(&kranal_data.kra_nconns) == 0);
-        if (kranal_data.kra_conns != NULL) {
-                for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
-                        LASSERT (list_empty(&kranal_data.kra_conns[i]));
-
-                PORTAL_FREE(kranal_data.kra_conns,
-                            sizeof (struct list_head) * 
-                            kranal_data.kra_conn_hash_size);
-        }
-
-        for (i = 0; i < kranal_data.kra_ndevs; i++)
-                kranal_device_fini(&kranal_data.kra_devices[i]);
-
-        kranal_free_txdescs(&kranal_data.kra_idle_txs);
-        kranal_free_txdescs(&kranal_data.kra_idle_nblk_txs);
-
-        CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               atomic_read(&portal_kmemory));
-        printk(KERN_INFO "Lustre: RapidArray NAL unloaded (final mem %d)\n",
-               atomic_read(&portal_kmemory));
-
-        kranal_data.kra_init = RANAL_INIT_NOTHING;
-}
-
-int
-kranal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
-                    ptl_ni_limits_t *requested_limits,
-                    ptl_ni_limits_t *actual_limits)
-{
-        static int        device_ids[] = {RAPK_MAIN_DEVICE_ID,
-                                          RAPK_EXPANSION_DEVICE_ID};
-        struct timeval    tv;
-        ptl_process_id_t  process_id;
-        int               pkmem = atomic_read(&portal_kmemory);
-        int               rc;
-        int               i;
-        kra_device_t     *dev;
-
-        LASSERT (nal == &kranal_api);
-
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL)
-                        *actual_limits = kranal_lib.libnal_ni.ni_actual_limits;
-                /* This module got the first ref */
-                PORTAL_MODULE_USE;
-                return PTL_OK;
-        }
-
-        LASSERT (kranal_data.kra_init == RANAL_INIT_NOTHING);
-
-        memset(&kranal_data, 0, sizeof(kranal_data)); /* zero pointers, flags etc */
-
-        /* CAVEAT EMPTOR: Every 'Fma' message includes the sender's NID and
-         * a unique (for all time) incarnation so we can uniquely identify
-         * the sender.  The incarnation is an incrementing counter
-         * initialised with seconds + microseconds at startup time.  So we
-         * rely on NOT creating connections more frequently on average than
-         * 1MHz to ensure we don't use old incarnations when we reboot. */
-        do_gettimeofday(&tv);
-        kranal_data.kra_next_incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
-        init_MUTEX(&kranal_data.kra_nid_mutex);
-        init_MUTEX_LOCKED(&kranal_data.kra_listener_signal);
-
-        rwlock_init(&kranal_data.kra_global_lock);
-
-        for (i = 0; i < RANAL_MAXDEVS; i++ ) {
-                kra_device_t  *dev = &kranal_data.kra_devices[i];
-
-                dev->rad_idx = i;
-                INIT_LIST_HEAD(&dev->rad_connq);
-                init_waitqueue_head(&dev->rad_waitq);
-                spin_lock_init(&dev->rad_lock);
-        }
-
-        init_waitqueue_head(&kranal_data.kra_reaper_waitq);
-        spin_lock_init(&kranal_data.kra_reaper_lock);
-
-        INIT_LIST_HEAD(&kranal_data.kra_connd_peers);
-        init_waitqueue_head(&kranal_data.kra_connd_waitq);
-        spin_lock_init(&kranal_data.kra_connd_lock);
-
-        INIT_LIST_HEAD(&kranal_data.kra_idle_txs);
-        INIT_LIST_HEAD(&kranal_data.kra_idle_nblk_txs);
-        init_waitqueue_head(&kranal_data.kra_idle_tx_waitq);
-        spin_lock_init(&kranal_data.kra_tx_lock);
-
-        /* OK to call kranal_api_shutdown() to cleanup now */
-        kranal_data.kra_init = RANAL_INIT_DATA;
-        
-        kranal_data.kra_peer_hash_size = RANAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC(kranal_data.kra_peers,
-                     sizeof(struct list_head) * kranal_data.kra_peer_hash_size);
-        if (kranal_data.kra_peers == NULL)
-                goto failed;
-
-        for (i = 0; i < kranal_data.kra_peer_hash_size; i++)
-                INIT_LIST_HEAD(&kranal_data.kra_peers[i]);
-
-        kranal_data.kra_conn_hash_size = RANAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC(kranal_data.kra_conns,
-                     sizeof(struct list_head) * kranal_data.kra_conn_hash_size);
-        if (kranal_data.kra_conns == NULL)
-                goto failed;
-
-        for (i = 0; i < kranal_data.kra_conn_hash_size; i++)
-                INIT_LIST_HEAD(&kranal_data.kra_conns[i]);
-
-        rc = kranal_alloc_txdescs(&kranal_data.kra_idle_txs, RANAL_NTX);
-        if (rc != 0)
-                goto failed;
-
-        rc = kranal_alloc_txdescs(&kranal_data.kra_idle_nblk_txs,RANAL_NTX_NBLK);
-        if (rc != 0)
-                goto failed;
-
-        process_id.pid = requested_pid;
-        process_id.nid = PTL_NID_ANY;           /* don't know my NID yet */
-
-        rc = lib_init(&kranal_lib, nal, process_id,
-                      requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-                CERROR("lib_init failed: error %d\n", rc);
-                goto failed;
-        }
-
-        /* lib interface initialised */
-        kranal_data.kra_init = RANAL_INIT_LIB;
-        /*****************************************************/
-
-        rc = kranal_thread_start(kranal_reaper, NULL);
-        if (rc != 0) {
-                CERROR("Can't spawn ranal reaper: %d\n", rc);
-                goto failed;
-        }
-
-        for (i = 0; i < RANAL_N_CONND; i++) {
-                rc = kranal_thread_start(kranal_connd, (void *)i);
-                if (rc != 0) {
-                        CERROR("Can't spawn ranal connd[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
-        }
-
-        LASSERT(kranal_data.kra_ndevs == 0);
-        for (i = 0; i < sizeof(device_ids)/sizeof(device_ids[0]); i++) {
-                dev = &kranal_data.kra_devices[kranal_data.kra_ndevs];
-
-                rc = kranal_device_init(device_ids[i], dev);
-                if (rc == 0)
-                        kranal_data.kra_ndevs++;
-
-                rc = kranal_thread_start(kranal_scheduler, dev);
-                if (rc != 0) {
-                        CERROR("Can't spawn ranal scheduler[%d]: %d\n",
-                               i, rc);
-                        goto failed;
-                }
-        }
-
-        if (kranal_data.kra_ndevs == 0)
-                goto failed;
-
-        rc = libcfs_nal_cmd_register(RANAL, &kranal_cmd, NULL);
-        if (rc != 0) {
-                CERROR("Can't initialise command interface (rc = %d)\n", rc);
-                goto failed;
-        }
-
-        /* flag everything initialised */
-        kranal_data.kra_init = RANAL_INIT_ALL;
-        /*****************************************************/
-
-        CDEBUG(D_MALLOC, "initial kmem %d\n", atomic_read(&portal_kmemory));
-        printk(KERN_INFO "Lustre: RapidArray NAL loaded "
-               "(initial mem %d)\n", pkmem);
-
-        return PTL_OK;
-
- failed:
-        kranal_api_shutdown(&kranal_api);    
-        return PTL_FAIL;
-}
-
-void __exit
-kranal_module_fini (void)
-{
-#ifdef CONFIG_SYSCTL
-        if (kranal_tunables.kra_sysctl != NULL)
-                unregister_sysctl_table(kranal_tunables.kra_sysctl);
-#endif
-        PtlNIFini(kranal_ni);
-
-        ptl_unregister_nal(RANAL);
-}
-
-int __init
-kranal_module_init (void)
-{
-        int    rc;
-
-        /* the following must be sizeof(int) for
-         * proc_dointvec/kranal_listener_procint() */
-        LASSERT (sizeof(kranal_tunables.kra_timeout) == sizeof(int));
-        LASSERT (sizeof(kranal_tunables.kra_listener_timeout) == sizeof(int));
-        LASSERT (sizeof(kranal_tunables.kra_backlog) == sizeof(int));
-        LASSERT (sizeof(kranal_tunables.kra_port) == sizeof(int));
-        LASSERT (sizeof(kranal_tunables.kra_max_immediate) == sizeof(int));
-
-        kranal_api.nal_ni_init = kranal_api_startup;
-        kranal_api.nal_ni_fini = kranal_api_shutdown;
-
-        /* Initialise dynamic tunables to defaults once only */
-        kranal_tunables.kra_timeout = RANAL_TIMEOUT;
-
-        rc = ptl_register_nal(RANAL, &kranal_api);
-        if (rc != PTL_OK) {
-                CERROR("Can't register RANAL: %d\n", rc);
-                return -ENOMEM;               /* or something... */
-        }
-
-        /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(RANAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &kranal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(RANAL);
-                return -ENODEV;
-        }
-
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        kranal_tunables.kra_sysctl = 
-                register_sysctl_table(kranal_top_ctl_table, 0);
-#endif
-        return 0;
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel RapidArray NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(kranal_module_init);
-module_exit(kranal_module_fini);
diff --git a/lustre/portals/knals/ranal/ranal.h b/lustre/portals/knals/ranal/ranal.h
deleted file mode 100644 (file)
index fe130b7..0000000
+++ /dev/null
@@ -1,477 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-
-#include <net/sock.h>
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#include <rapl.h>
-
-#if CONFIG_SMP
-# define RANAL_N_SCHED      num_online_cpus()   /* # schedulers */
-#else
-# define RANAL_N_SCHED      1                   /* # schedulers */
-#endif
-
-#define RANAL_MAXDEVS       2                   /* max # devices RapidArray supports */
-
-#define RANAL_N_CONND       4                   /* # connection daemons */
-
-#define RANAL_MIN_RECONNECT_INTERVAL 1          /* first failed connection retry (seconds)... */
-#define RANAL_MAX_RECONNECT_INTERVAL 60         /* ...exponentially increasing to this */
-
-#define RANAL_FMA_PREFIX_LEN      232           /* size of FMA "Prefix" */
-#define RANAL_FMA_MAX_DATA_LEN    ((7<<10)-256) /* Max FMA MSG is 7K including prefix */
-
-#define RANAL_PEER_HASH_SIZE  101               /* # peer lists */
-#define RANAL_CONN_HASH_SIZE  101               /* # conn lists */
-
-#define RANAL_NTX             64                /* # tx descs */
-#define RANAL_NTX_NBLK        256               /* # reserved tx descs */
-
-#define RANAL_FMA_CQ_SIZE     8192              /* # entries in receive CQ 
-                                                 * (overflow is a performance hit) */
-
-#define RANAL_RESCHED         100               /* # scheduler loops before reschedule */
-
-#define RANAL_MIN_TIMEOUT     5                 /* minimum timeout interval (seconds) */
-#define RANAL_TIMEOUT2KEEPALIVE(t) (((t)+1)/2)  /* timeout -> keepalive interval */
-
-/* default vals for runtime tunables */
-#define RANAL_TIMEOUT           30              /* comms timeout (seconds) */
-#define RANAL_LISTENER_TIMEOUT   5              /* listener timeout (seconds) */
-#define RANAL_MAX_IMMEDIATE    (2<<10)          /* biggest immediate payload */
-
-typedef struct 
-{
-        int               kra_timeout;          /* comms timeout (seconds) */
-        int               kra_listener_timeout; /* max time the listener can block */
-       int               kra_backlog;          /* listener's backlog */
-       int               kra_port;             /* listener's TCP/IP port */
-        int               kra_max_immediate;    /* biggest immediate payload */
-        struct ctl_table_header *kra_sysctl;    /* sysctl interface */
-} kra_tunables_t;
-
-typedef struct
-{
-        RAP_PVOID               rad_handle;     /* device handle */
-        RAP_PROTECTION_HANDLE   rad_ptag;       /* protection tag */
-        RAP_CQ_HANDLE           rad_fma_cq;     /* FMA (small message) completion queue */
-        RAP_CQ_HANDLE           rad_rdma_cq;    /* rdma completion queue */
-        int                     rad_id;         /* device id */
-        int                     rad_idx;        /* index in kra_devices */
-        int                     rad_ready;      /* set by device callback */
-        struct list_head        rad_connq;      /* connections requiring attention */
-        wait_queue_head_t       rad_waitq;      /* scheduler waits here */
-        spinlock_t              rad_lock;       /* serialise */
-} kra_device_t;
-        
-typedef struct 
-{
-        int               kra_init;             /* initialisation state */
-        int               kra_shutdown;         /* shut down? */
-        atomic_t          kra_nthreads;         /* # live threads */
-
-        struct semaphore  kra_nid_mutex;        /* serialise NID/listener ops */
-       struct semaphore  kra_listener_signal;  /* block for listener startup/shutdown */
-       struct socket    *kra_listener_sock;    /* listener's socket */
-       int               kra_listener_shutdown; /* ask listener to close */
-
-        kra_device_t      kra_devices[RANAL_MAXDEVS]; /* device/ptag/cq etc */
-        int               kra_ndevs;            /* # devices */
-
-        rwlock_t          kra_global_lock;      /* stabilize peer/conn ops */
-
-        struct list_head *kra_peers;            /* hash table of all my known peers */
-        int               kra_peer_hash_size;   /* size of kra_peers */
-        atomic_t          kra_npeers;           /* # peers extant */
-
-        struct list_head *kra_conns;            /* conns hashed by cqid */
-        int               kra_conn_hash_size;   /* size of kra_conns */
-        __u64             kra_next_incarnation; /* conn incarnation # generator */
-        int               kra_next_cqid;        /* cqid generator */
-        atomic_t          kra_nconns;           /* # connections extant */
-
-        long              kra_new_min_timeout;  /* minimum timeout on any new conn */
-        wait_queue_head_t kra_reaper_waitq;     /* reaper sleeps here */
-        spinlock_t        kra_reaper_lock;      /* serialise */
-        
-        struct list_head  kra_connd_peers;      /* peers waiting for a connection */
-        wait_queue_head_t kra_connd_waitq;      /* connection daemons sleep here */
-        spinlock_t        kra_connd_lock;       /* serialise */
-
-        struct list_head  kra_idle_txs;         /* idle tx descriptors */
-        struct list_head  kra_idle_nblk_txs;    /* idle reserved tx descriptors */
-        __u64             kra_next_tx_cookie;   /* RDMA completion cookie */
-        wait_queue_head_t kra_idle_tx_waitq;    /* block here for tx descriptor */
-        spinlock_t        kra_tx_lock;          /* serialise */
-} kra_data_t;
-
-#define RANAL_INIT_NOTHING         0
-#define RANAL_INIT_DATA            1
-#define RANAL_INIT_LIB             2
-#define RANAL_INIT_ALL             3
-
-/************************************************************************
- * Wire message structs.  These are sent in sender's byte order
- * (i.e. receiver checks magic and flips if required).
- */
-
-typedef struct kra_connreq                     /* connection request/response */
-{                                              /* (sent via socket) */
-        __u32             racr_magic;          /* I'm an ranal connreq */
-        __u16             racr_version;                /* this is my version number */
-        __u16             racr_devid;           /* which device to connect on */
-        __u64             racr_nid;            /* my NID */
-        __u64             racr_incarnation;    /* my incarnation */
-        __u32             racr_timeout;         /* my timeout */
-       RAP_RI_PARAMETERS racr_riparams;        /* my endpoint info */
-} kra_connreq_t;
-
-typedef struct
-{
-        RAP_MEM_KEY       rard_key;
-        RAP_PVOID64       rard_addr;
-        RAP_UINT32        rard_nob;
-} kra_rdma_desc_t;
-
-typedef struct
-{
-        ptl_hdr_t         raim_hdr;             /* portals header */
-        /* Portals payload is in FMA "Message Data" */
-} kra_immediate_msg_t;
-
-typedef struct
-{
-        ptl_hdr_t         raprm_hdr;           /* portals header */
-        __u64             raprm_cookie;                /* opaque completion cookie */
-} kra_putreq_msg_t;
-
-typedef struct
-{
-       __u64             rapam_src_cookie;     /* reflected completion cookie */
-       __u64             rapam_dst_cookie;     /* opaque completion cookie */
-       kra_rdma_desc_t   rapam_desc;           /* sender's sink buffer */
-} kra_putack_msg_t;
-
-typedef struct
-{
-        ptl_hdr_t         ragm_hdr;             /* portals header */
-        __u64             ragm_cookie;          /* opaque completion cookie */
-        kra_rdma_desc_t   ragm_desc;            /* sender's sink buffer */
-} kra_get_msg_t;
-
-typedef struct
-{
-        __u64             racm_cookie;          /* reflected completion cookie */
-} kra_completion_msg_t;
-
-typedef struct                                  /* NB must fit in FMA "Prefix" */
-{
-        __u32             ram_magic;           /* I'm an ranal message */
-        __u16             ram_version;         /* this is my version number */
-        __u16             ram_type;            /* msg type */
-        __u64             ram_srcnid;           /* sender's NID */
-        __u64             ram_incarnation;      /* sender's connection incarnation */
-        union {
-                kra_immediate_msg_t   immediate;
-               kra_putreq_msg_t      putreq;
-               kra_putack_msg_t      putack;
-               kra_get_msg_t         get;
-                kra_completion_msg_t  completion;
-        }                    ram_u;
-        __u32             ram_seq;              /* incrementing sequence number */
-} kra_msg_t;
-
-#define RANAL_MSG_MAGIC       0x0be91b92        /* unique magic */
-#define RANAL_MSG_VERSION              1        /* current protocol version */
-
-#define RANAL_MSG_FENCE             0x80        /* fence RDMA */
-
-#define RANAL_MSG_NONE              0x00        /* illegal message */
-#define RANAL_MSG_NOOP              0x01        /* empty ram_u (keepalive) */
-#define RANAL_MSG_IMMEDIATE         0x02        /* ram_u.immediate */
-#define RANAL_MSG_PUT_REQ           0x03       /* ram_u.putreq (src->sink) */
-#define RANAL_MSG_PUT_NAK           0x04       /* ram_u.completion (no PUT match: sink->src) */
-#define RANAL_MSG_PUT_ACK           0x05       /* ram_u.putack (PUT matched: sink->src) */
-#define RANAL_MSG_PUT_DONE          0x86       /* ram_u.completion (src->sink) */
-#define RANAL_MSG_GET_REQ           0x07               /* ram_u.get (sink->src) */
-#define RANAL_MSG_GET_NAK           0x08        /* ram_u.completion (no GET match: src->sink) */
-#define RANAL_MSG_GET_DONE          0x89       /* ram_u.completion (src->sink) */
-#define RANAL_MSG_CLOSE             0x8a        /* empty ram_u */
-
-/***********************************************************************/
-
-typedef struct kra_tx                           /* message descriptor */
-{
-        struct list_head          tx_list;      /* queue on idle_txs/rac_sendq/rac_waitq */
-        struct kra_conn          *tx_conn;      /* owning conn */
-        lib_msg_t                *tx_libmsg[2]; /* lib msgs to finalize on completion */
-        unsigned long             tx_qtime;     /* when tx started to wait for something */
-        int                       tx_isnblk;    /* I'm reserved for non-blocking sends */
-        int                       tx_nob;       /* # bytes of payload */
-        int                       tx_buftype;   /* payload buffer type */
-        void                     *tx_buffer;    /* source/sink buffer */
-        int                       tx_phys_offset; /* first page offset (if phys) */
-        int                       tx_phys_npages; /* # physical pages */
-        RAP_PHYS_REGION          *tx_phys;      /* page descriptors */
-        RAP_MEM_KEY               tx_map_key;   /* mapping key */
-        RAP_RDMA_DESCRIPTOR       tx_rdma_desc; /* rdma descriptor */
-        __u64                     tx_cookie;    /* identify this tx to peer */
-        kra_msg_t                 tx_msg;       /* FMA message buffer */
-} kra_tx_t;
-
-#define RANAL_BUF_NONE           0              /* buffer type not set */
-#define RANAL_BUF_IMMEDIATE      1              /* immediate data */
-#define RANAL_BUF_PHYS_UNMAPPED  2              /* physical: not mapped yet */
-#define RANAL_BUF_PHYS_MAPPED    3              /* physical: mapped already */
-#define RANAL_BUF_VIRT_UNMAPPED  4              /* virtual: not mapped yet */
-#define RANAL_BUF_VIRT_MAPPED    5              /* virtual: mapped already */
-
-#define RANAL_TX_IDLE            0x00           /* on freelist */
-#define RANAL_TX_SIMPLE          0x10           /* about to send a simple message */
-#define RANAL_TX_PUTI_REQ        0x20           /* PUT initiator about to send PUT_REQ */
-#define RANAL_TX_PUTI_WAIT_ACK   0x21           /* PUT initiator waiting for PUT_ACK */
-#define RANAL_TX_PUTI_RDMA       0x22           /* PUT initiator waiting for RDMA to complete */
-#define RANAL_TX_PUTI_DONE       0x23           /* PUT initiator about to send PUT_DONE */
-#define RANAL_TX_PUTT_NAK        0x30           /* PUT target about to send PUT_NAK */
-#define RANAL_TX_PUTT_ACK        0x30           /* PUT target about to send PUT_ACK */
-#define RANAL_TX_PUTT_WAIT_DONE  0x31           /* PUT target waiting for PUT_DONE */
-#define RANAL_TX_GETI_REQ        0x40           /* GET initiator about to send GET_REQ */
-#define RANAL_TX_GETI_WAIT_DONE  0x41           /* GET initiator waiting for GET_DONE */
-#define RANAL_TX_GETT_NAK        0x50           /* GET target about to send PUT_NAK */
-#define RANAL_TX_GETT_RDMA       0x51           /* GET target waiting for RDMA to complete */
-#define RANAL_TX_GETT_DONE       0x52           /* GET target about to send GET_DONE */
-
-typedef struct kra_conn
-{ 
-        struct kra_peer    *rac_peer;           /* owning peer */
-        struct list_head    rac_list;           /* stash on peer's conn list */
-        struct list_head    rac_hashlist;       /* stash in connection hash table */
-        struct list_head    rac_schedlist;      /* queue for scheduler */
-        struct list_head    rac_fmaq;           /* txs queued for FMA */
-        struct list_head    rac_rdmaq;          /* txs awaiting RDMA completion */
-        struct list_head    rac_replyq;         /* txs awaiting replies */
-        __u64               rac_peer_incarnation; /* peer's unique connection stamp */
-        __u64               rac_my_incarnation; /* my unique connection stamp */
-        unsigned long       rac_last_tx;        /* when I last sent an FMA message */
-        unsigned long       rac_last_rx;        /* when I last received an FMA messages */
-        long                rac_keepalive;      /* keepalive interval */
-        long                rac_timeout;        /* infer peer death on (last_rx + timout > now) */
-        __u32               rac_cqid;           /* my completion callback id (non-unique) */
-        __u32               rac_tx_seq;         /* tx msg sequence number */
-        __u32               rac_rx_seq;         /* rx msg sequence number */
-        atomic_t            rac_refcount;       /* # users */
-        unsigned int        rac_close_sent;     /* I've sent CLOSE */
-        unsigned int        rac_close_recvd;    /* I've received CLOSE */
-        unsigned int        rac_closing;        /* connection being torn down */
-        unsigned int        rac_scheduled;      /* being attented to */
-        spinlock_t          rac_lock;           /* serialise */
-        kra_device_t       *rac_device;         /* which device */
-       RAP_PVOID           rac_rihandle;       /* RA endpoint */
-        kra_msg_t          *rac_rxmsg;          /* incoming message (FMA prefix) */
-        kra_msg_t           rac_msg;            /* keepalive/CLOSE message buffer */
-} kra_conn_t;
-
-typedef struct kra_peer
-{
-        struct list_head    rap_list;           /* stash on global peer list */
-        struct list_head    rap_connd_list;     /* schedule on kra_connd_peers */
-        struct list_head    rap_conns;          /* all active connections */
-        struct list_head    rap_tx_queue;       /* msgs waiting for a conn */
-        ptl_nid_t           rap_nid;            /* who's on the other end(s) */
-        __u32               rap_ip;             /* IP address of peer */
-        int                 rap_port;           /* port on which peer listens */
-        atomic_t            rap_refcount;       /* # users */
-        int                 rap_persistence;    /* "known" peer refs */
-        int                 rap_connecting;     /* connection forming */
-        unsigned long       rap_reconnect_time; /* CURRENT_TIME when reconnect OK */
-        unsigned long       rap_reconnect_interval; /* exponential backoff */
-} kra_peer_t;
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
-# define sk_allocation  allocation
-# define sk_data_ready data_ready
-# define sk_write_space write_space
-# define sk_user_data   user_data
-# define sk_prot        prot
-# define sk_sndbuf      sndbuf
-# define sk_socket      socket
-# define sk_wmem_queued wmem_queued
-# define sk_err         err
-# define sk_sleep       sleep
-#endif
-
-extern lib_nal_t       kranal_lib;
-extern kra_data_t      kranal_data;
-extern kra_tunables_t  kranal_tunables;
-
-extern void __kranal_peer_decref(kra_peer_t *peer);
-extern void __kranal_conn_decref(kra_conn_t *conn);
-
-static inline void
-kranal_peer_addref(kra_peer_t *peer)
-{
-        CDEBUG(D_NET, "%p->"LPX64"\n", peer, peer->rap_nid);
-       LASSERT(atomic_read(&peer->rap_refcount) > 0);
-       atomic_inc(&peer->rap_refcount);
-}
-
-static inline void
-kranal_peer_decref(kra_peer_t *peer)
-{
-        CDEBUG(D_NET, "%p->"LPX64"\n", peer, peer->rap_nid);
-       LASSERT(atomic_read(&peer->rap_refcount) > 0);
-       if (atomic_dec_and_test(&peer->rap_refcount))
-               __kranal_peer_decref(peer);
-}
-
-static inline struct list_head *
-kranal_nid2peerlist (ptl_nid_t nid) 
-{
-        unsigned int hash = ((unsigned int)nid) % kranal_data.kra_peer_hash_size;
-        
-        return (&kranal_data.kra_peers [hash]);
-}
-
-static inline int
-kranal_peer_active(kra_peer_t *peer)
-{
-        /* Am I in the peer hash table? */
-        return (!list_empty(&peer->rap_list));
-}
-
-static inline void
-kranal_conn_addref(kra_conn_t *conn)
-{
-        CDEBUG(D_NET, "%p->"LPX64"\n", conn, conn->rac_peer->rap_nid);
-       LASSERT(atomic_read(&conn->rac_refcount) > 0);
-       atomic_inc(&conn->rac_refcount);
-}
-
-static inline void
-kranal_conn_decref(kra_conn_t *conn)
-{
-        CDEBUG(D_NET, "%p->"LPX64"\n", conn, conn->rac_peer->rap_nid);
-       LASSERT(atomic_read(&conn->rac_refcount) > 0);
-       if (atomic_dec_and_test(&conn->rac_refcount))
-               __kranal_conn_decref(conn);
-}
-
-static inline struct list_head *
-kranal_cqid2connlist (__u32 cqid) 
-{
-        unsigned int hash = cqid % kranal_data.kra_conn_hash_size;
-        
-        return (&kranal_data.kra_conns [hash]);
-}
-
-static inline kra_conn_t *
-kranal_cqid2conn_locked (__u32 cqid) 
-{
-        struct list_head *conns = kranal_cqid2connlist(cqid);
-        struct list_head *tmp;
-        kra_conn_t       *conn;
-        
-        list_for_each(tmp, conns) {
-                conn = list_entry(tmp, kra_conn_t, rac_hashlist);
-                
-                if (conn->rac_cqid == cqid)
-                        return conn;
-        }
-        
-        return NULL;
-}
-
-static inline int
-kranal_tx_mapped (kra_tx_t *tx)
-{
-        return (tx->tx_buftype == RANAL_BUF_VIRT_MAPPED ||
-                tx->tx_buftype == RANAL_BUF_PHYS_MAPPED);
-}
-
-#if CONFIG_X86
-static inline __u64
-kranal_page2phys (struct page *p)
-{
-        __u64 page_number = p - mem_map;
-        
-        return (page_number << PAGE_SHIFT);
-}
-#else
-# error "no page->phys"
-#endif
-
-extern int kranal_listener_procint(ctl_table *table, 
-                                   int write, struct file *filp, 
-                                   void *buffer, size_t *lenp);
-extern int kranal_close_stale_conns_locked (kra_peer_t *peer, 
-                                            __u64 incarnation);
-extern void kranal_update_reaper_timeout(long timeout);
-extern void kranal_tx_done (kra_tx_t *tx, int completion);
-extern void kranal_unlink_peer_locked (kra_peer_t *peer);
-extern void kranal_schedule_conn(kra_conn_t *conn);
-extern kra_peer_t *kranal_create_peer (ptl_nid_t nid);
-extern kra_peer_t *kranal_find_peer_locked (ptl_nid_t nid);
-extern void kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx);
-extern int kranal_del_peer (ptl_nid_t nid, int single_share);
-extern void kranal_device_callback(RAP_INT32 devid);
-extern int kranal_thread_start (int(*fn)(void *arg), void *arg);
-extern int kranal_connd (void *arg);
-extern int kranal_reaper (void *arg);
-extern int kranal_scheduler (void *arg);
-extern void kranal_close_conn_locked (kra_conn_t *conn, int error);
-extern void kranal_terminate_conn_locked (kra_conn_t *conn);
-extern void kranal_connect (kra_peer_t *peer);
diff --git a/lustre/portals/knals/ranal/ranal_cb.c b/lustre/portals/knals/ranal/ranal_cb.c
deleted file mode 100644 (file)
index 9490b56..0000000
+++ /dev/null
@@ -1,1766 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "ranal.h"
-
-int
-kranal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
-        /* I would guess that if kranal_get_peer (nid) == NULL,
-           and we're not routing, then 'nid' is very distant :) */
-        if ( nal->libnal_ni.ni_pid.nid == nid ) {
-                *dist = 0;
-        } else {
-                *dist = 1;
-        }
-
-        return 0;
-}
-
-void
-kranal_device_callback(RAP_INT32 devid)
-{
-        kra_device_t *dev;
-        int           i;
-        unsigned long flags;
-        
-        for (i = 0; i < kranal_data.kra_ndevs; i++) {
-
-                dev = &kranal_data.kra_devices[i];
-                if (dev->rad_id != devid)
-                        continue;
-
-                spin_lock_irqsave(&dev->rad_lock, flags);
-
-                if (!dev->rad_ready) {
-                        dev->rad_ready = 1;
-                        wake_up(&dev->rad_waitq);
-                }
-
-                spin_unlock_irqrestore(&dev->rad_lock, flags);
-                return;
-        }
-        
-        CWARN("callback for unknown device %d\n", devid);
-}
-
-void
-kranal_schedule_conn(kra_conn_t *conn)
-{
-        kra_device_t    *dev = conn->rac_device;
-        unsigned long    flags;
-        
-        spin_lock_irqsave(&dev->rad_lock, flags);
-        
-        if (!conn->rac_scheduled) {
-                kranal_conn_addref(conn);       /* +1 ref for scheduler */
-                conn->rac_scheduled = 1;
-                list_add_tail(&conn->rac_schedlist, &dev->rad_connq);
-                wake_up(&dev->rad_waitq);
-        }
-
-        spin_unlock_irqrestore(&dev->rad_lock, flags);
-}
-
-void
-kranal_schedule_cqid (__u32 cqid)
-{
-        kra_conn_t         *conn;
-        struct list_head   *conns;
-        struct list_head   *tmp;
-
-        conns = kranal_cqid2connlist(cqid);
-
-        read_lock(&kranal_data.kra_global_lock);
-
-        conn = kranal_cqid2conn_locked(cqid);
-        
-        if (conn == NULL)
-                CWARN("no cqid %x\n", cqid);
-        else
-                kranal_schedule_conn(conn);
-        
-        read_unlock(&kranal_data.kra_global_lock);
-}
-
-void
-kranal_schedule_dev(kra_device_t *dev)
-{
-        kra_conn_t         *conn;
-        struct list_head   *conns;
-        struct list_head   *tmp;
-        int                 i;
-
-        /* Don't do this in IRQ context (servers may have 1000s of clients) */
-        LASSERT (!in_interrupt()); 
-
-        CWARN("Scheduling ALL conns on device %d\n", dev->rad_id);
-
-        for (i = 0; i < kranal_data.kra_conn_hash_size; i++) {
-
-                /* Drop the lock on each hash bucket to ensure we don't
-                 * block anyone for too long at IRQ priority on another CPU */
-                
-                read_lock(&kranal_data.kra_global_lock);
-        
-                conns = &kranal_data.kra_conns[i];
-
-                list_for_each (tmp, conns) {
-                        conn = list_entry(tmp, kra_conn_t, rac_hashlist);
-                
-                        if (conn->rac_device == dev)
-                                kranal_schedule_conn(conn);
-                }
-                read_unlock(&kranal_data.kra_global_lock);
-        }
-}
-
-void
-kranal_tx_done (kra_tx_t *tx, int completion)
-{
-        ptl_err_t        ptlrc = (completion == 0) ? PTL_OK : PTL_FAIL;
-        kra_device_t    *dev;
-        unsigned long    flags;
-        int              i;
-        RAP_RETURN       rrc;
-
-        LASSERT (!in_interrupt());
-
-        switch (tx->tx_buftype) {
-        default:
-                LBUG();
-
-        case RANAL_BUF_NONE:
-        case RANAL_BUF_IMMEDIATE:
-        case RANAL_BUF_PHYS_UNMAPPED:
-        case RANAL_BUF_VIRT_UNMAPPED:
-                break;
-
-        case RANAL_BUF_PHYS_MAPPED:
-                LASSERT (tx->tx_conn != NULL);
-                dev = tx->tx_conn->rac_device;
-                rrc = RapkDeregisterMemory(dev->rad_handle, NULL,
-                                           dev->rad_ptag, &tx->tx_map_key);
-                LASSERT (rrc == RAP_SUCCESS);
-                break;
-
-        case RANAL_BUF_VIRT_MAPPED:
-                LASSERT (tx->tx_conn != NULL);
-                dev = tx->tx_conn->rac_device;
-                rrc = RapkDeregisterMemory(dev->rad_handle, tx->tx_buffer,
-                                           dev->rad_ptag, &tx->tx_map_key);
-                LASSERT (rrc == RAP_SUCCESS);
-                break;
-        }
-
-        for (i = 0; i < 2; i++) {
-                /* tx may have up to 2 libmsgs to finalise */
-                if (tx->tx_libmsg[i] == NULL)
-                        continue;
-
-                lib_finalize(&kranal_lib, NULL, tx->tx_libmsg[i], ptlrc);
-                tx->tx_libmsg[i] = NULL;
-        }
-
-        tx->tx_buftype = RANAL_BUF_NONE;
-        tx->tx_msg.ram_type = RANAL_MSG_NONE;
-        tx->tx_conn = NULL;
-
-        spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
-
-        if (tx->tx_isnblk) {
-                list_add_tail(&tx->tx_list, &kranal_data.kra_idle_nblk_txs);
-        } else {
-                list_add_tail(&tx->tx_list, &kranal_data.kra_idle_txs);
-                wake_up(&kranal_data.kra_idle_tx_waitq);
-        }
-
-        spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-}
-
-kra_tx_t *
-kranal_get_idle_tx (int may_block) 
-{
-        unsigned long  flags;
-        kra_tx_t      *tx = NULL;
-        
-        for (;;) {
-                spin_lock_irqsave(&kranal_data.kra_tx_lock, flags);
-
-                /* "normal" descriptor is free */
-                if (!list_empty(&kranal_data.kra_idle_txs)) {
-                        tx = list_entry(kranal_data.kra_idle_txs.next,
-                                       kra_tx_t, tx_list);
-                        break;
-                }
-
-                if (!may_block) {
-                        /* may dip into reserve pool */
-                        if (list_empty(&kranal_data.kra_idle_nblk_txs)) {
-                                CERROR("reserved tx desc pool exhausted\n");
-                                break;
-                        }
-
-                        tx = list_entry(kranal_data.kra_idle_nblk_txs.next,
-                                       kra_tx_t, tx_list);
-                        break;
-                }
-
-                /* block for idle tx */
-                spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-
-                wait_event(kranal_data.kra_idle_tx_waitq,
-                          !list_empty(&kranal_data.kra_idle_txs));
-        }
-
-        if (tx != NULL) {
-                list_del(&tx->tx_list);
-
-                /* Allocate a new completion cookie.  It might not be
-                 * needed, but we've got a lock right now... */
-                tx->tx_cookie = kranal_data.kra_next_tx_cookie++;
-
-                LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-                LASSERT (tx->tx_msg.ram_type == RANAL_MSG_NONE);
-                LASSERT (tx->tx_conn == NULL);
-                LASSERT (tx->tx_libmsg[0] == NULL);
-                LASSERT (tx->tx_libmsg[1] == NULL);
-        }
-
-        spin_unlock_irqrestore(&kranal_data.kra_tx_lock, flags);
-        
-        return tx;
-}
-
-void
-kranal_init_msg(kra_msg_t *msg, int type)
-{
-        msg->ram_magic = RANAL_MSG_MAGIC;
-        msg->ram_version = RANAL_MSG_VERSION;
-        msg->ram_type = type;
-        msg->ram_srcnid = kranal_lib.libnal_ni.ni_pid.nid;
-        /* ram_incarnation gets set when FMA is sent */
-}
-
-kra_tx_t *
-kranal_new_tx_msg (int may_block, int type)
-{
-        kra_tx_t *tx = kranal_get_idle_tx(may_block);
-
-        if (tx == NULL)
-                return NULL;
-
-        kranal_init_msg(&tx->tx_msg, type);
-        return tx;
-}
-
-int
-kranal_setup_immediate_buffer (kra_tx_t *tx, int niov, struct iovec *iov, 
-                               int offset, int nob)
-                 
-{
-        LASSERT (nob > 0);
-        LASSERT (niov > 0);
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                niov--;
-                iov++;
-                LASSERT (niov > 0);
-        }
-
-        if (nob > iov->iov_len - offset) {
-                CERROR("Can't handle multiple vaddr fragments\n");
-                return -EMSGSIZE;
-        }
-
-        tx->tx_buftype = RANAL_BUF_IMMEDIATE;
-        tx->tx_nob = nob;
-        tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset);
-        return 0;
-}
-
-int
-kranal_setup_virt_buffer (kra_tx_t *tx, int niov, struct iovec *iov, 
-                          int offset, int nob)
-                 
-{
-        LASSERT (nob > 0);
-        LASSERT (niov > 0);
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                niov--;
-                iov++;
-                LASSERT (niov > 0);
-        }
-
-        if (nob > iov->iov_len - offset) {
-                CERROR("Can't handle multiple vaddr fragments\n");
-                return -EMSGSIZE;
-        }
-
-        tx->tx_buftype = RANAL_BUF_VIRT_UNMAPPED;
-        tx->tx_nob = nob;
-        tx->tx_buffer = (void *)(((unsigned long)iov->iov_base) + offset);
-        return 0;
-}
-
-int
-kranal_setup_phys_buffer (kra_tx_t *tx, int nkiov, ptl_kiov_t *kiov,
-                          int offset, int nob)
-{
-        RAP_PHYS_REGION *phys = tx->tx_phys;
-        int              resid;
-
-        CDEBUG(D_NET, "niov %d offset %d nob %d\n", nkiov, offset, nob);
-
-        LASSERT (nob > 0);
-        LASSERT (nkiov > 0);
-        LASSERT (tx->tx_buftype == RANAL_BUF_NONE);
-
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                nkiov--;
-                kiov++;
-                LASSERT (nkiov > 0);
-        }
-
-        tx->tx_buftype = RANAL_BUF_PHYS_UNMAPPED;
-        tx->tx_nob = nob;
-        tx->tx_buffer = (void *)((unsigned long)(kiov->kiov_offset + offset));
-        
-        phys->Address = kranal_page2phys(kiov->kiov_page);
-        phys->Length  = PAGE_SIZE;
-        phys++;
-
-        resid = nob - (kiov->kiov_len - offset);
-        while (resid > 0) {
-                kiov++;
-                nkiov--;
-                LASSERT (nkiov > 0);
-
-                if (kiov->kiov_offset != 0 ||
-                    ((resid > PAGE_SIZE) && 
-                     kiov->kiov_len < PAGE_SIZE)) {
-                        int i;
-                        /* Can't have gaps */
-                        CERROR("Can't make payload contiguous in I/O VM:"
-                               "page %d, offset %d, len %d \n", 
-                               phys - tx->tx_phys, 
-                               kiov->kiov_offset, kiov->kiov_len);                        
-                        return -EINVAL;
-                }
-
-                if ((phys - tx->tx_phys) == PTL_MD_MAX_IOV) {
-                        CERROR ("payload too big (%d)\n", phys - tx->tx_phys);
-                        return -EMSGSIZE;
-                }
-
-                phys->Address = kranal_page2phys(kiov->kiov_page);
-                phys->Length  = PAGE_SIZE;
-                phys++;
-
-                resid -= PAGE_SIZE;
-        }
-
-        tx->tx_phys_npages = phys - tx->tx_phys;
-        return 0;
-}
-
-static inline int
-kranal_setup_buffer (kra_tx_t *tx, int niov, 
-                     struct iovec *iov, ptl_kiov_t *kiov,
-                     int offset, int nob)
-{
-        LASSERT ((iov == NULL) != (kiov == NULL));
-        
-        if (kiov != NULL)
-                return kranal_setup_phys_buffer(tx, niov, kiov, offset, nob);
-        
-        return kranal_setup_virt_buffer(tx, niov, iov, offset, nob);
-}
-
-void
-kranal_map_buffer (kra_tx_t *tx)
-{
-        kra_conn_t     *conn = tx->tx_conn;
-        kra_device_t   *dev = conn->rac_device;
-        RAP_RETURN      rrc;
-
-        switch (tx->tx_buftype) {
-        default:
-                
-        case RANAL_BUF_PHYS_UNMAPPED:
-                rrc = RapkRegisterPhys(conn->rac_device->rad_handle,
-                                       tx->tx_phys, tx->tx_phys_npages,
-                                       conn->rac_device->rad_ptag,
-                                       &tx->tx_map_key);
-                LASSERT (rrc == RAP_SUCCESS);
-                tx->tx_buftype = RANAL_BUF_PHYS_MAPPED;
-                return;
-
-        case RANAL_BUF_VIRT_UNMAPPED:
-                rrc = RapkRegisterMemory(conn->rac_device->rad_handle,
-                                         tx->tx_buffer, tx->tx_nob,
-                                         conn->rac_device->rad_ptag,
-                                         &tx->tx_map_key);
-                LASSERT (rrc == RAP_SUCCESS);
-                tx->tx_buftype = RANAL_BUF_VIRT_MAPPED;
-                return;
-        }
-}
-
-kra_conn_t *
-kranal_find_conn_locked (kra_peer_t *peer)
-{
-        struct list_head *tmp;
-
-        /* just return the first connection */
-        list_for_each (tmp, &peer->rap_conns) {
-                return list_entry(tmp, kra_conn_t, rac_list);
-        }
-
-        return NULL;
-}
-
-void
-kranal_post_fma (kra_conn_t *conn, kra_tx_t *tx)
-{
-        unsigned long    flags;
-
-        tx->tx_conn = conn;
-
-        spin_lock_irqsave(&conn->rac_lock, flags);
-        list_add_tail(&tx->tx_list, &conn->rac_fmaq);
-        tx->tx_qtime = jiffies;
-        spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-        kranal_schedule_conn(conn);
-}
-
-void
-kranal_launch_tx (kra_tx_t *tx, ptl_nid_t nid)
-{
-        unsigned long    flags;
-        kra_peer_t      *peer;
-        kra_conn_t      *conn;
-        unsigned long    now;
-        rwlock_t        *g_lock = &kranal_data.kra_global_lock;
-
-        /* If I get here, I've committed to send, so I complete the tx with
-         * failure on any problems */
-        
-        LASSERT (tx->tx_conn == NULL);          /* only set when assigned a conn */
-
-        read_lock(g_lock);
-        
-        peer = kranal_find_peer_locked(nid);
-        if (peer == NULL) {
-                read_unlock(g_lock);
-                kranal_tx_done(tx, -EHOSTUNREACH);
-                return;
-        }
-
-        conn = kranal_find_conn_locked(peer);
-        if (conn != NULL) {
-                kranal_post_fma(conn, tx);
-                read_unlock(g_lock);
-                return;
-        }
-        
-        /* Making one or more connections; I'll need a write lock... */
-        read_unlock(g_lock);
-        write_lock_irqsave(g_lock, flags);
-
-        peer = kranal_find_peer_locked(nid);
-        if (peer == NULL) {
-                write_unlock_irqrestore(g_lock, flags);
-                kranal_tx_done(tx, -EHOSTUNREACH);
-                return;
-        }
-
-        conn = kranal_find_conn_locked(peer);
-        if (conn != NULL) {
-                /* Connection exists; queue message on it */
-                kranal_post_fma(conn, tx);
-                write_unlock_irqrestore(g_lock, flags);
-                return;
-        }
-
-        LASSERT (peer->rap_persistence > 0);
-
-        if (!peer->rap_connecting) {
-                now = CURRENT_TIME;
-                if (now < peer->rap_reconnect_time) {
-                        write_unlock_irqrestore(g_lock, flags);
-                        kranal_tx_done(tx, -EHOSTUNREACH);
-                        return;
-                }
-        
-                peer->rap_connecting = 1;
-                kranal_peer_addref(peer); /* extra ref for connd */
-        
-                spin_lock(&kranal_data.kra_connd_lock);
-        
-                list_add_tail(&peer->rap_connd_list,
-                             &kranal_data.kra_connd_peers);
-                wake_up(&kranal_data.kra_connd_waitq);
-        
-                spin_unlock(&kranal_data.kra_connd_lock);
-        }
-        
-        /* A connection is being established; queue the message... */
-        list_add_tail(&tx->tx_list, &peer->rap_tx_queue);
-
-        write_unlock_irqrestore(g_lock, flags);
-}
-
-static void
-kranal_rdma(kra_tx_t *tx, int type, 
-            kra_rdma_desc_t *rard, int nob, __u64 cookie)
-{
-        kra_conn_t   *conn = tx->tx_conn;
-        RAP_RETURN    rrc;
-        unsigned long flags;
-
-        /* prep final completion message */
-        kranal_init_msg(&tx->tx_msg, type);
-        tx->tx_msg.ram_u.completion.racm_cookie = cookie;
-        
-        LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                 tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-        LASSERT (nob <= rard->rard_nob);
-
-        memset(&tx->tx_rdma_desc, 0, sizeof(tx->tx_rdma_desc));
-        tx->tx_rdma_desc.SrcPtr.AddressBits = (__u64)((unsigned long)tx->tx_buffer);
-        tx->tx_rdma_desc.SrcKey = tx->tx_map_key;
-        tx->tx_rdma_desc.DstPtr = rard->rard_addr;
-        tx->tx_rdma_desc.DstKey = rard->rard_key;
-        tx->tx_rdma_desc.Length = nob;
-        tx->tx_rdma_desc.AppPtr = tx;
-
-        if (nob == 0) { /* Immediate completion */
-                kranal_post_fma(conn, tx);
-                return;
-        }
-        
-        rrc = RapkPostRdma(conn->rac_rihandle, &tx->tx_rdma_desc);
-        LASSERT (rrc == RAP_SUCCESS);
-
-        spin_lock_irqsave(&conn->rac_lock, flags);
-        list_add_tail(&tx->tx_list, &conn->rac_rdmaq);
-        tx->tx_qtime = jiffies;
-        spin_unlock_irqrestore(&conn->rac_lock, flags);
-}
-
-int
-kranal_consume_rxmsg (kra_conn_t *conn, void *buffer, int nob)
-{
-        __u32      nob_received = nob;
-        RAP_RETURN rrc;
-
-        LASSERT (conn->rac_rxmsg != NULL);
-
-        rrc = RapkFmaCopyToUser(conn->rac_rihandle, buffer,
-                                &nob_received, sizeof(kra_msg_t));
-        LASSERT (rrc == RAP_SUCCESS);
-
-        conn->rac_rxmsg = NULL;
-
-        if (nob_received != nob) {
-                CWARN("Expected %d immediate bytes but got %d\n",
-                      nob, nob_received);
-                return -EPROTO;
-        }
-        
-        return 0;
-}
-
-ptl_err_t
-kranal_do_send (lib_nal_t    *nal, 
-                void         *private,
-                lib_msg_t    *libmsg,
-                ptl_hdr_t    *hdr, 
-                int           type, 
-                ptl_nid_t     nid, 
-                ptl_pid_t     pid,
-                unsigned int  niov, 
-                struct iovec *iov, 
-                ptl_kiov_t   *kiov,
-                size_t        offset,
-                size_t        nob)
-{
-        kra_conn_t *conn;
-        kra_tx_t   *tx;
-        int         rc;
-
-        /* NB 'private' is different depending on what we're sending.... */
-
-        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
-               " pid %d\n", nob, niov, nid , pid);
-
-        LASSERT (nob == 0 || niov > 0);
-        LASSERT (niov <= PTL_MD_MAX_IOV);
-
-        LASSERT (!in_interrupt());
-        /* payload is either all vaddrs or all pages */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        switch(type) {
-        default:
-                LBUG();
-                
-        case PTL_MSG_REPLY: {
-                /* reply's 'private' is the conn that received the GET_REQ */
-                conn = private;
-                LASSERT (conn->rac_rxmsg != NULL);
-
-                if (conn->rac_rxmsg->ram_type == RANAL_MSG_IMMEDIATE) {
-                        if (nob > RANAL_MAX_IMMEDIATE) {
-                                CERROR("Can't REPLY IMMEDIATE %d to "LPX64"\n",
-                                       nob, nid);
-                                return PTL_FAIL;
-                        }
-                        break;                  /* RDMA not expected */
-                }
-                
-                /* Incoming message consistent with immediate reply? */
-                if (conn->rac_rxmsg->ram_type != RANAL_MSG_GET_REQ) {
-                        CERROR("REPLY to "LPX64" bad msg type %x!!!\n",
-                              nid, conn->rac_rxmsg->ram_type);
-                        return PTL_FAIL;
-                }
-
-                tx = kranal_get_idle_tx(0);
-                if (tx == NULL)
-                        return PTL_FAIL;
-
-                rc = kranal_setup_buffer(tx, niov, iov, kiov, offset, nob);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        return PTL_FAIL;
-                }
-
-                tx->tx_conn = conn;
-                tx->tx_libmsg[0] = libmsg;
-
-                kranal_map_buffer(tx);
-                kranal_rdma(tx, RANAL_MSG_GET_DONE,
-                            &conn->rac_rxmsg->ram_u.get.ragm_desc, nob,
-                            conn->rac_rxmsg->ram_u.get.ragm_cookie);
-                return PTL_OK;
-        }
-
-        case PTL_MSG_GET:
-                if (kiov == NULL &&             /* not paged */
-                    nob <= RANAL_MAX_IMMEDIATE && /* small enough */
-                    nob <= kranal_tunables.kra_max_immediate)
-                        break;                  /* send IMMEDIATE */
-
-                tx = kranal_new_tx_msg(0, RANAL_MSG_GET_REQ);
-                if (tx == NULL)
-                        return PTL_NO_SPACE;
-
-                rc = kranal_setup_buffer(tx, niov, iov, kiov, offset, nob);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        return PTL_FAIL;
-                }
-
-                tx->tx_libmsg[1] = lib_create_reply_msg(&kranal_lib, nid, libmsg);
-                if (tx->tx_libmsg[1] == NULL) {
-                        CERROR("Can't create reply for GET to "LPX64"\n", nid);
-                        kranal_tx_done(tx, rc);
-                        return PTL_FAIL;
-                }
-
-                tx->tx_libmsg[0] = libmsg;
-                tx->tx_msg.ram_u.get.ragm_hdr = *hdr;
-                /* rest of tx_msg is setup just before it is sent */
-                kranal_launch_tx(tx, nid);
-                return PTL_OK;
-
-        case PTL_MSG_ACK:
-                LASSERT (nob == 0);
-                break;
-
-        case PTL_MSG_PUT:
-                if (kiov == NULL &&             /* not paged */
-                    nob <= RANAL_MAX_IMMEDIATE && /* small enough */
-                    nob <= kranal_tunables.kra_max_immediate)
-                        break;                  /* send IMMEDIATE */
-                
-                tx = kranal_new_tx_msg(!in_interrupt(), RANAL_MSG_PUT_REQ);
-                if (tx == NULL)
-                        return PTL_NO_SPACE;
-
-                rc = kranal_setup_buffer(tx, niov, iov, kiov, offset, nob);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        return PTL_FAIL;
-                }
-
-                tx->tx_libmsg[0] = libmsg;
-                tx->tx_msg.ram_u.putreq.raprm_hdr = *hdr;
-                /* rest of tx_msg is setup just before it is sent */
-                kranal_launch_tx(tx, nid);
-                return PTL_OK;
-        }
-
-        LASSERT (kiov == NULL);
-        LASSERT (nob <= RANAL_MAX_IMMEDIATE);
-
-        tx = kranal_new_tx_msg(!(type == PTL_MSG_ACK ||
-                                 type == PTL_MSG_REPLY ||
-                                 in_interrupt()), 
-                               RANAL_MSG_IMMEDIATE);
-        if (tx == NULL)
-                return PTL_NO_SPACE;
-
-        rc = kranal_setup_immediate_buffer(tx, niov, iov, offset, nob);
-        if (rc != 0) {
-                kranal_tx_done(tx, rc);
-                return PTL_FAIL;
-        }
-                
-        tx->tx_msg.ram_u.immediate.raim_hdr = *hdr;
-        tx->tx_libmsg[0] = libmsg;
-        kranal_launch_tx(tx, nid);
-        return PTL_OK;
-}
-
-ptl_err_t
-kranal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-            ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-            unsigned int niov, struct iovec *iov,
-            size_t offset, size_t len)
-{
-        return kranal_do_send(nal, private, cookie,
-                             hdr, type, nid, pid,
-                             niov, iov, NULL,
-                             offset, len);
-}
-
-ptl_err_t
-kranal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
-                  ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                  unsigned int niov, ptl_kiov_t *kiov, 
-                  size_t offset, size_t len)
-{
-        return kranal_do_send(nal, private, cookie,
-                             hdr, type, nid, pid,
-                             niov, NULL, kiov,
-                             offset, len);
-}
-
-ptl_err_t
-kranal_recvmsg (lib_nal_t *nal, void *private, lib_msg_t *libmsg,
-               unsigned int niov, struct iovec *iov, ptl_kiov_t *kiov,
-               size_t offset, size_t mlen, size_t rlen)
-{
-        kra_conn_t  *conn = private;
-        kra_msg_t   *rxmsg = conn->rac_rxmsg;
-        kra_tx_t    *tx;
-        void        *buffer;
-        int          rc;
-        
-        LASSERT (mlen <= rlen);
-        LASSERT (!in_interrupt());
-        /* Either all pages or all vaddrs */
-        LASSERT (!(kiov != NULL && iov != NULL));
-
-        switch(rxmsg->ram_type) {
-        default:
-                LBUG();
-                return PTL_FAIL;
-                
-        case RANAL_MSG_IMMEDIATE:
-                if (mlen == 0) {
-                        buffer = NULL;
-                } else if (kiov != NULL) {
-                        CERROR("Can't recv immediate into paged buffer\n");
-                        return PTL_FAIL;
-                } else {
-                        LASSERT (niov > 0);
-                        while (offset >= iov->iov_len) {
-                                offset -= iov->iov_len;
-                                iov++;
-                                niov--;
-                                LASSERT (niov > 0);
-                        }
-                        if (mlen > iov->iov_len - offset) {
-                                CERROR("Can't handle immediate frags\n");
-                                return PTL_FAIL;
-                        }
-                        buffer = ((char *)iov->iov_base) + offset;
-                }
-                rc = kranal_consume_rxmsg(conn, buffer, mlen);
-                lib_finalize(nal, NULL, libmsg, (rc == 0) ? PTL_OK : PTL_FAIL);
-                return PTL_OK;
-
-        case RANAL_MSG_GET_REQ:
-                /* If the GET matched, we've already handled it in
-                 * kranal_do_send which is called to send the REPLY.  We're
-                 * only called here to complete the GET receive (if we needed
-                 * it which we don't, but I digress...) */
-                LASSERT (libmsg == NULL);
-                lib_finalize(nal, NULL, libmsg, PTL_OK);
-                return PTL_OK;
-
-        case RANAL_MSG_PUT_REQ:
-                if (libmsg == NULL) {           /* PUT didn't match... */
-                        lib_finalize(nal, NULL, libmsg, PTL_OK);
-                        return PTL_OK;
-                }
-                
-                tx = kranal_new_tx_msg(0, RANAL_MSG_PUT_ACK);
-                if (tx == NULL)
-                        return PTL_NO_SPACE;
-
-                rc = kranal_setup_buffer(tx, niov, iov, kiov, offset, mlen);
-                if (rc != 0) {
-                        kranal_tx_done(tx, rc);
-                        return PTL_FAIL;
-                }
-
-                kranal_map_buffer(tx);
-                
-                tx->tx_msg.ram_u.putack.rapam_src_cookie = 
-                        conn->rac_rxmsg->ram_u.putreq.raprm_cookie;
-                tx->tx_msg.ram_u.putack.rapam_dst_cookie = tx->tx_cookie;
-                tx->tx_msg.ram_u.putack.rapam_desc.rard_key = tx->tx_map_key;
-                tx->tx_msg.ram_u.putack.rapam_desc.rard_addr.AddressBits = 
-                        (__u64)((unsigned long)tx->tx_buffer);
-                tx->tx_msg.ram_u.putack.rapam_desc.rard_nob = mlen;
-
-                tx->tx_libmsg[0] = libmsg; /* finalize this on RDMA_DONE */
-
-                kranal_post_fma(conn, tx);
-                
-                /* flag matched by consuming rx message */
-                kranal_consume_rxmsg(conn, NULL, 0);
-                return PTL_OK;
-        }
-}
-
-ptl_err_t
-kranal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
-            unsigned int niov, struct iovec *iov, 
-            size_t offset, size_t mlen, size_t rlen)
-{
-        return kranal_recvmsg(nal, private, msg, niov, iov, NULL,
-                             offset, mlen, rlen);
-}
-
-ptl_err_t
-kranal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
-                  unsigned int niov, ptl_kiov_t *kiov, 
-                  size_t offset, size_t mlen, size_t rlen)
-{
-        return kranal_recvmsg(nal, private, msg, niov, NULL, kiov,
-                             offset, mlen, rlen);
-}
-
-int
-kranal_thread_start (int(*fn)(void *arg), void *arg)
-{
-        long    pid = kernel_thread(fn, arg, 0);
-
-        if (pid < 0)
-                return(int)pid;
-
-        atomic_inc(&kranal_data.kra_nthreads);
-        return 0;
-}
-
-void
-kranal_thread_fini (void)
-{
-        atomic_dec(&kranal_data.kra_nthreads);
-}
-
-int
-kranal_check_conn (kra_conn_t *conn)
-{
-        kra_tx_t          *tx;
-        struct list_head  *ttmp;
-        unsigned long      flags;
-        long               timeout;
-        unsigned long      now = jiffies;
-
-        if (!conn->rac_closing &&
-            time_after_eq(now, conn->rac_last_tx + conn->rac_keepalive * HZ)) {
-                /* not sent in a while; schedule conn so scheduler sends a keepalive */
-                kranal_schedule_conn(conn);
-        }
-
-        /* wait twice as long for CLOSE to be sure peer is dead */
-        timeout = (conn->rac_closing ? 1 : 2) * conn->rac_timeout * HZ;
-
-        if (!conn->rac_close_recvd &&
-            time_after_eq(now, conn->rac_last_rx + timeout)) {
-                CERROR("Nothing received from "LPX64" within %lu seconds\n",
-                       conn->rac_peer->rap_nid, (now - conn->rac_last_rx)/HZ);
-                return -ETIMEDOUT;
-        }
-
-        if (conn->rac_closing)
-                return 0;
-        
-        /* Check the conn's queues are moving.  These are "belt+braces" checks,
-         * in case of hardware/software errors that make this conn seem
-         * responsive even though it isn't progressing its message queues. */
-
-        spin_lock_irqsave(&conn->rac_lock, flags);
-
-        list_for_each (ttmp, &conn->rac_fmaq) {
-                tx = list_entry(ttmp, kra_tx_t, tx_list);
-                
-                if (time_after_eq(now, tx->tx_qtime + timeout)) {
-                        spin_unlock_irqrestore(&conn->rac_lock, flags);
-                        CERROR("tx on fmaq for "LPX64" blocked %lu seconds\n",
-                               conn->rac_peer->rap_nid, (now - tx->tx_qtime)/HZ);
-                        return -ETIMEDOUT;
-                }
-        }
-        
-        list_for_each (ttmp, &conn->rac_rdmaq) {
-                tx = list_entry(ttmp, kra_tx_t, tx_list);
-                
-                if (time_after_eq(now, tx->tx_qtime + timeout)) {
-                        spin_unlock_irqrestore(&conn->rac_lock, flags);
-                        CERROR("tx on rdmaq for "LPX64" blocked %lu seconds\n",
-                               conn->rac_peer->rap_nid, (now - tx->tx_qtime)/HZ);
-                        return -ETIMEDOUT;
-                }
-        }
-        
-        list_for_each (ttmp, &conn->rac_replyq) {
-                tx = list_entry(ttmp, kra_tx_t, tx_list);
-                
-                if (time_after_eq(now, tx->tx_qtime + timeout)) {
-                        spin_unlock_irqrestore(&conn->rac_lock, flags);
-                        CERROR("tx on replyq for "LPX64" blocked %lu seconds\n",
-                               conn->rac_peer->rap_nid, (now - tx->tx_qtime)/HZ);
-                        return -ETIMEDOUT;
-                }
-        }
-        
-        spin_unlock_irqrestore(&conn->rac_lock, flags);
-        return 0;
-}
-
-void
-kranal_check_conns (int idx, unsigned long *min_timeoutp)
-{
-        struct list_head  *conns = &kranal_data.kra_conns[idx];
-        struct list_head  *ctmp;
-        kra_conn_t        *conn;
-        unsigned long      flags;
-        int                rc;
-
- again:
-        /* NB. We expect to check all the conns and not find any problems, so
-         * we just use a shared lock while we take a look... */
-        read_lock(&kranal_data.kra_global_lock);
-
-        list_for_each (ctmp, conns) {
-                conn = list_entry(ctmp, kra_conn_t, rac_hashlist);
-
-                if (conn->rac_timeout < *min_timeoutp )
-                        *min_timeoutp = conn->rac_timeout;
-                if (conn->rac_keepalive < *min_timeoutp )
-                        *min_timeoutp = conn->rac_keepalive;
-
-                rc = kranal_check_conn(conn);
-                if (rc == 0)
-                        continue;
-
-                kranal_conn_addref(conn);
-                read_unlock(&kranal_data.kra_global_lock);
-
-                CERROR("Check on conn to "LPX64"failed: %d\n",
-                       conn->rac_peer->rap_nid, rc);
-
-                write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                if (!conn->rac_closing)
-                        kranal_close_conn_locked(conn, -ETIMEDOUT);
-                else
-                        kranal_terminate_conn_locked(conn);
-                        
-                write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-
-                kranal_conn_decref(conn);
-
-                /* start again now I've dropped the lock */
-                goto again;
-        }
-
-        read_unlock(&kranal_data.kra_global_lock);
-}
-
-int
-kranal_connd (void *arg)
-{
-       char               name[16];
-        wait_queue_t       wait;
-        unsigned long      flags;
-        kra_peer_t        *peer;
-        int                i;
-
-       snprintf(name, sizeof(name), "kranal_connd_%02ld", (long)arg);
-        kportal_daemonize(name);
-        kportal_blockallsigs();
-
-        init_waitqueue_entry(&wait, current);
-
-        spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-
-        while (!kranal_data.kra_shutdown) {
-                /* Safe: kra_shutdown only set when quiescent */
-
-                if (!list_empty(&kranal_data.kra_connd_peers)) {
-                        peer = list_entry(kranal_data.kra_connd_peers.next,
-                                         kra_peer_t, rap_connd_list);
-                        
-                        list_del_init(&peer->rap_connd_list);
-                        spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-                        kranal_connect(peer);
-                        kranal_peer_decref(peer);
-
-                        spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-                       continue;
-                }
-
-                set_current_state(TASK_INTERRUPTIBLE);
-                add_wait_queue(&kranal_data.kra_connd_waitq, &wait);
-                
-                spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-                schedule ();
-                
-                set_current_state(TASK_RUNNING);
-                remove_wait_queue(&kranal_data.kra_connd_waitq, &wait);
-
-                spin_lock_irqsave(&kranal_data.kra_connd_lock, flags);
-        }
-
-        spin_unlock_irqrestore(&kranal_data.kra_connd_lock, flags);
-
-        kranal_thread_fini();
-        return 0;
-}
-
-void
-kranal_update_reaper_timeout(long timeout) 
-{
-        unsigned long   flags;
-
-        LASSERT (timeout > 0);
-        
-        spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-        
-        if (timeout < kranal_data.kra_new_min_timeout)
-                kranal_data.kra_new_min_timeout = timeout;
-
-        spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-}
-
-int
-kranal_reaper (void *arg)
-{
-        wait_queue_t       wait;
-        unsigned long      flags;
-        kra_conn_t        *conn;
-        kra_peer_t        *peer;
-        long               timeout;
-        int                i;
-        int                conn_entries = kranal_data.kra_conn_hash_size;
-        int                conn_index = 0;
-        int                base_index = conn_entries - 1;
-        unsigned long      next_check_time = jiffies;
-        long               next_min_timeout = MAX_SCHEDULE_TIMEOUT;
-        long               current_min_timeout = 1;
-        
-        kportal_daemonize("kranal_reaper");
-        kportal_blockallsigs();
-
-        init_waitqueue_entry(&wait, current);
-
-        spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-        kranal_data.kra_new_min_timeout = 1;
-
-        while (!kranal_data.kra_shutdown) {
-
-                /* careful with the jiffy wrap... */
-                timeout = (long)(next_check_time - jiffies);
-                if (timeout <= 0) {
-                
-                        /* I wake up every 'p' seconds to check for
-                         * timeouts on some more peers.  I try to check
-                         * every connection 'n' times within the global
-                         * minimum of all keepalive and timeout intervals,
-                         * to ensure I attend to every connection within
-                         * (n+1)/n times its timeout intervals. */
-                
-                        const int     p = 1;
-                        const int     n = 3;
-                        unsigned long min_timeout;
-                        int           chunk;
-
-                        if (kranal_data.kra_new_min_timeout != MAX_SCHEDULE_TIMEOUT) {
-                                /* new min timeout set: restart min timeout scan */
-                                next_min_timeout = MAX_SCHEDULE_TIMEOUT;
-                                base_index = conn_index - 1;
-                                if (base_index < 0)
-                                        base_index = conn_entries - 1;
-
-                                if (kranal_data.kra_new_min_timeout < current_min_timeout) {
-                                        current_min_timeout = kranal_data.kra_new_min_timeout;
-                                        CWARN("Set new min timeout %ld\n",
-                                              current_min_timeout);
-                                }
-
-                                kranal_data.kra_new_min_timeout = MAX_SCHEDULE_TIMEOUT;
-                        }
-                        min_timeout = current_min_timeout;
-
-                        spin_unlock_irqrestore(&kranal_data.kra_reaper_lock,
-                                               flags);
-
-                        LASSERT (min_timeout > 0);
-
-                        /* Compute how many table entries to check now so I
-                         * get round the whole table fast enough (NB I do
-                         * this at fixed intervals of 'p' seconds) */
-                       chunk = conn_entries;
-                        if (min_timeout > n * p)
-                                chunk = (chunk * n * p) / min_timeout;
-                        if (chunk == 0)
-                                chunk = 1;
-
-                        for (i = 0; i < chunk; i++) {
-                                kranal_check_conns(conn_index, 
-                                                   &next_min_timeout);
-                                conn_index = (conn_index + 1) % conn_entries;
-                        }
-
-                        next_check_time += p * HZ;
-
-                        spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
-                        if (((conn_index - chunk <= base_index &&
-                              base_index < conn_index) ||
-                             (conn_index - conn_entries - chunk <= base_index &&
-                              base_index < conn_index - conn_entries))) {
-
-                                /* Scanned all conns: set current_min_timeout... */
-                                if (current_min_timeout != next_min_timeout) {
-                                        current_min_timeout = next_min_timeout;                                        
-                                        CWARN("Set new min timeout %ld\n",
-                                              current_min_timeout);
-                                }
-
-                                /* ...and restart min timeout scan */
-                                next_min_timeout = MAX_SCHEDULE_TIMEOUT;
-                                base_index = conn_index - 1;
-                                if (base_index < 0)
-                                        base_index = conn_entries - 1;
-                        }
-                }
-
-                set_current_state(TASK_INTERRUPTIBLE);
-                add_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
-
-                spin_unlock_irqrestore(&kranal_data.kra_reaper_lock, flags);
-
-                schedule_timeout(timeout);
-
-                spin_lock_irqsave(&kranal_data.kra_reaper_lock, flags);
-
-                set_current_state(TASK_RUNNING);
-                remove_wait_queue(&kranal_data.kra_reaper_waitq, &wait);
-        }
-
-        kranal_thread_fini();
-        return 0;
-}
-
-void
-kranal_process_rdmaq (__u32 cqid)
-{
-        kra_conn_t          *conn;
-        kra_tx_t            *tx;
-        RAP_RETURN           rrc;
-        unsigned long        flags;
-        RAP_RDMA_DESCRIPTOR *desc;
-        
-        read_lock(&kranal_data.kra_global_lock);
-
-        conn = kranal_cqid2conn_locked(cqid);
-        LASSERT (conn != NULL);
-
-        rrc = RapkRdmaDone(conn->rac_rihandle, &desc);
-        LASSERT (rrc == RAP_SUCCESS);
-
-        spin_lock_irqsave(&conn->rac_lock, flags);
-
-        LASSERT (!list_empty(&conn->rac_rdmaq));
-        tx = list_entry(conn->rac_rdmaq.next, kra_tx_t, tx_list);
-        list_del(&tx->tx_list);
-
-        LASSERT(desc->AppPtr == (void *)tx);
-        LASSERT(tx->tx_msg.ram_type == RANAL_MSG_PUT_DONE ||
-                tx->tx_msg.ram_type == RANAL_MSG_GET_DONE);
-
-        list_add_tail(&tx->tx_list, &conn->rac_fmaq);
-        tx->tx_qtime = jiffies;
-        
-        spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-        /* Get conn's fmaq processed, now I've just put something there */
-        kranal_schedule_conn(conn);
-
-        read_unlock(&kranal_data.kra_global_lock);
-}
-
-int
-kranal_sendmsg(kra_conn_t *conn, kra_msg_t *msg,
-               void *immediate, int immediatenob)
-{
-        int        sync = (msg->ram_type & RANAL_MSG_FENCE) != 0;
-        RAP_RETURN rrc;
-        
-        LASSERT (sizeof(*msg) <= RANAL_FMA_PREFIX_LEN);
-        LASSERT ((msg->ram_type == RANAL_MSG_IMMEDIATE) ?
-                 immediatenob <= RANAL_FMA_MAX_DATA_LEN :
-                 immediatenob == 0);
-
-        msg->ram_incarnation = conn->rac_my_incarnation;
-        msg->ram_seq = conn->rac_tx_seq;
-
-        if (sync)
-                rrc = RapkFmaSyncSend(conn->rac_device->rad_handle,
-                                      immediate, immediatenob,
-                                      msg, sizeof(*msg));
-        else
-                rrc = RapkFmaSend(conn->rac_device->rad_handle,
-                                  immediate, immediatenob,
-                                  msg, sizeof(*msg));
-
-        switch (rrc) {
-        default:
-                LBUG();
-
-        case RAP_SUCCESS:
-                conn->rac_last_tx = jiffies;
-                conn->rac_tx_seq++;
-                return 0;
-                
-        case RAP_NOT_DONE:
-                return -EAGAIN;
-        }
-}
-
-int
-kranal_process_fmaq (kra_conn_t *conn) 
-{
-        unsigned long flags;
-        int           more_to_do;
-        kra_tx_t     *tx;
-        int           rc;
-        int           expect_reply;
-
-        /* NB I will be rescheduled some via a rad_fma_cq event if my FMA is
-         * out of credits when I try to send right now... */
-
-        if (conn->rac_closing) {
-
-                if (!list_empty(&conn->rac_rdmaq)) {
-                        /* Can't send CLOSE yet; I'm still waiting for RDMAs I
-                         * posted to finish */
-                        LASSERT (!conn->rac_close_sent);
-                        kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
-                        kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
-                        return 0;
-                }
-
-                if (conn->rac_close_sent)
-                        return 0;
-                
-                kranal_init_msg(&conn->rac_msg, RANAL_MSG_CLOSE);
-                rc = kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
-                conn->rac_close_sent = (rc == 0);
-                return 0;
-        }
-
-        spin_lock_irqsave(&conn->rac_lock, flags);
-
-        if (list_empty(&conn->rac_fmaq)) {
-
-                spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-                if (time_after_eq(jiffies, 
-                                  conn->rac_last_tx + conn->rac_keepalive)) {
-                        kranal_init_msg(&conn->rac_msg, RANAL_MSG_NOOP);
-                        kranal_sendmsg(conn, &conn->rac_msg, NULL, 0);
-                }
-                return 0;
-        }
-        
-        tx = list_entry(conn->rac_fmaq.next, kra_tx_t, tx_list);
-        list_del(&tx->tx_list);
-        more_to_do = !list_empty(&conn->rac_fmaq);
-
-        spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-        expect_reply = 0;
-        switch (tx->tx_msg.ram_type) {
-        default:
-                LBUG();
-                
-        case RANAL_MSG_IMMEDIATE:
-        case RANAL_MSG_PUT_NAK:
-        case RANAL_MSG_PUT_DONE:
-        case RANAL_MSG_GET_NAK:
-        case RANAL_MSG_GET_DONE:
-                rc = kranal_sendmsg(conn, &tx->tx_msg,
-                                    tx->tx_buffer, tx->tx_nob);
-                expect_reply = 0;
-                break;
-                
-        case RANAL_MSG_PUT_REQ:
-                tx->tx_msg.ram_u.putreq.raprm_cookie = tx->tx_cookie;
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                kranal_map_buffer(tx);
-                expect_reply = 1;
-                break;
-
-        case RANAL_MSG_PUT_ACK:
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                expect_reply = 1;
-                break;
-
-        case RANAL_MSG_GET_REQ:
-                kranal_map_buffer(tx);
-                tx->tx_msg.ram_u.get.ragm_cookie = tx->tx_cookie;
-                tx->tx_msg.ram_u.get.ragm_desc.rard_key = tx->tx_map_key;
-                tx->tx_msg.ram_u.get.ragm_desc.rard_addr.AddressBits = 
-                        (__u64)((unsigned long)tx->tx_buffer);
-                tx->tx_msg.ram_u.get.ragm_desc.rard_nob = tx->tx_nob;
-                rc = kranal_sendmsg(conn, &tx->tx_msg, NULL, 0);
-                expect_reply = 1;
-                break;
-        }
-
-        if (rc == -EAGAIN) {
-                /* replace at the head of the list for later */
-                spin_lock_irqsave(&conn->rac_lock, flags);
-                list_add(&tx->tx_list, &conn->rac_fmaq);
-                spin_unlock_irqrestore(&conn->rac_lock, flags);
-
-                return 0;
-        }
-
-        LASSERT (rc == 0);
-        
-        if (!expect_reply) {
-                kranal_tx_done(tx, 0);
-        } else {
-                spin_lock_irqsave(&conn->rac_lock, flags);
-                list_add_tail(&tx->tx_list, &conn->rac_replyq);
-                tx->tx_qtime = jiffies;
-                spin_unlock_irqrestore(&conn->rac_lock, flags);
-        }
-
-        return more_to_do;
-}
-
-static inline void
-kranal_swab_rdma_desc (kra_rdma_desc_t *d)
-{
-        __swab64s(&d->rard_key.Key);
-        __swab16s(&d->rard_key.Cookie);
-        __swab16s(&d->rard_key.MdHandle);
-        __swab32s(&d->rard_key.Flags);
-        __swab64s(&d->rard_addr.AddressBits);
-        __swab32s(&d->rard_nob);
-}
-
-kra_tx_t *
-kranal_match_reply(kra_conn_t *conn, int type, __u64 cookie)
-{
-        unsigned long     flags;
-        struct list_head *ttmp;
-        kra_tx_t         *tx;
-        
-        list_for_each(ttmp, &conn->rac_replyq) {
-                tx = list_entry(ttmp, kra_tx_t, tx_list);
-                
-                if (tx->tx_cookie != cookie)
-                        continue;
-                
-                if (tx->tx_msg.ram_type != type) {
-                        CWARN("Unexpected type %x (%x expected) "
-                              "matched reply from "LPX64"\n",
-                              tx->tx_msg.ram_type, type,
-                              conn->rac_peer->rap_nid);
-                        return NULL;
-                }
-        }
-        
-        CWARN("Unmatched reply from "LPX64"\n", conn->rac_peer->rap_nid);
-        return NULL;
-}
-
-int
-kranal_process_receives(kra_conn_t *conn)
-{
-        unsigned long flags;
-        __u32         seq;
-        __u32         nob;
-        kra_tx_t     *tx;
-        kra_msg_t    *msg;
-        void         *prefix;
-        RAP_RETURN    rrc = RapkFmaGetPrefix(conn->rac_rihandle, &prefix);
-        kra_peer_t   *peer = conn->rac_peer;
-
-        if (rrc == RAP_NOT_DONE)
-                return 0;
-        
-        LASSERT (rrc == RAP_SUCCESS);
-        conn->rac_last_rx = jiffies;
-        seq = conn->rac_rx_seq++;
-        msg = (kra_msg_t *)prefix;
-
-        if (msg->ram_magic != RANAL_MSG_MAGIC) {
-                if (__swab32(msg->ram_magic) != RANAL_MSG_MAGIC) {
-                        CERROR("Unexpected magic %08x from "LPX64"\n",
-                               msg->ram_magic, peer->rap_nid);
-                        goto out;
-                }
-
-                __swab32s(&msg->ram_magic);
-                __swab16s(&msg->ram_version);
-                __swab16s(&msg->ram_type);
-                __swab64s(&msg->ram_srcnid);
-                __swab64s(&msg->ram_incarnation);
-                __swab32s(&msg->ram_seq);
-
-                /* NB message type checked below; NOT here... */
-                switch (msg->ram_type) {
-                case RANAL_MSG_PUT_ACK:
-                        kranal_swab_rdma_desc(&msg->ram_u.putack.rapam_desc);
-                        break;
-
-                case RANAL_MSG_GET_REQ:
-                        kranal_swab_rdma_desc(&msg->ram_u.get.ragm_desc);
-                        break;
-                        
-                default:
-                        break;
-                }
-        }
-
-        if (msg->ram_version != RANAL_MSG_VERSION) {
-                CERROR("Unexpected protocol version %d from "LPX64"\n",
-                       msg->ram_version, peer->rap_nid);
-                goto out;
-        }
-
-        if (msg->ram_srcnid != peer->rap_nid) {
-                CERROR("Unexpected peer "LPX64" from "LPX64"\n",
-                       msg->ram_srcnid, peer->rap_nid);
-                goto out;
-        }
-        
-        if (msg->ram_incarnation != conn->rac_peer_incarnation) {
-                CERROR("Unexpected incarnation "LPX64"("LPX64
-                       " expected) from "LPX64"\n",
-                       msg->ram_incarnation, conn->rac_peer_incarnation,
-                       peer->rap_nid);
-                goto out;
-        }
-        
-        if (msg->ram_seq != seq) {
-                CERROR("Unexpected sequence number %d(%d expected) from "
-                       LPX64"\n", msg->ram_seq, seq, peer->rap_nid);
-                goto out;
-        }
-
-        if ((msg->ram_type & RANAL_MSG_FENCE) != 0) {
-                /* This message signals RDMA completion: wait now... */
-                rrc = RapkFmaSyncWait(conn->rac_rihandle);
-                LASSERT (rrc == RAP_SUCCESS);
-        }
-        if (msg->ram_type == RANAL_MSG_CLOSE) {
-                conn->rac_close_recvd = 1;
-                write_lock_irqsave(&kranal_data.kra_global_lock, flags);
-
-                if (!conn->rac_closing)
-                        kranal_close_conn_locked(conn, -ETIMEDOUT);
-                else if (conn->rac_close_sent)
-                        kranal_terminate_conn_locked(conn);
-
-                write_unlock_irqrestore(&kranal_data.kra_global_lock, flags);
-                goto out;
-        }
-
-        if (conn->rac_closing)
-                goto out;
-        
-        conn->rac_rxmsg = msg;                  /* stash message for portals callbacks */
-                                                /* they'll NULL rac_rxmsg if they consume it */
-        switch (msg->ram_type) {
-        case RANAL_MSG_NOOP:
-                /* Nothing to do; just a keepalive */
-                break;
-                
-        case RANAL_MSG_IMMEDIATE:
-                lib_parse(&kranal_lib, &msg->ram_u.immediate.raim_hdr, conn);
-                break;
-                
-        case RANAL_MSG_PUT_REQ:
-                lib_parse(&kranal_lib, &msg->ram_u.putreq.raprm_hdr, conn);
-
-                if (conn->rac_rxmsg == NULL)    /* lib_parse matched something */
-                        break;
-
-                tx = kranal_new_tx_msg(0, RANAL_MSG_PUT_NAK);
-                if (tx == NULL)
-                        break;
-                
-                tx->tx_msg.ram_u.completion.racm_cookie = 
-                        msg->ram_u.putreq.raprm_cookie;
-                kranal_post_fma(conn, tx);
-                break;
-
-        case RANAL_MSG_PUT_NAK:
-                tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-                
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, -ENOENT);    /* no match */
-                break;
-                
-        case RANAL_MSG_PUT_ACK:
-                tx = kranal_match_reply(conn, RANAL_MSG_PUT_REQ,
-                                        msg->ram_u.putack.rapam_src_cookie);
-                if (tx == NULL)
-                        break;
-
-                kranal_rdma(tx, RANAL_MSG_PUT_DONE,
-                            &msg->ram_u.putack.rapam_desc, 
-                            msg->ram_u.putack.rapam_desc.rard_nob,
-                            msg->ram_u.putack.rapam_dst_cookie);
-                break;
-
-        case RANAL_MSG_PUT_DONE:
-                tx = kranal_match_reply(conn, RANAL_MSG_PUT_ACK,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, 0);
-                break;
-
-        case RANAL_MSG_GET_REQ:
-                lib_parse(&kranal_lib, &msg->ram_u.get.ragm_hdr, conn);
-                
-                if (conn->rac_rxmsg == NULL)    /* lib_parse matched something */
-                        break;
-
-                tx = kranal_new_tx_msg(0, RANAL_MSG_GET_NAK);
-                if (tx == NULL)
-                        break;
-
-                tx->tx_msg.ram_u.completion.racm_cookie = msg->ram_u.get.ragm_cookie;
-                kranal_post_fma(conn, tx);
-                break;
-                
-        case RANAL_MSG_GET_NAK:
-                tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-                
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, -ENOENT);    /* no match */
-                break;
-                
-        case RANAL_MSG_GET_DONE:
-                tx = kranal_match_reply(conn, RANAL_MSG_GET_REQ,
-                                        msg->ram_u.completion.racm_cookie);
-                if (tx == NULL)
-                        break;
-                
-                LASSERT (tx->tx_buftype == RANAL_BUF_PHYS_MAPPED ||
-                         tx->tx_buftype == RANAL_BUF_VIRT_MAPPED);
-                kranal_tx_done(tx, 0);
-                break;
-        }
-
- out:
-        if (conn->rac_rxmsg != NULL)
-                kranal_consume_rxmsg(conn, NULL, 0);
-
-        return 1;
-}
-
-int
-kranal_scheduler (void *arg)
-{
-        kra_device_t   *dev = (kra_device_t *)arg;
-        wait_queue_t    wait;
-        char            name[16];
-        kra_conn_t     *conn;
-        unsigned long   flags;
-        RAP_RETURN      rrc;
-        int             rc;
-        int             resched;
-        int             i;
-        __u32           cqid;
-        __u32           event_type;
-        int             did_something;
-        int             busy_loops = 0;
-
-        snprintf(name, sizeof(name), "kranal_sd_%02d", dev->rad_idx);
-        kportal_daemonize(name);
-        kportal_blockallsigs();
-
-        init_waitqueue_entry(&wait, current);
-
-        spin_lock_irqsave(&dev->rad_lock, flags);
-
-        while (!kranal_data.kra_shutdown) {
-                /* Safe: kra_shutdown only set when quiescent */
-                
-               if (busy_loops++ >= RANAL_RESCHED) {
-                        spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-                        our_cond_resched();
-                       busy_loops = 0;
-
-                        spin_lock_irqsave(&dev->rad_lock, flags);
-               }
-
-                did_something = 0;
-
-                if (dev->rad_ready) {
-                        dev->rad_ready = 0;
-                        spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-                        rrc = RapkCQDone(dev->rad_rdma_cq, &cqid, &event_type);
-
-                        LASSERT (rrc == RAP_SUCCESS || rrc == RAP_NOT_DONE);
-                        LASSERT ((event_type & RAPK_CQ_EVENT_OVERRUN) == 0);
-                        
-                        if (rrc == RAP_SUCCESS) {
-                                kranal_process_rdmaq(cqid);
-                                did_something = 1;
-                        }
-                        
-                        rrc = RapkCQDone(dev->rad_fma_cq, &cqid, &event_type);
-                        LASSERT (rrc == RAP_SUCCESS || rrc == RAP_NOT_DONE);
-                        
-                        if (rrc == RAP_SUCCESS) {
-                                if ((event_type & RAPK_CQ_EVENT_OVERRUN) != 0)
-                                        kranal_schedule_dev(dev);
-                                else
-                                        kranal_schedule_cqid(cqid);
-                                did_something = 1;
-                        }
-                        
-                        spin_lock_irqsave(&dev->rad_lock, flags);
-
-                        /* If there were no completions to handle, I leave
-                         * rad_ready clear.  NB I cleared it BEFORE I checked
-                         * the completion queues since I'm racing with the
-                         * device callback. */
-
-                        if (did_something)
-                                dev->rad_ready = 1;
-                }
-               
-                if (!list_empty(&dev->rad_connq)) {
-                        conn = list_entry(dev->rad_connq.next,
-                                          kra_conn_t, rac_schedlist);
-                        list_del(&conn->rac_schedlist);
-                        spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-                        LASSERT (conn->rac_scheduled);
-
-                        resched  = kranal_process_fmaq(conn);
-                        resched |= kranal_process_receives(conn);
-                        did_something = 1;
-
-                        spin_lock_irqsave(&dev->rad_lock, flags);
-                        if (resched)
-                                list_add_tail(&conn->rac_schedlist,
-                                              &dev->rad_connq);
-                }
-
-                if (did_something)
-                        continue;
-
-                add_wait_queue(&dev->rad_waitq, &wait);
-                set_current_state(TASK_INTERRUPTIBLE);
-
-                spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-                busy_loops = 0;
-                schedule();
-
-                set_current_state(TASK_RUNNING);
-                remove_wait_queue(&dev->rad_waitq, &wait);
-
-                spin_lock_irqsave(&dev->rad_lock, flags);
-        }
-
-        spin_unlock_irqrestore(&dev->rad_lock, flags);
-
-        kranal_thread_fini();
-        return 0;
-}
-
-
-lib_nal_t kranal_lib = {
-        libnal_data:        &kranal_data,      /* NAL private data */
-        libnal_send:         kranal_send,
-        libnal_send_pages:   kranal_send_pages,
-        libnal_recv:         kranal_recv,
-        libnal_recv_pages:   kranal_recv_pages,
-        libnal_dist:         kranal_dist
-};
diff --git a/lustre/portals/knals/socknal/.cvsignore b/lustre/portals/knals/socknal/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/knals/socknal/Makefile.in b/lustre/portals/knals/socknal/Makefile.in
deleted file mode 100644 (file)
index 633b455..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-MODULES := ksocknal
-ksocknal-objs := socknal.o socknal_cb.o
-
-# If you don't build with -O2, your modules won't insert, becahse htonl is
-# just special that way.
-EXTRA_POST_CFLAGS := -O2
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/knals/socknal/Makefile.mk b/lustre/portals/knals/socknal/Makefile.mk
deleted file mode 100644 (file)
index 5c1b366..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include $(src)/../../Kernelenv
-
-obj-y += ksocknal.o
-ksocknal-objs    := socknal.o socknal_cb.o
-
diff --git a/lustre/portals/knals/socknal/autoMakefile.am b/lustre/portals/knals/socknal/autoMakefile.am
deleted file mode 100644 (file)
index 070b649..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-modulenet_DATA = ksocknal$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(ksocknal-objs:%.o=%.c) socknal.h
diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c
deleted file mode 100644 (file)
index 7642770..0000000
+++ /dev/null
@@ -1,2531 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "socknal.h"
-
-nal_t                   ksocknal_api;
-ksock_nal_data_t        ksocknal_data;
-ptl_handle_ni_t         ksocknal_ni;
-ksock_tunables_t        ksocknal_tunables;
-
-kpr_nal_interface_t ksocknal_router_interface = {
-        kprni_nalid:      SOCKNAL,
-        kprni_arg:        &ksocknal_data,
-        kprni_fwd:        ksocknal_fwd_packet,
-        kprni_notify:     ksocknal_notify,
-};
-
-#ifdef CONFIG_SYSCTL
-#define SOCKNAL_SYSCTL 200
-
-#define SOCKNAL_SYSCTL_TIMEOUT          1
-#define SOCKNAL_SYSCTL_EAGER_ACK        2
-#define SOCKNAL_SYSCTL_ZERO_COPY        3
-#define SOCKNAL_SYSCTL_TYPED            4
-#define SOCKNAL_SYSCTL_MIN_BULK         5
-#define SOCKNAL_SYSCTL_BUFFER_SIZE      6
-#define SOCKNAL_SYSCTL_NAGLE            7
-#define SOCKNAL_SYSCTL_IRQ_AFFINITY     8
-#define SOCKNAL_SYSCTL_KEEPALIVE_IDLE   9
-#define SOCKNAL_SYSCTL_KEEPALIVE_COUNT 10
-#define SOCKNAL_SYSCTL_KEEPALIVE_INTVL 11
-
-static ctl_table ksocknal_ctl_table[] = {
-        {SOCKNAL_SYSCTL_TIMEOUT, "timeout", 
-         &ksocknal_tunables.ksnd_io_timeout, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        {SOCKNAL_SYSCTL_EAGER_ACK, "eager_ack", 
-         &ksocknal_tunables.ksnd_eager_ack, sizeof (int),
-         0644, NULL, &proc_dointvec},
-#if SOCKNAL_ZC
-        {SOCKNAL_SYSCTL_ZERO_COPY, "zero_copy", 
-         &ksocknal_tunables.ksnd_zc_min_frag, sizeof (int),
-         0644, NULL, &proc_dointvec},
-#endif
-        {SOCKNAL_SYSCTL_TYPED, "typed", 
-         &ksocknal_tunables.ksnd_typed_conns, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        {SOCKNAL_SYSCTL_MIN_BULK, "min_bulk", 
-         &ksocknal_tunables.ksnd_min_bulk, sizeof (int),
-         0644, NULL, &proc_dointvec},
-        {SOCKNAL_SYSCTL_BUFFER_SIZE, "buffer_size",
-         &ksocknal_tunables.ksnd_buffer_size, sizeof(int),
-         0644, NULL, &proc_dointvec},
-        {SOCKNAL_SYSCTL_NAGLE, "nagle",
-         &ksocknal_tunables.ksnd_nagle, sizeof(int),
-         0644, NULL, &proc_dointvec},
-#if CPU_AFFINITY
-        {SOCKNAL_SYSCTL_IRQ_AFFINITY, "irq_affinity",
-         &ksocknal_tunables.ksnd_irq_affinity, sizeof(int),
-         0644, NULL, &proc_dointvec},
-#endif
-        {SOCKNAL_SYSCTL_KEEPALIVE_IDLE, "keepalive_idle",
-         &ksocknal_tunables.ksnd_keepalive_idle, sizeof(int),
-         0644, NULL, &proc_dointvec},
-        {SOCKNAL_SYSCTL_KEEPALIVE_COUNT, "keepalive_count",
-         &ksocknal_tunables.ksnd_keepalive_count, sizeof(int),
-         0644, NULL, &proc_dointvec},
-        {SOCKNAL_SYSCTL_KEEPALIVE_INTVL, "keepalive_intvl",
-         &ksocknal_tunables.ksnd_keepalive_intvl, sizeof(int),
-         0644, NULL, &proc_dointvec},
-        { 0 }
-};
-
-static ctl_table ksocknal_top_ctl_table[] = {
-        {SOCKNAL_SYSCTL, "socknal", NULL, 0, 0555, ksocknal_ctl_table},
-        { 0 }
-};
-#endif
-
-int
-ksocknal_set_mynid(ptl_nid_t nid)
-{
-        lib_ni_t *ni = &ksocknal_lib.libnal_ni;
-
-        /* FIXME: we have to do this because we call lib_init() at module
-         * insertion time, which is before we have 'mynid' available.  lib_init
-         * sets the NAL's nid, which it uses to tell other nodes where packets
-         * are coming from.  This is not a very graceful solution to this
-         * problem. */
-
-        CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n",
-               nid, ni->ni_pid.nid);
-
-        ni->ni_pid.nid = nid;
-        return (0);
-}
-
-void
-ksocknal_bind_irq (unsigned int irq)
-{
-#if (defined(CONFIG_SMP) && CPU_AFFINITY)
-        int              bind;
-        int              cpu;
-        unsigned long    flags;
-        char             cmdline[64];
-        ksock_irqinfo_t *info;
-        char            *argv[] = {"/bin/sh",
-                                   "-c",
-                                   cmdline,
-                                   NULL};
-        char            *envp[] = {"HOME=/",
-                                   "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-                                   NULL};
-
-        LASSERT (irq < NR_IRQS);
-        if (irq == 0)              /* software NIC or affinity disabled */
-                return;
-
-        info = &ksocknal_data.ksnd_irqinfo[irq];
-
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        LASSERT (info->ksni_valid);
-        bind = !info->ksni_bound;
-        info->ksni_bound = 1;
-
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        if (!bind)                              /* bound already */
-                return;
-
-        cpu = ksocknal_irqsched2cpu(info->ksni_sched);
-        snprintf (cmdline, sizeof (cmdline),
-                  "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq);
-
-        printk (KERN_INFO "Lustre: Binding irq %u to CPU %d with cmd: %s\n",
-                irq, cpu, cmdline);
-
-        /* FIXME: Find a better method of setting IRQ affinity...
-         */
-
-        USERMODEHELPER(argv[0], argv, envp);
-#endif
-}
-
-ksock_interface_t *
-ksocknal_ip2iface(__u32 ip)
-{
-        int                i;
-        ksock_interface_t *iface;
-
-        for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) {
-                LASSERT(i < SOCKNAL_MAX_INTERFACES);
-                iface = &ksocknal_data.ksnd_interfaces[i];
-                
-                if (iface->ksni_ipaddr == ip)
-                        return (iface);
-        }
-        
-        return (NULL);
-}
-
-ksock_route_t *
-ksocknal_create_route (__u32 ipaddr, int port)
-{
-        ksock_route_t *route;
-
-        PORTAL_ALLOC (route, sizeof (*route));
-        if (route == NULL)
-                return (NULL);
-
-        atomic_set (&route->ksnr_refcount, 1);
-        route->ksnr_peer = NULL;
-        route->ksnr_timeout = jiffies;
-        route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
-        route->ksnr_ipaddr = ipaddr;
-        route->ksnr_port = port;
-        route->ksnr_connecting = 0;
-        route->ksnr_connected = 0;
-        route->ksnr_deleted = 0;
-        route->ksnr_conn_count = 0;
-        route->ksnr_share_count = 0;
-
-        return (route);
-}
-
-void
-ksocknal_destroy_route (ksock_route_t *route)
-{
-        if (route->ksnr_peer != NULL)
-                ksocknal_put_peer (route->ksnr_peer);
-
-        PORTAL_FREE (route, sizeof (*route));
-}
-
-void
-ksocknal_put_route (ksock_route_t *route)
-{
-        CDEBUG (D_OTHER, "putting route[%p] (%d)\n",
-                route, atomic_read (&route->ksnr_refcount));
-
-        LASSERT (atomic_read (&route->ksnr_refcount) > 0);
-        if (!atomic_dec_and_test (&route->ksnr_refcount))
-             return;
-
-        ksocknal_destroy_route (route);
-}
-
-ksock_peer_t *
-ksocknal_create_peer (ptl_nid_t nid)
-{
-        ksock_peer_t *peer;
-
-        LASSERT (nid != PTL_NID_ANY);
-
-        PORTAL_ALLOC (peer, sizeof (*peer));
-        if (peer == NULL)
-                return (NULL);
-
-        memset (peer, 0, sizeof (*peer));       /* NULL pointers/clear flags etc */
-
-        peer->ksnp_nid = nid;
-        atomic_set (&peer->ksnp_refcount, 1);   /* 1 ref for caller */
-        peer->ksnp_closing = 0;
-        INIT_LIST_HEAD (&peer->ksnp_conns);
-        INIT_LIST_HEAD (&peer->ksnp_routes);
-        INIT_LIST_HEAD (&peer->ksnp_tx_queue);
-
-        atomic_inc (&ksocknal_data.ksnd_npeers);
-        return (peer);
-}
-
-void
-ksocknal_destroy_peer (ksock_peer_t *peer)
-{
-        CDEBUG (D_NET, "peer "LPX64" %p deleted\n", peer->ksnp_nid, peer);
-
-        LASSERT (atomic_read (&peer->ksnp_refcount) == 0);
-        LASSERT (list_empty (&peer->ksnp_conns));
-        LASSERT (list_empty (&peer->ksnp_routes));
-        LASSERT (list_empty (&peer->ksnp_tx_queue));
-
-        PORTAL_FREE (peer, sizeof (*peer));
-
-        /* NB a peer's connections and autoconnect routes keep a reference
-         * on their peer until they are destroyed, so we can be assured
-         * that _all_ state to do with this peer has been cleaned up when
-         * its refcount drops to zero. */
-        atomic_dec (&ksocknal_data.ksnd_npeers);
-}
-
-void
-ksocknal_put_peer (ksock_peer_t *peer)
-{
-        CDEBUG (D_OTHER, "putting peer[%p] -> "LPX64" (%d)\n",
-                peer, peer->ksnp_nid,
-                atomic_read (&peer->ksnp_refcount));
-
-        LASSERT (atomic_read (&peer->ksnp_refcount) > 0);
-        if (!atomic_dec_and_test (&peer->ksnp_refcount))
-                return;
-
-        ksocknal_destroy_peer (peer);
-}
-
-ksock_peer_t *
-ksocknal_find_peer_locked (ptl_nid_t nid)
-{
-        struct list_head *peer_list = ksocknal_nid2peerlist (nid);
-        struct list_head *tmp;
-        ksock_peer_t     *peer;
-
-        list_for_each (tmp, peer_list) {
-
-                peer = list_entry (tmp, ksock_peer_t, ksnp_list);
-
-                LASSERT (!peer->ksnp_closing);
-
-                if (peer->ksnp_nid != nid)
-                        continue;
-
-                CDEBUG(D_NET, "got peer [%p] -> "LPX64" (%d)\n",
-                       peer, nid, atomic_read (&peer->ksnp_refcount));
-                return (peer);
-        }
-        return (NULL);
-}
-
-ksock_peer_t *
-ksocknal_get_peer (ptl_nid_t nid)
-{
-        ksock_peer_t     *peer;
-
-        read_lock (&ksocknal_data.ksnd_global_lock);
-        peer = ksocknal_find_peer_locked (nid);
-        if (peer != NULL)                       /* +1 ref for caller? */
-                atomic_inc (&peer->ksnp_refcount);
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-
-        return (peer);
-}
-
-void
-ksocknal_unlink_peer_locked (ksock_peer_t *peer)
-{
-        int                i;
-        __u32              ip;
-
-        for (i = 0; i < peer->ksnp_n_passive_ips; i++) {
-                LASSERT (i < SOCKNAL_MAX_INTERFACES);
-                ip = peer->ksnp_passive_ips[i];
-
-                ksocknal_ip2iface(ip)->ksni_npeers--;
-        }
-
-        LASSERT (list_empty(&peer->ksnp_conns));
-        LASSERT (list_empty(&peer->ksnp_routes));
-        LASSERT (!peer->ksnp_closing);
-        peer->ksnp_closing = 1;
-        list_del (&peer->ksnp_list);
-        /* lose peerlist's ref */
-        ksocknal_put_peer (peer);
-}
-
-int
-ksocknal_get_peer_info (int index, ptl_nid_t *nid,
-                        __u32 *myip, __u32 *peer_ip, int *port, 
-                        int *conn_count, int *share_count)
-{
-        ksock_peer_t      *peer;
-        struct list_head  *ptmp;
-        ksock_route_t     *route;
-        struct list_head  *rtmp;
-        int                i;
-        int                j;
-        int                rc = -ENOENT;
-
-        read_lock (&ksocknal_data.ksnd_global_lock);
-
-        for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-                
-                list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
-                        peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
-                        if (peer->ksnp_n_passive_ips == 0 &&
-                            list_empty(&peer->ksnp_routes)) {
-                                if (index-- > 0)
-                                        continue;
-                                
-                                *nid = peer->ksnp_nid;
-                                *myip = 0;
-                                *peer_ip = 0;
-                                *port = 0;
-                                *conn_count = 0;
-                                *share_count = 0;
-                                rc = 0;
-                                goto out;
-                        }
-
-                        for (j = 0; j < peer->ksnp_n_passive_ips; j++) {
-                                if (index-- > 0)
-                                        continue;
-                                
-                                *nid = peer->ksnp_nid;
-                                *myip = peer->ksnp_passive_ips[j];
-                                *peer_ip = 0;
-                                *port = 0;
-                                *conn_count = 0;
-                                *share_count = 0;
-                                rc = 0;
-                                goto out;
-                        }
-                        
-                        list_for_each (rtmp, &peer->ksnp_routes) {
-                                if (index-- > 0)
-                                        continue;
-
-                                route = list_entry(rtmp, ksock_route_t,
-                                                   ksnr_list);
-
-                                *nid = peer->ksnp_nid;
-                                *myip = route->ksnr_myipaddr;
-                                *peer_ip = route->ksnr_ipaddr;
-                                *port = route->ksnr_port;
-                                *conn_count = route->ksnr_conn_count;
-                                *share_count = route->ksnr_share_count;
-                                rc = 0;
-                                goto out;
-                        }
-                }
-        }
- out:
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-        return (rc);
-}
-
-void
-ksocknal_associate_route_conn_locked(ksock_route_t *route, ksock_conn_t *conn)
-{
-        ksock_peer_t      *peer = route->ksnr_peer;
-        int                type = conn->ksnc_type;
-        ksock_interface_t *iface;
-
-        conn->ksnc_route = route;
-        atomic_inc (&route->ksnr_refcount);
-
-        if (route->ksnr_myipaddr != conn->ksnc_myipaddr) {
-                if (route->ksnr_myipaddr == 0) {
-                        /* route wasn't bound locally yet (the initial route) */
-                        CWARN("Binding "LPX64" %u.%u.%u.%u to %u.%u.%u.%u\n",
-                              peer->ksnp_nid, 
-                              HIPQUAD(route->ksnr_ipaddr),
-                              HIPQUAD(conn->ksnc_myipaddr));
-                } else {
-                        CWARN("Rebinding "LPX64" %u.%u.%u.%u from "
-                              "%u.%u.%u.%u to %u.%u.%u.%u\n",
-                              peer->ksnp_nid, 
-                              HIPQUAD(route->ksnr_ipaddr),
-                              HIPQUAD(route->ksnr_myipaddr),
-                              HIPQUAD(conn->ksnc_myipaddr));
-                        
-                        iface = ksocknal_ip2iface(route->ksnr_myipaddr);
-                        if (iface != NULL) 
-                                iface->ksni_nroutes--;
-                }
-                route->ksnr_myipaddr = conn->ksnc_myipaddr;
-                iface = ksocknal_ip2iface(route->ksnr_myipaddr);
-                if (iface != NULL) 
-                        iface->ksni_nroutes++;
-        }
-
-        route->ksnr_connected |= (1<<type);
-        route->ksnr_connecting &= ~(1<<type);
-        route->ksnr_conn_count++;
-
-        /* Successful connection => further attempts can
-         * proceed immediately */
-        route->ksnr_timeout = jiffies;
-        route->ksnr_retry_interval = SOCKNAL_MIN_RECONNECT_INTERVAL;
-}
-
-void
-ksocknal_add_route_locked (ksock_peer_t *peer, ksock_route_t *route)
-{
-        struct list_head  *tmp;
-        ksock_conn_t      *conn;
-        int                type;
-        ksock_route_t     *route2;
-
-        LASSERT (route->ksnr_peer == NULL);
-        LASSERT (route->ksnr_connecting == 0);
-        LASSERT (route->ksnr_connected == 0);
-
-        /* LASSERT(unique) */
-        list_for_each(tmp, &peer->ksnp_routes) {
-                route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-
-                if (route2->ksnr_ipaddr == route->ksnr_ipaddr) {
-                        CERROR ("Duplicate route "LPX64" %u.%u.%u.%u\n",
-                                peer->ksnp_nid, HIPQUAD(route->ksnr_ipaddr));
-                        LBUG();
-                }
-        }
-
-        route->ksnr_peer = peer;
-        atomic_inc (&peer->ksnp_refcount);
-        /* peer's routelist takes over my ref on 'route' */
-        list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-        
-        list_for_each(tmp, &peer->ksnp_conns) {
-                conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-                type = conn->ksnc_type;
-
-                if (conn->ksnc_ipaddr != route->ksnr_ipaddr)
-                        continue;
-
-                ksocknal_associate_route_conn_locked(route, conn);
-                /* keep going (typed routes) */
-        }
-}
-
-void
-ksocknal_del_route_locked (ksock_route_t *route)
-{
-        ksock_peer_t      *peer = route->ksnr_peer;
-        ksock_interface_t *iface;
-        ksock_conn_t      *conn;
-        struct list_head  *ctmp;
-        struct list_head  *cnxt;
-
-        LASSERT (!route->ksnr_deleted);
-
-        /* Close associated conns */
-        list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
-                conn = list_entry(ctmp, ksock_conn_t, ksnc_list);
-
-                if (conn->ksnc_route != route)
-                        continue;
-                
-                ksocknal_close_conn_locked (conn, 0);
-        }
-
-        if (route->ksnr_myipaddr != 0) {
-                iface = ksocknal_ip2iface(route->ksnr_myipaddr);
-                if (iface != NULL)
-                        iface->ksni_nroutes--;
-        }
-
-        route->ksnr_deleted = 1;
-        list_del (&route->ksnr_list);
-        ksocknal_put_route (route);             /* drop peer's ref */
-
-        if (list_empty (&peer->ksnp_routes) &&
-            list_empty (&peer->ksnp_conns)) {
-                /* I've just removed the last autoconnect route of a peer
-                 * with no active connections */
-                ksocknal_unlink_peer_locked (peer);
-        }
-}
-
-int
-ksocknal_add_peer (ptl_nid_t nid, __u32 ipaddr, int port)
-{
-        unsigned long      flags;
-        struct list_head  *tmp;
-        ksock_peer_t      *peer;
-        ksock_peer_t      *peer2;
-        ksock_route_t     *route;
-        ksock_route_t     *route2;
-        
-        if (nid == PTL_NID_ANY)
-                return (-EINVAL);
-
-        /* Have a brand new peer ready... */
-        peer = ksocknal_create_peer (nid);
-        if (peer == NULL)
-                return (-ENOMEM);
-
-        route = ksocknal_create_route (ipaddr, port);
-        if (route == NULL) {
-                ksocknal_put_peer (peer);
-                return (-ENOMEM);
-        }
-
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        peer2 = ksocknal_find_peer_locked (nid);
-        if (peer2 != NULL) {
-                ksocknal_put_peer (peer);
-                peer = peer2;
-        } else {
-                /* peer table takes my ref on peer */
-                list_add_tail (&peer->ksnp_list,
-                               ksocknal_nid2peerlist (nid));
-        }
-
-        route2 = NULL;
-        list_for_each (tmp, &peer->ksnp_routes) {
-                route2 = list_entry(tmp, ksock_route_t, ksnr_list);
-                
-                if (route2->ksnr_ipaddr == ipaddr)
-                        break;
-                
-                route2 = NULL;
-        }
-        if (route2 == NULL) {
-                ksocknal_add_route_locked(peer, route);
-                route->ksnr_share_count++;
-        } else {
-                ksocknal_put_route(route);
-                route2->ksnr_share_count++;
-        }
-
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        return (0);
-}
-
-void
-ksocknal_del_peer_locked (ksock_peer_t *peer, __u32 ip, int single_share)
-{
-        ksock_conn_t     *conn;
-        ksock_route_t    *route;
-        struct list_head *tmp;
-        struct list_head *nxt;
-        int               nshared;
-
-        LASSERT (!peer->ksnp_closing);
-
-        list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
-                route = list_entry(tmp, ksock_route_t, ksnr_list);
-
-                if (single_share && route->ksnr_share_count == 0)
-                        continue;
-
-                /* no match */
-                if (!(ip == 0 || route->ksnr_ipaddr == ip))
-                        continue;
-
-                if (!single_share)
-                        route->ksnr_share_count = 0;
-                else if (route->ksnr_share_count > 0)
-                        route->ksnr_share_count--;
-
-                if (route->ksnr_share_count == 0) {
-                        /* This deletes associated conns too */
-                        ksocknal_del_route_locked (route);
-                }
-                
-                if (single_share)
-                        break;
-        }
-
-        nshared = 0;
-        list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
-                route = list_entry(tmp, ksock_route_t, ksnr_list);
-                nshared += route->ksnr_share_count;
-        }
-                        
-        if (nshared == 0) {
-                /* remove everything else if there are no explicit entries
-                 * left */
-
-                list_for_each_safe (tmp, nxt, &peer->ksnp_routes) {
-                        route = list_entry(tmp, ksock_route_t, ksnr_list);
-
-                        /* we should only be removing auto-entries */
-                        LASSERT(route->ksnr_share_count == 0);
-                        ksocknal_del_route_locked (route);
-                }
-
-                list_for_each_safe (tmp, nxt, &peer->ksnp_conns) {
-                        conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-
-                        ksocknal_close_conn_locked(conn, 0);
-                }
-        }
-                
-        /* NB peer unlinks itself when last conn/route is removed */
-}
-
-int
-ksocknal_del_peer (ptl_nid_t nid, __u32 ip, int single_share)
-{
-        unsigned long      flags;
-        struct list_head  *ptmp;
-        struct list_head  *pnxt;
-        ksock_peer_t      *peer;
-        int                lo;
-        int                hi;
-        int                i;
-        int                rc = -ENOENT;
-
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
-        else {
-                lo = 0;
-                hi = ksocknal_data.ksnd_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
-                        peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
-                        if (!(nid == PTL_NID_ANY || peer->ksnp_nid == nid))
-                                continue;
-
-                        ksocknal_del_peer_locked (peer, ip, single_share);
-                        rc = 0;                 /* matched! */
-
-                        if (single_share)
-                                break;
-                }
-        }
-
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        return (rc);
-}
-
-ksock_conn_t *
-ksocknal_get_conn_by_idx (int index)
-{
-        ksock_peer_t      *peer;
-        struct list_head  *ptmp;
-        ksock_conn_t      *conn;
-        struct list_head  *ctmp;
-        int                i;
-
-        read_lock (&ksocknal_data.ksnd_global_lock);
-
-        for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-                list_for_each (ptmp, &ksocknal_data.ksnd_peers[i]) {
-                        peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
-                        LASSERT (!peer->ksnp_closing);
-
-                        list_for_each (ctmp, &peer->ksnp_conns) {
-                                if (index-- > 0)
-                                        continue;
-
-                                conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-                                atomic_inc (&conn->ksnc_refcount);
-                                read_unlock (&ksocknal_data.ksnd_global_lock);
-                                return (conn);
-                        }
-                }
-        }
-
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-        return (NULL);
-}
-
-int
-ksocknal_get_conn_addrs (ksock_conn_t *conn)
-{
-        struct sockaddr_in sin;
-        int                len = sizeof (sin);
-        int                rc;
-        
-        rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
-                                            (struct sockaddr *)&sin, &len, 2);
-        /* Didn't need the {get,put}connsock dance to deref ksnc_sock... */
-        LASSERT (!conn->ksnc_closing);
-
-        if (rc != 0) {
-                CERROR ("Error %d getting sock peer IP\n", rc);
-                return rc;
-        }
-
-        conn->ksnc_ipaddr = ntohl (sin.sin_addr.s_addr);
-        conn->ksnc_port   = ntohs (sin.sin_port);
-
-        rc = conn->ksnc_sock->ops->getname (conn->ksnc_sock,
-                                            (struct sockaddr *)&sin, &len, 0);
-        if (rc != 0) {
-                CERROR ("Error %d getting sock local IP\n", rc);
-                return rc;
-        }
-
-        conn->ksnc_myipaddr = ntohl (sin.sin_addr.s_addr);
-
-        return 0;
-}
-
-unsigned int
-ksocknal_sock_irq (struct socket *sock)
-{
-        int                irq = 0;
-        struct dst_entry  *dst;
-
-        if (!ksocknal_tunables.ksnd_irq_affinity)
-                return 0;
-
-        dst = sk_dst_get (sock->sk);
-        if (dst != NULL) {
-                if (dst->dev != NULL) {
-                        irq = dst->dev->irq;
-                        if (irq >= NR_IRQS) {
-                                CERROR ("Unexpected IRQ %x\n", irq);
-                                irq = 0;
-                        }
-                }
-                dst_release (dst);
-        }
-        
-        return (irq);
-}
-
-ksock_sched_t *
-ksocknal_choose_scheduler_locked (unsigned int irq)
-{
-        ksock_sched_t    *sched;
-        ksock_irqinfo_t  *info;
-        int               i;
-
-        LASSERT (irq < NR_IRQS);
-        info = &ksocknal_data.ksnd_irqinfo[irq];
-
-        if (irq != 0 &&                         /* hardware NIC */
-            info->ksni_valid) {                 /* already set up */
-                return (&ksocknal_data.ksnd_schedulers[info->ksni_sched]);
-        }
-
-        /* software NIC (irq == 0) || not associated with a scheduler yet.
-         * Choose the CPU with the fewest connections... */
-        sched = &ksocknal_data.ksnd_schedulers[0];
-        for (i = 1; i < ksocknal_data.ksnd_nschedulers; i++)
-                if (sched->kss_nconns >
-                    ksocknal_data.ksnd_schedulers[i].kss_nconns)
-                        sched = &ksocknal_data.ksnd_schedulers[i];
-
-        if (irq != 0) {                         /* Hardware NIC */
-                info->ksni_valid = 1;
-                info->ksni_sched = sched - ksocknal_data.ksnd_schedulers;
-
-                /* no overflow... */
-                LASSERT (info->ksni_sched == sched - ksocknal_data.ksnd_schedulers);
-        }
-
-        return (sched);
-}
-
-int
-ksocknal_local_ipvec (__u32 *ipaddrs)
-{
-        int                i;
-        int                nip;
-
-        read_lock (&ksocknal_data.ksnd_global_lock);
-
-        nip = ksocknal_data.ksnd_ninterfaces;
-        for (i = 0; i < nip; i++) {
-                LASSERT (i < SOCKNAL_MAX_INTERFACES);
-
-                ipaddrs[i] = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr;
-                LASSERT (ipaddrs[i] != 0);
-        }
-        
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-        return (nip);
-}
-
-int
-ksocknal_match_peerip (ksock_interface_t *iface, __u32 *ips, int nips)
-{
-        int   best_netmatch = 0;
-        int   best_xor      = 0;
-        int   best          = -1;
-        int   this_xor;
-        int   this_netmatch;
-        int   i;
-        
-        for (i = 0; i < nips; i++) {
-                if (ips[i] == 0)
-                        continue;
-
-                this_xor = (ips[i] ^ iface->ksni_ipaddr);
-                this_netmatch = ((this_xor & iface->ksni_netmask) == 0) ? 1 : 0;
-                
-                if (!(best < 0 ||
-                      best_netmatch < this_netmatch ||
-                      (best_netmatch == this_netmatch && 
-                       best_xor > this_xor)))
-                        continue;
-                
-                best = i;
-                best_netmatch = this_netmatch;
-                best_xor = this_xor;
-        }
-        
-        LASSERT (best >= 0);
-        return (best);
-}
-
-int
-ksocknal_select_ips(ksock_peer_t *peer, __u32 *peerips, int n_peerips)
-{
-        rwlock_t           *global_lock = &ksocknal_data.ksnd_global_lock;
-        unsigned long       flags;
-        ksock_interface_t  *iface;
-        ksock_interface_t  *best_iface;
-        int                 n_ips;
-        int                 i;
-        int                 j;
-        int                 k;
-        __u32               ip;
-        __u32               xor;
-        int                 this_netmatch;
-        int                 best_netmatch;
-        int                 best_npeers;
-
-        /* CAVEAT EMPTOR: We do all our interface matching with an
-         * exclusive hold of global lock at IRQ priority.  We're only
-         * expecting to be dealing with small numbers of interfaces, so the
-         * O(n**3)-ness shouldn't matter */
-
-        /* Also note that I'm not going to return more than n_peerips
-         * interfaces, even if I have more myself */
-        
-        write_lock_irqsave(global_lock, flags);
-
-        LASSERT (n_peerips <= SOCKNAL_MAX_INTERFACES);
-        LASSERT (ksocknal_data.ksnd_ninterfaces <= SOCKNAL_MAX_INTERFACES);
-
-        n_ips = MIN(n_peerips, ksocknal_data.ksnd_ninterfaces);
-
-        for (i = 0; peer->ksnp_n_passive_ips < n_ips; i++) {
-                /*              ^ yes really... */
-
-                /* If we have any new interfaces, first tick off all the
-                 * peer IPs that match old interfaces, then choose new
-                 * interfaces to match the remaining peer IPS. 
-                 * We don't forget interfaces we've stopped using; we might
-                 * start using them again... */
-                
-                if (i < peer->ksnp_n_passive_ips) {
-                        /* Old interface. */
-                        ip = peer->ksnp_passive_ips[i];
-                        best_iface = ksocknal_ip2iface(ip);
-
-                        /* peer passive ips are kept up to date */
-                        LASSERT(best_iface != NULL);
-                } else {
-                        /* choose a new interface */
-                        LASSERT (i == peer->ksnp_n_passive_ips);
-
-                        best_iface = NULL;
-                        best_netmatch = 0;
-                        best_npeers = 0;
-                        
-                        for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) {
-                                iface = &ksocknal_data.ksnd_interfaces[j];
-                                ip = iface->ksni_ipaddr;
-
-                                for (k = 0; k < peer->ksnp_n_passive_ips; k++)
-                                        if (peer->ksnp_passive_ips[k] == ip)
-                                                break;
-                        
-                                if (k < peer->ksnp_n_passive_ips) /* using it already */
-                                        continue;
-
-                                k = ksocknal_match_peerip(iface, peerips, n_peerips);
-                                xor = (ip ^ peerips[k]);
-                                this_netmatch = ((xor & iface->ksni_netmask) == 0) ? 1 : 0;
-
-                                if (!(best_iface == NULL ||
-                                      best_netmatch < this_netmatch ||
-                                      (best_netmatch == this_netmatch &&
-                                       best_npeers > iface->ksni_npeers)))
-                                        continue;
-
-                                best_iface = iface;
-                                best_netmatch = this_netmatch;
-                                best_npeers = iface->ksni_npeers;
-                        }
-
-                        best_iface->ksni_npeers++;
-                        ip = best_iface->ksni_ipaddr;
-                        peer->ksnp_passive_ips[i] = ip;
-                        peer->ksnp_n_passive_ips = i+1;
-                }
-                
-                LASSERT (best_iface != NULL);
-
-                /* mark the best matching peer IP used */
-                j = ksocknal_match_peerip(best_iface, peerips, n_peerips);
-                peerips[j] = 0;
-        }
-        
-        /* Overwrite input peer IP addresses */
-        memcpy(peerips, peer->ksnp_passive_ips, n_ips * sizeof(*peerips));
-        
-        write_unlock_irqrestore(global_lock, flags);
-        
-        return (n_ips);
-}
-
-void
-ksocknal_create_routes(ksock_peer_t *peer, int port, 
-                       __u32 *peer_ipaddrs, int npeer_ipaddrs)
-{
-        ksock_route_t      *newroute = NULL;
-        rwlock_t           *global_lock = &ksocknal_data.ksnd_global_lock;
-        unsigned long       flags;
-        struct list_head   *rtmp;
-        ksock_route_t      *route;
-        ksock_interface_t  *iface;
-        ksock_interface_t  *best_iface;
-        int                 best_netmatch;
-        int                 this_netmatch;
-        int                 best_nroutes;
-        int                 i;
-        int                 j;
-
-        /* CAVEAT EMPTOR: We do all our interface matching with an
-         * exclusive hold of global lock at IRQ priority.  We're only
-         * expecting to be dealing with small numbers of interfaces, so the
-         * O(n**3)-ness here shouldn't matter */
-
-        write_lock_irqsave(global_lock, flags);
-
-        LASSERT (npeer_ipaddrs <= SOCKNAL_MAX_INTERFACES);
-        
-        for (i = 0; i < npeer_ipaddrs; i++) {
-                if (newroute != NULL) {
-                        newroute->ksnr_ipaddr = peer_ipaddrs[i];
-                } else {
-                        write_unlock_irqrestore(global_lock, flags);
-
-                        newroute = ksocknal_create_route(peer_ipaddrs[i], port);
-                        if (newroute == NULL)
-                                return;
-
-                        write_lock_irqsave(global_lock, flags);
-                }
-                
-                /* Already got a route? */
-                route = NULL;
-                list_for_each(rtmp, &peer->ksnp_routes) {
-                        route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
-                        if (route->ksnr_ipaddr == newroute->ksnr_ipaddr)
-                                break;
-                        
-                        route = NULL;
-                }
-                if (route != NULL)
-                        continue;
-
-                best_iface = NULL;
-                best_nroutes = 0;
-                best_netmatch = 0;
-
-                LASSERT (ksocknal_data.ksnd_ninterfaces <= SOCKNAL_MAX_INTERFACES);
-
-                /* Select interface to connect from */
-                for (j = 0; j < ksocknal_data.ksnd_ninterfaces; j++) {
-                        iface = &ksocknal_data.ksnd_interfaces[j];
-
-                        /* Using this interface already? */
-                        list_for_each(rtmp, &peer->ksnp_routes) {
-                                route = list_entry(rtmp, ksock_route_t, ksnr_list);
-
-                                if (route->ksnr_myipaddr == iface->ksni_ipaddr)
-                                        break;
-
-                                route = NULL;
-                        }
-                        if (route != NULL)
-                                continue;
-
-                        this_netmatch = (((iface->ksni_ipaddr ^ 
-                                           newroute->ksnr_ipaddr) & 
-                                           iface->ksni_netmask) == 0) ? 1 : 0;
-                        
-                        if (!(best_iface == NULL ||
-                              best_netmatch < this_netmatch ||
-                              (best_netmatch == this_netmatch &&
-                               best_nroutes > iface->ksni_nroutes)))
-                                continue;
-                        
-                        best_iface = iface;
-                        best_netmatch = this_netmatch;
-                        best_nroutes = iface->ksni_nroutes;
-                }
-                
-                if (best_iface == NULL)
-                        continue;
-
-                newroute->ksnr_myipaddr = best_iface->ksni_ipaddr;
-                best_iface->ksni_nroutes++;
-
-                ksocknal_add_route_locked(peer, newroute);
-                newroute = NULL;
-        }
-        
-        write_unlock_irqrestore(global_lock, flags);
-        if (newroute != NULL)
-                ksocknal_put_route(newroute);
-}
-
-int
-ksocknal_create_conn (ksock_route_t *route, struct socket *sock, int type)
-{
-        int                passive = (type == SOCKNAL_CONN_NONE);
-        rwlock_t          *global_lock = &ksocknal_data.ksnd_global_lock;
-        __u32              ipaddrs[SOCKNAL_MAX_INTERFACES];
-        int                nipaddrs;
-        ptl_nid_t          nid;
-        struct list_head  *tmp;
-        __u64              incarnation;
-        unsigned long      flags;
-        ksock_conn_t      *conn;
-        ksock_conn_t      *conn2;
-        ksock_peer_t      *peer = NULL;
-        ksock_peer_t      *peer2;
-        ksock_sched_t     *sched;
-        unsigned int       irq;
-        ksock_tx_t        *tx;
-        int                rc;
-
-        /* NB, sock has an associated file since (a) this connection might
-         * have been created in userland and (b) we need to refcount the
-         * socket so that we don't close it while I/O is being done on
-         * it, and sock->file has that pre-cooked... */
-        LASSERT (sock->file != NULL);
-        LASSERT (file_count(sock->file) > 0);
-        LASSERT (route == NULL || !passive);
-
-        rc = ksocknal_setup_sock (sock);
-        if (rc != 0)
-                return (rc);
-
-        irq = ksocknal_sock_irq (sock);
-
-        PORTAL_ALLOC(conn, sizeof(*conn));
-        if (conn == NULL)
-                return (-ENOMEM);
-
-        memset (conn, 0, sizeof (*conn));
-        conn->ksnc_peer = NULL;
-        conn->ksnc_route = NULL;
-        conn->ksnc_sock = sock;
-        conn->ksnc_type = type;
-        conn->ksnc_saved_data_ready = sock->sk->sk_data_ready;
-        conn->ksnc_saved_write_space = sock->sk->sk_write_space;
-        atomic_set (&conn->ksnc_refcount, 1);    /* 1 ref for me */
-
-        conn->ksnc_rx_ready = 0;
-        conn->ksnc_rx_scheduled = 0;
-        ksocknal_new_packet (conn, 0);
-
-        INIT_LIST_HEAD (&conn->ksnc_tx_queue);
-        conn->ksnc_tx_ready = 0;
-        conn->ksnc_tx_scheduled = 0;
-        atomic_set (&conn->ksnc_tx_nob, 0);
-
-        /* stash conn's local and remote addrs */
-        rc = ksocknal_get_conn_addrs (conn);
-        if (rc != 0)
-                goto failed_0;
-
-        if (!passive) {
-                /* Active connection sends HELLO eagerly */
-                rc = ksocknal_local_ipvec(ipaddrs);
-                if (rc < 0)
-                        goto failed_0;
-                nipaddrs = rc;
-
-                rc = ksocknal_send_hello (conn, ipaddrs, nipaddrs);
-                if (rc != 0)
-                        goto failed_0;
-        }
-
-        /* Find out/confirm peer's NID and connection type and get the
-         * vector of interfaces she's willing to let me connect to */
-        nid = (route == NULL) ? PTL_NID_ANY : route->ksnr_peer->ksnp_nid;
-        rc = ksocknal_recv_hello (conn, &nid, &incarnation, ipaddrs);
-        if (rc < 0)
-                goto failed_0;
-        nipaddrs = rc;
-        LASSERT (nid != PTL_NID_ANY);
-
-        if (route != NULL) {
-                peer = route->ksnr_peer;
-                atomic_inc(&peer->ksnp_refcount);
-        } else {
-                peer = ksocknal_create_peer(nid);
-                if (peer == NULL) {
-                        rc = -ENOMEM;
-                        goto failed_0;
-                }
-
-                write_lock_irqsave(global_lock, flags);
-
-                peer2 = ksocknal_find_peer_locked(nid);
-                if (peer2 == NULL) {
-                        /* NB this puts an "empty" peer in the peer
-                         * table (which takes my ref) */
-                        list_add_tail(&peer->ksnp_list,
-                                      ksocknal_nid2peerlist(nid));
-                } else  {
-                        ksocknal_put_peer(peer);
-                        peer = peer2;
-                }
-                /* +1 ref for me */
-                atomic_inc(&peer->ksnp_refcount);
-
-                write_unlock_irqrestore(global_lock, flags);
-        }
-        
-        if (!passive) {
-                ksocknal_create_routes(peer, conn->ksnc_port, 
-                                       ipaddrs, nipaddrs);
-                rc = 0;
-        } else {
-                rc = ksocknal_select_ips(peer, ipaddrs, nipaddrs);
-                LASSERT (rc >= 0);
-                rc = ksocknal_send_hello (conn, ipaddrs, rc);
-        }
-        if (rc < 0)
-                goto failed_1;
-        
-        write_lock_irqsave (global_lock, flags);
-
-        if (peer->ksnp_closing ||
-            (route != NULL && route->ksnr_deleted)) {
-                /* route/peer got closed under me */
-                rc = -ESTALE;
-                goto failed_2;
-        }
-
-        /* Refuse to duplicate an existing connection (both sides might
-         * autoconnect at once), unless this is a loopback connection */
-        if (conn->ksnc_ipaddr != conn->ksnc_myipaddr) {
-                list_for_each(tmp, &peer->ksnp_conns) {
-                        conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
-
-                        if (conn2->ksnc_ipaddr != conn->ksnc_ipaddr ||
-                            conn2->ksnc_myipaddr != conn->ksnc_myipaddr ||
-                            conn2->ksnc_type != conn->ksnc_type ||
-                            conn2->ksnc_incarnation != incarnation)
-                                continue;
-
-                        CWARN("Not creating duplicate connection to "
-                              "%u.%u.%u.%u type %d\n",
-                              HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_type);
-                        rc = -EALREADY;
-                        goto failed_2;
-                }
-        }
-
-        /* If the connection created by this route didn't bind to the IP
-         * address the route connected to, the connection/route matching
-         * code below probably isn't going to work. */
-        if (route != NULL &&
-            route->ksnr_ipaddr != conn->ksnc_ipaddr) {
-                CERROR("Route "LPX64" %u.%u.%u.%u connected to %u.%u.%u.%u\n",
-                       peer->ksnp_nid,
-                       HIPQUAD(route->ksnr_ipaddr),
-                       HIPQUAD(conn->ksnc_ipaddr));
-        }
-
-        /* Search for a route corresponding to the new connection and
-         * create an association.  This allows incoming connections created
-         * by routes in my peer to match my own route entries so I don't
-         * continually create duplicate routes. */
-        list_for_each (tmp, &peer->ksnp_routes) {
-                route = list_entry(tmp, ksock_route_t, ksnr_list);
-
-                if (route->ksnr_ipaddr != conn->ksnc_ipaddr)
-                        continue;
-                
-                ksocknal_associate_route_conn_locked(route, conn);
-                break;
-        }
-
-        /* Give conn a ref on sock->file since we're going to return success */
-        get_file(sock->file);
-
-        conn->ksnc_peer = peer;                 /* conn takes my ref on peer */
-        conn->ksnc_incarnation = incarnation;
-        peer->ksnp_last_alive = jiffies;
-        peer->ksnp_error = 0;
-
-        sched = ksocknal_choose_scheduler_locked (irq);
-        sched->kss_nconns++;
-        conn->ksnc_scheduler = sched;
-
-        /* Set the deadline for the outgoing HELLO to drain */
-        conn->ksnc_tx_bufnob = sock->sk->sk_wmem_queued;
-        conn->ksnc_tx_deadline = jiffies +
-                                 ksocknal_tunables.ksnd_io_timeout * HZ;
-        mb();       /* order with adding to peer's conn list */
-
-        list_add (&conn->ksnc_list, &peer->ksnp_conns);
-        atomic_inc (&conn->ksnc_refcount);
-
-        /* NB my callbacks block while I hold ksnd_global_lock */
-        sock->sk->sk_user_data = conn;
-        sock->sk->sk_data_ready = ksocknal_data_ready;
-        sock->sk->sk_write_space = ksocknal_write_space;
-
-        /* Take all the packets blocking for a connection.
-         * NB, it might be nicer to share these blocked packets among any
-         * other connections that are becoming established. */
-        while (!list_empty (&peer->ksnp_tx_queue)) {
-                tx = list_entry (peer->ksnp_tx_queue.next,
-                                 ksock_tx_t, tx_list);
-
-                list_del (&tx->tx_list);
-                ksocknal_queue_tx_locked (tx, conn);
-        }
-
-        rc = ksocknal_close_stale_conns_locked(peer, incarnation);
-        if (rc != 0)
-                CERROR ("Closed %d stale conns to nid "LPX64" ip %d.%d.%d.%d\n",
-                        rc, conn->ksnc_peer->ksnp_nid,
-                        HIPQUAD(conn->ksnc_ipaddr));
-
-        write_unlock_irqrestore (global_lock, flags);
-
-        ksocknal_bind_irq (irq);
-
-        /* Call the callbacks right now to get things going. */
-        if (ksocknal_getconnsock(conn) == 0) {
-                ksocknal_data_ready (sock->sk, 0);
-                ksocknal_write_space (sock->sk);
-                ksocknal_putconnsock(conn);
-        }
-
-        CWARN("New conn nid:"LPX64" %u.%u.%u.%u -> %u.%u.%u.%u/%d"
-              " incarnation:"LPX64" sched[%d]/%d\n",
-              nid, HIPQUAD(conn->ksnc_myipaddr), 
-              HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port, incarnation,
-              (int)(conn->ksnc_scheduler - ksocknal_data.ksnd_schedulers), irq);
-
-        ksocknal_put_conn (conn);
-        return (0);
-
- failed_2:
-        if (!peer->ksnp_closing &&
-            list_empty (&peer->ksnp_conns) &&
-            list_empty (&peer->ksnp_routes))
-                ksocknal_unlink_peer_locked(peer);
-        write_unlock_irqrestore(global_lock, flags);
-
- failed_1:
-        ksocknal_put_peer (peer);
-
- failed_0:
-        PORTAL_FREE (conn, sizeof(*conn));
-
-        LASSERT (rc != 0);
-        return (rc);
-}
-
-void
-ksocknal_close_conn_locked (ksock_conn_t *conn, int error)
-{
-        /* This just does the immmediate housekeeping, and queues the
-         * connection for the reaper to terminate.
-         * Caller holds ksnd_global_lock exclusively in irq context */
-        ksock_peer_t      *peer = conn->ksnc_peer;
-        ksock_route_t     *route;
-        ksock_conn_t      *conn2;
-        struct list_head  *tmp;
-
-        LASSERT (peer->ksnp_error == 0);
-        LASSERT (!conn->ksnc_closing);
-        conn->ksnc_closing = 1;
-        atomic_inc (&ksocknal_data.ksnd_nclosing_conns);
-        
-        /* ksnd_deathrow_conns takes over peer's ref */
-        list_del (&conn->ksnc_list);
-
-        route = conn->ksnc_route;
-        if (route != NULL) {
-                /* dissociate conn from route... */
-                LASSERT (!route->ksnr_deleted);
-                LASSERT ((route->ksnr_connecting & (1 << conn->ksnc_type)) == 0);
-                LASSERT ((route->ksnr_connected & (1 << conn->ksnc_type)) != 0);
-
-                conn2 = NULL;
-                list_for_each(tmp, &peer->ksnp_conns) {
-                        conn2 = list_entry(tmp, ksock_conn_t, ksnc_list);
-                        
-                        if (conn2->ksnc_route == route &&
-                            conn2->ksnc_type == conn->ksnc_type)
-                                break;
-                        
-                        conn2 = NULL;
-                }
-                if (conn2 == NULL)
-                        route->ksnr_connected &= ~(1 << conn->ksnc_type);
-
-                conn->ksnc_route = NULL;
-
-#if 0           /* irrelevent with only eager routes */
-                list_del (&route->ksnr_list);   /* make route least favourite */
-                list_add_tail (&route->ksnr_list, &peer->ksnp_routes);
-#endif
-                ksocknal_put_route (route);     /* drop conn's ref on route */
-        }
-
-        if (list_empty (&peer->ksnp_conns)) {
-                /* No more connections to this peer */
-
-                peer->ksnp_error = error;       /* stash last conn close reason */
-
-                if (list_empty (&peer->ksnp_routes)) {
-                        /* I've just closed last conn belonging to a
-                         * non-autoconnecting peer */
-                        ksocknal_unlink_peer_locked (peer);
-                }
-        }
-
-        spin_lock (&ksocknal_data.ksnd_reaper_lock);
-
-        list_add_tail (&conn->ksnc_list, &ksocknal_data.ksnd_deathrow_conns);
-        wake_up (&ksocknal_data.ksnd_reaper_waitq);
-                
-        spin_unlock (&ksocknal_data.ksnd_reaper_lock);
-}
-
-void
-ksocknal_terminate_conn (ksock_conn_t *conn)
-{
-        /* This gets called by the reaper (guaranteed thread context) to
-         * disengage the socket from its callbacks and close it.
-         * ksnc_refcount will eventually hit zero, and then the reaper will
-         * destroy it. */
-        unsigned long   flags;
-        ksock_peer_t   *peer = conn->ksnc_peer;
-        ksock_sched_t  *sched = conn->ksnc_scheduler;
-        struct timeval  now;
-        time_t          then = 0;
-        int             notify = 0;
-
-        LASSERT(conn->ksnc_closing);
-
-        /* wake up the scheduler to "send" all remaining packets to /dev/null */
-        spin_lock_irqsave(&sched->kss_lock, flags);
-
-        if (!conn->ksnc_tx_scheduled &&
-            !list_empty(&conn->ksnc_tx_queue)){
-                list_add_tail (&conn->ksnc_tx_list,
-                               &sched->kss_tx_conns);
-                /* a closing conn is always ready to tx */
-                conn->ksnc_tx_ready = 1;
-                conn->ksnc_tx_scheduled = 1;
-                /* extra ref for scheduler */
-                atomic_inc (&conn->ksnc_refcount);
-
-                wake_up (&sched->kss_waitq);
-        }
-
-        spin_unlock_irqrestore (&sched->kss_lock, flags);
-
-        /* serialise with callbacks */
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        /* Remove conn's network callbacks.
-         * NB I _have_ to restore the callback, rather than storing a noop,
-         * since the socket could survive past this module being unloaded!! */
-        conn->ksnc_sock->sk->sk_data_ready = conn->ksnc_saved_data_ready;
-        conn->ksnc_sock->sk->sk_write_space = conn->ksnc_saved_write_space;
-
-        /* A callback could be in progress already; they hold a read lock
-         * on ksnd_global_lock (to serialise with me) and NOOP if
-         * sk_user_data is NULL. */
-        conn->ksnc_sock->sk->sk_user_data = NULL;
-
-        /* OK, so this conn may not be completely disengaged from its
-         * scheduler yet, but it _has_ committed to terminate... */
-        conn->ksnc_scheduler->kss_nconns--;
-
-        if (peer->ksnp_error != 0) {
-                /* peer's last conn closed in error */
-                LASSERT (list_empty (&peer->ksnp_conns));
-                
-                /* convert peer's last-known-alive timestamp from jiffies */
-                do_gettimeofday (&now);
-                then = now.tv_sec - (jiffies - peer->ksnp_last_alive)/HZ;
-                notify = 1;
-        }
-        
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        /* The socket is closed on the final put; either here, or in
-         * ksocknal_{send,recv}msg().  Since we set up the linger2 option
-         * when the connection was established, this will close the socket
-         * immediately, aborting anything buffered in it. Any hung
-         * zero-copy transmits will therefore complete in finite time. */
-        ksocknal_putconnsock (conn);
-
-        if (notify)
-                kpr_notify (&ksocknal_data.ksnd_router, peer->ksnp_nid,
-                            0, then);
-}
-
-void
-ksocknal_destroy_conn (ksock_conn_t *conn)
-{
-        /* Final coup-de-grace of the reaper */
-        CDEBUG (D_NET, "connection %p\n", conn);
-
-        LASSERT (atomic_read (&conn->ksnc_refcount) == 0);
-        LASSERT (conn->ksnc_route == NULL);
-        LASSERT (!conn->ksnc_tx_scheduled);
-        LASSERT (!conn->ksnc_rx_scheduled);
-        LASSERT (list_empty(&conn->ksnc_tx_queue));
-
-        /* complete current receive if any */
-        switch (conn->ksnc_rx_state) {
-        case SOCKNAL_RX_BODY:
-                CERROR("Completing partial receive from "LPX64
-                       ", ip %d.%d.%d.%d:%d, with error\n",
-                       conn->ksnc_peer->ksnp_nid,
-                       HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
-                lib_finalize (&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_FAIL);
-                break;
-        case SOCKNAL_RX_BODY_FWD:
-                ksocknal_fmb_callback (conn->ksnc_cookie, -ECONNABORTED);
-                break;
-        case SOCKNAL_RX_HEADER:
-        case SOCKNAL_RX_SLOP:
-                break;
-        default:
-                LBUG ();
-                break;
-        }
-
-        ksocknal_put_peer (conn->ksnc_peer);
-
-        PORTAL_FREE (conn, sizeof (*conn));
-        atomic_dec (&ksocknal_data.ksnd_nclosing_conns);
-}
-
-void
-ksocknal_put_conn (ksock_conn_t *conn)
-{
-        unsigned long flags;
-
-        CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n",
-                conn, conn->ksnc_peer->ksnp_nid,
-                atomic_read (&conn->ksnc_refcount));
-
-        LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
-        if (!atomic_dec_and_test (&conn->ksnc_refcount))
-                return;
-
-        spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-
-        list_add (&conn->ksnc_list, &ksocknal_data.ksnd_zombie_conns);
-        wake_up (&ksocknal_data.ksnd_reaper_waitq);
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
-}
-
-int
-ksocknal_close_peer_conns_locked (ksock_peer_t *peer, __u32 ipaddr, int why)
-{
-        ksock_conn_t       *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
-                conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-
-                if (ipaddr == 0 ||
-                    conn->ksnc_ipaddr == ipaddr) {
-                        count++;
-                        ksocknal_close_conn_locked (conn, why);
-                }
-        }
-
-        return (count);
-}
-
-int
-ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation)
-{
-        ksock_conn_t       *conn;
-        struct list_head   *ctmp;
-        struct list_head   *cnxt;
-        int                 count = 0;
-
-        list_for_each_safe (ctmp, cnxt, &peer->ksnp_conns) {
-                conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-
-                if (conn->ksnc_incarnation == incarnation)
-                        continue;
-
-                CWARN("Closing stale conn nid:"LPX64" ip:%08x/%d "
-                      "incarnation:"LPX64"("LPX64")\n",
-                      peer->ksnp_nid, conn->ksnc_ipaddr, conn->ksnc_port,
-                      conn->ksnc_incarnation, incarnation);
-                
-                count++;
-                ksocknal_close_conn_locked (conn, -ESTALE);
-        }
-
-        return (count);
-}
-
-int
-ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why) 
-{
-        ksock_peer_t     *peer = conn->ksnc_peer;
-        __u32             ipaddr = conn->ksnc_ipaddr;
-        unsigned long     flags;
-        int               count;
-
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        count = ksocknal_close_peer_conns_locked (peer, ipaddr, why);
-        
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        return (count);
-}
-
-int
-ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr)
-{
-        unsigned long       flags;
-        ksock_peer_t       *peer;
-        struct list_head   *ptmp;
-        struct list_head   *pnxt;
-        int                 lo;
-        int                 hi;
-        int                 i;
-        int                 count = 0;
-
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        if (nid != PTL_NID_ANY)
-                lo = hi = ksocknal_nid2peerlist(nid) - ksocknal_data.ksnd_peers;
-        else {
-                lo = 0;
-                hi = ksocknal_data.ksnd_peer_hash_size - 1;
-        }
-
-        for (i = lo; i <= hi; i++) {
-                list_for_each_safe (ptmp, pnxt, &ksocknal_data.ksnd_peers[i]) {
-
-                        peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-
-                        if (!(nid == PTL_NID_ANY || nid == peer->ksnp_nid))
-                                continue;
-
-                        count += ksocknal_close_peer_conns_locked (peer, ipaddr, 0);
-                }
-        }
-
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        /* wildcards always succeed */
-        if (nid == PTL_NID_ANY || ipaddr == 0)
-                return (0);
-        
-        return (count == 0 ? -ENOENT : 0);
-}
-
-void
-ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive)
-{
-        /* The router is telling me she's been notified of a change in
-         * gateway state.... */
-
-        CDEBUG (D_NET, "gw "LPX64" %s\n", gw_nid, alive ? "up" : "down");
-
-        if (!alive) {
-                /* If the gateway crashed, close all open connections... */
-                ksocknal_close_matching_conns (gw_nid, 0);
-                return;
-        }
-        
-        /* ...otherwise do nothing.  We can only establish new connections
-         * if we have autroutes, and these connect on demand. */
-}
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        return &(sk->tp_pinfo.af_tcp);
-}
-#else
-struct tcp_opt *sock2tcp_opt(struct sock *sk)
-{
-        struct tcp_sock *s = (struct tcp_sock *)sk;
-        return &s->tcp;
-}
-#endif
-
-void
-ksocknal_push_conn (ksock_conn_t *conn)
-{
-        struct sock    *sk;
-        struct tcp_opt *tp;
-        int             nonagle;
-        int             val = 1;
-        int             rc;
-        mm_segment_t    oldmm;
-
-        rc = ksocknal_getconnsock (conn);
-        if (rc != 0)                            /* being shut down */
-                return;
-        
-        sk = conn->ksnc_sock->sk;
-        tp = sock2tcp_opt(sk);
-        
-        lock_sock (sk);
-        nonagle = tp->nonagle;
-        tp->nonagle = 1;
-        release_sock (sk);
-
-        oldmm = get_fs ();
-        set_fs (KERNEL_DS);
-
-        rc = sk->sk_prot->setsockopt (sk, SOL_TCP, TCP_NODELAY,
-                                      (char *)&val, sizeof (val));
-        LASSERT (rc == 0);
-
-        set_fs (oldmm);
-
-        lock_sock (sk);
-        tp->nonagle = nonagle;
-        release_sock (sk);
-
-        ksocknal_putconnsock (conn);
-}
-
-void
-ksocknal_push_peer (ksock_peer_t *peer)
-{
-        int               index;
-        int               i;
-        struct list_head *tmp;
-        ksock_conn_t     *conn;
-
-        for (index = 0; ; index++) {
-                read_lock (&ksocknal_data.ksnd_global_lock);
-
-                i = 0;
-                conn = NULL;
-
-                list_for_each (tmp, &peer->ksnp_conns) {
-                        if (i++ == index) {
-                                conn = list_entry (tmp, ksock_conn_t, ksnc_list);
-                                atomic_inc (&conn->ksnc_refcount);
-                                break;
-                        }
-                }
-
-                read_unlock (&ksocknal_data.ksnd_global_lock);
-
-                if (conn == NULL)
-                        break;
-
-                ksocknal_push_conn (conn);
-                ksocknal_put_conn (conn);
-        }
-}
-
-int
-ksocknal_push (ptl_nid_t nid)
-{
-        ksock_peer_t      *peer;
-        struct list_head  *tmp;
-        int                index;
-        int                i;
-        int                j;
-        int                rc = -ENOENT;
-
-        if (nid != PTL_NID_ANY) {
-                peer = ksocknal_get_peer (nid);
-
-                if (peer != NULL) {
-                        rc = 0;
-                        ksocknal_push_peer (peer);
-                        ksocknal_put_peer (peer);
-                }
-                return (rc);
-        }
-
-        for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-                for (j = 0; ; j++) {
-                        read_lock (&ksocknal_data.ksnd_global_lock);
-
-                        index = 0;
-                        peer = NULL;
-
-                        list_for_each (tmp, &ksocknal_data.ksnd_peers[i]) {
-                                if (index++ == j) {
-                                        peer = list_entry(tmp, ksock_peer_t,
-                                                          ksnp_list);
-                                        atomic_inc (&peer->ksnp_refcount);
-                                        break;
-                                }
-                        }
-
-                        read_unlock (&ksocknal_data.ksnd_global_lock);
-
-                        if (peer != NULL) {
-                                rc = 0;
-                                ksocknal_push_peer (peer);
-                                ksocknal_put_peer (peer);
-                        }
-                }
-
-        }
-
-        return (rc);
-}
-
-int
-ksocknal_add_interface(__u32 ipaddress, __u32 netmask)
-{
-        unsigned long      flags;
-        ksock_interface_t *iface;
-        int                rc;
-        int                i;
-        int                j;
-        struct list_head  *ptmp;
-        ksock_peer_t      *peer;
-        struct list_head  *rtmp;
-        ksock_route_t     *route;
-
-        if (ipaddress == 0 ||
-            netmask == 0)
-                return (-EINVAL);
-
-        write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
-
-        iface = ksocknal_ip2iface(ipaddress);
-        if (iface != NULL) {
-                /* silently ignore dups */
-                rc = 0;
-        } else if (ksocknal_data.ksnd_ninterfaces == SOCKNAL_MAX_INTERFACES) {
-                rc = -ENOSPC;
-        } else {
-                iface = &ksocknal_data.ksnd_interfaces[ksocknal_data.ksnd_ninterfaces++];
-
-                iface->ksni_ipaddr = ipaddress;
-                iface->ksni_netmask = netmask;
-                iface->ksni_nroutes = 0;
-                iface->ksni_npeers = 0;
-
-                for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-                        list_for_each(ptmp, &ksocknal_data.ksnd_peers[i]) {
-                                peer = list_entry(ptmp, ksock_peer_t, ksnp_list);
-
-                                for (j = 0; i < peer->ksnp_n_passive_ips; j++)
-                                        if (peer->ksnp_passive_ips[j] == ipaddress)
-                                                iface->ksni_npeers++;
-                                
-                                list_for_each(rtmp, &peer->ksnp_routes) {
-                                        route = list_entry(rtmp, ksock_route_t, ksnr_list);
-                                        
-                                        if (route->ksnr_myipaddr == ipaddress)
-                                                iface->ksni_nroutes++;
-                                }
-                        }
-                }
-
-                rc = 0;
-                /* NB only new connections will pay attention to the new interface! */
-        }
-        
-        write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-
-        return (rc);
-}
-
-void
-ksocknal_peer_del_interface_locked(ksock_peer_t *peer, __u32 ipaddr)
-{
-        struct list_head   *tmp;
-        struct list_head   *nxt;
-        ksock_route_t      *route;
-        ksock_conn_t       *conn;
-        int                 i;
-        int                 j;
-
-        for (i = 0; i < peer->ksnp_n_passive_ips; i++)
-                if (peer->ksnp_passive_ips[i] == ipaddr) {
-                        for (j = i+1; j < peer->ksnp_n_passive_ips; j++)
-                                peer->ksnp_passive_ips[j-1] =
-                                        peer->ksnp_passive_ips[j];
-                        peer->ksnp_n_passive_ips--;
-                        break;
-                }
-
-        list_for_each_safe(tmp, nxt, &peer->ksnp_routes) {
-                route = list_entry (tmp, ksock_route_t, ksnr_list);
-                
-                if (route->ksnr_myipaddr != ipaddr)
-                        continue;
-                
-                if (route->ksnr_share_count != 0) {
-                        /* Manually created; keep, but unbind */
-                        route->ksnr_myipaddr = 0;
-                } else {
-                        ksocknal_del_route_locked(route);
-                }
-        }
-        
-        list_for_each_safe(tmp, nxt, &peer->ksnp_conns) {
-                conn = list_entry(tmp, ksock_conn_t, ksnc_list);
-                
-                if (conn->ksnc_myipaddr == ipaddr)
-                        ksocknal_close_conn_locked (conn, 0);
-        }
-}
-
-int
-ksocknal_del_interface(__u32 ipaddress)
-{
-        int                rc = -ENOENT;
-        unsigned long      flags;
-        struct list_head  *tmp;
-        struct list_head  *nxt;
-        ksock_peer_t      *peer;
-        __u32              this_ip;
-        int                i;
-        int                j;
-
-        write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
-
-        for (i = 0; i < ksocknal_data.ksnd_ninterfaces; i++) {
-                this_ip = ksocknal_data.ksnd_interfaces[i].ksni_ipaddr;
-
-                if (!(ipaddress == 0 ||
-                      ipaddress == this_ip))
-                        continue;
-
-                rc = 0;
-
-                for (j = i+1; j < ksocknal_data.ksnd_ninterfaces; j++)
-                        ksocknal_data.ksnd_interfaces[j-1] =
-                                ksocknal_data.ksnd_interfaces[j];
-                
-                ksocknal_data.ksnd_ninterfaces--;
-
-                for (j = 0; j < ksocknal_data.ksnd_peer_hash_size; j++) {
-                        list_for_each_safe(tmp, nxt, &ksocknal_data.ksnd_peers[j]) {
-                                peer = list_entry(tmp, ksock_peer_t, ksnp_list);
-                                
-                                ksocknal_peer_del_interface_locked(peer, this_ip);
-                        }
-                }
-        }
-        
-        write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-        
-        return (rc);
-}
-
-int
-ksocknal_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int rc;
-
-        switch(pcfg->pcfg_command) {
-        case NAL_CMD_GET_INTERFACE: {
-                ksock_interface_t *iface;
-
-                read_lock (&ksocknal_data.ksnd_global_lock);
-
-                if (pcfg->pcfg_count < 0 ||
-                    pcfg->pcfg_count >= ksocknal_data.ksnd_ninterfaces) {
-                        rc = -ENOENT;
-                } else {
-                        rc = 0;
-                        iface = &ksocknal_data.ksnd_interfaces[pcfg->pcfg_count];
-
-                        pcfg->pcfg_id    = iface->ksni_ipaddr;
-                        pcfg->pcfg_misc  = iface->ksni_netmask;
-                        pcfg->pcfg_fd    = iface->ksni_npeers;
-                        pcfg->pcfg_count = iface->ksni_nroutes;
-                }
-                
-                read_unlock (&ksocknal_data.ksnd_global_lock);
-                break;
-        }
-        case NAL_CMD_ADD_INTERFACE: {
-                rc = ksocknal_add_interface(pcfg->pcfg_id, /* IP address */
-                                            pcfg->pcfg_misc); /* net mask */
-                break;
-        }
-        case NAL_CMD_DEL_INTERFACE: {
-                rc = ksocknal_del_interface(pcfg->pcfg_id); /* IP address */
-                break;
-        }
-        case NAL_CMD_GET_PEER: {
-                ptl_nid_t    nid = 0;
-                __u32        myip = 0;
-                __u32        ip = 0;
-                int          port = 0;
-                int          conn_count = 0;
-                int          share_count = 0;
-                
-                rc = ksocknal_get_peer_info(pcfg->pcfg_count, &nid,
-                                            &myip, &ip, &port,
-                                            &conn_count,  &share_count);
-                pcfg->pcfg_nid   = nid;
-                pcfg->pcfg_size  = myip;
-                pcfg->pcfg_id    = ip;
-                pcfg->pcfg_misc  = port;
-                pcfg->pcfg_count = conn_count;
-                pcfg->pcfg_wait  = share_count;
-                break;
-        }
-        case NAL_CMD_ADD_PEER: {
-                rc = ksocknal_add_peer (pcfg->pcfg_nid, 
-                                        pcfg->pcfg_id, /* IP */
-                                        pcfg->pcfg_misc); /* port */
-                break;
-        }
-        case NAL_CMD_DEL_PEER: {
-                rc = ksocknal_del_peer (pcfg->pcfg_nid, 
-                                        pcfg->pcfg_id, /* IP */
-                                        pcfg->pcfg_flags); /* single_share? */
-                break;
-        }
-        case NAL_CMD_GET_CONN: {
-                ksock_conn_t *conn = ksocknal_get_conn_by_idx (pcfg->pcfg_count);
-
-                if (conn == NULL)
-                        rc = -ENOENT;
-                else {
-                        int   txmem;
-                        int   rxmem;
-                        int   nagle;
-
-                        ksocknal_get_conn_tunables(conn, &txmem, &rxmem, &nagle);
-
-                        rc = 0;
-                        pcfg->pcfg_nid    = conn->ksnc_peer->ksnp_nid;
-                        pcfg->pcfg_id     = conn->ksnc_ipaddr;
-                        pcfg->pcfg_misc   = conn->ksnc_port;
-                        pcfg->pcfg_fd     = conn->ksnc_myipaddr;
-                        pcfg->pcfg_flags  = conn->ksnc_type;
-                        pcfg->pcfg_gw_nal = conn->ksnc_scheduler - 
-                                            ksocknal_data.ksnd_schedulers;
-                        pcfg->pcfg_count  = txmem;
-                        pcfg->pcfg_size   = rxmem;
-                        pcfg->pcfg_wait   = nagle;
-                        ksocknal_put_conn (conn);
-                }
-                break;
-        }
-        case NAL_CMD_REGISTER_PEER_FD: {
-                struct socket *sock = sockfd_lookup (pcfg->pcfg_fd, &rc);
-                int            type = pcfg->pcfg_misc;
-
-                if (sock == NULL)
-                        break;
-
-                switch (type) {
-                case SOCKNAL_CONN_NONE:
-                case SOCKNAL_CONN_ANY:
-                case SOCKNAL_CONN_CONTROL:
-                case SOCKNAL_CONN_BULK_IN:
-                case SOCKNAL_CONN_BULK_OUT:
-                        rc = ksocknal_create_conn(NULL, sock, type);
-                        break;
-                default:
-                        rc = -EINVAL;
-                        break;
-                }
-                fput (sock->file);
-                break;
-        }
-        case NAL_CMD_CLOSE_CONNECTION: {
-                rc = ksocknal_close_matching_conns (pcfg->pcfg_nid, 
-                                                    pcfg->pcfg_id);
-                break;
-        }
-        case NAL_CMD_REGISTER_MYNID: {
-                rc = ksocknal_set_mynid (pcfg->pcfg_nid);
-                break;
-        }
-        case NAL_CMD_PUSH_CONNECTION: {
-                rc = ksocknal_push (pcfg->pcfg_nid);
-                break;
-        }
-        default:
-                rc = -EINVAL;
-                break;
-        }
-
-        return rc;
-}
-
-void
-ksocknal_free_fmbs (ksock_fmb_pool_t *p)
-{
-        int          npages = p->fmp_buff_pages;
-        ksock_fmb_t *fmb;
-        int          i;
-
-        LASSERT (list_empty(&p->fmp_blocked_conns));
-        LASSERT (p->fmp_nactive_fmbs == 0);
-        
-        while (!list_empty(&p->fmp_idle_fmbs)) {
-
-                fmb = list_entry(p->fmp_idle_fmbs.next,
-                                 ksock_fmb_t, fmb_list);
-                
-                for (i = 0; i < npages; i++)
-                        if (fmb->fmb_kiov[i].kiov_page != NULL)
-                                __free_page(fmb->fmb_kiov[i].kiov_page);
-
-                list_del(&fmb->fmb_list);
-                PORTAL_FREE(fmb, offsetof(ksock_fmb_t, fmb_kiov[npages]));
-        }
-}
-
-void
-ksocknal_free_buffers (void)
-{
-        ksocknal_free_fmbs(&ksocknal_data.ksnd_small_fmp);
-        ksocknal_free_fmbs(&ksocknal_data.ksnd_large_fmp);
-
-        LASSERT (atomic_read(&ksocknal_data.ksnd_nactive_ltxs) == 0);
-
-        if (ksocknal_data.ksnd_schedulers != NULL)
-                PORTAL_FREE (ksocknal_data.ksnd_schedulers,
-                             sizeof (ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
-
-        PORTAL_FREE (ksocknal_data.ksnd_peers,
-                     sizeof (struct list_head) * 
-                     ksocknal_data.ksnd_peer_hash_size);
-}
-
-void
-ksocknal_api_shutdown (nal_t *nal)
-{
-        ksock_sched_t *sched;
-        int            i;
-
-        if (nal->nal_refct != 0) {
-                /* This module got the first ref */
-                PORTAL_MODULE_UNUSE;
-                return;
-        }
-
-        CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-
-        LASSERT(nal == &ksocknal_api);
-
-        switch (ksocknal_data.ksnd_init) {
-        default:
-                LASSERT (0);
-
-        case SOCKNAL_INIT_ALL:
-                libcfs_nal_cmd_unregister(SOCKNAL);
-
-                ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB;
-                /* fall through */
-
-        case SOCKNAL_INIT_LIB:
-                /* No more calls to ksocknal_cmd() to create new
-                 * autoroutes/connections since we're being unloaded. */
-
-                /* Delete all peers */
-                ksocknal_del_peer(PTL_NID_ANY, 0, 0);
-
-                /* Wait for all peer state to clean up */
-                i = 2;
-                while (atomic_read (&ksocknal_data.ksnd_npeers) != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d peers to disconnect\n",
-                               atomic_read (&ksocknal_data.ksnd_npeers));
-                        set_current_state (TASK_UNINTERRUPTIBLE);
-                        schedule_timeout (HZ);
-                }
-
-                /* Tell lib we've stopped calling into her. */
-                lib_fini(&ksocknal_lib);
-
-                ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
-                /* fall through */
-
-        case SOCKNAL_INIT_DATA:
-                LASSERT (atomic_read (&ksocknal_data.ksnd_npeers) == 0);
-                LASSERT (ksocknal_data.ksnd_peers != NULL);
-                for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++) {
-                        LASSERT (list_empty (&ksocknal_data.ksnd_peers[i]));
-                }
-                LASSERT (list_empty (&ksocknal_data.ksnd_enomem_conns));
-                LASSERT (list_empty (&ksocknal_data.ksnd_zombie_conns));
-                LASSERT (list_empty (&ksocknal_data.ksnd_autoconnectd_routes));
-                LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns));
-                LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns));
-
-                if (ksocknal_data.ksnd_schedulers != NULL)
-                        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                                ksock_sched_t *kss =
-                                        &ksocknal_data.ksnd_schedulers[i];
-
-                                LASSERT (list_empty (&kss->kss_tx_conns));
-                                LASSERT (list_empty (&kss->kss_rx_conns));
-                                LASSERT (kss->kss_nconns == 0);
-                        }
-
-                /* stop router calling me */
-                kpr_shutdown (&ksocknal_data.ksnd_router);
-
-                /* flag threads to terminate; wake and wait for them to die */
-                ksocknal_data.ksnd_shuttingdown = 1;
-                wake_up_all (&ksocknal_data.ksnd_autoconnectd_waitq);
-                wake_up_all (&ksocknal_data.ksnd_reaper_waitq);
-
-                for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                        sched = &ksocknal_data.ksnd_schedulers[i];
-                        wake_up_all(&sched->kss_waitq);
-                }
-
-                i = 4;
-                read_lock(&ksocknal_data.ksnd_global_lock);
-                while (ksocknal_data.ksnd_nthreads != 0) {
-                        i++;
-                        CDEBUG(((i & (-i)) == i) ? D_WARNING : D_NET, /* power of 2? */
-                               "waiting for %d threads to terminate\n",
-                                ksocknal_data.ksnd_nthreads);
-                        read_unlock(&ksocknal_data.ksnd_global_lock);
-                        set_current_state (TASK_UNINTERRUPTIBLE);
-                        schedule_timeout (HZ);
-                        read_lock(&ksocknal_data.ksnd_global_lock);
-                }
-                read_unlock(&ksocknal_data.ksnd_global_lock);
-
-                kpr_deregister (&ksocknal_data.ksnd_router);
-
-                ksocknal_free_buffers();
-
-                ksocknal_data.ksnd_init = SOCKNAL_INIT_NOTHING;
-                /* fall through */
-
-        case SOCKNAL_INIT_NOTHING:
-                break;
-        }
-
-        CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n",
-               atomic_read (&portal_kmemory));
-
-        printk(KERN_INFO "Lustre: Routing socket NAL unloaded (final mem %d)\n",
-               atomic_read(&portal_kmemory));
-}
-
-
-void
-ksocknal_init_incarnation (void)
-{
-        struct timeval tv;
-
-        /* The incarnation number is the time this module loaded and it
-         * identifies this particular instance of the socknal.  Hopefully
-         * we won't be able to reboot more frequently than 1MHz for the
-         * forseeable future :) */
-        
-        do_gettimeofday(&tv);
-        
-        ksocknal_data.ksnd_incarnation = 
-                (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-}
-
-int
-ksocknal_api_startup (nal_t *nal, ptl_pid_t requested_pid,
-                      ptl_ni_limits_t *requested_limits,
-                      ptl_ni_limits_t *actual_limits)
-{
-        ptl_process_id_t  process_id;
-        int               pkmem = atomic_read(&portal_kmemory);
-        int               rc;
-        int               i;
-        int               j;
-
-        LASSERT (nal == &ksocknal_api);
-
-        if (nal->nal_refct != 0) {
-                if (actual_limits != NULL)
-                        *actual_limits = ksocknal_lib.libnal_ni.ni_actual_limits;
-                /* This module got the first ref */
-                PORTAL_MODULE_USE;
-                return (PTL_OK);
-        }
-
-        LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING);
-
-        memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */
-
-        ksocknal_init_incarnation();
-        
-        ksocknal_data.ksnd_peer_hash_size = SOCKNAL_PEER_HASH_SIZE;
-        PORTAL_ALLOC (ksocknal_data.ksnd_peers,
-                      sizeof (struct list_head) * ksocknal_data.ksnd_peer_hash_size);
-        if (ksocknal_data.ksnd_peers == NULL)
-                return (-ENOMEM);
-
-        for (i = 0; i < ksocknal_data.ksnd_peer_hash_size; i++)
-                INIT_LIST_HEAD(&ksocknal_data.ksnd_peers[i]);
-
-        rwlock_init(&ksocknal_data.ksnd_global_lock);
-
-        spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns);
-        ksocknal_data.ksnd_small_fmp.fmp_buff_pages = SOCKNAL_SMALL_FWD_PAGES;
-
-        spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs);
-        INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns);
-        ksocknal_data.ksnd_large_fmp.fmp_buff_pages = SOCKNAL_LARGE_FWD_PAGES;
-
-        spin_lock_init (&ksocknal_data.ksnd_reaper_lock);
-        INIT_LIST_HEAD (&ksocknal_data.ksnd_enomem_conns);
-        INIT_LIST_HEAD (&ksocknal_data.ksnd_zombie_conns);
-        INIT_LIST_HEAD (&ksocknal_data.ksnd_deathrow_conns);
-        init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq);
-
-        spin_lock_init (&ksocknal_data.ksnd_autoconnectd_lock);
-        INIT_LIST_HEAD (&ksocknal_data.ksnd_autoconnectd_routes);
-        init_waitqueue_head(&ksocknal_data.ksnd_autoconnectd_waitq);
-
-        /* NB memset above zeros whole of ksocknal_data, including
-         * ksocknal_data.ksnd_irqinfo[all].ksni_valid */
-
-        /* flag lists/ptrs/locks initialised */
-        ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA;
-
-        ksocknal_data.ksnd_nschedulers = ksocknal_nsched();
-        PORTAL_ALLOC(ksocknal_data.ksnd_schedulers,
-                     sizeof(ksock_sched_t) * ksocknal_data.ksnd_nschedulers);
-        if (ksocknal_data.ksnd_schedulers == NULL) {
-                ksocknal_api_shutdown (nal);
-                return (-ENOMEM);
-        }
-
-        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i];
-
-                spin_lock_init (&kss->kss_lock);
-                INIT_LIST_HEAD (&kss->kss_rx_conns);
-                INIT_LIST_HEAD (&kss->kss_tx_conns);
-#if SOCKNAL_ZC
-                INIT_LIST_HEAD (&kss->kss_zctxdone_list);
-#endif
-                init_waitqueue_head (&kss->kss_waitq);
-        }
-
-        /* NB we have to wait to be told our true NID... */
-        process_id.pid = requested_pid; 
-        process_id.nid = 0;
-        
-        rc = lib_init(&ksocknal_lib, nal, process_id,
-                      requested_limits, actual_limits);
-        if (rc != PTL_OK) {
-                CERROR("lib_init failed: error %d\n", rc);
-                ksocknal_api_shutdown (nal);
-                return (rc);
-        }
-
-        ksocknal_data.ksnd_init = SOCKNAL_INIT_LIB; // flag lib_init() called
-
-        for (i = 0; i < ksocknal_data.ksnd_nschedulers; i++) {
-                rc = ksocknal_thread_start (ksocknal_scheduler,
-                                            &ksocknal_data.ksnd_schedulers[i]);
-                if (rc != 0) {
-                        CERROR("Can't spawn socknal scheduler[%d]: %d\n",
-                               i, rc);
-                        ksocknal_api_shutdown (nal);
-                        return (rc);
-                }
-        }
-
-        for (i = 0; i < SOCKNAL_N_AUTOCONNECTD; i++) {
-                rc = ksocknal_thread_start (ksocknal_autoconnectd, (void *)((long)i));
-                if (rc != 0) {
-                        CERROR("Can't spawn socknal autoconnectd: %d\n", rc);
-                        ksocknal_api_shutdown (nal);
-                        return (rc);
-                }
-        }
-
-        rc = ksocknal_thread_start (ksocknal_reaper, NULL);
-        if (rc != 0) {
-                CERROR ("Can't spawn socknal reaper: %d\n", rc);
-                ksocknal_api_shutdown (nal);
-                return (rc);
-        }
-
-        rc = kpr_register(&ksocknal_data.ksnd_router,
-                          &ksocknal_router_interface);
-        if (rc != 0) {
-                CDEBUG(D_NET, "Can't initialise routing interface "
-                       "(rc = %d): not routing\n", rc);
-        } else {
-                /* Only allocate forwarding buffers if there's a router */
-
-                for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS +
-                                 SOCKNAL_LARGE_FWD_NMSGS); i++) {
-                        ksock_fmb_t      *fmb;
-                        ksock_fmb_pool_t *pool;
-                        
-
-                        if (i < SOCKNAL_SMALL_FWD_NMSGS)
-                                pool = &ksocknal_data.ksnd_small_fmp;
-                        else
-                                pool = &ksocknal_data.ksnd_large_fmp;
-                        
-                        PORTAL_ALLOC(fmb, offsetof(ksock_fmb_t, 
-                                                   fmb_kiov[pool->fmp_buff_pages]));
-                        if (fmb == NULL) {
-                                ksocknal_api_shutdown(nal);
-                                return (-ENOMEM);
-                        }
-
-                        fmb->fmb_pool = pool;
-                        
-                        for (j = 0; j < pool->fmp_buff_pages; j++) {
-                                fmb->fmb_kiov[j].kiov_page = alloc_page(GFP_KERNEL);
-
-                                if (fmb->fmb_kiov[j].kiov_page == NULL) {
-                                        ksocknal_api_shutdown (nal);
-                                        return (-ENOMEM);
-                                }
-
-                                LASSERT(page_address(fmb->fmb_kiov[j].kiov_page) != NULL);
-                        }
-
-                        list_add(&fmb->fmb_list, &pool->fmp_idle_fmbs);
-                }
-        }
-
-        rc = libcfs_nal_cmd_register(SOCKNAL, &ksocknal_cmd, NULL);
-        if (rc != 0) {
-                CERROR ("Can't initialise command interface (rc = %d)\n", rc);
-                ksocknal_api_shutdown (nal);
-                return (rc);
-        }
-
-        /* flag everything initialised */
-        ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL;
-
-        printk(KERN_INFO "Lustre: Routing socket NAL loaded "
-               "(Routing %s, initial mem %d, incarnation "LPX64")\n",
-               kpr_routing (&ksocknal_data.ksnd_router) ?
-               "enabled" : "disabled", pkmem, ksocknal_data.ksnd_incarnation);
-
-        return (0);
-}
-
-void __exit
-ksocknal_module_fini (void)
-{
-#ifdef CONFIG_SYSCTL
-        if (ksocknal_tunables.ksnd_sysctl != NULL)
-                unregister_sysctl_table (ksocknal_tunables.ksnd_sysctl);
-#endif
-        PtlNIFini(ksocknal_ni);
-
-        ptl_unregister_nal(SOCKNAL);
-}
-
-int __init
-ksocknal_module_init (void)
-{
-        int    rc;
-
-        /* packet descriptor must fit in a router descriptor's scratchpad */
-        LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t));
-        /* the following must be sizeof(int) for proc_dointvec() */
-        LASSERT(sizeof (ksocknal_tunables.ksnd_io_timeout) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_eager_ack) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_typed_conns) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_min_bulk) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_buffer_size) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_nagle) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_idle) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_count) == sizeof (int));
-        LASSERT(sizeof (ksocknal_tunables.ksnd_keepalive_intvl) == sizeof (int));
-#if CPU_AFFINITY
-        LASSERT(sizeof (ksocknal_tunables.ksnd_irq_affinity) == sizeof (int));
-#endif
-#if SOCKNAL_ZC
-        LASSERT(sizeof (ksocknal_tunables.ksnd_zc_min_frag) == sizeof (int));
-#endif
-        /* check ksnr_connected/connecting field large enough */
-        LASSERT(SOCKNAL_CONN_NTYPES <= 4);
-        
-        ksocknal_api.nal_ni_init = ksocknal_api_startup;
-        ksocknal_api.nal_ni_fini = ksocknal_api_shutdown;
-
-        /* Initialise dynamic tunables to defaults once only */
-        ksocknal_tunables.ksnd_io_timeout      = SOCKNAL_IO_TIMEOUT;
-        ksocknal_tunables.ksnd_eager_ack       = SOCKNAL_EAGER_ACK;
-        ksocknal_tunables.ksnd_typed_conns     = SOCKNAL_TYPED_CONNS;
-        ksocknal_tunables.ksnd_min_bulk        = SOCKNAL_MIN_BULK;
-        ksocknal_tunables.ksnd_buffer_size     = SOCKNAL_BUFFER_SIZE;
-        ksocknal_tunables.ksnd_nagle           = SOCKNAL_NAGLE;
-        ksocknal_tunables.ksnd_keepalive_idle  = SOCKNAL_KEEPALIVE_IDLE;
-        ksocknal_tunables.ksnd_keepalive_count = SOCKNAL_KEEPALIVE_COUNT;
-        ksocknal_tunables.ksnd_keepalive_intvl = SOCKNAL_KEEPALIVE_INTVL;
-#if CPU_AFFINITY
-        ksocknal_tunables.ksnd_irq_affinity = SOCKNAL_IRQ_AFFINITY;
-#endif
-#if SOCKNAL_ZC
-        ksocknal_tunables.ksnd_zc_min_frag  = SOCKNAL_ZC_MIN_FRAG;
-#endif
-
-        rc = ptl_register_nal(SOCKNAL, &ksocknal_api);
-        if (rc != PTL_OK) {
-                CERROR("Can't register SOCKNAL: %d\n", rc);
-                return (-ENOMEM);               /* or something... */
-        }
-
-        /* Pure gateways want the NAL started up at module load time... */
-        rc = PtlNIInit(SOCKNAL, LUSTRE_SRV_PTL_PID, NULL, NULL, &ksocknal_ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                ptl_unregister_nal(SOCKNAL);
-                return (-ENODEV);
-        }
-        
-#ifdef CONFIG_SYSCTL
-        /* Press on regardless even if registering sysctl doesn't work */
-        ksocknal_tunables.ksnd_sysctl = 
-                register_sysctl_table (ksocknal_top_ctl_table, 0);
-#endif
-        return (0);
-}
-
-MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
-MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01");
-MODULE_LICENSE("GPL");
-
-module_init(ksocknal_module_init);
-module_exit(ksocknal_module_fini);
-
diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h
deleted file mode 100644 (file)
index 9cfe858..0000000
+++ /dev/null
@@ -1,526 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_PORTAL_ALLOC
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/irq.h>
-
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <linux/kmod.h>
-#include <linux/sysctl.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <asm/div64.h>
-
-#define DEBUG_SUBSYSTEM S_NAL
-
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/kpr.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-#include <portals/socknal.h>
-
-#define SOCKNAL_N_AUTOCONNECTD  4               /* # socknal autoconnect daemons */
-
-#define SOCKNAL_MIN_RECONNECT_INTERVAL HZ      /* first failed connection retry... */
-#define SOCKNAL_MAX_RECONNECT_INTERVAL (60*HZ) /* ...exponentially increasing to this */
-
-/* default vals for runtime tunables */
-#define SOCKNAL_IO_TIMEOUT       50             /* default comms timeout (seconds) */
-#define SOCKNAL_EAGER_ACK        0              /* default eager ack (boolean) */
-#define SOCKNAL_TYPED_CONNS      1              /* unidirectional large, bidirectional small? */
-#define SOCKNAL_ZC_MIN_FRAG     (2<<10)         /* default smallest zerocopy fragment */
-#define SOCKNAL_MIN_BULK        (1<<10)         /* smallest "large" message */
-#define SOCKNAL_BUFFER_SIZE     (8<<20)         /* default socket buffer size */
-#define SOCKNAL_NAGLE            0              /* enable/disable NAGLE? */
-#define SOCKNAL_IRQ_AFFINITY     1              /* enable/disable IRQ affinity? */
-#define SOCKNAL_KEEPALIVE_IDLE   0              /* # seconds idle before 1st probe */
-#define SOCKNAL_KEEPALIVE_COUNT  10             /* # unanswered probes to determine peer death */
-#define SOCKNAL_KEEPALIVE_INTVL  1              /* seconds between probes */
-
-#define SOCKNAL_PEER_HASH_SIZE   101            /* # peer lists */
-
-#define SOCKNAL_SMALL_FWD_NMSGS        128             /* # small messages I can be forwarding at any time */
-#define SOCKNAL_LARGE_FWD_NMSGS 64              /* # large messages I can be forwarding at any time */
-
-#define SOCKNAL_SMALL_FWD_PAGES        1               /* # pages in a small message fwd buffer */
-
-#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN(PTL_MTU) >> PAGE_SHIFT)
-                                               /* # pages in a large message fwd buffer */
-
-#define SOCKNAL_RESCHED         100             /* # scheduler loops before reschedule */
-#define SOCKNAL_ENOMEM_RETRY    1               /* jiffies between retries */
-
-#define SOCKNAL_MAX_INTERFACES  16              /* Largest number of interfaces we bind */
-
-#define SOCKNAL_ROUND_ROBIN     0               /* round robin / load balance */
-
-#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sk_sndbuf*8)/10)
-
-#define SOCKNAL_SINGLE_FRAG_TX      0           /* disable multi-fragment sends */
-#define SOCKNAL_SINGLE_FRAG_RX      0           /* disable multi-fragment receives */
-#define SOCKNAL_RISK_KMAP_DEADLOCK  0           /* risk kmap deadlock on multi-frag I/O 
-                                                 * (backs off to single-frag if disabled) */
-                                                
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,72))
-# define sk_allocation  allocation
-# define sk_data_ready data_ready
-# define sk_write_space write_space
-# define sk_user_data   user_data
-# define sk_prot        prot
-# define sk_sndbuf      sndbuf
-# define sk_socket      socket
-#endif
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
-# define sk_wmem_queued wmem_queued
-# define sk_err         err
-#endif
-
-typedef struct                                  /* pool of forwarding buffers */
-{
-        spinlock_t        fmp_lock;             /* serialise */
-        struct list_head  fmp_idle_fmbs;        /* free buffers */
-        struct list_head  fmp_blocked_conns;    /* connections waiting for a buffer */
-        int               fmp_nactive_fmbs;     /* # buffers in use */
-        int               fmp_buff_pages;       /* # pages per buffer */
-} ksock_fmb_pool_t;
-
-
-typedef struct                                  /* per scheduler state */
-{
-        spinlock_t        kss_lock;             /* serialise */
-        struct list_head  kss_rx_conns;         /* conn waiting to be read */
-        struct list_head  kss_tx_conns;         /* conn waiting to be written */
-#if SOCKNAL_ZC
-        struct list_head  kss_zctxdone_list;    /* completed ZC transmits */
-#endif
-        wait_queue_head_t kss_waitq;            /* where scheduler sleeps */
-        int               kss_nconns;           /* # connections assigned to this scheduler */
-} ksock_sched_t;
-
-typedef struct
-{
-        int               ksni_valid:1;         /* been set yet? */
-        int               ksni_bound:1;         /* bound to a cpu yet? */
-        int               ksni_sched:6;         /* which scheduler (assumes < 64) */
-} ksock_irqinfo_t;
-
-typedef struct
-{
-        __u32             ksni_ipaddr;          /* interface's IP address */
-        __u32             ksni_netmask;         /* interface's network mask */
-        int               ksni_nroutes;         /* # routes using (active) */
-        int               ksni_npeers;          /* # peers using (passive) */
-} ksock_interface_t;
-
-typedef struct
-{
-        int               ksnd_io_timeout;      /* "stuck" socket timeout (seconds) */
-        int               ksnd_eager_ack;       /* make TCP ack eagerly? */
-        int               ksnd_typed_conns;     /* drive sockets by type? */
-        int               ksnd_min_bulk;        /* smallest "large" message */
-        int               ksnd_buffer_size;     /* socket buffer size */
-        int               ksnd_nagle;           /* enable NAGLE? */
-        int               ksnd_irq_affinity;    /* enable IRQ affinity? */
-        int               ksnd_keepalive_idle;  /* # idle secs before 1st probe */
-        int               ksnd_keepalive_count; /* # probes */
-        int               ksnd_keepalive_intvl; /* time between probes */
-#if SOCKNAL_ZC
-        unsigned int      ksnd_zc_min_frag;     /* minimum zero copy frag size */
-#endif
-        struct ctl_table_header *ksnd_sysctl;   /* sysctl interface */
-} ksock_tunables_t;
-
-typedef struct
-{
-        int               ksnd_init;            /* initialisation state */
-        __u64             ksnd_incarnation;     /* my epoch */
-        
-        rwlock_t          ksnd_global_lock;     /* stabilize peer/conn ops */
-        struct list_head *ksnd_peers;           /* hash table of all my known peers */
-        int               ksnd_peer_hash_size;  /* size of ksnd_peers */
-
-        int               ksnd_nthreads;        /* # live threads */
-        int               ksnd_shuttingdown;    /* tell threads to exit */
-        int               ksnd_nschedulers;     /* # schedulers */
-        ksock_sched_t    *ksnd_schedulers;      /* their state */
-
-        atomic_t          ksnd_npeers;          /* total # peers extant */
-        atomic_t          ksnd_nclosing_conns;  /* # closed conns extant */
-
-        kpr_router_t      ksnd_router;          /* THE router */
-
-        ksock_fmb_pool_t  ksnd_small_fmp;       /* small message forwarding buffers */
-        ksock_fmb_pool_t  ksnd_large_fmp;       /* large message forwarding buffers */
-
-        atomic_t          ksnd_nactive_ltxs;    /* #active ltxs */
-
-        struct list_head  ksnd_deathrow_conns;  /* conns to be closed */
-        struct list_head  ksnd_zombie_conns;    /* conns to be freed */
-        struct list_head  ksnd_enomem_conns;    /* conns to be retried */
-        wait_queue_head_t ksnd_reaper_waitq;    /* reaper sleeps here */
-        unsigned long     ksnd_reaper_waketime; /* when reaper will wake */
-        spinlock_t        ksnd_reaper_lock;     /* serialise */
-
-        int               ksnd_enomem_tx;       /* test ENOMEM sender */
-        int               ksnd_stall_tx;        /* test sluggish sender */
-        int               ksnd_stall_rx;        /* test sluggish receiver */
-
-        struct list_head  ksnd_autoconnectd_routes; /* routes waiting to be connected */
-        wait_queue_head_t ksnd_autoconnectd_waitq; /* autoconnectds sleep here */
-        spinlock_t        ksnd_autoconnectd_lock; /* serialise */
-
-        ksock_irqinfo_t   ksnd_irqinfo[NR_IRQS];/* irq->scheduler lookup */
-
-        int               ksnd_ninterfaces;
-        ksock_interface_t ksnd_interfaces[SOCKNAL_MAX_INTERFACES]; /* published interfaces */
-} ksock_nal_data_t;
-
-#define SOCKNAL_INIT_NOTHING    0
-#define SOCKNAL_INIT_DATA       1
-#define SOCKNAL_INIT_LIB        2
-#define SOCKNAL_INIT_ALL        3
-
-/* A packet just assembled for transmission is represented by 1 or more
- * struct iovec fragments (the first frag contains the portals header),
- * followed by 0 or more ptl_kiov_t fragments.
- *
- * On the receive side, initially 1 struct iovec fragment is posted for
- * receive (the header).  Once the header has been received, the payload is
- * received into either struct iovec or ptl_kiov_t fragments, depending on
- * what the header matched or whether the message needs forwarding. */
-
-struct ksock_conn;                              /* forward ref */
-struct ksock_peer;                              /* forward ref */
-struct ksock_route;                             /* forward ref */
-
-typedef struct                                  /* transmit packet */
-{
-        struct list_head        tx_list;        /* queue on conn for transmission etc */
-        char                    tx_isfwd;       /* forwarding / sourced here */
-        int                     tx_nob;         /* # packet bytes */
-        int                     tx_resid;       /* residual bytes */
-        int                     tx_niov;        /* # packet iovec frags */
-        struct iovec           *tx_iov;         /* packet iovec frags */
-        int                     tx_nkiov;       /* # packet page frags */
-        ptl_kiov_t             *tx_kiov;        /* packet page frags */
-        struct ksock_conn      *tx_conn;        /* owning conn */
-        ptl_hdr_t              *tx_hdr;         /* packet header (for debug only) */
-#if SOCKNAL_ZC        
-        zccd_t                  tx_zccd;        /* zero copy callback descriptor */
-#endif
-} ksock_tx_t;
-
-typedef struct                                  /* forwarded packet */
-{
-        ksock_tx_t             ftx_tx;          /* send info */
-        struct iovec           ftx_iov;         /* hdr iovec */
-} ksock_ftx_t;
-
-#define KSOCK_ZCCD_2_TX(ptr)   list_entry (ptr, ksock_tx_t, tx_zccd)
-/* network zero copy callback descriptor embedded in ksock_tx_t */
-
-typedef struct                                  /* locally transmitted packet */
-{
-        ksock_tx_t              ltx_tx;         /* send info */
-        void                   *ltx_private;    /* lib_finalize() callback arg */
-        void                   *ltx_cookie;     /* lib_finalize() callback arg */
-        ptl_hdr_t               ltx_hdr;        /* buffer for packet header */
-        int                     ltx_desc_size;  /* bytes allocated for this desc */
-        struct iovec            ltx_iov[1];     /* iov for hdr + payload */
-        ptl_kiov_t              ltx_kiov[0];    /* kiov for payload */
-} ksock_ltx_t;
-
-#define KSOCK_TX_2_KPR_FWD_DESC(ptr)    list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch)
-/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */
-
-#define KSOCK_TX_2_KSOCK_LTX(ptr)       list_entry (ptr, ksock_ltx_t, ltx_tx)
-/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */
-
-/* NB list_entry() is used here as convenient macro for calculating a
- * pointer to a struct from the address of a member. */
-
-typedef struct                                  /* Kernel portals Socket Forwarding message buffer */
-{                                               /* (socknal->router) */
-        struct list_head        fmb_list;       /* queue idle */
-        kpr_fwd_desc_t          fmb_fwd;        /* router's descriptor */
-        ksock_fmb_pool_t       *fmb_pool;       /* owning pool */
-        struct ksock_peer      *fmb_peer;       /* peer received from */
-        ptl_hdr_t               fmb_hdr;        /* message header */
-        ptl_kiov_t              fmb_kiov[0];    /* payload frags */
-} ksock_fmb_t;
-
-/* space for the rx frag descriptors; we either read a single contiguous
- * header, or up to PTL_MD_MAX_IOV frags of payload of either type. */
-typedef union {
-        struct iovec    iov[PTL_MD_MAX_IOV];
-        ptl_kiov_t      kiov[PTL_MD_MAX_IOV];
-} ksock_rxiovspace_t;
-
-#define SOCKNAL_RX_HEADER       1               /* reading header */
-#define SOCKNAL_RX_BODY         2               /* reading body (to deliver here) */
-#define SOCKNAL_RX_BODY_FWD     3               /* reading body (to forward) */
-#define SOCKNAL_RX_SLOP         4               /* skipping body */
-#define SOCKNAL_RX_GET_FMB      5               /* scheduled for forwarding */
-#define SOCKNAL_RX_FMB_SLEEP    6               /* blocked waiting for a fwd desc */
-
-typedef struct ksock_conn
-{ 
-        struct ksock_peer  *ksnc_peer;          /* owning peer */
-        struct ksock_route *ksnc_route;         /* owning route */
-        struct list_head    ksnc_list;          /* stash on peer's conn list */
-        struct socket      *ksnc_sock;          /* actual socket */
-        void               *ksnc_saved_data_ready; /* socket's original data_ready() callback */
-        void               *ksnc_saved_write_space; /* socket's original write_space() callback */
-        atomic_t            ksnc_refcount;      /* # users */
-        ksock_sched_t     *ksnc_scheduler;     /* who schedules this connection */
-        __u32               ksnc_myipaddr;      /* my IP */
-        __u32               ksnc_ipaddr;        /* peer's IP */
-        int                 ksnc_port;          /* peer's port */
-        int                 ksnc_closing;       /* being shut down */
-        int                 ksnc_type;          /* type of connection */
-        __u64               ksnc_incarnation;   /* peer's incarnation */
-        
-        /* reader */
-        struct list_head    ksnc_rx_list;       /* where I enq waiting input or a forwarding descriptor */
-        unsigned long       ksnc_rx_deadline;   /* when (in jiffies) receive times out */
-        int                 ksnc_rx_started;    /* started receiving a message */
-        int                 ksnc_rx_ready;      /* data ready to read */
-        int                 ksnc_rx_scheduled;  /* being progressed */
-        int                 ksnc_rx_state;      /* what is being read */
-        int                 ksnc_rx_nob_left;   /* # bytes to next hdr/body  */
-        int                 ksnc_rx_nob_wanted; /* bytes actually wanted */
-        int                 ksnc_rx_niov;       /* # iovec frags */
-        struct iovec       *ksnc_rx_iov;        /* the iovec frags */
-        int                 ksnc_rx_nkiov;      /* # page frags */
-        ptl_kiov_t         *ksnc_rx_kiov;       /* the page frags */
-        ksock_rxiovspace_t  ksnc_rx_iov_space;  /* space for frag descriptors */
-        void               *ksnc_cookie;        /* rx lib_finalize passthru arg */
-        ptl_hdr_t           ksnc_hdr;           /* where I read headers into */
-
-        /* WRITER */
-        struct list_head    ksnc_tx_list;       /* where I enq waiting for output space */
-        struct list_head    ksnc_tx_queue;      /* packets waiting to be sent */
-        unsigned long       ksnc_tx_deadline;   /* when (in jiffies) tx times out */
-        int                 ksnc_tx_bufnob;     /* send buffer marker */
-        atomic_t            ksnc_tx_nob;        /* # bytes queued */
-        int                 ksnc_tx_ready;      /* write space */
-        int                 ksnc_tx_scheduled;  /* being progressed */
-
-#if !SOCKNAL_SINGLE_FRAG_RX
-        struct iovec        ksnc_rx_scratch_iov[PTL_MD_MAX_IOV];
-#endif
-#if !SOCKNAL_SINGLE_FRAG_TX
-        struct iovec        ksnc_tx_scratch_iov[PTL_MD_MAX_IOV];
-#endif
-} ksock_conn_t;
-
-#define KSNR_TYPED_ROUTES   ((1 << SOCKNAL_CONN_CONTROL) |      \
-                             (1 << SOCKNAL_CONN_BULK_IN) |      \
-                             (1 << SOCKNAL_CONN_BULK_OUT))
-
-typedef struct ksock_route
-{
-        struct list_head    ksnr_list;          /* chain on peer route list */
-        struct list_head    ksnr_connect_list;  /* chain on autoconnect list */
-        struct ksock_peer  *ksnr_peer;          /* owning peer */
-        atomic_t            ksnr_refcount;      /* # users */
-        unsigned long       ksnr_timeout;       /* when (in jiffies) reconnection can happen next */
-        unsigned int        ksnr_retry_interval; /* how long between retries */
-        __u32               ksnr_myipaddr;      /* my IP */
-        __u32               ksnr_ipaddr;        /* IP address to connect to */
-        int                 ksnr_port;          /* port to connect to */
-        unsigned int        ksnr_connecting:4;  /* autoconnects in progress by type */
-        unsigned int        ksnr_connected:4;   /* connections established by type */
-        unsigned int        ksnr_deleted:1;     /* been removed from peer? */
-        unsigned int        ksnr_share_count;   /* created explicitly? */
-        int                 ksnr_conn_count;    /* # conns established by this route */
-} ksock_route_t;
-
-typedef struct ksock_peer
-{
-        struct list_head    ksnp_list;          /* stash on global peer list */
-        ptl_nid_t           ksnp_nid;           /* who's on the other end(s) */
-        atomic_t            ksnp_refcount;      /* # users */
-        int                 ksnp_sharecount;    /* lconf usage counter */
-        int                 ksnp_closing;       /* being closed */
-        int                 ksnp_error;         /* errno on closing last conn */
-        struct list_head    ksnp_conns;         /* all active connections */
-        struct list_head    ksnp_routes;        /* routes */
-        struct list_head    ksnp_tx_queue;      /* waiting packets */
-        unsigned long       ksnp_last_alive;    /* when (in jiffies) I was last alive */
-        int                 ksnp_n_passive_ips; /* # of... */
-        __u32               ksnp_passive_ips[SOCKNAL_MAX_INTERFACES]; /* preferred local interfaces */
-} ksock_peer_t;
-
-
-extern lib_nal_t        ksocknal_lib;
-extern ksock_nal_data_t ksocknal_data;
-extern ksock_tunables_t ksocknal_tunables;
-
-static inline struct list_head *
-ksocknal_nid2peerlist (ptl_nid_t nid)
-{
-        unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size;
-
-        return (&ksocknal_data.ksnd_peers [hash]);
-}
-
-static inline int
-ksocknal_getconnsock (ksock_conn_t *conn)
-{
-        int   rc = -ESHUTDOWN;
-
-        read_lock (&ksocknal_data.ksnd_global_lock);
-        if (!conn->ksnc_closing) {
-                rc = 0;
-                get_file (conn->ksnc_sock->file);
-        }
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-
-        return (rc);
-}
-
-static inline void
-ksocknal_putconnsock (ksock_conn_t *conn)
-{
-        fput (conn->ksnc_sock->file);
-}
-
-#ifndef CONFIG_SMP
-static inline
-int ksocknal_nsched(void)
-{
-        return 1;
-}
-#else
-#include <linux/lustre_version.h>
-# if !(defined(CONFIG_X86) && (LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,21))) || defined(CONFIG_X86_64) || (LUSTRE_KERNEL_VERSION < 39) || ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) && !defined(CONFIG_X86_HT))
-static inline int
-ksocknal_nsched(void)
-{
-        return num_online_cpus();
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
-        return i;
-}
-
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
-        return i;
-}
-# else
-static inline int
-ksocknal_nsched(void)
-{
-        if (smp_num_siblings == 1)
-                return (num_online_cpus());
-
-        /* We need to know if this assumption is crap */
-        LASSERT (smp_num_siblings == 2);
-        return (num_online_cpus()/2);
-}
-
-static inline int
-ksocknal_sched2cpu(int i)
-{
-        if (smp_num_siblings == 1)
-                return i;
-
-        return (i * 2);
-}
-
-static inline int
-ksocknal_irqsched2cpu(int i)
-{
-        return (ksocknal_sched2cpu(i) + 1);
-}
-# endif
-#endif
-
-extern void ksocknal_put_route (ksock_route_t *route);
-extern void ksocknal_put_peer (ksock_peer_t *peer);
-extern ksock_peer_t *ksocknal_find_peer_locked (ptl_nid_t nid);
-extern ksock_peer_t *ksocknal_get_peer (ptl_nid_t nid);
-extern int ksocknal_del_route (ptl_nid_t nid, __u32 ipaddr,
-                               int single, int keep_conn);
-extern int ksocknal_create_conn (ksock_route_t *route,
-                                 struct socket *sock, int type);
-extern void ksocknal_close_conn_locked (ksock_conn_t *conn, int why);
-extern void ksocknal_terminate_conn (ksock_conn_t *conn);
-extern void ksocknal_destroy_conn (ksock_conn_t *conn);
-extern void ksocknal_put_conn (ksock_conn_t *conn);
-extern int ksocknal_close_stale_conns_locked (ksock_peer_t *peer, __u64 incarnation);
-extern int ksocknal_close_conn_and_siblings (ksock_conn_t *conn, int why);
-extern int ksocknal_close_matching_conns (ptl_nid_t nid, __u32 ipaddr);
-
-extern void ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn);
-extern void ksocknal_tx_done (ksock_tx_t *tx, int asynch);
-extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void ksocknal_fmb_callback (void *arg, int error);
-extern void ksocknal_notify (void *arg, ptl_nid_t gw_nid, int alive);
-extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg);
-extern int ksocknal_new_packet (ksock_conn_t *conn, int skip);
-extern int ksocknal_scheduler (void *arg);
-extern void ksocknal_data_ready(struct sock *sk, int n);
-extern void ksocknal_write_space(struct sock *sk);
-extern int ksocknal_autoconnectd (void *arg);
-extern int ksocknal_reaper (void *arg);
-extern int ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, 
-                                       int *rxmem, int *nagle);
-extern int ksocknal_setup_sock (struct socket *sock);
-extern int ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs);
-extern int ksocknal_recv_hello (ksock_conn_t *conn,
-                                ptl_nid_t *nid, __u64 *incarnation, __u32 *ipaddrs);
diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c
deleted file mode 100644 (file)
index ed91f94..0000000
+++ /dev/null
@@ -1,2934 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include "socknal.h"
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-# include <linux/syscalls.h>
-#endif
-
-/*
- *  LIB functions follow
- *
- */
-int
-ksocknal_dist(lib_nal_t *nal, ptl_nid_t nid, unsigned long *dist)
-{
-        /* I would guess that if ksocknal_get_peer (nid) == NULL,
-           and we're not routing, then 'nid' is very distant :) */
-        if (nal->libnal_ni.ni_pid.nid == nid) {
-                *dist = 0;
-        } else {
-                *dist = 1;
-        }
-
-        return 0;
-}
-
-void
-ksocknal_free_ltx (ksock_ltx_t *ltx)
-{
-        atomic_dec(&ksocknal_data.ksnd_nactive_ltxs);
-        PORTAL_FREE(ltx, ltx->ltx_desc_size);
-}
-
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-struct page *
-ksocknal_kvaddr_to_page (unsigned long vaddr)
-{
-        struct page *page;
-
-        if (vaddr >= VMALLOC_START &&
-            vaddr < VMALLOC_END)
-                page = vmalloc_to_page ((void *)vaddr);
-#if CONFIG_HIGHMEM
-        else if (vaddr >= PKMAP_BASE &&
-                 vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE))
-                page = vmalloc_to_page ((void *)vaddr);
-                /* in 2.4 ^ just walks the page tables */
-#endif
-        else
-                page = virt_to_page (vaddr);
-
-        if (page == NULL ||
-            !VALID_PAGE (page))
-                return (NULL);
-
-        return (page);
-}
-#endif
-
-int
-ksocknal_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
-        struct socket *sock = conn->ksnc_sock;
-        struct iovec  *iov = tx->tx_iov;
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-        unsigned long  vaddr = (unsigned long)iov->iov_base
-        int            offset = vaddr & (PAGE_SIZE - 1);
-        int            zcsize = MIN (iov->iov_len, PAGE_SIZE - offset);
-        struct page   *page;
-#endif
-        int            nob;
-        int            rc;
-
-        /* NB we can't trust socket ops to either consume our iovs
-         * or leave them alone. */
-        LASSERT (tx->tx_niov > 0);
-        
-#if (SOCKNAL_ZC && SOCKNAL_VADDR_ZC)
-        if (zcsize >= ksocknal_data.ksnd_zc_min_frag &&
-            (sock->sk->route_caps & NETIF_F_SG) &&
-            (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) &&
-            (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) {
-                int msgflg = MSG_DONTWAIT;
-                
-                CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n",
-                       (void *)vaddr, page, page_address(page), offset, zcsize);
-
-                if (!list_empty (&conn->ksnc_tx_queue) ||
-                    zcsize < tx->tx_resid)
-                        msgflg |= MSG_MORE;
-                
-                rc = tcp_sendpage_zccd(sock, page, offset, zcsize, msgflg, &tx->tx_zccd);
-        } else
-#endif
-        {
-#if SOCKNAL_SINGLE_FRAG_TX
-                struct iovec    scratch;
-                struct iovec   *scratchiov = &scratch;
-                int             niov = 1;
-#else
-                struct iovec   *scratchiov = conn->ksnc_tx_scratch_iov;
-                int             niov = tx->tx_niov;
-#endif
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = scratchiov,
-                        .msg_iovlen     = niov,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = MSG_DONTWAIT
-                };
-                mm_segment_t oldmm = get_fs();
-                int  i;
-
-                for (nob = i = 0; i < niov; i++) {
-                        scratchiov[i] = tx->tx_iov[i];
-                        nob += scratchiov[i].iov_len;
-                }
-
-                if (!list_empty(&conn->ksnc_tx_queue) ||
-                    nob < tx->tx_resid)
-                        msg.msg_flags |= MSG_MORE;
-                
-                set_fs (KERNEL_DS);
-                rc = sock_sendmsg(sock, &msg, nob);
-                set_fs (oldmm);
-        } 
-
-        if (rc <= 0)                            /* sent nothing? */
-                return (rc);
-
-        nob = rc;
-        LASSERT (nob <= tx->tx_resid);
-        tx->tx_resid -= nob;
-
-        /* "consume" iov */
-        do {
-                LASSERT (tx->tx_niov > 0);
-                
-                if (nob < iov->iov_len) {
-                        iov->iov_base = (void *)(((unsigned long)(iov->iov_base)) + nob);
-                        iov->iov_len -= nob;
-                        return (rc);
-                }
-
-                nob -= iov->iov_len;
-                tx->tx_iov = ++iov;
-                tx->tx_niov--;
-        } while (nob != 0);
-        
-        return (rc);
-}
-
-int
-ksocknal_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
-{
-        struct socket *sock = conn->ksnc_sock;
-        ptl_kiov_t    *kiov = tx->tx_kiov;
-        int            rc;
-        int            nob;
-        
-        /* NB we can't trust socket ops to either consume our iovs
-         * or leave them alone. */
-        LASSERT (tx->tx_niov == 0);
-        LASSERT (tx->tx_nkiov > 0);
-
-#if SOCKNAL_ZC
-        if (kiov->kiov_len >= ksocknal_tunables.ksnd_zc_min_frag &&
-            (sock->sk->route_caps & NETIF_F_SG) &&
-            (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM))) {
-                struct page   *page = kiov->kiov_page;
-                int            offset = kiov->kiov_offset;
-                int            fragsize = kiov->kiov_len;
-                int            msgflg = MSG_DONTWAIT;
-
-                CDEBUG(D_NET, "page %p + offset %x for %d\n",
-                               page, offset, kiov->kiov_len);
-
-                if (!list_empty(&conn->ksnc_tx_queue) ||
-                    fragsize < tx->tx_resid)
-                        msgflg |= MSG_MORE;
-
-                rc = tcp_sendpage_zccd(sock, page, offset, fragsize, msgflg,
-                                       &tx->tx_zccd);
-        } else
-#endif
-        {
-#if SOCKNAL_SINGLE_FRAG_TX || !SOCKNAL_RISK_KMAP_DEADLOCK
-                struct iovec  scratch;
-                struct iovec *scratchiov = &scratch;
-                int           niov = 1;
-#else
-#warning "XXX risk of kmap deadlock on multiple frags..."
-                struct iovec *scratchiov = conn->ksnc_tx_scratch_iov;
-                int           niov = tx->tx_nkiov;
-#endif
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = scratchiov,
-                        .msg_iovlen     = niov,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = MSG_DONTWAIT
-                };
-                mm_segment_t  oldmm = get_fs();
-                int           i;
-                
-                for (nob = i = 0; i < niov; i++) {
-                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
-                                                 kiov[i].kiov_offset;
-                        nob += scratchiov[i].iov_len = kiov[i].kiov_len;
-                }
-
-                if (!list_empty(&conn->ksnc_tx_queue) ||
-                    nob < tx->tx_resid)
-                        msg.msg_flags |= MSG_DONTWAIT;
-
-                set_fs (KERNEL_DS);
-                rc = sock_sendmsg(sock, &msg, nob);
-                set_fs (oldmm);
-
-                for (i = 0; i < niov; i++)
-                        kunmap(kiov[i].kiov_page);
-        }
-
-        if (rc <= 0)                            /* sent nothing? */
-                return (rc);
-
-        nob = rc;
-        LASSERT (nob <= tx->tx_resid);
-        tx->tx_resid -= nob;
-
-        do {
-                LASSERT(tx->tx_nkiov > 0);
-                
-                if (nob < kiov->kiov_len) {
-                        kiov->kiov_offset += nob;
-                        kiov->kiov_len -= nob;
-                        return rc;
-                }
-                
-                nob -= kiov->kiov_len;
-                tx->tx_kiov = ++kiov;
-                tx->tx_nkiov--;
-        } while (nob != 0);
-
-        return (rc);
-}
-
-int
-ksocknal_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
-{
-        int      rc;
-        int      bufnob;
-        
-        if (ksocknal_data.ksnd_stall_tx != 0) {
-                set_current_state (TASK_UNINTERRUPTIBLE);
-                schedule_timeout (ksocknal_data.ksnd_stall_tx * HZ);
-        }
-
-        LASSERT (tx->tx_resid != 0);
-
-        rc = ksocknal_getconnsock (conn);
-        if (rc != 0) {
-                LASSERT (conn->ksnc_closing);
-                return (-ESHUTDOWN);
-        }
-
-        do {
-                if (ksocknal_data.ksnd_enomem_tx > 0) {
-                        /* testing... */
-                        ksocknal_data.ksnd_enomem_tx--;
-                        rc = -EAGAIN;
-                } else if (tx->tx_niov != 0) {
-                        rc = ksocknal_send_iov (conn, tx);
-                } else {
-                        rc = ksocknal_send_kiov (conn, tx);
-                }
-
-                bufnob = conn->ksnc_sock->sk->sk_wmem_queued;
-                if (rc > 0)                     /* sent something? */
-                        conn->ksnc_tx_bufnob += rc; /* account it */
-                
-                if (bufnob < conn->ksnc_tx_bufnob) {
-                        /* allocated send buffer bytes < computed; infer
-                         * something got ACKed */
-                        conn->ksnc_tx_deadline = jiffies + 
-                                                 ksocknal_tunables.ksnd_io_timeout * HZ;
-                        conn->ksnc_peer->ksnp_last_alive = jiffies;
-                        conn->ksnc_tx_bufnob = bufnob;
-                        mb();
-                }
-
-                if (rc <= 0) { /* Didn't write anything? */
-                        unsigned long  flags;
-                        ksock_sched_t *sched;
-
-                        if (rc == 0) /* some stacks return 0 instead of -EAGAIN */
-                                rc = -EAGAIN;
-
-                        if (rc != -EAGAIN)
-                                break;
-
-                        /* Check if EAGAIN is due to memory pressure */
-
-                        sched = conn->ksnc_scheduler;
-                        spin_lock_irqsave(&sched->kss_lock, flags);
-                                
-                        if (!test_bit(SOCK_NOSPACE, &conn->ksnc_sock->flags) &&
-                            !conn->ksnc_tx_ready) {
-                                /* SOCK_NOSPACE is set when the socket fills
-                                 * and cleared in the write_space callback
-                                 * (which also sets ksnc_tx_ready).  If
-                                 * SOCK_NOSPACE and ksnc_tx_ready are BOTH
-                                 * zero, I didn't fill the socket and
-                                 * write_space won't reschedule me, so I
-                                 * return -ENOMEM to get my caller to retry
-                                 * after a timeout */
-                                rc = -ENOMEM;
-                        }
-
-                        spin_unlock_irqrestore(&sched->kss_lock, flags);
-                        break;
-                }
-
-                /* socket's wmem_queued now includes 'rc' bytes */
-                atomic_sub (rc, &conn->ksnc_tx_nob);
-                rc = 0;
-
-        } while (tx->tx_resid != 0);
-
-        ksocknal_putconnsock (conn);
-        return (rc);
-}
-
-void
-ksocknal_eager_ack (ksock_conn_t *conn)
-{
-        int            opt = 1;
-        mm_segment_t   oldmm = get_fs();
-        struct socket *sock = conn->ksnc_sock;
-        
-        /* Remind the socket to ACK eagerly.  If I don't, the socket might
-         * think I'm about to send something it could piggy-back the ACK
-         * on, introducing delay in completing zero-copy sends in my
-         * peer. */
-
-        set_fs(KERNEL_DS);
-        sock->ops->setsockopt (sock, SOL_TCP, TCP_QUICKACK,
-                               (char *)&opt, sizeof (opt));
-        set_fs(oldmm);
-}
-
-int
-ksocknal_recv_iov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX
-        struct iovec  scratch;
-        struct iovec *scratchiov = &scratch;
-        int           niov = 1;
-#else
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
-        int           niov = conn->ksnc_rx_niov;
-#endif
-        struct iovec *iov = conn->ksnc_rx_iov;
-        struct msghdr msg = {
-                .msg_name       = NULL,
-                .msg_namelen    = 0,
-                .msg_iov        = scratchiov,
-                .msg_iovlen     = niov,
-                .msg_control    = NULL,
-                .msg_controllen = 0,
-                .msg_flags      = 0
-        };
-        mm_segment_t oldmm = get_fs();
-        int          nob;
-        int          i;
-        int          rc;
-
-        /* NB we can't trust socket ops to either consume our iovs
-         * or leave them alone. */
-        LASSERT (niov > 0);
-
-        for (nob = i = 0; i < niov; i++) {
-                scratchiov[i] = iov[i];
-                nob += scratchiov[i].iov_len;
-        }
-        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
-        set_fs (KERNEL_DS);
-        rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-        /* NB this is just a boolean..........................^ */
-        set_fs (oldmm);
-
-        if (rc <= 0)
-                return (rc);
-
-        /* received something... */
-        nob = rc;
-
-        conn->ksnc_peer->ksnp_last_alive = jiffies;
-        conn->ksnc_rx_deadline = jiffies + 
-                                 ksocknal_tunables.ksnd_io_timeout * HZ;
-        mb();                           /* order with setting rx_started */
-        conn->ksnc_rx_started = 1;
-
-        conn->ksnc_rx_nob_wanted -= nob;
-        conn->ksnc_rx_nob_left -= nob;
-
-        do {
-                LASSERT (conn->ksnc_rx_niov > 0);
-                
-                if (nob < iov->iov_len) {
-                        iov->iov_len -= nob;
-                        iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob);
-                        return (-EAGAIN);
-                }
-                
-                nob -= iov->iov_len;
-                conn->ksnc_rx_iov = ++iov;
-                conn->ksnc_rx_niov--;
-        } while (nob != 0);
-
-        return (rc);
-}
-
-int
-ksocknal_recv_kiov (ksock_conn_t *conn)
-{
-#if SOCKNAL_SINGLE_FRAG_RX || !SOCKNAL_RISK_KMAP_DEADLOCK
-        struct iovec  scratch;
-        struct iovec *scratchiov = &scratch;
-        int           niov = 1;
-#else
-#warning "XXX risk of kmap deadlock on multiple frags..."
-        struct iovec *scratchiov = conn->ksnc_rx_scratch_iov;
-        int           niov = conn->ksnc_rx_nkiov;
-#endif   
-        ptl_kiov_t   *kiov = conn->ksnc_rx_kiov;
-        struct msghdr msg = {
-                .msg_name       = NULL,
-                .msg_namelen    = 0,
-                .msg_iov        = scratchiov,
-                .msg_iovlen     = niov,
-                .msg_control    = NULL,
-                .msg_controllen = 0,
-                .msg_flags      = 0
-        };
-        mm_segment_t oldmm = get_fs();
-        int          nob;
-        int          i;
-        int          rc;
-
-        LASSERT (conn->ksnc_rx_nkiov > 0);
-
-        /* NB we can't trust socket ops to either consume our iovs
-         * or leave them alone. */
-        for (nob = i = 0; i < niov; i++) {
-                scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + kiov[i].kiov_offset;
-                nob += scratchiov[i].iov_len = kiov[i].kiov_len;
-        }
-        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
-
-        set_fs (KERNEL_DS);
-        rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-        /* NB this is just a boolean.......................^ */
-        set_fs (oldmm);
-
-        for (i = 0; i < niov; i++)
-                kunmap(kiov[i].kiov_page);
-
-        if (rc <= 0)
-                return (rc);
-        
-        /* received something... */
-        nob = rc;
-
-        conn->ksnc_peer->ksnp_last_alive = jiffies;
-        conn->ksnc_rx_deadline = jiffies + 
-                                 ksocknal_tunables.ksnd_io_timeout * HZ;
-        mb();                           /* order with setting rx_started */
-        conn->ksnc_rx_started = 1;
-
-        conn->ksnc_rx_nob_wanted -= nob;
-        conn->ksnc_rx_nob_left -= nob;
-
-        do {
-                LASSERT (conn->ksnc_rx_nkiov > 0);
-                
-                if (nob < kiov->kiov_len) {
-                        kiov->kiov_offset += nob;
-                        kiov->kiov_len -= nob;
-                        return -EAGAIN;
-                }
-                
-                nob -= kiov->kiov_len;
-                conn->ksnc_rx_kiov = ++kiov;
-                conn->ksnc_rx_nkiov--;
-        } while (nob != 0);
-
-        return 1;
-}
-
-int
-ksocknal_receive (ksock_conn_t *conn) 
-{
-        /* Return 1 on success, 0 on EOF, < 0 on error.
-         * Caller checks ksnc_rx_nob_wanted to determine
-         * progress/completion. */
-        int     rc;
-        ENTRY;
-        
-        if (ksocknal_data.ksnd_stall_rx != 0) {
-                set_current_state (TASK_UNINTERRUPTIBLE);
-                schedule_timeout (ksocknal_data.ksnd_stall_rx * HZ);
-        }
-
-        rc = ksocknal_getconnsock (conn);
-        if (rc != 0) {
-                LASSERT (conn->ksnc_closing);
-                return (-ESHUTDOWN);
-        }
-
-        for (;;) {
-                if (conn->ksnc_rx_niov != 0)
-                        rc = ksocknal_recv_iov (conn);
-                else
-                        rc = ksocknal_recv_kiov (conn);
-
-                if (rc <= 0) {
-                        /* error/EOF or partial receive */
-                        if (rc == -EAGAIN) {
-                                rc = 1;
-                        } else if (rc == 0 && conn->ksnc_rx_started) {
-                                /* EOF in the middle of a message */
-                                rc = -EPROTO;
-                        }
-                        break;
-                }
-
-                /* Completed a fragment */
-
-                if (conn->ksnc_rx_nob_wanted == 0) {
-                        /* Completed a message segment (header or payload) */
-                        if ((ksocknal_tunables.ksnd_eager_ack & conn->ksnc_type) != 0 &&
-                            (conn->ksnc_rx_state ==  SOCKNAL_RX_BODY ||
-                             conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD)) {
-                                /* Remind the socket to ack eagerly... */
-                                ksocknal_eager_ack(conn);
-                        }
-                        rc = 1;
-                        break;
-                }
-        }
-
-        ksocknal_putconnsock (conn);
-        RETURN (rc);
-}
-
-#if SOCKNAL_ZC
-void
-ksocknal_zc_callback (zccd_t *zcd)
-{
-        ksock_tx_t    *tx = KSOCK_ZCCD_2_TX(zcd);
-        ksock_sched_t *sched = tx->tx_conn->ksnc_scheduler;
-        unsigned long  flags;
-        ENTRY;
-
-        /* Schedule tx for cleanup (can't do it now due to lock conflicts) */
-
-        spin_lock_irqsave (&sched->kss_lock, flags);
-
-        list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list);
-        wake_up (&sched->kss_waitq);
-
-        spin_unlock_irqrestore (&sched->kss_lock, flags);
-        EXIT;
-}
-#endif
-
-void
-ksocknal_tx_done (ksock_tx_t *tx, int asynch)
-{
-        ksock_ltx_t   *ltx;
-        ENTRY;
-
-        if (tx->tx_conn != NULL) {
-#if SOCKNAL_ZC
-                /* zero copy completion isn't always from
-                 * process_transmit() so it needs to keep a ref on
-                 * tx_conn... */
-                if (asynch)
-                        ksocknal_put_conn (tx->tx_conn);
-#else
-                LASSERT (!asynch);
-#endif
-        }
-
-        if (tx->tx_isfwd) {             /* was a forwarded packet? */
-                kpr_fwd_done (&ksocknal_data.ksnd_router,
-                              KSOCK_TX_2_KPR_FWD_DESC (tx), 
-                              (tx->tx_resid == 0) ? 0 : -ECONNABORTED);
-                EXIT;
-                return;
-        }
-
-        /* local send */
-        ltx = KSOCK_TX_2_KSOCK_LTX (tx);
-
-        lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie,
-                      (tx->tx_resid == 0) ? PTL_OK : PTL_FAIL);
-
-        ksocknal_free_ltx (ltx);
-        EXIT;
-}
-
-void
-ksocknal_tx_launched (ksock_tx_t *tx) 
-{
-#if SOCKNAL_ZC
-        if (atomic_read (&tx->tx_zccd.zccd_count) != 1) {
-                ksock_conn_t  *conn = tx->tx_conn;
-                
-                /* zccd skbufs are still in-flight.  First take a ref on
-                 * conn, so it hangs about for ksocknal_tx_done... */
-                atomic_inc (&conn->ksnc_refcount);
-
-                /* ...then drop the initial ref on zccd, so the zero copy
-                 * callback can occur */
-                zccd_put (&tx->tx_zccd);
-                return;
-        }
-#endif
-        /* Any zero-copy-ness (if any) has completed; I can complete the
-         * transmit now, avoiding an extra schedule */
-        ksocknal_tx_done (tx, 0);
-}
-
-int
-ksocknal_process_transmit (ksock_conn_t *conn, ksock_tx_t *tx)
-{
-        unsigned long  flags;
-        int            rc;
-       
-        rc = ksocknal_transmit (conn, tx);
-
-        CDEBUG (D_NET, "send(%d) %d\n", tx->tx_resid, rc);
-
-        if (tx->tx_resid == 0) {
-                /* Sent everything OK */
-                LASSERT (rc == 0);
-
-                ksocknal_tx_launched (tx);
-                return (0);
-        }
-
-        if (rc == -EAGAIN)
-                return (rc);
-
-        if (rc == -ENOMEM) {
-                static int counter;
-
-                counter++;   /* exponential backoff warnings */
-                if ((counter & (-counter)) == counter)
-                        CWARN("%d ENOMEM tx %p\n", counter, conn);
-
-                /* Queue on ksnd_enomem_conns for retry after a timeout */
-                spin_lock_irqsave(&ksocknal_data.ksnd_reaper_lock, flags);
-
-                /* enomem list takes over scheduler's ref... */
-                LASSERT (conn->ksnc_tx_scheduled);
-                list_add_tail(&conn->ksnc_tx_list,
-                              &ksocknal_data.ksnd_enomem_conns);
-                if (!time_after_eq(jiffies + SOCKNAL_ENOMEM_RETRY,
-                                   ksocknal_data.ksnd_reaper_waketime))
-                        wake_up (&ksocknal_data.ksnd_reaper_waitq);
-                
-                spin_unlock_irqrestore(&ksocknal_data.ksnd_reaper_lock, flags);
-                return (rc);
-        }
-
-        /* Actual error */
-        LASSERT (rc < 0);
-
-        if (!conn->ksnc_closing)
-                CERROR("[%p] Error %d on write to "LPX64
-                       " ip %d.%d.%d.%d:%d\n", conn, rc,
-                       conn->ksnc_peer->ksnp_nid,
-                       HIPQUAD(conn->ksnc_ipaddr),
-                       conn->ksnc_port);
-
-        ksocknal_close_conn_and_siblings (conn, rc);
-        ksocknal_tx_launched (tx);
-
-        return (rc);
-}
-
-void
-ksocknal_launch_autoconnect_locked (ksock_route_t *route)
-{
-        unsigned long     flags;
-
-        /* called holding write lock on ksnd_global_lock */
-
-        LASSERT (!route->ksnr_deleted);
-        LASSERT ((route->ksnr_connected & (1 << SOCKNAL_CONN_ANY)) == 0);
-        LASSERT ((route->ksnr_connected & KSNR_TYPED_ROUTES) != KSNR_TYPED_ROUTES);
-        LASSERT (route->ksnr_connecting == 0);
-        
-        if (ksocknal_tunables.ksnd_typed_conns)
-                route->ksnr_connecting = 
-                        KSNR_TYPED_ROUTES & ~route->ksnr_connected;
-        else
-                route->ksnr_connecting = (1 << SOCKNAL_CONN_ANY);
-
-        atomic_inc (&route->ksnr_refcount);     /* extra ref for asynchd */
-        
-        spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
-        
-        list_add_tail (&route->ksnr_connect_list,
-                       &ksocknal_data.ksnd_autoconnectd_routes);
-        wake_up (&ksocknal_data.ksnd_autoconnectd_waitq);
-        
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
-}
-
-ksock_peer_t *
-ksocknal_find_target_peer_locked (ksock_tx_t *tx, ptl_nid_t nid)
-{
-        char          ipbuf[PTL_NALFMT_SIZE];
-        ptl_nid_t     target_nid;
-        int           rc;
-        ksock_peer_t *peer = ksocknal_find_peer_locked (nid);
-
-        if (peer != NULL)
-                return (peer);
-
-        if (tx->tx_isfwd) {
-                CERROR ("Can't send packet to "LPX64
-                       " %s: routed target is not a peer\n",
-                        nid, portals_nid2str(SOCKNAL, nid, ipbuf));
-                return (NULL);
-        }
-
-        rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, tx->tx_nob,
-                         &target_nid);
-        if (rc != 0) {
-                CERROR ("Can't route to "LPX64" %s: router error %d\n",
-                        nid, portals_nid2str(SOCKNAL, nid, ipbuf), rc);
-                return (NULL);
-        }
-
-        peer = ksocknal_find_peer_locked (target_nid);
-        if (peer != NULL)
-                return (peer);
-
-        CERROR ("Can't send packet to "LPX64" %s: no peer entry\n",
-                target_nid, portals_nid2str(SOCKNAL, target_nid, ipbuf));
-        return (NULL);
-}
-
-ksock_conn_t *
-ksocknal_find_conn_locked (ksock_tx_t *tx, ksock_peer_t *peer)
-{
-        struct list_head *tmp;
-        ksock_conn_t     *typed = NULL;
-        int               tnob  = 0;
-        ksock_conn_t     *fallback = NULL;
-        int               fnob     = 0;
-        ksock_conn_t     *conn;
-
-        list_for_each (tmp, &peer->ksnp_conns) {
-                ksock_conn_t *c = list_entry(tmp, ksock_conn_t, ksnc_list);
-#if SOCKNAL_ROUND_ROBIN
-                const int     nob = 0;
-#else
-                int           nob = atomic_read(&c->ksnc_tx_nob) +
-                                        c->ksnc_sock->sk->sk_wmem_queued;
-#endif
-                LASSERT (!c->ksnc_closing);
-
-                if (fallback == NULL || nob < fnob) {
-                        fallback = c;
-                        fnob     = nob;
-                }
-
-                if (!ksocknal_tunables.ksnd_typed_conns)
-                        continue;
-
-                switch (c->ksnc_type) {
-                default:
-                        LBUG();
-                case SOCKNAL_CONN_ANY:
-                        break;
-                case SOCKNAL_CONN_BULK_IN:
-                        continue;
-                case SOCKNAL_CONN_BULK_OUT:
-                        if (tx->tx_nob < ksocknal_tunables.ksnd_min_bulk)
-                                continue;
-                        break;
-                case SOCKNAL_CONN_CONTROL:
-                        if (tx->tx_nob >= ksocknal_tunables.ksnd_min_bulk)
-                                continue;
-                        break;
-                }
-
-                if (typed == NULL || nob < tnob) {
-                        typed = c;
-                        tnob  = nob;
-                }
-        }
-
-        /* prefer the typed selection */
-        conn = (typed != NULL) ? typed : fallback;
-
-#if SOCKNAL_ROUND_ROBIN
-        if (conn != NULL) {
-                /* round-robin all else being equal */
-                list_del (&conn->ksnc_list);
-                list_add_tail (&conn->ksnc_list, &peer->ksnp_conns);
-        }
-#endif
-        return conn;
-}
-
-void
-ksocknal_queue_tx_locked (ksock_tx_t *tx, ksock_conn_t *conn)
-{
-        unsigned long  flags;
-        ksock_sched_t *sched = conn->ksnc_scheduler;
-
-        /* called holding global lock (read or irq-write) and caller may
-         * not have dropped this lock between finding conn and calling me,
-         * so we don't need the {get,put}connsock dance to deref
-         * ksnc_sock... */
-        LASSERT(!conn->ksnc_closing);
-        LASSERT(tx->tx_resid == tx->tx_nob);
-        
-        CDEBUG (D_NET, "Sending to "LPX64" ip %d.%d.%d.%d:%d\n", 
-                conn->ksnc_peer->ksnp_nid,
-                HIPQUAD(conn->ksnc_ipaddr),
-                conn->ksnc_port);
-
-        atomic_add (tx->tx_nob, &conn->ksnc_tx_nob);
-        tx->tx_conn = conn;
-
-#if SOCKNAL_ZC
-        zccd_init (&tx->tx_zccd, ksocknal_zc_callback);
-        /* NB this sets 1 ref on zccd, so the callback can only occur after
-         * I've released this ref. */
-#endif
-        spin_lock_irqsave (&sched->kss_lock, flags);
-
-        if (list_empty(&conn->ksnc_tx_queue) &&
-            conn->ksnc_sock->sk->sk_wmem_queued == 0) {
-                /* First packet starts the timeout */
-                conn->ksnc_tx_deadline = jiffies +
-                                         ksocknal_tunables.ksnd_io_timeout * HZ;
-                conn->ksnc_tx_bufnob = 0;
-                mb();    /* order with adding to tx_queue */
-        }
-
-        list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue);
-                
-        if (conn->ksnc_tx_ready &&      /* able to send */
-            !conn->ksnc_tx_scheduled) { /* not scheduled to send */
-                /* +1 ref for scheduler */
-                atomic_inc (&conn->ksnc_refcount);
-                list_add_tail (&conn->ksnc_tx_list, 
-                               &sched->kss_tx_conns);
-                conn->ksnc_tx_scheduled = 1;
-                wake_up (&sched->kss_waitq);
-        }
-
-        spin_unlock_irqrestore (&sched->kss_lock, flags);
-}
-
-ksock_route_t *
-ksocknal_find_connectable_route_locked (ksock_peer_t *peer)
-{
-        struct list_head  *tmp;
-        ksock_route_t     *route;
-        int                bits;
-        
-        list_for_each (tmp, &peer->ksnp_routes) {
-                route = list_entry (tmp, ksock_route_t, ksnr_list);
-                bits  = route->ksnr_connected;
-
-                /* All typed connections established? */
-                if ((bits & KSNR_TYPED_ROUTES) == KSNR_TYPED_ROUTES)
-                        continue;
-
-                /* Untyped connection established? */
-                if ((bits & (1 << SOCKNAL_CONN_ANY)) != 0)
-                        continue;
-
-                /* connection being established? */
-                if (route->ksnr_connecting != 0)
-                        continue;
-
-                /* too soon to retry this guy? */
-                if (!time_after_eq (jiffies, route->ksnr_timeout))
-                        continue;
-                
-                return (route);
-        }
-        
-        return (NULL);
-}
-
-ksock_route_t *
-ksocknal_find_connecting_route_locked (ksock_peer_t *peer)
-{
-        struct list_head  *tmp;
-        ksock_route_t     *route;
-
-        list_for_each (tmp, &peer->ksnp_routes) {
-                route = list_entry (tmp, ksock_route_t, ksnr_list);
-                
-                if (route->ksnr_connecting != 0)
-                        return (route);
-        }
-        
-        return (NULL);
-}
-
-int
-ksocknal_launch_packet (ksock_tx_t *tx, ptl_nid_t nid)
-{
-        unsigned long     flags;
-        ksock_peer_t     *peer;
-        ksock_conn_t     *conn;
-        ksock_route_t    *route;
-        rwlock_t         *g_lock;
-        
-        /* Ensure the frags we've been given EXACTLY match the number of
-         * bytes we want to send.  Many TCP/IP stacks disregard any total
-         * size parameters passed to them and just look at the frags. 
-         *
-         * We always expect at least 1 mapped fragment containing the
-         * complete portals header. */
-        LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) +
-                 lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob);
-        LASSERT (tx->tx_niov >= 1);
-        LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t));
-
-        CDEBUG (D_NET, "packet %p type %d, nob %d niov %d nkiov %d\n",
-                tx, ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type, 
-                tx->tx_nob, tx->tx_niov, tx->tx_nkiov);
-
-        tx->tx_conn = NULL;                     /* only set when assigned a conn */
-        tx->tx_resid = tx->tx_nob;
-        tx->tx_hdr = (ptl_hdr_t *)tx->tx_iov[0].iov_base;
-
-        g_lock = &ksocknal_data.ksnd_global_lock;
-#if !SOCKNAL_ROUND_ROBIN
-        read_lock (g_lock);
-
-        peer = ksocknal_find_target_peer_locked (tx, nid);
-        if (peer == NULL) {
-                read_unlock (g_lock);
-                return (-EHOSTUNREACH);
-        }
-
-        if (ksocknal_find_connectable_route_locked(peer) == NULL) {
-                conn = ksocknal_find_conn_locked (tx, peer);
-                if (conn != NULL) {
-                        /* I've got no autoconnect routes that need to be
-                         * connecting and I do have an actual connection... */
-                        ksocknal_queue_tx_locked (tx, conn);
-                        read_unlock (g_lock);
-                        return (0);
-                }
-        }
-        /* I'll need a write lock... */
-        read_unlock (g_lock);
-#endif
-        write_lock_irqsave(g_lock, flags);
-
-        peer = ksocknal_find_target_peer_locked (tx, nid);
-        if (peer == NULL) {
-                write_unlock_irqrestore(g_lock, flags);
-                return (-EHOSTUNREACH);
-        }
-
-        for (;;) {
-                /* launch any/all autoconnections that need it */
-                route = ksocknal_find_connectable_route_locked (peer);
-                if (route == NULL)
-                        break;
-
-                ksocknal_launch_autoconnect_locked (route);
-        }
-
-        conn = ksocknal_find_conn_locked (tx, peer);
-        if (conn != NULL) {
-                /* Connection exists; queue message on it */
-                ksocknal_queue_tx_locked (tx, conn);
-                write_unlock_irqrestore (g_lock, flags);
-                return (0);
-        }
-
-        route = ksocknal_find_connecting_route_locked (peer);
-        if (route != NULL) {
-                /* At least 1 connection is being established; queue the
-                 * message... */
-                list_add_tail (&tx->tx_list, &peer->ksnp_tx_queue);
-                write_unlock_irqrestore (g_lock, flags);
-                return (0);
-        }
-        
-        write_unlock_irqrestore (g_lock, flags);
-        return (-EHOSTUNREACH);
-}
-
-ptl_err_t
-ksocknal_sendmsg(lib_nal_t     *nal, 
-                 void         *private, 
-                 lib_msg_t    *cookie,
-                 ptl_hdr_t    *hdr, 
-                 int           type, 
-                 ptl_nid_t     nid, 
-                 ptl_pid_t     pid,
-                 unsigned int  payload_niov, 
-                 struct iovec *payload_iov, 
-                 ptl_kiov_t   *payload_kiov,
-                 size_t        payload_offset,
-                 size_t        payload_nob)
-{
-        ksock_ltx_t  *ltx;
-        int           desc_size;
-        int           rc;
-
-        /* NB 'private' is different depending on what we're sending.
-         * Just ignore it... */
-
-        CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid:"LPX64
-               " pid %d\n", payload_nob, payload_niov, nid , pid);
-
-        LASSERT (payload_nob == 0 || payload_niov > 0);
-        LASSERT (payload_niov <= PTL_MD_MAX_IOV);
-
-        /* It must be OK to kmap() if required */
-        LASSERT (payload_kiov == NULL || !in_interrupt ());
-        /* payload is either all vaddrs or all pages */
-        LASSERT (!(payload_kiov != NULL && payload_iov != NULL));
-        
-        if (payload_iov != NULL)
-                desc_size = offsetof(ksock_ltx_t, ltx_iov[1 + payload_niov]);
-        else
-                desc_size = offsetof(ksock_ltx_t, ltx_kiov[payload_niov]);
-        
-        if (in_interrupt() ||
-            type == PTL_MSG_ACK ||
-            type == PTL_MSG_REPLY) {
-                /* Can't block if in interrupt or responding to an incoming
-                 * message */
-                PORTAL_ALLOC_ATOMIC(ltx, desc_size);
-        } else {
-                PORTAL_ALLOC(ltx, desc_size);
-        }
-        
-        if (ltx == NULL) {
-                CERROR("Can't allocate tx desc type %d size %d %s\n",
-                       type, desc_size, in_interrupt() ? "(intr)" : "");
-                return (PTL_NO_SPACE);
-        }
-
-        atomic_inc(&ksocknal_data.ksnd_nactive_ltxs);
-
-        ltx->ltx_desc_size = desc_size;
-        
-        /* We always have 1 mapped frag for the header */
-        ltx->ltx_tx.tx_iov = ltx->ltx_iov;
-        ltx->ltx_iov[0].iov_base = &ltx->ltx_hdr;
-        ltx->ltx_iov[0].iov_len = sizeof(*hdr);
-        ltx->ltx_hdr = *hdr;
-        
-        ltx->ltx_private = private;
-        ltx->ltx_cookie = cookie;
-        
-        ltx->ltx_tx.tx_isfwd = 0;
-        ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_nob;
-
-        if (payload_iov != NULL) {
-                /* payload is all mapped */
-                ltx->ltx_tx.tx_kiov  = NULL;
-                ltx->ltx_tx.tx_nkiov = 0;
-
-                ltx->ltx_tx.tx_niov = 
-                        1 + lib_extract_iov(payload_niov, &ltx->ltx_iov[1],
-                                            payload_niov, payload_iov,
-                                            payload_offset, payload_nob);
-        } else {
-                /* payload is all pages */
-                ltx->ltx_tx.tx_niov = 1;
-
-                ltx->ltx_tx.tx_kiov = ltx->ltx_kiov;
-                ltx->ltx_tx.tx_nkiov =
-                        lib_extract_kiov(payload_niov, ltx->ltx_kiov,
-                                         payload_niov, payload_kiov,
-                                         payload_offset, payload_nob);
-        }
-
-        rc = ksocknal_launch_packet(&ltx->ltx_tx, nid);
-        if (rc == 0)
-                return (PTL_OK);
-        
-        ksocknal_free_ltx(ltx);
-        return (PTL_FAIL);
-}
-
-ptl_err_t
-ksocknal_send (lib_nal_t *nal, void *private, lib_msg_t *cookie,
-               ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-               unsigned int payload_niov, struct iovec *payload_iov,
-               size_t payload_offset, size_t payload_len)
-{
-        return (ksocknal_sendmsg(nal, private, cookie,
-                                 hdr, type, nid, pid,
-                                 payload_niov, payload_iov, NULL,
-                                 payload_offset, payload_len));
-}
-
-ptl_err_t
-ksocknal_send_pages (lib_nal_t *nal, void *private, lib_msg_t *cookie, 
-                     ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-                     unsigned int payload_niov, ptl_kiov_t *payload_kiov, 
-                     size_t payload_offset, size_t payload_len)
-{
-        return (ksocknal_sendmsg(nal, private, cookie,
-                                 hdr, type, nid, pid,
-                                 payload_niov, NULL, payload_kiov,
-                                 payload_offset, payload_len));
-}
-
-void
-ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd)
-{
-        ptl_nid_t     nid = fwd->kprfd_gateway_nid;
-        ksock_ftx_t  *ftx = (ksock_ftx_t *)&fwd->kprfd_scratch;
-        int           rc;
-        
-        CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd,
-                fwd->kprfd_gateway_nid, fwd->kprfd_target_nid);
-
-        /* I'm the gateway; must be the last hop */
-        if (nid == ksocknal_lib.libnal_ni.ni_pid.nid)
-                nid = fwd->kprfd_target_nid;
-
-        /* setup iov for hdr */
-        ftx->ftx_iov.iov_base = fwd->kprfd_hdr;
-        ftx->ftx_iov.iov_len = sizeof(ptl_hdr_t);
-
-        ftx->ftx_tx.tx_isfwd = 1;                  /* This is a forwarding packet */
-        ftx->ftx_tx.tx_nob   = sizeof(ptl_hdr_t) + fwd->kprfd_nob;
-        ftx->ftx_tx.tx_niov  = 1;
-        ftx->ftx_tx.tx_iov   = &ftx->ftx_iov;
-        ftx->ftx_tx.tx_nkiov = fwd->kprfd_niov;
-        ftx->ftx_tx.tx_kiov  = fwd->kprfd_kiov;
-
-        rc = ksocknal_launch_packet (&ftx->ftx_tx, nid);
-        if (rc != 0)
-                kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, rc);
-}
-
-int
-ksocknal_thread_start (int (*fn)(void *arg), void *arg)
-{
-        long          pid = kernel_thread (fn, arg, 0);
-        unsigned long flags;
-
-        if (pid < 0)
-                return ((int)pid);
-
-        write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
-        ksocknal_data.ksnd_nthreads++;
-        write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-        return (0);
-}
-
-void
-ksocknal_thread_fini (void)
-{
-        unsigned long flags;
-
-        write_lock_irqsave(&ksocknal_data.ksnd_global_lock, flags);
-        ksocknal_data.ksnd_nthreads--;
-        write_unlock_irqrestore(&ksocknal_data.ksnd_global_lock, flags);
-}
-
-void
-ksocknal_fmb_callback (void *arg, int error)
-{
-        ksock_fmb_t       *fmb = (ksock_fmb_t *)arg;
-        ksock_fmb_pool_t  *fmp = fmb->fmb_pool;
-        ptl_hdr_t         *hdr = &fmb->fmb_hdr;
-        ksock_conn_t      *conn = NULL;
-        ksock_sched_t     *sched;
-        unsigned long      flags;
-        char               ipbuf[PTL_NALFMT_SIZE];
-        char               ipbuf2[PTL_NALFMT_SIZE];
-
-        if (error != 0)
-                CERROR("Failed to route packet from "
-                       LPX64" %s to "LPX64" %s: %d\n",
-                       le64_to_cpu(hdr->src_nid),
-                       portals_nid2str(SOCKNAL, le64_to_cpu(hdr->src_nid), ipbuf),
-                       le64_to_cpu(hdr->dest_nid),
-                       portals_nid2str(SOCKNAL, le64_to_cpu(hdr->dest_nid), ipbuf2),
-                       error);
-        else
-                CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n",
-                        le64_to_cpu(hdr->src_nid), le64_to_cpu(hdr->dest_nid));
-
-        /* drop peer ref taken on init */
-        ksocknal_put_peer (fmb->fmb_peer);
-
-        spin_lock_irqsave (&fmp->fmp_lock, flags);
-
-        list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs);
-        fmp->fmp_nactive_fmbs--;
-
-        if (!list_empty (&fmp->fmp_blocked_conns)) {
-                conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next,
-                                   ksock_conn_t, ksnc_rx_list);
-                list_del (&conn->ksnc_rx_list);
-        }
-
-        spin_unlock_irqrestore (&fmp->fmp_lock, flags);
-
-        if (conn == NULL)
-                return;
-
-        CDEBUG (D_NET, "Scheduling conn %p\n", conn);
-        LASSERT (conn->ksnc_rx_scheduled);
-        LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP);
-
-        conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;
-
-        sched = conn->ksnc_scheduler;
-
-        spin_lock_irqsave (&sched->kss_lock, flags);
-
-        list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns);
-        wake_up (&sched->kss_waitq);
-
-        spin_unlock_irqrestore (&sched->kss_lock, flags);
-}
-
-ksock_fmb_t *
-ksocknal_get_idle_fmb (ksock_conn_t *conn)
-{
-        int               payload_nob = conn->ksnc_rx_nob_left;
-        unsigned long     flags;
-        ksock_fmb_pool_t *pool;
-        ksock_fmb_t      *fmb;
-
-        LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-        LASSERT (kpr_routing(&ksocknal_data.ksnd_router));
-
-        if (payload_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE)
-                pool = &ksocknal_data.ksnd_small_fmp;
-        else
-                pool = &ksocknal_data.ksnd_large_fmp;
-
-        spin_lock_irqsave (&pool->fmp_lock, flags);
-
-        if (!list_empty (&pool->fmp_idle_fmbs)) {
-                fmb = list_entry(pool->fmp_idle_fmbs.next,
-                                 ksock_fmb_t, fmb_list);
-                list_del (&fmb->fmb_list);
-                pool->fmp_nactive_fmbs++;
-                spin_unlock_irqrestore (&pool->fmp_lock, flags);
-
-                return (fmb);
-        }
-
-        /* deschedule until fmb free */
-
-        conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP;
-
-        list_add_tail (&conn->ksnc_rx_list,
-                       &pool->fmp_blocked_conns);
-
-        spin_unlock_irqrestore (&pool->fmp_lock, flags);
-        return (NULL);
-}
-
-int
-ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb)
-{
-        int       payload_nob = conn->ksnc_rx_nob_left;
-        ptl_nid_t dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid);
-        int       niov = 0;
-        int       nob = payload_nob;
-
-        LASSERT (conn->ksnc_rx_scheduled);
-        LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB);
-        LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left);
-        LASSERT (payload_nob >= 0);
-        LASSERT (payload_nob <= fmb->fmb_pool->fmp_buff_pages * PAGE_SIZE);
-        LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE);
-        LASSERT (fmb->fmb_kiov[0].kiov_offset == 0);
-
-        /* Take a ref on the conn's peer to prevent module unload before
-         * forwarding completes. */
-        fmb->fmb_peer = conn->ksnc_peer;
-        atomic_inc (&conn->ksnc_peer->ksnp_refcount);
-
-        /* Copy the header we just read into the forwarding buffer.  If
-         * there's payload, start reading reading it into the buffer,
-         * otherwise the forwarding buffer can be kicked off
-         * immediately. */
-        fmb->fmb_hdr = conn->ksnc_hdr;
-
-        while (nob > 0) {
-                LASSERT (niov < fmb->fmb_pool->fmp_buff_pages);
-                LASSERT (fmb->fmb_kiov[niov].kiov_offset == 0);
-                fmb->fmb_kiov[niov].kiov_len = MIN (PAGE_SIZE, nob);
-                nob -= PAGE_SIZE;
-                niov++;
-        }
-
-        kpr_fwd_init(&fmb->fmb_fwd, dest_nid, &fmb->fmb_hdr,
-                     payload_nob, niov, fmb->fmb_kiov,
-                     ksocknal_fmb_callback, fmb);
-
-        if (payload_nob == 0) {         /* got complete packet already */
-                CDEBUG (D_NET, "%p "LPX64"->"LPX64" fwd_start (immediate)\n",
-                        conn, le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid);
-
-                kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
-
-                ksocknal_new_packet (conn, 0);  /* on to next packet */
-                return (1);
-        }
-
-        conn->ksnc_cookie = fmb;                /* stash fmb for later */
-        conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */
-        
-        /* Set up conn->ksnc_rx_kiov to read the payload into fmb's kiov-ed
-         * buffer */
-        LASSERT (niov <= sizeof(conn->ksnc_rx_iov_space)/sizeof(ptl_kiov_t));
-
-        conn->ksnc_rx_niov = 0;
-        conn->ksnc_rx_nkiov = niov;
-        conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
-        memcpy(conn->ksnc_rx_kiov, fmb->fmb_kiov, niov * sizeof(ptl_kiov_t));
-        
-        CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn,
-                le64_to_cpu(conn->ksnc_hdr.src_nid), dest_nid, payload_nob);
-        return (0);
-}
-
-void
-ksocknal_fwd_parse (ksock_conn_t *conn)
-{
-        ksock_peer_t *peer;
-        ptl_nid_t     dest_nid = le64_to_cpu(conn->ksnc_hdr.dest_nid);
-        ptl_nid_t     src_nid = le64_to_cpu(conn->ksnc_hdr.src_nid);
-        int           body_len = le32_to_cpu(conn->ksnc_hdr.payload_length);
-        char str[PTL_NALFMT_SIZE];
-        char str2[PTL_NALFMT_SIZE];
-
-        CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn,
-                src_nid, dest_nid, conn->ksnc_rx_nob_left);
-
-        LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER);
-        LASSERT (conn->ksnc_rx_scheduled);
-
-        if (body_len < 0) {                 /* length corrupt (overflow) */
-                CERROR("dropping packet from "LPX64" (%s) for "LPX64" (%s): "
-                       "packet size %d illegal\n",
-                       src_nid, portals_nid2str(TCPNAL, src_nid, str),
-                       dest_nid, portals_nid2str(TCPNAL, dest_nid, str2),
-                       body_len);
-
-                ksocknal_new_packet (conn, 0);  /* on to new packet */
-                return;
-        }
-
-        if (!kpr_routing(&ksocknal_data.ksnd_router)) {    /* not forwarding */
-                CERROR("dropping packet from "LPX64" (%s) for "LPX64
-                       " (%s): not forwarding\n",
-                       src_nid, portals_nid2str(TCPNAL, src_nid, str),
-                       dest_nid, portals_nid2str(TCPNAL, dest_nid, str2));
-                /* on to new packet (skip this one's body) */
-                ksocknal_new_packet (conn, body_len);
-                return;
-        }
-
-        if (body_len > PTL_MTU) {      /* too big to forward */
-                CERROR ("dropping packet from "LPX64" (%s) for "LPX64
-                        "(%s): packet size %d too big\n",
-                        src_nid, portals_nid2str(TCPNAL, src_nid, str),
-                        dest_nid, portals_nid2str(TCPNAL, dest_nid, str2),
-                        body_len);
-                /* on to new packet (skip this one's body) */
-                ksocknal_new_packet (conn, body_len);
-                return;
-        }
-
-        /* should have gone direct */
-        peer = ksocknal_get_peer (conn->ksnc_hdr.dest_nid);
-        if (peer != NULL) {
-                CERROR ("dropping packet from "LPX64" (%s) for "LPX64
-                        "(%s): target is a peer\n",
-                        src_nid, portals_nid2str(TCPNAL, src_nid, str),
-                        dest_nid, portals_nid2str(TCPNAL, dest_nid, str2));
-                ksocknal_put_peer (peer);  /* drop ref from get above */
-
-                /* on to next packet (skip this one's body) */
-                ksocknal_new_packet (conn, body_len);
-                return;
-        }
-
-        conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB;       /* Getting FMB now */
-        conn->ksnc_rx_nob_left = body_len;              /* stash packet size */
-        conn->ksnc_rx_nob_wanted = body_len;            /* (no slop) */
-}
-
-int
-ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip)
-{
-        static char ksocknal_slop_buffer[4096];
-
-        int   nob;
-        int   niov;
-        int   skipped;
-
-        if (nob_to_skip == 0) {         /* right at next packet boundary now */
-                conn->ksnc_rx_started = 0;
-                mb ();                          /* racing with timeout thread */
-                
-                conn->ksnc_rx_state = SOCKNAL_RX_HEADER;
-                conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t);
-                conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t);
-
-                conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
-                conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr;
-                conn->ksnc_rx_iov[0].iov_len  = sizeof (ptl_hdr_t);
-                conn->ksnc_rx_niov = 1;
-
-                conn->ksnc_rx_kiov = NULL;
-                conn->ksnc_rx_nkiov = 0;
-                return (1);
-        }
-
-        /* Set up to skip as much a possible now.  If there's more left
-         * (ran out of iov entries) we'll get called again */
-
-        conn->ksnc_rx_state = SOCKNAL_RX_SLOP;
-        conn->ksnc_rx_nob_left = nob_to_skip;
-        conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space;
-        skipped = 0;
-        niov = 0;
-
-        do {
-                nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer));
-
-                conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer;
-                conn->ksnc_rx_iov[niov].iov_len  = nob;
-                niov++;
-                skipped += nob;
-                nob_to_skip -=nob;
-
-        } while (nob_to_skip != 0 &&    /* mustn't overflow conn's rx iov */
-                 niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec));
-
-        conn->ksnc_rx_niov = niov;
-        conn->ksnc_rx_kiov = NULL;
-        conn->ksnc_rx_nkiov = 0;
-        conn->ksnc_rx_nob_wanted = skipped;
-        return (0);
-}
-
-int
-ksocknal_process_receive (ksock_conn_t *conn)
-{
-        ksock_fmb_t  *fmb;
-        int           rc;
-        
-        LASSERT (atomic_read (&conn->ksnc_refcount) > 0);
-
-        /* doesn't need a forwarding buffer */
-        if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB)
-                goto try_read;
-
- get_fmb:
-        fmb = ksocknal_get_idle_fmb (conn);
-        if (fmb == NULL) {
-                /* conn descheduled waiting for idle fmb */
-                return (0);
-        }
-
-        if (ksocknal_init_fmb (conn, fmb)) {
-                /* packet forwarded */
-                return (0);
-        }
-
- try_read:
-        /* NB: sched lock NOT held */
-        LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER ||
-                 conn->ksnc_rx_state == SOCKNAL_RX_BODY ||
-                 conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD ||
-                 conn->ksnc_rx_state == SOCKNAL_RX_SLOP);
-
-        LASSERT (conn->ksnc_rx_nob_wanted > 0);
-
-        rc = ksocknal_receive(conn);
-
-        if (rc <= 0) {
-                LASSERT (rc != -EAGAIN);
-
-                if (rc == 0)
-                        CWARN ("[%p] EOF from "LPX64" ip %d.%d.%d.%d:%d\n",
-                               conn, conn->ksnc_peer->ksnp_nid,
-                               HIPQUAD(conn->ksnc_ipaddr),
-                               conn->ksnc_port);
-                else if (!conn->ksnc_closing)
-                        CERROR ("[%p] Error %d on read from "LPX64
-                                " ip %d.%d.%d.%d:%d\n",
-                                conn, rc, conn->ksnc_peer->ksnp_nid,
-                                HIPQUAD(conn->ksnc_ipaddr),
-                                conn->ksnc_port);
-
-                ksocknal_close_conn_and_siblings (conn, rc);
-                return (rc == 0 ? -ESHUTDOWN : rc);
-        }
-
-        if (conn->ksnc_rx_nob_wanted != 0) {
-                /* short read */
-                return (-EAGAIN);
-        }
-        
-        switch (conn->ksnc_rx_state) {
-        case SOCKNAL_RX_HEADER:
-                if (conn->ksnc_hdr.type != cpu_to_le32(PTL_MSG_HELLO) &&
-                    le64_to_cpu(conn->ksnc_hdr.dest_nid) != 
-                    ksocknal_lib.libnal_ni.ni_pid.nid) {
-                        /* This packet isn't for me */
-                        ksocknal_fwd_parse (conn);
-                        switch (conn->ksnc_rx_state) {
-                        case SOCKNAL_RX_HEADER: /* skipped (zero payload) */
-                                return (0);     /* => come back later */
-                        case SOCKNAL_RX_SLOP:   /* skipping packet's body */
-                                goto try_read;  /* => go read it */
-                        case SOCKNAL_RX_GET_FMB: /* forwarding */
-                                goto get_fmb;   /* => go get a fwd msg buffer */
-                        default:
-                                LBUG ();
-                        }
-                        /* Not Reached */
-                }
-
-                /* sets wanted_len, iovs etc */
-                rc = lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn);
-
-                if (rc != PTL_OK) {
-                        /* I just received garbage: give up on this conn */
-                        ksocknal_close_conn_and_siblings (conn, rc);
-                        return (-EPROTO);
-                }
-
-                if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */
-                        conn->ksnc_rx_state = SOCKNAL_RX_BODY;
-                        goto try_read;          /* go read the payload */
-                }
-                /* Fall through (completed packet for me) */
-
-        case SOCKNAL_RX_BODY:
-                /* payload all received */
-                lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie, PTL_OK);
-                /* Fall through */
-
-        case SOCKNAL_RX_SLOP:
-                /* starting new packet? */
-                if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left))
-                        return (0);     /* come back later */
-                goto try_read;          /* try to finish reading slop now */
-
-        case SOCKNAL_RX_BODY_FWD:
-                /* payload all received */
-                CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n",
-                        conn, le64_to_cpu(conn->ksnc_hdr.src_nid),
-                        le64_to_cpu(conn->ksnc_hdr.dest_nid),
-                        conn->ksnc_rx_nob_left);
-
-                /* forward the packet. NB ksocknal_init_fmb() put fmb into
-                 * conn->ksnc_cookie */
-                fmb = (ksock_fmb_t *)conn->ksnc_cookie;
-                kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd);
-
-                /* no slop in forwarded packets */
-                LASSERT (conn->ksnc_rx_nob_left == 0);
-
-                ksocknal_new_packet (conn, 0);  /* on to next packet */
-                return (0);                     /* (later) */
-
-        default:
-                break;
-        }
-
-        /* Not Reached */
-        LBUG ();
-        return (-EINVAL);                       /* keep gcc happy */
-}
-
-ptl_err_t
-ksocknal_recv (lib_nal_t *nal, void *private, lib_msg_t *msg,
-               unsigned int niov, struct iovec *iov, 
-               size_t offset, size_t mlen, size_t rlen)
-{
-        ksock_conn_t *conn = (ksock_conn_t *)private;
-
-        LASSERT (mlen <= rlen);
-        LASSERT (niov <= PTL_MD_MAX_IOV);
-        
-        conn->ksnc_cookie = msg;
-        conn->ksnc_rx_nob_wanted = mlen;
-        conn->ksnc_rx_nob_left   = rlen;
-
-        conn->ksnc_rx_nkiov = 0;
-        conn->ksnc_rx_kiov = NULL;
-        conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov;
-        conn->ksnc_rx_niov =
-                lib_extract_iov(PTL_MD_MAX_IOV, conn->ksnc_rx_iov,
-                                niov, iov, offset, mlen);
-
-        LASSERT (mlen == 
-                 lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
-                 lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
-        return (PTL_OK);
-}
-
-ptl_err_t
-ksocknal_recv_pages (lib_nal_t *nal, void *private, lib_msg_t *msg,
-                     unsigned int niov, ptl_kiov_t *kiov, 
-                     size_t offset, size_t mlen, size_t rlen)
-{
-        ksock_conn_t *conn = (ksock_conn_t *)private;
-
-        LASSERT (mlen <= rlen);
-        LASSERT (niov <= PTL_MD_MAX_IOV);
-        
-        conn->ksnc_cookie = msg;
-        conn->ksnc_rx_nob_wanted = mlen;
-        conn->ksnc_rx_nob_left   = rlen;
-
-        conn->ksnc_rx_niov = 0;
-        conn->ksnc_rx_iov  = NULL;
-        conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov;
-        conn->ksnc_rx_nkiov = 
-                lib_extract_kiov(PTL_MD_MAX_IOV, conn->ksnc_rx_kiov,
-                                 niov, kiov, offset, mlen);
-
-        LASSERT (mlen == 
-                 lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) +
-                 lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov));
-
-        return (PTL_OK);
-}
-
-static inline int
-ksocknal_sched_cansleep(ksock_sched_t *sched)
-{
-        unsigned long flags;
-        int           rc;
-
-        spin_lock_irqsave(&sched->kss_lock, flags);
-
-        rc = (!ksocknal_data.ksnd_shuttingdown &&
-#if SOCKNAL_ZC
-              list_empty(&sched->kss_zctxdone_list) &&
-#endif
-              list_empty(&sched->kss_rx_conns) &&
-              list_empty(&sched->kss_tx_conns));
-        
-        spin_unlock_irqrestore(&sched->kss_lock, flags);
-        return (rc);
-}
-
-int ksocknal_scheduler (void *arg)
-{
-        ksock_sched_t     *sched = (ksock_sched_t *)arg;
-        ksock_conn_t      *conn;
-        ksock_tx_t        *tx;
-        unsigned long      flags;
-        int                rc;
-        int                nloops = 0;
-        int                id = sched - ksocknal_data.ksnd_schedulers;
-        char               name[16];
-
-        snprintf (name, sizeof (name),"ksocknald_%02d", id);
-        kportal_daemonize (name);
-        kportal_blockallsigs ();
-
-#if (CONFIG_SMP && CPU_AFFINITY)
-        id = ksocknal_sched2cpu(id);
-        if (cpu_online(id)) {
-                cpumask_t m;
-                cpu_set(id, m);
-                set_cpus_allowed(current, m);
-        } else {
-                CERROR ("Can't set CPU affinity for %s to %d\n", name, id);
-        }
-#endif /* CONFIG_SMP && CPU_AFFINITY */
-        
-        spin_lock_irqsave (&sched->kss_lock, flags);
-
-        while (!ksocknal_data.ksnd_shuttingdown) {
-                int did_something = 0;
-
-                /* Ensure I progress everything semi-fairly */
-
-                if (!list_empty (&sched->kss_rx_conns)) {
-                        conn = list_entry(sched->kss_rx_conns.next,
-                                          ksock_conn_t, ksnc_rx_list);
-                        list_del(&conn->ksnc_rx_list);
-
-                        LASSERT(conn->ksnc_rx_scheduled);
-                        LASSERT(conn->ksnc_rx_ready);
-
-                        /* clear rx_ready in case receive isn't complete.
-                         * Do it BEFORE we call process_recv, since
-                         * data_ready can set it any time after we release
-                         * kss_lock. */
-                        conn->ksnc_rx_ready = 0;
-                        spin_unlock_irqrestore(&sched->kss_lock, flags);
-                        
-                        rc = ksocknal_process_receive(conn);
-                        
-                        spin_lock_irqsave(&sched->kss_lock, flags);
-
-                        /* I'm the only one that can clear this flag */
-                        LASSERT(conn->ksnc_rx_scheduled);
-
-                        /* Did process_receive get everything it wanted? */
-                        if (rc == 0)
-                                conn->ksnc_rx_ready = 1;
-                        
-                        if (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP ||
-                            conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB) {
-                                /* Conn blocked for a forwarding buffer.
-                                 * It will get queued for my attention when
-                                 * one becomes available (and it might just
-                                 * already have been!).  Meanwhile my ref
-                                 * on it stays put. */
-                        } else if (conn->ksnc_rx_ready) {
-                                /* reschedule for rx */
-                                list_add_tail (&conn->ksnc_rx_list,
-                                               &sched->kss_rx_conns);
-                        } else {
-                                conn->ksnc_rx_scheduled = 0;
-                                /* drop my ref */
-                                ksocknal_put_conn(conn);
-                        }
-
-                        did_something = 1;
-                }
-
-                if (!list_empty (&sched->kss_tx_conns)) {
-                        conn = list_entry(sched->kss_tx_conns.next,
-                                          ksock_conn_t, ksnc_tx_list);
-                        list_del (&conn->ksnc_tx_list);
-                        
-                        LASSERT(conn->ksnc_tx_scheduled);
-                        LASSERT(conn->ksnc_tx_ready);
-                        LASSERT(!list_empty(&conn->ksnc_tx_queue));
-                        
-                        tx = list_entry(conn->ksnc_tx_queue.next,
-                                        ksock_tx_t, tx_list);
-                        /* dequeue now so empty list => more to send */
-                        list_del(&tx->tx_list);
-                        
-                        /* Clear tx_ready in case send isn't complete.  Do
-                         * it BEFORE we call process_transmit, since
-                         * write_space can set it any time after we release
-                         * kss_lock. */
-                        conn->ksnc_tx_ready = 0;
-                        spin_unlock_irqrestore (&sched->kss_lock, flags);
-
-                        rc = ksocknal_process_transmit(conn, tx);
-
-                        spin_lock_irqsave (&sched->kss_lock, flags);
-
-                        if (rc == -ENOMEM || rc == -EAGAIN) {
-                                /* Incomplete send: replace tx on HEAD of tx_queue */
-                                list_add (&tx->tx_list, &conn->ksnc_tx_queue);
-                        } else {
-                                /* Complete send; assume space for more */
-                                conn->ksnc_tx_ready = 1;
-                        }
-
-                        if (rc == -ENOMEM) {
-                                /* Do nothing; after a short timeout, this
-                                 * conn will be reposted on kss_tx_conns. */
-                        } else if (conn->ksnc_tx_ready &&
-                                   !list_empty (&conn->ksnc_tx_queue)) {
-                                /* reschedule for tx */
-                                list_add_tail (&conn->ksnc_tx_list, 
-                                               &sched->kss_tx_conns);
-                        } else {
-                                conn->ksnc_tx_scheduled = 0;
-                                /* drop my ref */
-                                ksocknal_put_conn (conn);
-                        }
-                                
-                        did_something = 1;
-                }
-#if SOCKNAL_ZC
-                if (!list_empty (&sched->kss_zctxdone_list)) {
-                        ksock_tx_t *tx =
-                                list_entry(sched->kss_zctxdone_list.next,
-                                           ksock_tx_t, tx_list);
-                        did_something = 1;
-
-                        list_del (&tx->tx_list);
-                        spin_unlock_irqrestore (&sched->kss_lock, flags);
-
-                        ksocknal_tx_done (tx, 1);
-
-                        spin_lock_irqsave (&sched->kss_lock, flags);
-                }
-#endif
-                if (!did_something ||           /* nothing to do */
-                    ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */
-                        spin_unlock_irqrestore (&sched->kss_lock, flags);
-
-                        nloops = 0;
-
-                        if (!did_something) {   /* wait for something to do */
-                                rc = wait_event_interruptible (sched->kss_waitq,
-                                                               !ksocknal_sched_cansleep(sched));
-                                LASSERT (rc == 0);
-                        } else
-                               our_cond_resched();
-
-                        spin_lock_irqsave (&sched->kss_lock, flags);
-                }
-        }
-
-        spin_unlock_irqrestore (&sched->kss_lock, flags);
-        ksocknal_thread_fini ();
-        return (0);
-}
-
-void
-ksocknal_data_ready (struct sock *sk, int n)
-{
-        unsigned long  flags;
-        ksock_conn_t  *conn;
-        ksock_sched_t *sched;
-        ENTRY;
-
-        /* interleave correctly with closing sockets... */
-        read_lock (&ksocknal_data.ksnd_global_lock);
-
-        conn = sk->sk_user_data;
-        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
-                LASSERT (sk->sk_data_ready != &ksocknal_data_ready);
-                sk->sk_data_ready (sk, n);
-        } else {
-                sched = conn->ksnc_scheduler;
-
-                spin_lock_irqsave (&sched->kss_lock, flags);
-
-                conn->ksnc_rx_ready = 1;
-
-                if (!conn->ksnc_rx_scheduled) {  /* not being progressed */
-                        list_add_tail(&conn->ksnc_rx_list,
-                                      &sched->kss_rx_conns);
-                        conn->ksnc_rx_scheduled = 1;
-                        /* extra ref for scheduler */
-                        atomic_inc (&conn->ksnc_refcount);
-
-                        wake_up (&sched->kss_waitq);
-                }
-
-                spin_unlock_irqrestore (&sched->kss_lock, flags);
-        }
-
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-
-        EXIT;
-}
-
-void
-ksocknal_write_space (struct sock *sk)
-{
-        unsigned long  flags;
-        ksock_conn_t  *conn;
-        ksock_sched_t *sched;
-
-        /* interleave correctly with closing sockets... */
-        read_lock (&ksocknal_data.ksnd_global_lock);
-
-        conn = sk->sk_user_data;
-
-        CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n",
-               sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn,
-               (conn == NULL) ? "" : (conn->ksnc_tx_ready ?
-                                      " ready" : " blocked"),
-               (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ?
-                                      " scheduled" : " idle"),
-               (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ?
-                                      " empty" : " queued"));
-
-        if (conn == NULL) {             /* raced with ksocknal_terminate_conn */
-                LASSERT (sk->sk_write_space != &ksocknal_write_space);
-                sk->sk_write_space (sk);
-
-                read_unlock (&ksocknal_data.ksnd_global_lock);
-                return;
-        }
-
-        if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */
-                sched = conn->ksnc_scheduler;
-
-                spin_lock_irqsave (&sched->kss_lock, flags);
-
-                clear_bit (SOCK_NOSPACE, &sk->sk_socket->flags);
-                conn->ksnc_tx_ready = 1;
-
-                if (!conn->ksnc_tx_scheduled && // not being progressed
-                    !list_empty(&conn->ksnc_tx_queue)){//packets to send
-                        list_add_tail (&conn->ksnc_tx_list,
-                                       &sched->kss_tx_conns);
-                        conn->ksnc_tx_scheduled = 1;
-                        /* extra ref for scheduler */
-                        atomic_inc (&conn->ksnc_refcount);
-
-                        wake_up (&sched->kss_waitq);
-                }
-
-                spin_unlock_irqrestore (&sched->kss_lock, flags);
-        }
-
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_sock_write (struct socket *sock, void *buffer, int nob)
-{
-        int           rc;
-        mm_segment_t  oldmm = get_fs();
-
-        while (nob > 0) {
-                struct iovec  iov = {
-                        .iov_base = buffer,
-                        .iov_len  = nob
-                };
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = &iov,
-                        .msg_iovlen     = 1,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = 0
-                };
-
-                set_fs (KERNEL_DS);
-                rc = sock_sendmsg (sock, &msg, iov.iov_len);
-                set_fs (oldmm);
-                
-                if (rc < 0)
-                        return (rc);
-
-                if (rc == 0) {
-                        CERROR ("Unexpected zero rc\n");
-                        return (-ECONNABORTED);
-                }
-
-                buffer = ((char *)buffer) + rc;
-                nob -= rc;
-        }
-        
-        return (0);
-}
-
-int
-ksocknal_sock_read (struct socket *sock, void *buffer, int nob)
-{
-        int           rc;
-        mm_segment_t  oldmm = get_fs();
-        
-        while (nob > 0) {
-                struct iovec  iov = {
-                        .iov_base = buffer,
-                        .iov_len  = nob
-                };
-                struct msghdr msg = {
-                        .msg_name       = NULL,
-                        .msg_namelen    = 0,
-                        .msg_iov        = &iov,
-                        .msg_iovlen     = 1,
-                        .msg_control    = NULL,
-                        .msg_controllen = 0,
-                        .msg_flags      = 0
-                };
-
-                set_fs (KERNEL_DS);
-                rc = sock_recvmsg (sock, &msg, iov.iov_len, 0);
-                set_fs (oldmm);
-                
-                if (rc < 0)
-                        return (rc);
-
-                if (rc == 0)
-                        return (-ECONNABORTED);
-
-                buffer = ((char *)buffer) + rc;
-                nob -= rc;
-        }
-        
-        return (0);
-}
-
-int
-ksocknal_send_hello (ksock_conn_t *conn, __u32 *ipaddrs, int nipaddrs)
-{
-        /* CAVEAT EMPTOR: this byte flips 'ipaddrs' */
-        struct socket      *sock = conn->ksnc_sock;
-        ptl_hdr_t           hdr;
-        ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
-        int                 i;
-        int                 rc;
-
-        LASSERT (conn->ksnc_type != SOCKNAL_CONN_NONE);
-        LASSERT (nipaddrs <= SOCKNAL_MAX_INTERFACES);
-
-        /* No need for getconnsock/putconnsock */
-        LASSERT (!conn->ksnc_closing);
-
-        LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
-        hmv->magic         = cpu_to_le32 (PORTALS_PROTO_MAGIC);
-        hmv->version_major = cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR);
-        hmv->version_minor = cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR);
-
-        hdr.src_nid        = cpu_to_le64 (ksocknal_lib.libnal_ni.ni_pid.nid);
-        hdr.type           = cpu_to_le32 (PTL_MSG_HELLO);
-        hdr.payload_length = cpu_to_le32 (nipaddrs * sizeof(*ipaddrs));
-
-        hdr.msg.hello.type = cpu_to_le32 (conn->ksnc_type);
-        hdr.msg.hello.incarnation =
-                cpu_to_le64 (ksocknal_data.ksnd_incarnation);
-
-        /* Receiver is eager */
-        rc = ksocknal_sock_write (sock, &hdr, sizeof(hdr));
-        if (rc != 0) {
-                CERROR ("Error %d sending HELLO hdr to %u.%u.%u.%u/%d\n",
-                        rc, HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
-                return (rc);
-        }
-        
-        if (nipaddrs == 0)
-                return (0);
-        
-        for (i = 0; i < nipaddrs; i++) {
-                ipaddrs[i] = __cpu_to_le32 (ipaddrs[i]);
-        }
-
-        rc = ksocknal_sock_write (sock, ipaddrs, nipaddrs * sizeof(*ipaddrs));
-        if (rc != 0)
-                CERROR ("Error %d sending HELLO payload (%d)"
-                        " to %u.%u.%u.%u/%d\n", rc, nipaddrs, 
-                        HIPQUAD(conn->ksnc_ipaddr), conn->ksnc_port);
-        return (rc);
-}
-
-int
-ksocknal_invert_type(int type)
-{
-        switch (type)
-        {
-        case SOCKNAL_CONN_ANY:
-        case SOCKNAL_CONN_CONTROL:
-                return (type);
-        case SOCKNAL_CONN_BULK_IN:
-                return SOCKNAL_CONN_BULK_OUT;
-        case SOCKNAL_CONN_BULK_OUT:
-                return SOCKNAL_CONN_BULK_IN;
-        default:
-                return (SOCKNAL_CONN_NONE);
-        }
-}
-
-int
-ksocknal_recv_hello (ksock_conn_t *conn, ptl_nid_t *nid,
-                     __u64 *incarnation, __u32 *ipaddrs)
-{
-        struct socket      *sock = conn->ksnc_sock;
-        int                 rc;
-        int                 nips;
-        int                 i;
-        int                 type;
-        ptl_hdr_t           hdr;
-        ptl_magicversion_t *hmv;
-
-        hmv = (ptl_magicversion_t *)&hdr.dest_nid;
-        LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
-
-        rc = ksocknal_sock_read (sock, hmv, sizeof (*hmv));
-        if (rc != 0) {
-                CERROR ("Error %d reading HELLO from %u.%u.%u.%u\n",
-                        rc, HIPQUAD(conn->ksnc_ipaddr));
-                return (rc);
-        }
-
-        if (hmv->magic != le32_to_cpu (PORTALS_PROTO_MAGIC)) {
-                CERROR ("Bad magic %#08x (%#08x expected) from %u.%u.%u.%u\n",
-                        __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC,
-                        HIPQUAD(conn->ksnc_ipaddr));
-                return (-EPROTO);
-        }
-
-        if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
-            hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
-                CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
-                        " from %u.%u.%u.%u\n",
-                        le16_to_cpu (hmv->version_major),
-                        le16_to_cpu (hmv->version_minor),
-                        PORTALS_PROTO_VERSION_MAJOR,
-                        PORTALS_PROTO_VERSION_MINOR,
-                        HIPQUAD(conn->ksnc_ipaddr));
-                return (-EPROTO);
-        }
-
-#if (PORTALS_PROTO_VERSION_MAJOR != 1)
-# error "This code only understands protocol version 1.x"
-#endif
-        /* version 1 sends magic/version as the dest_nid of a 'hello'
-         * header, followed by payload full of interface IP addresses.
-         * Read the rest of it in now... */
-
-        rc = ksocknal_sock_read (sock, hmv + 1, sizeof (hdr) - sizeof (*hmv));
-        if (rc != 0) {
-                CERROR ("Error %d reading rest of HELLO hdr from %u.%u.%u.%u\n",
-                        rc, HIPQUAD(conn->ksnc_ipaddr));
-                return (rc);
-        }
-
-        /* ...and check we got what we expected */
-        if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
-                CERROR ("Expecting a HELLO hdr,"
-                        " but got type %d from %u.%u.%u.%u\n",
-                        le32_to_cpu (hdr.type),
-                        HIPQUAD(conn->ksnc_ipaddr));
-                return (-EPROTO);
-        }
-
-        if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
-                CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY"
-                       "from %u.%u.%u.%u\n", HIPQUAD(conn->ksnc_ipaddr));
-                return (-EPROTO);
-        }
-
-        if (*nid == PTL_NID_ANY) {              /* don't know peer's nid yet */
-                *nid = le64_to_cpu(hdr.src_nid);
-        } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-                CERROR ("Connected to nid "LPX64"@%u.%u.%u.%u "
-                        "but expecting "LPX64"\n",
-                        le64_to_cpu (hdr.src_nid),
-                        HIPQUAD(conn->ksnc_ipaddr), *nid);
-                return (-EPROTO);
-        }
-
-        type = __le32_to_cpu(hdr.msg.hello.type);
-
-        if (conn->ksnc_type == SOCKNAL_CONN_NONE) {
-                /* I've accepted this connection; peer determines type */
-                conn->ksnc_type = ksocknal_invert_type(type);
-                if (conn->ksnc_type == SOCKNAL_CONN_NONE) {
-                        CERROR ("Unexpected type %d from "LPX64"@%u.%u.%u.%u\n",
-                                type, *nid, HIPQUAD(conn->ksnc_ipaddr));
-                        return (-EPROTO);
-                }
-        } else if (ksocknal_invert_type(type) != conn->ksnc_type) {
-                CERROR ("Mismatched types: me %d, "LPX64"@%u.%u.%u.%u %d\n",
-                        conn->ksnc_type, *nid, HIPQUAD(conn->ksnc_ipaddr),
-                        le32_to_cpu(hdr.msg.hello.type));
-                return (-EPROTO);
-        }
-
-        *incarnation = le64_to_cpu(hdr.msg.hello.incarnation);
-
-        nips = __le32_to_cpu (hdr.payload_length) / sizeof (__u32);
-
-        if (nips > SOCKNAL_MAX_INTERFACES ||
-            nips * sizeof(__u32) != __le32_to_cpu (hdr.payload_length)) {
-                CERROR("Bad payload length %d from "LPX64"@%u.%u.%u.%u\n",
-                       __le32_to_cpu (hdr.payload_length),
-                       *nid, HIPQUAD(conn->ksnc_ipaddr));
-        }
-
-        if (nips == 0)
-                return (0);
-        
-        rc = ksocknal_sock_read (sock, ipaddrs, nips * sizeof(*ipaddrs));
-        if (rc != 0) {
-                CERROR ("Error %d reading IPs from "LPX64"@%u.%u.%u.%u\n",
-                        rc, *nid, HIPQUAD(conn->ksnc_ipaddr));
-                return (rc);
-        }
-
-        for (i = 0; i < nips; i++) {
-                ipaddrs[i] = __le32_to_cpu(ipaddrs[i]);
-                
-                if (ipaddrs[i] == 0) {
-                        CERROR("Zero IP[%d] from "LPX64"@%u.%u.%u.%u\n",
-                               i, *nid, HIPQUAD(conn->ksnc_ipaddr));
-                        return (-EPROTO);
-                }
-        }
-
-        return (nips);
-}
-
-int
-ksocknal_get_conn_tunables (ksock_conn_t *conn, int *txmem, int *rxmem, int *nagle)
-{
-        mm_segment_t   oldmm = get_fs ();
-        struct socket *sock = conn->ksnc_sock;
-        int            len;
-        int            rc;
-
-        rc = ksocknal_getconnsock (conn);
-        if (rc != 0) {
-                LASSERT (conn->ksnc_closing);
-                *txmem = *rxmem = *nagle = 0;
-                return (-ESHUTDOWN);
-        }
-        
-        set_fs (KERNEL_DS);
-
-        len = sizeof(*txmem);
-        rc = sock_getsockopt(sock, SOL_SOCKET, SO_SNDBUF,
-                             (char *)txmem, &len);
-        if (rc == 0) {
-                len = sizeof(*rxmem);
-                rc = sock_getsockopt(sock, SOL_SOCKET, SO_RCVBUF,
-                                     (char *)rxmem, &len);
-        }
-        if (rc == 0) {
-                len = sizeof(*nagle);
-                rc = sock->ops->getsockopt(sock, SOL_TCP, TCP_NODELAY,
-                                           (char *)nagle, &len);
-        }
-
-        set_fs (oldmm);
-        ksocknal_putconnsock (conn);
-
-        if (rc == 0)
-                *nagle = !*nagle;
-        else
-                *txmem = *rxmem = *nagle = 0;
-                
-        return (rc);
-}
-
-int
-ksocknal_setup_sock (struct socket *sock)
-{
-        mm_segment_t    oldmm = get_fs ();
-        int             rc;
-        int             option;
-        int             keep_idle;
-        int             keep_intvl;
-        int             keep_count;
-        int             do_keepalive;
-        struct linger   linger;
-
-        sock->sk->sk_allocation = GFP_NOFS;
-
-        /* Ensure this socket aborts active sends immediately when we close
-         * it. */
-
-        linger.l_onoff = 0;
-        linger.l_linger = 0;
-
-        set_fs (KERNEL_DS);
-        rc = sock_setsockopt (sock, SOL_SOCKET, SO_LINGER,
-                              (char *)&linger, sizeof (linger));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set SO_LINGER: %d\n", rc);
-                return (rc);
-        }
-
-        option = -1;
-        set_fs (KERNEL_DS);
-        rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_LINGER2,
-                                    (char *)&option, sizeof (option));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set SO_LINGER2: %d\n", rc);
-                return (rc);
-        }
-
-        if (!ksocknal_tunables.ksnd_nagle) {
-                option = 1;
-                
-                set_fs (KERNEL_DS);
-                rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_NODELAY,
-                                            (char *)&option, sizeof (option));
-                set_fs (oldmm);
-                if (rc != 0) {
-                        CERROR ("Can't disable nagle: %d\n", rc);
-                        return (rc);
-                }
-        }
-        
-        if (ksocknal_tunables.ksnd_buffer_size > 0) {
-                option = ksocknal_tunables.ksnd_buffer_size;
-                
-                set_fs (KERNEL_DS);
-                rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDBUF,
-                                      (char *)&option, sizeof (option));
-                set_fs (oldmm);
-                if (rc != 0) {
-                        CERROR ("Can't set send buffer %d: %d\n",
-                                option, rc);
-                        return (rc);
-                }
-
-                set_fs (KERNEL_DS);
-                rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
-                                      (char *)&option, sizeof (option));
-                set_fs (oldmm);
-                if (rc != 0) {
-                        CERROR ("Can't set receive buffer %d: %d\n",
-                                option, rc);
-                        return (rc);
-                }
-        }
-
-        /* snapshot tunables */
-        keep_idle  = ksocknal_tunables.ksnd_keepalive_idle;
-        keep_count = ksocknal_tunables.ksnd_keepalive_count;
-        keep_intvl = ksocknal_tunables.ksnd_keepalive_intvl;
-        
-        do_keepalive = (keep_idle > 0 && keep_count > 0 && keep_intvl > 0);
-
-        option = (do_keepalive ? 1 : 0);
-        set_fs (KERNEL_DS);
-        rc = sock_setsockopt (sock, SOL_SOCKET, SO_KEEPALIVE, 
-                              (char *)&option, sizeof (option));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set SO_KEEPALIVE: %d\n", rc);
-                return (rc);
-        }
-
-        if (!do_keepalive)
-                return (0);
-
-        set_fs (KERNEL_DS);
-        rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPIDLE,
-                                    (char *)&keep_idle, sizeof (keep_idle));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set TCP_KEEPIDLE: %d\n", rc);
-                return (rc);
-        }
-
-        set_fs (KERNEL_DS);
-        rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPINTVL,
-                                    (char *)&keep_intvl, sizeof (keep_intvl));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set TCP_KEEPINTVL: %d\n", rc);
-                return (rc);
-        }
-
-        set_fs (KERNEL_DS);
-        rc = sock->ops->setsockopt (sock, SOL_TCP, TCP_KEEPCNT,
-                                    (char *)&keep_count, sizeof (keep_count));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set TCP_KEEPCNT: %d\n", rc);
-                return (rc);
-        }
-
-        return (0);
-}
-
-static int
-ksocknal_connect_sock(struct socket **sockp, int *may_retry, 
-                      ksock_route_t *route, int local_port)
-{
-        struct sockaddr_in  locaddr;
-        struct sockaddr_in  srvaddr;
-        struct socket      *sock;
-        int                 rc;
-        int                 option;
-        mm_segment_t        oldmm = get_fs();
-        struct timeval      tv;
-
-        memset(&locaddr, 0, sizeof(locaddr)); 
-        locaddr.sin_family = AF_INET; 
-        locaddr.sin_port = htons(local_port);
-        locaddr.sin_addr.s_addr = 
-                (route->ksnr_myipaddr != 0) ? htonl(route->ksnr_myipaddr) 
-                                            : INADDR_ANY;
-        memset (&srvaddr, 0, sizeof (srvaddr));
-        srvaddr.sin_family = AF_INET;
-        srvaddr.sin_port = htons (route->ksnr_port);
-        srvaddr.sin_addr.s_addr = htonl (route->ksnr_ipaddr);
-
-        *may_retry = 0;
-
-        rc = sock_create (PF_INET, SOCK_STREAM, 0, &sock);
-        *sockp = sock;
-        if (rc != 0) {
-                CERROR ("Can't create autoconnect socket: %d\n", rc);
-                return (rc);
-        }
-
-        /* Ugh; have to map_fd for compatibility with sockets passed in
-         * from userspace.  And we actually need the sock->file refcounting
-         * that this gives you :) */
-
-        rc = sock_map_fd (sock);
-        if (rc < 0) {
-                sock_release (sock);
-                CERROR ("sock_map_fd error %d\n", rc);
-                return (rc);
-        }
-
-        /* NB the file descriptor (rc) now owns the ref on sock->file */
-        LASSERT (sock->file != NULL);
-        LASSERT (file_count(sock->file) == 1);
-
-        get_file(sock->file);                /* extra ref makes sock->file */
-        sys_close(rc);                       /* survive this close */
-
-        /* Still got a single ref on sock->file */
-        LASSERT (file_count(sock->file) == 1);
-
-        /* Set the socket timeouts, so our connection attempt completes in
-         * finite time */
-        tv.tv_sec = ksocknal_tunables.ksnd_io_timeout;
-        tv.tv_usec = 0;
-
-        set_fs (KERNEL_DS);
-        rc = sock_setsockopt (sock, SOL_SOCKET, SO_SNDTIMEO,
-                              (char *)&tv, sizeof (tv));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set send timeout %d: %d\n", 
-                        ksocknal_tunables.ksnd_io_timeout, rc);
-                goto failed;
-        }
-        
-        set_fs (KERNEL_DS);
-        rc = sock_setsockopt (sock, SOL_SOCKET, SO_RCVTIMEO,
-                              (char *)&tv, sizeof (tv));
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR ("Can't set receive timeout %d: %d\n",
-                        ksocknal_tunables.ksnd_io_timeout, rc);
-                goto failed;
-        }
-
-        set_fs (KERNEL_DS);
-        option = 1;
-        rc = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, 
-                             (char *)&option, sizeof (option)); 
-        set_fs (oldmm);
-        if (rc != 0) {
-                CERROR("Can't set SO_REUSEADDR for socket: %d\n", rc);
-                goto failed;
-        }
-
-        rc = sock->ops->bind(sock, 
-                             (struct sockaddr *)&locaddr, sizeof(locaddr));
-        if (rc == -EADDRINUSE) {
-                CDEBUG(D_NET, "Port %d already in use\n", local_port);
-                *may_retry = 1;
-                goto failed;
-        }
-        if (rc != 0) {
-                CERROR("Error trying to bind to reserved port %d: %d\n",
-                       local_port, rc);
-                goto failed;
-        }
-
-        rc = sock->ops->connect(sock,
-                                (struct sockaddr *)&srvaddr, sizeof(srvaddr),
-                                sock->file->f_flags);
-        if (rc == 0)
-                return 0;
-
-        /* EADDRNOTAVAIL probably means we're already connected to the same
-         * peer/port on the same local port on a differently typed
-         * connection.  Let our caller retry with a different local
-         * port... */
-        *may_retry = (rc == -EADDRNOTAVAIL);
-
-        CDEBUG(*may_retry ? D_NET : D_ERROR,
-               "Error %d connecting %u.%u.%u.%u/%d -> %u.%u.%u.%u/%d\n", rc,
-               HIPQUAD(route->ksnr_myipaddr), local_port,
-               HIPQUAD(route->ksnr_ipaddr), route->ksnr_port);
-
- failed:
-        fput(sock->file);
-        return rc;
-}
-
-int
-ksocknal_connect_peer (ksock_route_t *route, int type)
-{
-        struct socket      *sock;
-        int                 rc;
-        int                 port;
-        int                 may_retry;
-        
-        /* Iterate through reserved ports.  When typed connections are
-         * used, we will need to bind to multiple ports, but we only know
-         * this at connect time.  But, by that time we've already called
-         * bind() so we need a new socket. */
-
-        for (port = 1023; port > 512; --port) {
-
-                rc = ksocknal_connect_sock(&sock, &may_retry, route, port);
-
-                if (rc == 0) {
-                        rc = ksocknal_create_conn(route, sock, type);
-                        fput(sock->file);
-                        return rc;
-                }
-
-                if (!may_retry)
-                        return rc;
-        }
-
-        CERROR("Out of ports trying to bind to a reserved port\n");
-        return (-EADDRINUSE);
-}
-
-void
-ksocknal_autoconnect (ksock_route_t *route)
-{
-        LIST_HEAD        (zombies);
-        ksock_tx_t       *tx;
-        ksock_peer_t     *peer;
-        unsigned long     flags;
-        int               rc;
-        int               type;
-        
-        for (;;) {
-                for (type = 0; type < SOCKNAL_CONN_NTYPES; type++)
-                        if ((route->ksnr_connecting & (1 << type)) != 0)
-                                break;
-                LASSERT (type < SOCKNAL_CONN_NTYPES);
-
-                rc = ksocknal_connect_peer (route, type);
-                if (rc != 0)
-                        break;
-                
-                /* successfully autoconnected: create_conn did the
-                 * route/conn binding and scheduled any blocked packets */
-
-                if (route->ksnr_connecting == 0) {
-                        /* No more connections required */
-                        return;
-                }
-        }
-
-        /* Connection attempt failed */
-
-        write_lock_irqsave (&ksocknal_data.ksnd_global_lock, flags);
-
-        peer = route->ksnr_peer;
-        route->ksnr_connecting = 0;
-
-        /* This is a retry rather than a new connection */
-        LASSERT (route->ksnr_retry_interval != 0);
-        route->ksnr_timeout = jiffies + route->ksnr_retry_interval;
-        route->ksnr_retry_interval = MIN (route->ksnr_retry_interval * 2,
-                                          SOCKNAL_MAX_RECONNECT_INTERVAL);
-
-        if (!list_empty (&peer->ksnp_tx_queue) &&
-            ksocknal_find_connecting_route_locked (peer) == NULL) {
-                LASSERT (list_empty (&peer->ksnp_conns));
-
-                /* None of the connections that the blocked packets are
-                 * waiting for have been successful.  Complete them now... */
-                do {
-                        tx = list_entry (peer->ksnp_tx_queue.next,
-                                         ksock_tx_t, tx_list);
-                        list_del (&tx->tx_list);
-                        list_add_tail (&tx->tx_list, &zombies);
-                } while (!list_empty (&peer->ksnp_tx_queue));
-        }
-
-#if 0           /* irrelevent with only eager routes */
-        if (!route->ksnr_deleted) {
-                /* make this route least-favourite for re-selection */
-                list_del(&route->ksnr_list);
-                list_add_tail(&route->ksnr_list, &peer->ksnp_routes);
-        }
-#endif        
-        write_unlock_irqrestore (&ksocknal_data.ksnd_global_lock, flags);
-
-        while (!list_empty (&zombies)) {
-                char ipbuf[PTL_NALFMT_SIZE];
-                char ipbuf2[PTL_NALFMT_SIZE];
-                tx = list_entry (zombies.next, ksock_tx_t, tx_list);
-
-                CERROR ("Deleting packet type %d len %d ("LPX64" %s->"LPX64" %s)\n",
-                        le32_to_cpu (tx->tx_hdr->type),
-                        le32_to_cpu (tx->tx_hdr->payload_length),
-                        le64_to_cpu (tx->tx_hdr->src_nid),
-                        portals_nid2str(SOCKNAL,
-                                        le64_to_cpu(tx->tx_hdr->src_nid),
-                                        ipbuf),
-                        le64_to_cpu (tx->tx_hdr->dest_nid),
-                        portals_nid2str(SOCKNAL,
-                                        le64_to_cpu(tx->tx_hdr->src_nid),
-                                        ipbuf2));
-
-                list_del (&tx->tx_list);
-                /* complete now */
-                ksocknal_tx_done (tx, 0);
-        }
-}
-
-int
-ksocknal_autoconnectd (void *arg)
-{
-        long               id = (long)arg;
-        char               name[16];
-        unsigned long      flags;
-        ksock_route_t     *route;
-        int                rc;
-
-        snprintf (name, sizeof (name), "ksocknal_ad%02ld", id);
-        kportal_daemonize (name);
-        kportal_blockallsigs ();
-
-        spin_lock_irqsave (&ksocknal_data.ksnd_autoconnectd_lock, flags);
-
-        while (!ksocknal_data.ksnd_shuttingdown) {
-
-                if (!list_empty (&ksocknal_data.ksnd_autoconnectd_routes)) {
-                        route = list_entry (ksocknal_data.ksnd_autoconnectd_routes.next,
-                                            ksock_route_t, ksnr_connect_list);
-
-                        list_del (&route->ksnr_connect_list);
-                        spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
-
-                        ksocknal_autoconnect (route);
-                        ksocknal_put_route (route);
-
-                        spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock,
-                                          flags);
-                        continue;
-                }
-
-                spin_unlock_irqrestore(&ksocknal_data.ksnd_autoconnectd_lock,
-                                       flags);
-
-                rc = wait_event_interruptible(ksocknal_data.ksnd_autoconnectd_waitq,
-                                              ksocknal_data.ksnd_shuttingdown ||
-                                              !list_empty(&ksocknal_data.ksnd_autoconnectd_routes));
-
-                spin_lock_irqsave(&ksocknal_data.ksnd_autoconnectd_lock, flags);
-        }
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_autoconnectd_lock, flags);
-
-        ksocknal_thread_fini ();
-        return (0);
-}
-
-ksock_conn_t *
-ksocknal_find_timed_out_conn (ksock_peer_t *peer) 
-{
-        /* We're called with a shared lock on ksnd_global_lock */
-        ksock_conn_t      *conn;
-        struct list_head  *ctmp;
-
-        list_for_each (ctmp, &peer->ksnp_conns) {
-                conn = list_entry (ctmp, ksock_conn_t, ksnc_list);
-
-                /* Don't need the {get,put}connsock dance to deref ksnc_sock... */
-                LASSERT (!conn->ksnc_closing);
-
-                if (conn->ksnc_sock->sk->sk_err != 0) {
-                        /* Something (e.g. failed keepalive) set the socket error */
-                        atomic_inc (&conn->ksnc_refcount);
-                        CERROR ("Socket error %d: "LPX64" %p %d.%d.%d.%d\n",
-                                conn->ksnc_sock->sk->sk_err, peer->ksnp_nid,
-                                conn, HIPQUAD(conn->ksnc_ipaddr));
-                        return (conn);
-                }
-
-                if (conn->ksnc_rx_started &&
-                    time_after_eq (jiffies, conn->ksnc_rx_deadline)) {
-                        /* Timed out incomplete incoming message */
-                        atomic_inc (&conn->ksnc_refcount);
-                        CERROR ("Timed out RX from "LPX64" %p %d.%d.%d.%d\n",
-                                peer->ksnp_nid,conn,HIPQUAD(conn->ksnc_ipaddr));
-                        return (conn);
-                }
-
-                if ((!list_empty (&conn->ksnc_tx_queue) ||
-                     conn->ksnc_sock->sk->sk_wmem_queued != 0) &&
-                    time_after_eq (jiffies, conn->ksnc_tx_deadline)) {
-                        /* Timed out messages queued for sending or
-                         * buffered in the socket's send buffer */
-                        atomic_inc (&conn->ksnc_refcount);
-                        CERROR ("Timed out TX to "LPX64" %s%d %p %d.%d.%d.%d\n",
-                                peer->ksnp_nid,
-                                list_empty (&conn->ksnc_tx_queue) ? "" : "Q ",
-                                conn->ksnc_sock->sk->sk_wmem_queued, conn,
-                                HIPQUAD(conn->ksnc_ipaddr));
-                        return (conn);
-                }
-        }
-
-        return (NULL);
-}
-
-void
-ksocknal_check_peer_timeouts (int idx)
-{
-        struct list_head *peers = &ksocknal_data.ksnd_peers[idx];
-        struct list_head *ptmp;
-        ksock_peer_t     *peer;
-        ksock_conn_t     *conn;
-
- again:
-        /* NB. We expect to have a look at all the peers and not find any
-         * connections to time out, so we just use a shared lock while we
-         * take a look... */
-        read_lock (&ksocknal_data.ksnd_global_lock);
-
-        list_for_each (ptmp, peers) {
-                peer = list_entry (ptmp, ksock_peer_t, ksnp_list);
-                conn = ksocknal_find_timed_out_conn (peer);
-                
-                if (conn != NULL) {
-                        read_unlock (&ksocknal_data.ksnd_global_lock);
-
-                        CERROR ("Timeout out conn->"LPX64" ip %d.%d.%d.%d:%d\n",
-                                peer->ksnp_nid,
-                                HIPQUAD(conn->ksnc_ipaddr),
-                                conn->ksnc_port);
-                        ksocknal_close_conn_and_siblings (conn, -ETIMEDOUT);
-                        
-                        /* NB we won't find this one again, but we can't
-                         * just proceed with the next peer, since we dropped
-                         * ksnd_global_lock and it might be dead already! */
-                        ksocknal_put_conn (conn);
-                        goto again;
-                }
-        }
-
-        read_unlock (&ksocknal_data.ksnd_global_lock);
-}
-
-int
-ksocknal_reaper (void *arg)
-{
-        wait_queue_t       wait;
-        unsigned long      flags;
-        ksock_conn_t      *conn;
-        ksock_sched_t     *sched;
-        struct list_head   enomem_conns;
-        int                nenomem_conns;
-        int                timeout;
-        int                i;
-        int                peer_index = 0;
-        unsigned long      deadline = jiffies;
-        
-        kportal_daemonize ("ksocknal_reaper");
-        kportal_blockallsigs ();
-
-        INIT_LIST_HEAD(&enomem_conns);
-        init_waitqueue_entry (&wait, current);
-
-        spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-
-        while (!ksocknal_data.ksnd_shuttingdown) {
-
-                if (!list_empty (&ksocknal_data.ksnd_deathrow_conns)) {
-                        conn = list_entry (ksocknal_data.ksnd_deathrow_conns.next,
-                                           ksock_conn_t, ksnc_list);
-                        list_del (&conn->ksnc_list);
-                        
-                        spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
-
-                        ksocknal_terminate_conn (conn);
-                        ksocknal_put_conn (conn);
-
-                        spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-                        continue;
-                }
-
-                if (!list_empty (&ksocknal_data.ksnd_zombie_conns)) {
-                        conn = list_entry (ksocknal_data.ksnd_zombie_conns.next,
-                                           ksock_conn_t, ksnc_list);
-                        list_del (&conn->ksnc_list);
-                        
-                        spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
-
-                        ksocknal_destroy_conn (conn);
-
-                        spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-                        continue;
-                }
-
-                if (!list_empty (&ksocknal_data.ksnd_enomem_conns)) {
-                        list_add(&enomem_conns, &ksocknal_data.ksnd_enomem_conns);
-                        list_del_init(&ksocknal_data.ksnd_enomem_conns);
-                }
-
-                spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
-
-                /* reschedule all the connections that stalled with ENOMEM... */
-                nenomem_conns = 0;
-                while (!list_empty (&enomem_conns)) {
-                        conn = list_entry (enomem_conns.next,
-                                           ksock_conn_t, ksnc_tx_list);
-                        list_del (&conn->ksnc_tx_list);
-
-                        sched = conn->ksnc_scheduler;
-
-                        spin_lock_irqsave (&sched->kss_lock, flags);
-
-                        LASSERT (conn->ksnc_tx_scheduled);
-                        conn->ksnc_tx_ready = 1;
-                        list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns);
-                        wake_up (&sched->kss_waitq);
-
-                        spin_unlock_irqrestore (&sched->kss_lock, flags);
-                        nenomem_conns++;
-                }
-                
-                /* careful with the jiffy wrap... */
-                while ((timeout = (int)(deadline - jiffies)) <= 0) {
-                        const int n = 4;
-                        const int p = 1;
-                        int       chunk = ksocknal_data.ksnd_peer_hash_size;
-                        
-                        /* Time to check for timeouts on a few more peers: I do
-                         * checks every 'p' seconds on a proportion of the peer
-                         * table and I need to check every connection 'n' times
-                         * within a timeout interval, to ensure I detect a
-                         * timeout on any connection within (n+1)/n times the
-                         * timeout interval. */
-
-                        if (ksocknal_tunables.ksnd_io_timeout > n * p)
-                                chunk = (chunk * n * p) / 
-                                        ksocknal_tunables.ksnd_io_timeout;
-                        if (chunk == 0)
-                                chunk = 1;
-
-                        for (i = 0; i < chunk; i++) {
-                                ksocknal_check_peer_timeouts (peer_index);
-                                peer_index = (peer_index + 1) % 
-                                             ksocknal_data.ksnd_peer_hash_size;
-                        }
-
-                        deadline += p * HZ;
-                }
-
-                if (nenomem_conns != 0) {
-                        /* Reduce my timeout if I rescheduled ENOMEM conns.
-                         * This also prevents me getting woken immediately
-                         * if any go back on my enomem list. */
-                        timeout = SOCKNAL_ENOMEM_RETRY;
-                }
-                ksocknal_data.ksnd_reaper_waketime = jiffies + timeout;
-
-                set_current_state (TASK_INTERRUPTIBLE);
-                add_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
-
-                if (!ksocknal_data.ksnd_shuttingdown &&
-                    list_empty (&ksocknal_data.ksnd_deathrow_conns) &&
-                    list_empty (&ksocknal_data.ksnd_zombie_conns))
-                        schedule_timeout (timeout);
-
-                set_current_state (TASK_RUNNING);
-                remove_wait_queue (&ksocknal_data.ksnd_reaper_waitq, &wait);
-
-                spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags);
-        }
-
-        spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags);
-
-        ksocknal_thread_fini ();
-        return (0);
-}
-
-lib_nal_t ksocknal_lib = {
-        libnal_data:       &ksocknal_data,      /* NAL private data */
-        libnal_send:        ksocknal_send,
-        libnal_send_pages:  ksocknal_send_pages,
-        libnal_recv:        ksocknal_recv,
-        libnal_recv_pages:  ksocknal_recv_pages,
-        libnal_dist:        ksocknal_dist
-};
diff --git a/lustre/portals/libcfs/.cvsignore b/lustre/portals/libcfs/.cvsignore
deleted file mode 100644 (file)
index c6f0aa4..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-.deps
-Makefile
-link-stamp
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/libcfs/Makefile.in b/lustre/portals/libcfs/Makefile.in
deleted file mode 100644 (file)
index 15fff12..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-MODULES = libcfs
-libcfs-objs := debug.o lwt.o module.o proc.o tracefile.o watchdog.o
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/libcfs/Makefile.mk b/lustre/portals/libcfs/Makefile.mk
deleted file mode 100644 (file)
index 8ecf3c9..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include fs/lustre/portals/Kernelenv
-
-obj-y += libcfs.o
-libcfs-objs    := module.o proc.o debug.o lwt.o tracefile.o
diff --git a/lustre/portals/libcfs/autoMakefile.am b/lustre/portals/libcfs/autoMakefile.am
deleted file mode 100644 (file)
index 9c27693..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-# Copyright (C) 2001, 2002 Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-modulenet_DATA := libcfs$(KMODEXT)
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(libcfs-objs:%.o=%.c) tracefile.h
diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c
deleted file mode 100644 (file)
index b5286fc..0000000
+++ /dev/null
@@ -1,336 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *   Author: Phil Schwan <phil@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/notifier.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <linux/completion.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/miscdevice.h>
-#include <linux/version.h>
-
-# define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/libcfs.h>
-
-#include "tracefile.h"
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/kallsyms.h>
-#endif
-
-unsigned int portal_subsystem_debug = ~0 - (S_PORTALS | S_NAL);
-EXPORT_SYMBOL(portal_subsystem_debug);
-
-unsigned int portal_debug = (D_WARNING | D_DLMTRACE | D_ERROR | D_EMERG | D_HA |
-                             D_RPCTRACE | D_VFSTRACE);
-EXPORT_SYMBOL(portal_debug);
-
-unsigned int portal_printk;
-EXPORT_SYMBOL(portal_printk);
-
-unsigned int portal_stack;
-EXPORT_SYMBOL(portal_stack);
-
-#ifdef __KERNEL__
-atomic_t portal_kmemory = ATOMIC_INIT(0);
-EXPORT_SYMBOL(portal_kmemory);
-#endif
-
-static DECLARE_WAIT_QUEUE_HEAD(debug_ctlwq);
-
-char debug_file_path[1024] = "/tmp/lustre-log";
-static char debug_file_name[1024];
-static int handled_panic; /* to avoid recursive calls to notifiers */
-char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall";
-
-void portals_debug_dumplog_internal(void *arg)
-{
-        void *journal_info = current->journal_info;
-        current->journal_info = NULL;
-
-        snprintf(debug_file_name, sizeof(debug_file_path) - 1,
-                 "%s.%ld.%ld", debug_file_path, CURRENT_SECONDS, (long)arg);
-        printk(KERN_ALERT "LustreError: dumping log to %s\n", debug_file_name);
-        tracefile_dump_all_pages(debug_file_name);
-
-        current->journal_info = journal_info;
-}
-
-int portals_debug_dumplog_thread(void *arg)
-{
-        kportal_daemonize("");
-        reparent_to_init();
-        portals_debug_dumplog_internal(arg);
-        wake_up(&debug_ctlwq);
-        return 0;
-}
-
-void portals_debug_dumplog(void)
-{
-        int rc;
-        DECLARE_WAITQUEUE(wait, current);
-        ENTRY;
-
-        /* we're being careful to ensure that the kernel thread is
-         * able to set our state to running as it exits before we
-         * get to schedule() */
-        set_current_state(TASK_INTERRUPTIBLE);
-        add_wait_queue(&debug_ctlwq, &wait);
-
-        rc = kernel_thread(portals_debug_dumplog_thread,
-                           (void *)(long)current->pid,
-                           CLONE_VM | CLONE_FS | CLONE_FILES);
-        if (rc < 0)
-                printk(KERN_ERR "LustreError: cannot start log dump thread: "
-                       "%d\n", rc);
-        else
-                schedule();
-
-        /* be sure to teardown if kernel_thread() failed */
-        remove_wait_queue(&debug_ctlwq, &wait);
-        set_current_state(TASK_RUNNING);
-}
-
-static int panic_dumplog(struct notifier_block *self, unsigned long unused1,
-                         void *unused2)
-{
-        if (handled_panic)
-                return 0;
-        else
-                handled_panic = 1;
-
-        if (in_interrupt()) {
-                trace_debug_print();
-                return 0;
-        }
-
-        while (current->lock_depth >= 0)
-                unlock_kernel();
-        portals_debug_dumplog();
-        return 0;
-}
-
-static struct notifier_block lustre_panic_notifier = {
-        notifier_call :     panic_dumplog,
-        next :              NULL,
-        priority :          10000
-};
-
-int portals_debug_init(unsigned long bufsize)
-{
-        notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier);
-        return tracefile_init();
-}
-
-int portals_debug_cleanup(void)
-{
-        tracefile_exit();
-        notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier);
-        return 0;
-}
-
-int portals_debug_clear_buffer(void)
-{
-        trace_flush_pages();
-        return 0;
-}
-
-/* Debug markers, although printed by S_PORTALS
- * should not be be marked as such. */
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_UNDEFINED
-int portals_debug_mark_buffer(char *text)
-{
-        CDEBUG(D_TRACE,"***************************************************\n");
-        CDEBUG(D_WARNING, "DEBUG MARKER: %s\n", text);
-        CDEBUG(D_TRACE,"***************************************************\n");
-
-        return 0;
-}
-#undef DEBUG_SUBSYSTEM
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-void portals_debug_set_level(unsigned int debug_level)
-{
-        printk(KERN_WARNING "Lustre: Setting portals debug level to %08x\n",
-               debug_level);
-        portal_debug = debug_level;
-}
-
-void portals_run_upcall(char **argv)
-{
-        int   rc;
-        int   argc;
-        char *envp[] = {
-                "HOME=/",
-                "PATH=/sbin:/bin:/usr/sbin:/usr/bin",
-                NULL};
-        ENTRY;
-
-        argv[0] = portals_upcall;
-        argc = 1;
-        while (argv[argc] != NULL)
-                argc++;
-
-        LASSERT(argc >= 2);
-
-        rc = USERMODEHELPER(argv[0], argv, envp);
-        if (rc < 0) {
-                CERROR("Error %d invoking portals upcall %s %s%s%s%s%s%s%s%s; "
-                       "check /proc/sys/portals/upcall\n",
-                       rc, argv[0], argv[1],
-                       argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
-                       argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
-                       argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
-                       argc < 6 ? "" : ",...");
-        } else {
-                CERROR("Invoked portals upcall %s %s%s%s%s%s%s%s%s\n",
-                       argv[0], argv[1],
-                       argc < 3 ? "" : ",", argc < 3 ? "" : argv[2],
-                       argc < 4 ? "" : ",", argc < 4 ? "" : argv[3],
-                       argc < 5 ? "" : ",", argc < 5 ? "" : argv[4],
-                       argc < 6 ? "" : ",...");
-        }
-}
-
-void portals_run_lbug_upcall(char *file, const char *fn, const int line)
-{
-        char *argv[6];
-        char buf[32];
-
-        ENTRY;
-        snprintf (buf, sizeof buf, "%d", line);
-
-        argv[1] = "LBUG";
-        argv[2] = file;
-        argv[3] = (char *)fn;
-        argv[4] = buf;
-        argv[5] = NULL;
-
-        portals_run_upcall (argv);
-}
-
-char *portals_nid2str(int nal, ptl_nid_t nid, char *str)
-{
-        if (nid == PTL_NID_ANY) {
-                snprintf(str, PTL_NALFMT_SIZE, "%s", "PTL_NID_ANY");
-                return str;
-        }
-
-        switch(nal){
-/* XXX this could be a nal method of some sort, 'cept it's config
- * dependent whether (say) socknal NIDs are actually IP addresses... */
-#if !CRAY_PORTALS 
-        case TCPNAL:
-                /* userspace NAL */
-        case IIBNAL:
-        case OPENIBNAL:
-        case RANAL:
-        case SOCKNAL:
-                snprintf(str, PTL_NALFMT_SIZE, "%u:%u.%u.%u.%u",
-                         (__u32)(nid >> 32), HIPQUAD(nid));
-                break;
-        case QSWNAL:
-        case GMNAL:
-        case LONAL:
-                snprintf(str, PTL_NALFMT_SIZE, "%u:%u",
-                         (__u32)(nid >> 32), (__u32)nid);
-                break;
-#endif
-        default:
-                snprintf(str, PTL_NALFMT_SIZE, "?%x? %llx",
-                         nal, (long long)nid);
-                break;
-        }
-        return str;
-}
-
-char *portals_id2str(int nal, ptl_process_id_t id, char *str)
-{
-        int   len;
-        
-        portals_nid2str(nal, id.nid, str);
-        len = strlen(str);
-        snprintf(str + len, PTL_NALFMT_SIZE - len, "-%u", id.pid);
-        return str;
-}
-
-#ifdef __KERNEL__
-
-void portals_debug_dumpstack(struct task_struct *tsk)
-{
-#if defined(__arch_um__)
-        if (tsk != NULL)
-                CWARN("stack dump for pid %d (%d) requested; wake up gdb.\n",
-                      tsk->pid, UML_PID(tsk));
-        asm("int $3");
-#elif defined(HAVE_SHOW_TASK)
-        /* this is exported by lustre kernel version 42 */
-        extern void show_task(struct task_struct *);
-
-        if (tsk == NULL)
-                tsk = current;
-        CWARN("showing stack for process %d\n", tsk->pid);
-        show_task(tsk);
-#else
-        CWARN("can't show stack: kernel doesn't export show_task\n");
-#endif
-}
-
-struct task_struct *portals_current(void)
-{
-        CWARN("current task struct is %p\n", current);
-        return current;
-}
-
-EXPORT_SYMBOL(portals_debug_dumpstack);
-EXPORT_SYMBOL(portals_current);
-#endif /* __KERNEL__ */
-
-EXPORT_SYMBOL(portals_debug_dumplog);
-EXPORT_SYMBOL(portals_debug_set_level);
-EXPORT_SYMBOL(portals_run_upcall);
-EXPORT_SYMBOL(portals_run_lbug_upcall);
-EXPORT_SYMBOL(portals_nid2str);
-EXPORT_SYMBOL(portals_id2str);
diff --git a/lustre/portals/libcfs/lwt.c b/lustre/portals/libcfs/lwt.c
deleted file mode 100644 (file)
index 3f6a9c2..0000000
+++ /dev/null
@@ -1,268 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2003 Cluster File Systems, Inc.
- *   Author: Eric Barton <eeb@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/kernel.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <linux/interrupt.h>
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/kp30.h>
-
-#if LWT_SUPPORT
-
-#if !KLWT_SUPPORT
-int         lwt_enabled;
-lwt_cpu_t   lwt_cpus[NR_CPUS];
-#endif
-
-int         lwt_pages_per_cpu;
-
-/* NB only root is allowed to retrieve LWT info; it's an open door into the
- * kernel... */
-
-int
-lwt_lookup_string (int *size, char *knl_ptr,
-                   char *user_ptr, int user_size)
-{
-        int   maxsize = 128;
-        
-        /* knl_ptr was retrieved from an LWT snapshot and the caller wants to
-         * turn it into a string.  NB we can crash with an access violation
-         * trying to determine the string length, so we're trusting our
-         * caller... */
-
-        if (!capable(CAP_SYS_ADMIN))
-                return (-EPERM);
-
-        if (user_size > 0 && 
-            maxsize > user_size)
-                maxsize = user_size;
-
-        *size = strnlen (knl_ptr, maxsize - 1) + 1;
-        
-        if (user_ptr != NULL) {
-                if (user_size < 4)
-                        return (-EINVAL);
-                
-                if (copy_to_user (user_ptr, knl_ptr, *size))
-                        return (-EFAULT);
-
-                /* Did I truncate the string?  */
-                if (knl_ptr[*size - 1] != 0)
-                        copy_to_user (user_ptr + *size - 4, "...", 4);
-        }
-
-        return (0);
-}
-
-int
-lwt_control (int enable, int clear)
-{
-        lwt_page_t  *p;
-        int          i;
-        int          j;
-
-        if (!capable(CAP_SYS_ADMIN))
-                return (-EPERM);
-
-        if (!enable) {
-                LWT_EVENT(0,0,0,0);
-                lwt_enabled = 0;
-                mb();
-                /* give people some time to stop adding traces */
-                schedule_timeout(10);
-        }
-
-        for (i = 0; i < num_online_cpus(); i++) {
-                p = lwt_cpus[i].lwtc_current_page;
-
-                if (p == NULL)
-                        return (-ENODATA);
-
-                if (!clear)
-                        continue;
-
-                for (j = 0; j < lwt_pages_per_cpu; j++) {
-                        memset (p->lwtp_events, 0, PAGE_SIZE);
-
-                        p = list_entry (p->lwtp_list.next,
-                                        lwt_page_t, lwtp_list);
-                }
-        }
-
-        if (enable) {
-                lwt_enabled = 1;
-                mb();
-                LWT_EVENT(0,0,0,0);
-        }
-
-        return (0);
-}
-
-int
-lwt_snapshot (cycles_t *now, int *ncpu, int *total_size, 
-              void *user_ptr, int user_size)
-{
-        const int    events_per_page = PAGE_SIZE / sizeof(lwt_event_t);
-        const int    bytes_per_page = events_per_page * sizeof(lwt_event_t);
-        lwt_page_t  *p;
-        int          i;
-        int          j;
-
-        if (!capable(CAP_SYS_ADMIN))
-                return (-EPERM);
-
-        *ncpu = num_online_cpus();
-        *total_size = num_online_cpus() * lwt_pages_per_cpu * bytes_per_page;
-        *now = get_cycles();
-        
-        if (user_ptr == NULL)
-                return (0);
-
-        for (i = 0; i < num_online_cpus(); i++) {
-                p = lwt_cpus[i].lwtc_current_page;
-
-                if (p == NULL)
-                        return (-ENODATA);
-                
-                for (j = 0; j < lwt_pages_per_cpu; j++) {
-                        if (copy_to_user(user_ptr, p->lwtp_events,
-                                         bytes_per_page))
-                                return (-EFAULT);
-
-                        user_ptr = ((char *)user_ptr) + bytes_per_page;
-                        p = list_entry(p->lwtp_list.next,
-                                       lwt_page_t, lwtp_list);
-                        
-                }
-        }
-
-        return (0);
-}
-
-int
-lwt_init () 
-{
-       int     i;
-        int     j;
-
-        for (i = 0; i < num_online_cpus(); i++)
-                if (lwt_cpus[i].lwtc_current_page != NULL)
-                        return (-EALREADY);
-        
-        LASSERT (!lwt_enabled);
-
-       /* NULL pointers, zero scalars */
-       memset (lwt_cpus, 0, sizeof (lwt_cpus));
-        lwt_pages_per_cpu = LWT_MEMORY / (num_online_cpus() * PAGE_SIZE);
-
-       for (i = 0; i < num_online_cpus(); i++)
-               for (j = 0; j < lwt_pages_per_cpu; j++) {
-                       struct page *page = alloc_page (GFP_KERNEL);
-                       lwt_page_t  *lwtp;
-
-                       if (page == NULL) {
-                               CERROR ("Can't allocate page\n");
-                                lwt_fini ();
-                               return (-ENOMEM);
-                       }
-
-                        PORTAL_ALLOC(lwtp, sizeof (*lwtp));
-                       if (lwtp == NULL) {
-                               CERROR ("Can't allocate lwtp\n");
-                                __free_page(page);
-                               lwt_fini ();
-                               return (-ENOMEM);
-                       }
-
-                        lwtp->lwtp_page = page;
-                        lwtp->lwtp_events = page_address(page);
-                       memset (lwtp->lwtp_events, 0, PAGE_SIZE);
-
-                       if (j == 0) {
-                               INIT_LIST_HEAD (&lwtp->lwtp_list);
-                               lwt_cpus[i].lwtc_current_page = lwtp;
-                       } else {
-                               list_add (&lwtp->lwtp_list,
-                                   &lwt_cpus[i].lwtc_current_page->lwtp_list);
-                       }
-                }
-
-        lwt_enabled = 1;
-        mb();
-
-        LWT_EVENT(0,0,0,0);
-
-        return (0);
-}
-
-void
-lwt_fini () 
-{
-        int    i;
-
-        lwt_control(0, 0);
-        
-        for (i = 0; i < num_online_cpus(); i++)
-                while (lwt_cpus[i].lwtc_current_page != NULL) {
-                        lwt_page_t *lwtp = lwt_cpus[i].lwtc_current_page;
-                        
-                        if (list_empty (&lwtp->lwtp_list)) {
-                                lwt_cpus[i].lwtc_current_page = NULL;
-                        } else {
-                                lwt_cpus[i].lwtc_current_page =
-                                        list_entry (lwtp->lwtp_list.next,
-                                                    lwt_page_t, lwtp_list);
-
-                                list_del (&lwtp->lwtp_list);
-                        }
-                        
-                        __free_page (lwtp->lwtp_page);
-                        PORTAL_FREE (lwtp, sizeof (*lwtp));
-                }
-}
-
-EXPORT_SYMBOL(lwt_enabled);
-EXPORT_SYMBOL(lwt_cpus);
-
-EXPORT_SYMBOL(lwt_init);
-EXPORT_SYMBOL(lwt_fini);
-EXPORT_SYMBOL(lwt_lookup_string);
-EXPORT_SYMBOL(lwt_control);
-EXPORT_SYMBOL(lwt_snapshot);
-#endif
diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c
deleted file mode 100644 (file)
index 2a8e6f6..0000000
+++ /dev/null
@@ -1,608 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/miscdevice.h>
-
-#include <portals/lib-p30.h>
-#include <portals/p30.h>
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-
-#define PORTAL_MINOR 240
-
-struct nal_cmd_handler {
-        int                  nch_number;
-        nal_cmd_handler_fn  *nch_handler;
-        void                *nch_private;
-};
-
-static struct nal_cmd_handler nal_cmd[16];
-static DECLARE_MUTEX(nal_cmd_sem);
-
-#ifdef PORTAL_DEBUG
-void kportal_assertion_failed(char *expr, char *file, const char *func,
-                              const int line)
-{
-        portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK,
-                          "ASSERTION(%s) failed\n", expr);
-        LBUG_WITH_LOC(file, func, line);
-}
-#endif
-
-void
-kportal_daemonize (char *str) 
-{
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63))
-        daemonize(str);
-#else
-        daemonize();
-        snprintf (current->comm, sizeof (current->comm), "%s", str);
-#endif
-}
-
-void
-kportal_memhog_free (struct portals_device_userstate *pdu)
-{
-        struct page **level0p = &pdu->pdu_memhog_root_page;
-        struct page **level1p;
-        struct page **level2p;
-        int           count1;
-        int           count2;
-        
-        if (*level0p != NULL) {
-
-                level1p = (struct page **)page_address(*level0p);
-                count1 = 0;
-                
-                while (count1 < PAGE_SIZE/sizeof(struct page *) &&
-                       *level1p != NULL) {
-
-                        level2p = (struct page **)page_address(*level1p);
-                        count2 = 0;
-                        
-                        while (count2 < PAGE_SIZE/sizeof(struct page *) &&
-                               *level2p != NULL) {
-                                
-                                __free_page(*level2p);
-                                pdu->pdu_memhog_pages--;
-                                level2p++;
-                                count2++;
-                        }
-                        
-                        __free_page(*level1p);
-                        pdu->pdu_memhog_pages--;
-                        level1p++;
-                        count1++;
-                }
-                
-                __free_page(*level0p);
-                pdu->pdu_memhog_pages--;
-
-                *level0p = NULL;
-        }
-        
-        LASSERT (pdu->pdu_memhog_pages == 0);
-}
-
-int
-kportal_memhog_alloc (struct portals_device_userstate *pdu, int npages, int flags)
-{
-        struct page **level0p;
-        struct page **level1p;
-        struct page **level2p;
-        int           count1;
-        int           count2;
-        
-        LASSERT (pdu->pdu_memhog_pages == 0);
-        LASSERT (pdu->pdu_memhog_root_page == NULL);
-
-        if (npages < 0)
-                return -EINVAL;
-
-        if (npages == 0)
-                return 0;
-
-        level0p = &pdu->pdu_memhog_root_page;
-        *level0p = alloc_page(flags);
-        if (*level0p == NULL)
-                return -ENOMEM;
-        pdu->pdu_memhog_pages++;
-
-        level1p = (struct page **)page_address(*level0p);
-        count1 = 0;
-        memset(level1p, 0, PAGE_SIZE);
-        
-        while (pdu->pdu_memhog_pages < npages &&
-               count1 < PAGE_SIZE/sizeof(struct page *)) {
-
-                if (signal_pending(current))
-                        return (-EINTR);
-                
-                *level1p = alloc_page(flags);
-                if (*level1p == NULL)
-                        return -ENOMEM;
-                pdu->pdu_memhog_pages++;
-
-                level2p = (struct page **)page_address(*level1p);
-                count2 = 0;
-                memset(level2p, 0, PAGE_SIZE);
-                
-                while (pdu->pdu_memhog_pages < npages &&
-                       count2 < PAGE_SIZE/sizeof(struct page *)) {
-                        
-                        if (signal_pending(current))
-                                return (-EINTR);
-
-                        *level2p = alloc_page(flags);
-                        if (*level2p == NULL)
-                                return (-ENOMEM);
-                        pdu->pdu_memhog_pages++;
-                        
-                        level2p++;
-                        count2++;
-                }
-                
-                level1p++;
-                count1++;
-        }
-
-        return 0;
-}
-
-void
-kportal_blockallsigs ()
-{
-        unsigned long  flags;
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-}
-
-/* called when opening /dev/device */
-static int libcfs_psdev_open(struct inode * inode, struct file * file)
-{
-        struct portals_device_userstate *pdu;
-        ENTRY;
-        
-        if (!inode)
-                RETURN(-EINVAL);
-
-        PORTAL_MODULE_USE;
-
-        PORTAL_ALLOC(pdu, sizeof(*pdu));
-        if (pdu != NULL) {
-                pdu->pdu_memhog_pages = 0;
-                pdu->pdu_memhog_root_page = NULL;
-        }
-        file->private_data = pdu;
-        
-        RETURN(0);
-}
-
-/* called when closing /dev/device */
-static int libcfs_psdev_release(struct inode * inode, struct file * file)
-{
-        struct portals_device_userstate *pdu;
-        ENTRY;
-
-        if (!inode)
-                RETURN(-EINVAL);
-
-        pdu = file->private_data;
-        if (pdu != NULL) {
-                kportal_memhog_free(pdu);
-                PORTAL_FREE(pdu, sizeof(*pdu));
-        }
-        
-        PORTAL_MODULE_UNUSE;
-        RETURN(0);
-}
-
-static inline void freedata(void *data, int len)
-{
-        PORTAL_FREE(data, len);
-}
-
-struct nal_cmd_handler *
-libcfs_find_nal_cmd_handler(int nal)
-{
-        int    i;
-
-        for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
-                if (nal_cmd[i].nch_handler != NULL &&
-                    nal_cmd[i].nch_number == nal)
-                        return (&nal_cmd[i]);
-
-        return (NULL);
-}
-
-int
-libcfs_nal_cmd_register(int nal, nal_cmd_handler_fn *handler, void *private)
-{
-        struct nal_cmd_handler *cmd;
-        int                     i;
-        int                     rc;
-
-        CDEBUG(D_IOCTL, "Register NAL %x, handler: %p\n", nal, handler);
-
-        down(&nal_cmd_sem);
-
-        if (libcfs_find_nal_cmd_handler(nal) != NULL) {
-                up (&nal_cmd_sem);
-                return (-EBUSY);
-        }
-
-        cmd = NULL;
-        for (i = 0; i < sizeof(nal_cmd)/sizeof(nal_cmd[0]); i++)
-                if (nal_cmd[i].nch_handler == NULL) {
-                        cmd = &nal_cmd[i];
-                        break;
-                }
-        
-        if (cmd == NULL) {
-                rc = -EBUSY;
-        } else {
-                rc = 0;
-                cmd->nch_number = nal;
-                cmd->nch_handler = handler;
-                cmd->nch_private = private;
-        }
-
-        up(&nal_cmd_sem);
-
-        return rc;
-}
-EXPORT_SYMBOL(libcfs_nal_cmd_register);
-
-void
-libcfs_nal_cmd_unregister(int nal)
-{
-        struct nal_cmd_handler *cmd;
-
-        CDEBUG(D_IOCTL, "Unregister NAL %x\n", nal);
-
-        down(&nal_cmd_sem);
-        cmd = libcfs_find_nal_cmd_handler(nal);
-        LASSERT (cmd != NULL);
-        cmd->nch_handler = NULL;
-        cmd->nch_private = NULL;
-        up(&nal_cmd_sem);
-}
-EXPORT_SYMBOL(libcfs_nal_cmd_unregister);
-
-int
-libcfs_nal_cmd(struct portals_cfg *pcfg)
-{
-#if CRAY_PORTALS
-        /* pretend success */
-        RETURN(0);
-#else
-        struct nal_cmd_handler *cmd;
-        __u32 nal = pcfg->pcfg_nal;
-        int   rc = -EINVAL;
-        ENTRY;
-
-        down(&nal_cmd_sem);
-        cmd = libcfs_find_nal_cmd_handler(nal);
-        if (cmd != NULL) {
-                CDEBUG(D_IOCTL, "calling handler nal: %x, cmd: %d\n", nal, 
-                       pcfg->pcfg_command);
-                rc = cmd->nch_handler(pcfg, cmd->nch_private);
-        } else {
-                CERROR("invalid nal: %x, cmd: %d\n", nal, pcfg->pcfg_command);
-        }
-        up(&nal_cmd_sem);
-
-        RETURN(rc);
-#endif
-}
-EXPORT_SYMBOL(libcfs_nal_cmd);
-
-static DECLARE_RWSEM(ioctl_list_sem);
-static LIST_HEAD(ioctl_list);
-
-int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
-{
-        int rc = 0;
-        down_read(&ioctl_list_sem);
-        if (!list_empty(&hand->item))
-                rc = -EBUSY;
-        up_read(&ioctl_list_sem);
-
-        if (rc == 0) {
-                down_write(&ioctl_list_sem);
-                list_add_tail(&hand->item, &ioctl_list);
-                up_write(&ioctl_list_sem);
-        }
-        RETURN(0);
-}
-EXPORT_SYMBOL(libcfs_register_ioctl);
-
-int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
-{
-        int rc = 0;
-        down_read(&ioctl_list_sem);
-        if (list_empty(&hand->item))
-                rc = -ENOENT;
-        up_read(&ioctl_list_sem);
-
-        if (rc == 0) {
-                down_write(&ioctl_list_sem);
-                list_del_init(&hand->item);
-                up_write(&ioctl_list_sem);
-        }
-        RETURN(0);
-}
-EXPORT_SYMBOL(libcfs_deregister_ioctl);
-
-static int libcfs_ioctl(struct inode *inode, struct file *file,
-                        unsigned int cmd, unsigned long arg)
-{
-        int err = -EINVAL;
-        char buf[1024];
-        struct portal_ioctl_data *data;
-        ENTRY;
-
-        if (current->fsuid != 0)
-                RETURN(err = -EACCES);
-
-        if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE ||
-             _IOC_NR(cmd) < IOC_PORTAL_MIN_NR  ||
-             _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) {
-                CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n",
-                                _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd));
-                RETURN(-EINVAL);
-        }
-
-        if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) {
-                CERROR("PORTALS ioctl: data error\n");
-                RETURN(-EINVAL);
-        }
-
-        data = (struct portal_ioctl_data *)buf;
-
-        switch (cmd) {
-        case IOC_PORTAL_CLEAR_DEBUG:
-                portals_debug_clear_buffer();
-                RETURN(0);
-        case IOC_PORTAL_PANIC:
-                if (!capable (CAP_SYS_BOOT))
-                        RETURN (-EPERM);
-                panic("debugctl-invoked panic");
-                RETURN(0);
-        case IOC_PORTAL_MARK_DEBUG:
-                if (data->ioc_inlbuf1 == NULL ||
-                    data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
-                        RETURN(-EINVAL);
-                portals_debug_mark_buffer(data->ioc_inlbuf1);
-                RETURN(0);
-#if LWT_SUPPORT
-        case IOC_PORTAL_LWT_CONTROL:
-                err = lwt_control (data->ioc_flags, data->ioc_misc);
-                break;
-
-        case IOC_PORTAL_LWT_SNAPSHOT: {
-                cycles_t   now;
-                int        ncpu;
-                int        total_size;
-
-                err = lwt_snapshot (&now, &ncpu, &total_size,
-                                    data->ioc_pbuf1, data->ioc_plen1);
-                data->ioc_nid = now;
-                data->ioc_count = ncpu;
-                data->ioc_misc = total_size;
-
-                /* Hedge against broken user/kernel typedefs (e.g. cycles_t) */
-                data->ioc_nid2 = sizeof(lwt_event_t);
-                data->ioc_nid3 = offsetof(lwt_event_t, lwte_where);
-
-                if (err == 0 &&
-                    copy_to_user((char *)arg, data, sizeof (*data)))
-                        err = -EFAULT;
-                break;
-        }
-
-        case IOC_PORTAL_LWT_LOOKUP_STRING:
-                err = lwt_lookup_string (&data->ioc_count, data->ioc_pbuf1,
-                                         data->ioc_pbuf2, data->ioc_plen2);
-                if (err == 0 &&
-                    copy_to_user((char *)arg, data, sizeof (*data)))
-                        err = -EFAULT;
-                break;
-#endif
-        case IOC_PORTAL_NAL_CMD: {
-                struct portals_cfg pcfg;
-
-                if (data->ioc_plen1 != sizeof(pcfg)) {
-                        CERROR("Bad ioc_plen1 %d (wanted %d)\n",
-                               data->ioc_plen1, sizeof(pcfg));
-                        err = -EINVAL;
-                        break;
-                }
-
-                if (copy_from_user(&pcfg, (void *)data->ioc_pbuf1,
-                                   sizeof(pcfg))) {
-                        err = -EFAULT;
-                        break;
-                }
-
-                CDEBUG (D_IOCTL, "nal command nal %x cmd %d\n", pcfg.pcfg_nal,
-                        pcfg.pcfg_command);
-                err = libcfs_nal_cmd(&pcfg);
-
-                if (err == 0 &&
-                    copy_to_user((char *)data->ioc_pbuf1, &pcfg,
-                                 sizeof (pcfg)))
-                        err = -EFAULT;
-                break;
-        }
-
-        case IOC_PORTAL_MEMHOG:
-                if (!capable (CAP_SYS_ADMIN))
-                        err = -EPERM;
-                else if (file->private_data == NULL) {
-                        err = -EINVAL;
-                } else {
-                        kportal_memhog_free(file->private_data);
-                        err = kportal_memhog_alloc(file->private_data,
-                                                   data->ioc_count,
-                                                   data->ioc_flags);
-                        if (err != 0)
-                                kportal_memhog_free(file->private_data);
-                }
-                break;
-
-        default: {
-                struct libcfs_ioctl_handler *hand;
-                err = -EINVAL;
-                down_read(&ioctl_list_sem);
-                list_for_each_entry(hand, &ioctl_list, item) {
-                        err = hand->handle_ioctl(data, cmd, arg);
-                        if (err != -EINVAL)
-                                break;
-                }
-                up_read(&ioctl_list_sem);
-                } break;
-        }
-
-        RETURN(err);
-}
-
-
-static struct file_operations libcfs_fops = {
-        ioctl:   libcfs_ioctl,
-        open:    libcfs_psdev_open,
-        release: libcfs_psdev_release
-};
-
-
-static struct miscdevice libcfs_dev = {
-        PORTAL_MINOR,
-        "portals",
-        &libcfs_fops
-};
-
-extern int insert_proc(void);
-extern void remove_proc(void);
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
-MODULE_DESCRIPTION("Portals v3.1");
-MODULE_LICENSE("GPL");
-
-static int init_libcfs_module(void)
-{
-        int rc;
-
-        rc = portals_debug_init(5 * 1024 * 1024);
-        if (rc < 0) {
-                printk(KERN_ERR "LustreError: portals_debug_init: %d\n", rc);
-                return (rc);
-        }
-
-#if LWT_SUPPORT
-        rc = lwt_init();
-        if (rc != 0) {
-                CERROR("lwt_init: error %d\n", rc);
-                goto cleanup_debug;
-        }
-#endif
-        rc = misc_register(&libcfs_dev);
-        if (rc) {
-                CERROR("misc_register: error %d\n", rc);
-                goto cleanup_lwt;
-        }
-
-        rc = insert_proc();
-        if (rc) {
-                CERROR("insert_proc: error %d\n", rc);
-                goto cleanup_deregister;
-        }
-
-        CDEBUG (D_OTHER, "portals setup OK\n");
-        return (0);
-
- cleanup_deregister:
-        misc_deregister(&libcfs_dev);
- cleanup_lwt:
-#if LWT_SUPPORT
-        lwt_fini();
- cleanup_debug:
-#endif
-        portals_debug_cleanup();
-        return rc;
-}
-
-static void exit_libcfs_module(void)
-{
-        int rc;
-
-        remove_proc();
-
-        CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
-               atomic_read(&portal_kmemory));
-
-        rc = misc_deregister(&libcfs_dev);
-        if (rc)
-                CERROR("misc_deregister error %d\n", rc);
-
-#if LWT_SUPPORT
-        lwt_fini();
-#endif
-
-        if (atomic_read(&portal_kmemory) != 0)
-                CERROR("Portals memory leaked: %d bytes\n",
-                       atomic_read(&portal_kmemory));
-
-        rc = portals_debug_cleanup();
-        if (rc)
-                printk(KERN_ERR "LustreError: portals_debug_cleanup: %d\n", rc);
-}
-
-EXPORT_SYMBOL(kportal_daemonize);
-EXPORT_SYMBOL(kportal_blockallsigs);
-EXPORT_SYMBOL(kportal_assertion_failed);
-
-module_init(init_libcfs_module);
-module_exit(exit_libcfs_module);
diff --git a/lustre/portals/libcfs/proc.c b/lustre/portals/libcfs/proc.c
deleted file mode 100644 (file)
index 08446a0..0000000
+++ /dev/null
@@ -1,321 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *   Author: Zach Brown <zab@zabbo.net>
- *   Author: Peter J. Braam <braam@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-#include <net/sock.h>
-#include <linux/uio.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-
-#include <linux/proc_fs.h>
-#include <linux/sysctl.h>
-
-# define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/kp30.h>
-#include <asm/div64.h>
-#include "tracefile.h"
-
-static struct ctl_table_header *portals_table_header = NULL;
-extern char debug_file_path[1024];
-extern char portals_upcall[1024];
-
-#define PSDEV_PORTALS  (0x100)
-enum {
-        PSDEV_DEBUG = 1,          /* control debugging */
-        PSDEV_SUBSYSTEM_DEBUG,    /* control debugging */
-        PSDEV_PRINTK,             /* force all errors to console */
-        PSDEV_CONSOLE,            /* allow _any_ messages to console */
-        PSDEV_DEBUG_PATH,         /* crashdump log location */
-        PSDEV_DEBUG_DUMP_PATH,    /* crashdump tracelog location */
-        PSDEV_PORTALS_UPCALL,     /* User mode upcall script  */
-        PSDEV_PORTALS_MEMUSED,    /* bytes currently PORTAL_ALLOCated */
-};
-
-static struct ctl_table portals_table[] = {
-        {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL,
-         &proc_dointvec},
-        {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug,
-         sizeof(int), 0644, NULL, &proc_dointvec},
-        {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL,
-         &proc_dointvec},
-        {PSDEV_DEBUG_PATH, "debug_path", debug_file_path,
-         sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string},
-        {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall,
-         sizeof(portals_upcall), 0644, NULL, &proc_dostring,
-         &sysctl_string},
-        {PSDEV_PORTALS_MEMUSED, "memused", (int *)&portal_kmemory.counter,
-         sizeof(int), 0644, NULL, &proc_dointvec},
-        {0}
-};
-
-static struct ctl_table top_table[2] = {
-        {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table},
-        {0}
-};
-
-
-#ifdef PORTALS_PROFILING
-/*
- * profiling stuff.  we do this statically for now 'cause its simple,
- * but we could do some tricks with elf sections to have this array
- * automatically built.
- */
-#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, }
-
-struct prof_ent prof_ents[] = {
-        def_prof(our_recvmsg),
-        def_prof(our_sendmsg),
-        def_prof(socknal_recv),
-        def_prof(lib_parse),
-        def_prof(conn_list_walk),
-        def_prof(memcpy),
-        def_prof(lib_finalize),
-        def_prof(pingcli_time),
-        def_prof(gmnal_send),
-        def_prof(gmnal_recv),
-};
-
-EXPORT_SYMBOL(prof_ents);
-
-/*
- * this function is as crazy as the proc filling api
- * requires.
- *
- * buffer: page allocated for us to scribble in.  the
- *  data returned to the user will be taken from here.
- * *start: address of the pointer that will tell the 
- *  caller where in buffer the data the user wants is.
- * ppos: offset in the entire /proc file that the user
- *  currently wants.
- * wanted: the amount of data the user wants.
- *
- * while going, 'curpos' is the offset in the entire
- * file where we currently are.  We only actually
- * start filling buffer when we get to a place in
- * the file that the user cares about.
- *
- * we take care to only sprintf when the user cares because
- * we're holding a lock while we do this.
- *
- * we're smart and know that we generate fixed size lines.
- * we only start writing to the buffer when the user cares.
- * This is unpredictable because we don't snapshot the
- * list between calls that are filling in a file from
- * the list.  The list could change mid read and the
- * output will look very weird indeed.  oh well.
- */
-
-static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted,
-                          int *eof, void *data)
-{
-        int len = 0, i;
-        int curpos;
-        char *header = "Interval        Cycles_per (Starts Finishes Total)\n";
-        int header_len = strlen(header);
-        char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)";
-        int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1);
-
-        *start = buffer;
-
-        if (ppos < header_len) {
-                int diff = MIN(header_len, wanted);
-                memcpy(buffer, header + ppos, diff);
-                len += diff;
-                ppos += diff;
-        }
-
-        if (len >= wanted)
-                goto out;
-
-        curpos = header_len;
-
-        for ( i = 0; i < MAX_PROFS ; i++) {
-                int copied;
-                struct prof_ent *pe = &prof_ents[i];
-                long long cycles_per;
-                /*
-                 * find the part of the array that the buffer wants
-                 */
-                if (ppos >= (curpos + line_len))  {
-                        curpos += line_len;
-                        continue;
-                }
-                /* the clever caller split a line */
-                if (ppos > curpos) {
-                        *start = buffer + (ppos - curpos);
-                }
-
-                if (pe->finishes == 0)
-                        cycles_per = 0;
-                else
-                {
-                        cycles_per = pe->total_cycles;
-                        do_div (cycles_per, pe->finishes);
-                }
-
-                copied = sprintf(buffer + len, format, pe->str, cycles_per,
-                                 pe->starts, pe->finishes, pe->total_cycles);
-
-                len += copied;
-
-                /* pad to line len, -1 for \n */
-                if ((copied < line_len-1)) {
-                        int diff = (line_len-1) - copied;
-                        memset(buffer + len, ' ', diff);
-                        len += diff;
-                        copied += diff;
-                }
-
-                buffer[len++]= '\n';
-
-                /* bail if we have enough */
-                if (((buffer + len) - *start) >= wanted)
-                        break;
-
-                curpos += line_len;
-        }
-
-        /* lameness */
-        if (i == MAX_PROFS)
-                *eof = 1;
- out:
-
-        return MIN(((buffer + len) - *start), wanted);
-}
-
-/*
- * all kids love /proc :/
- */
-static unsigned char basedir[]="net/portals";
-#endif /* PORTALS_PROFILING */
-
-int insert_proc(void)
-{
-        struct proc_dir_entry *ent;
-#if PORTALS_PROFILING
-        unsigned char dir[128];
-
-        if (ARRAY_SIZE(prof_ents) != MAX_PROFS) {
-                CERROR("profiling enum and array are out of sync.\n");
-                return -1;
-        }
-
-        /*
-         * This is pretty lame.  assuming that failure just
-         * means that they already existed.
-         */
-        strcat(dir, basedir);
-        create_proc_entry(dir, S_IFDIR, 0);
-
-        strcat(dir, "/cycles");
-        ent = create_proc_entry(dir, 0, 0);
-        if (!ent) {
-                CERROR("couldn't register %s?\n", dir);
-                return -1;
-        }
-
-        ent->data = NULL;
-        ent->read_proc = prof_read_proc;
-#endif /* PORTALS_PROFILING */
-
-#ifdef CONFIG_SYSCTL
-        if (!portals_table_header)
-                portals_table_header = register_sysctl_table(top_table, 0);
-#endif
-
-        ent = create_proc_entry("sys/portals/dump_kernel", 0, NULL);
-        if (ent == NULL) {
-                CERROR("couldn't register dump_kernel\n");
-                return -1;
-        }
-        ent->write_proc = trace_dk;
-
-        ent = create_proc_entry("sys/portals/daemon_file", 0, NULL);
-        if (ent == NULL) {
-                CERROR("couldn't register daemon_file\n");
-                return -1;
-        }
-        ent->write_proc = trace_write_daemon_file;
-        ent->read_proc = trace_read_daemon_file;
-
-        ent = create_proc_entry("sys/portals/debug_mb", 0, NULL);
-        if (ent == NULL) {
-                CERROR("couldn't register debug_mb\n");
-                return -1;
-        }
-        ent->write_proc = trace_write_debug_mb;
-        ent->read_proc = trace_read_debug_mb;
-
-        return 0;
-}
-
-void remove_proc(void)
-{
-#if PORTALS_PROFILING
-        unsigned char dir[128];
-        int end;
-
-        dir[0]='\0';
-        strcat(dir, basedir);
-
-        end = strlen(dir);
-
-        strcat(dir, "/cycles");
-        remove_proc_entry(dir, 0);
-
-        dir[end] = '\0';
-        remove_proc_entry(dir, 0);
-#endif /* PORTALS_PROFILING */
-
-        remove_proc_entry("sys/portals/dump_kernel", NULL);
-        remove_proc_entry("sys/portals/daemon_file", NULL);
-        remove_proc_entry("sys/portals/debug_mb", NULL);
-
-#ifdef CONFIG_SYSCTL
-        if (portals_table_header)
-                unregister_sysctl_table(portals_table_header);
-        portals_table_header = NULL;
-#endif
-}
diff --git a/lustre/portals/libcfs/tracefile.c b/lustre/portals/libcfs/tracefile.c
deleted file mode 100644 (file)
index f0c06e5..0000000
+++ /dev/null
@@ -1,876 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Zach Brown <zab@clusterfs.com>
- *   Author: Phil Schwan <phil@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/rwsem.h>
-#include <linux/proc_fs.h>
-#include <linux/file.h>
-#include <linux/smp.h>
-#include <linux/ctype.h>
-#include <asm/uaccess.h>
-#ifdef HAVE_MM_INLINE
-#include <linux/mm_inline.h>
-#endif
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/kp30.h>
-#include <linux/portals_compat25.h>
-#include <linux/libcfs.h>
-
-#define TCD_MAX_PAGES (5 << (20 - PAGE_SHIFT))
-
-/* XXX move things up to the top, comment */
-
-static union {
-        struct trace_cpu_data {
-                struct list_head        tcd_pages;
-                unsigned long           tcd_cur_pages;
-
-                struct list_head        tcd_daemon_pages;
-                unsigned long           tcd_cur_daemon_pages;
-
-                unsigned long           tcd_max_pages;
-                int                     tcd_shutting_down;
-        } tcd;
-        char __pad[SMP_CACHE_BYTES];
-} trace_data[NR_CPUS] __cacheline_aligned;
-
-struct page_collection {
-        struct list_head        pc_pages;
-        spinlock_t              pc_lock;
-        int                     pc_want_daemon_pages;
-};
-
-struct tracefiled_ctl {
-        struct completion        tctl_start;
-        struct completion        tctl_stop;
-        wait_queue_head_t        tctl_waitq;
-        pid_t                    tctl_pid;
-        atomic_t                 tctl_shutdown;
-};
-
-#define TRACEFILE_SIZE (500 << 20)
-static DECLARE_RWSEM(tracefile_sem);
-static char *tracefile = NULL;
-static long long tracefile_size = TRACEFILE_SIZE;
-static struct tracefiled_ctl trace_tctl;
-static DECLARE_MUTEX(trace_thread_sem);
-static int thread_running = 0;
-
-#ifndef get_cpu
-#define get_cpu() smp_processor_id()
-#define put_cpu() do { } while (0)
-#endif
-
-#define trace_get_tcd(FLAGS) ({                 \
-        struct trace_cpu_data *__ret;           \
-        int __cpu = get_cpu();                  \
-        local_irq_save(FLAGS);                  \
-        __ret = &trace_data[__cpu].tcd;         \
-        __ret;                                  \
-})
-
-#define trace_put_tcd(TCD, FLAGS) do {          \
-        local_irq_restore(FLAGS);               \
-        put_cpu();                              \
-} while (0)
-
-static void put_pages_on_daemon_list_on_cpu(void *info);
-
-/* return a page that has 'len' bytes left at the end */
-static struct page *trace_get_page(struct trace_cpu_data *tcd,
-                                   unsigned long len)
-{
-        struct page *page = NULL;
-
-        if (len > PAGE_SIZE) {
-                printk(KERN_ERR "cowardly refusing to write %lu bytes in a "
-                       "page\n", len);
-                return NULL;
-        }
-
-        if (!list_empty(&tcd->tcd_pages)) {
-                page = list_entry(tcd->tcd_pages.prev, struct page,
-                                  PAGE_LIST_ENTRY);
-                if (page->index + len <= PAGE_SIZE)
-                        return page;
-        }
-
-        if (tcd->tcd_cur_pages < tcd->tcd_max_pages) {
-                page = alloc_page(GFP_ATOMIC);
-                if (page == NULL) {
-                        /* the kernel should print a message for us.  fall back
-                         * to using the last page in the ring buffer. */
-                        goto ring_buffer;
-                }
-                page->index = 0;
-                page->mapping = (void *)(long)smp_processor_id();
-                list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
-                tcd->tcd_cur_pages++;
-
-                if (tcd->tcd_cur_pages > 8 && thread_running) {
-                        struct tracefiled_ctl *tctl = &trace_tctl;
-                        wake_up(&tctl->tctl_waitq);
-                }
-                return page;
-        }
-
- ring_buffer:
-        if (thread_running) {
-                int pgcount = tcd->tcd_cur_pages / 10;
-                struct page_collection pc;
-                struct list_head *pos, *tmp;
-                printk(KERN_WARNING "debug daemon buffer overflowed; discarding"
-                       " 10%% of pages (%d)\n", pgcount + 1);
-
-                INIT_LIST_HEAD(&pc.pc_pages);
-                spin_lock_init(&pc.pc_lock);
-
-                list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
-                        struct page *page;
-
-                        if (pgcount-- == 0)
-                                break;
-
-                        page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                        list_del(&PAGE_LIST(page));
-                        list_add_tail(&PAGE_LIST(page), &pc.pc_pages);
-                        tcd->tcd_cur_pages--;
-                }
-                put_pages_on_daemon_list_on_cpu(&pc);
-        }
-        LASSERT(!list_empty(&tcd->tcd_pages));
-
-        page = list_entry(tcd->tcd_pages.next, struct page, PAGE_LIST_ENTRY);
-        page->index = 0;
-
-        list_del(&PAGE_LIST(page));
-        list_add_tail(&PAGE_LIST(page), &tcd->tcd_pages);
-        return page;
-}
-
-static void print_to_console(struct ptldebug_header *hdr, int mask, char *buf,
-                             int len, char *file, const char *fn)
-{
-        char *prefix = NULL, *ptype = NULL;
-
-        if ((mask & D_EMERG) != 0) {
-                prefix = "LustreError";
-                ptype = KERN_EMERG;
-        } else if ((mask & D_ERROR) != 0) {
-                prefix = "LustreError";
-                ptype = KERN_ERR;
-        } else if ((mask & D_WARNING) != 0) {
-                prefix = "Lustre";
-                ptype = KERN_WARNING;
-        } else if (portal_printk) {
-                prefix = "Lustre";
-                ptype = KERN_INFO;
-        }
-
-        printk("%s%s: %d:%d:(%s:%d:%s()) %.*s", ptype, prefix, hdr->ph_pid,
-               hdr->ph_extern_pid, file, hdr->ph_line_num, fn, len, buf);
-}
-
-void portals_debug_msg(int subsys, int mask, char *file, const char *fn,
-                       const int line, unsigned long stack, char *format, ...)
-{
-        struct trace_cpu_data *tcd;
-        struct ptldebug_header header;
-        struct page *page;
-        char *debug_buf = format;
-        int known_size, needed = 85 /* average message length */, max_nob;
-        va_list       ap;
-        unsigned long flags;
-        struct timeval tv;
-
-        if (*(format + strlen(format) - 1) != '\n')
-                printk(KERN_INFO "format at %s:%d:%s doesn't end in newline\n",
-                       file, line, fn);
-
-        tcd = trace_get_tcd(flags);
-        if (tcd->tcd_shutting_down)
-                goto out;
-
-        do_gettimeofday(&tv);
-
-        header.ph_subsys = subsys;
-        header.ph_mask = mask;
-        header.ph_cpu_id = smp_processor_id();
-        header.ph_sec = (__u32)tv.tv_sec;
-        header.ph_usec = tv.tv_usec;
-        header.ph_stack = stack;
-        header.ph_pid = current->pid;
-        header.ph_line_num = line;
-
-#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
-        header.ph_extern_pid = current->thread.extern_pid;
-#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        header.ph_extern_pid = current->thread.mode.tt.extern_pid;
-#else
-        header.ph_extern_pid = 0;
-#endif
-
-        known_size = sizeof(header) + strlen(file) + strlen(fn) + 2; // nulls
-
- retry:
-        page = trace_get_page(tcd, needed + known_size);
-        if (page == NULL) {
-                debug_buf = format;
-                if (needed + known_size > PAGE_SIZE)
-                        mask |= D_ERROR;
-                needed = strlen(format);
-                goto out;
-        }
-
-        debug_buf = page_address(page) + page->index + known_size;
-
-        max_nob = PAGE_SIZE - page->index - known_size;
-        LASSERT(max_nob > 0);
-        va_start(ap, format);
-        needed = vsnprintf(debug_buf, max_nob, format, ap);
-        va_end(ap);
-
-        if (needed > max_nob) /* overflow.  oh poop. */
-                goto retry;
-
-        header.ph_len = known_size + needed;
-        debug_buf = page_address(page) + page->index;
-
-        memcpy(debug_buf, &header, sizeof(header));
-        page->index += sizeof(header);
-        debug_buf += sizeof(header);
-
-        strcpy(debug_buf, file);
-        page->index += strlen(file) + 1;
-        debug_buf += strlen(file) + 1;
-
-        strcpy(debug_buf, fn);
-        page->index += strlen(fn) + 1;
-        debug_buf += strlen(fn) + 1;
-
-        page->index += needed;
-        if (page->index > PAGE_SIZE)
-                printk(KERN_EMERG "page->index == %lu in portals_debug_msg\n",
-                       page->index);
-
- out:
-        if ((mask & (D_EMERG | D_ERROR | D_WARNING)) || portal_printk)
-                print_to_console(&header, mask, debug_buf, needed, file, fn);
-
-        trace_put_tcd(tcd, flags);
-}
-EXPORT_SYMBOL(portals_debug_msg);
-
-static void collect_pages_on_cpu(void *info)
-{
-        struct trace_cpu_data *tcd;
-        unsigned long flags;
-        struct page_collection *pc = info;
-
-        tcd = trace_get_tcd(flags);
-
-        spin_lock(&pc->pc_lock);
-        list_splice(&tcd->tcd_pages, &pc->pc_pages);
-        INIT_LIST_HEAD(&tcd->tcd_pages);
-        tcd->tcd_cur_pages = 0;
-        if (pc->pc_want_daemon_pages) {
-                list_splice(&tcd->tcd_daemon_pages, &pc->pc_pages);
-                INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
-                tcd->tcd_cur_daemon_pages = 0;
-        }
-        spin_unlock(&pc->pc_lock);
-
-        trace_put_tcd(tcd, flags);
-}
-
-static void collect_pages(struct page_collection *pc)
-{
-        /* needs to be fixed up for preempt */
-        INIT_LIST_HEAD(&pc->pc_pages);
-        collect_pages_on_cpu(pc);
-        smp_call_function(collect_pages_on_cpu, pc, 0, 1);
-}
-
-static void put_pages_back_on_cpu(void *info)
-{
-        struct page_collection *pc = info;
-        struct trace_cpu_data *tcd;
-        struct list_head *pos, *tmp, *cur_head;
-        unsigned long flags;
-
-        tcd = trace_get_tcd(flags);
-
-        cur_head = tcd->tcd_pages.next;
-
-        spin_lock(&pc->pc_lock);
-        list_for_each_safe(pos, tmp, &pc->pc_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                if ((unsigned long)page->mapping != smp_processor_id())
-                        continue;
-
-                list_del(&PAGE_LIST(page));
-                list_add_tail(&PAGE_LIST(page), cur_head);
-                tcd->tcd_cur_pages++;
-        }
-        spin_unlock(&pc->pc_lock);
-
-        trace_put_tcd(tcd, flags);
-}
-
-static void put_pages_back(struct page_collection *pc)
-{
-        /* needs to be fixed up for preempt */
-        put_pages_back_on_cpu(pc);
-        smp_call_function(put_pages_back_on_cpu, pc, 0, 1);
-}
-
-/* Add pages to a per-cpu debug daemon ringbuffer.  This buffer makes sure that
- * we have a good amount of data at all times for dumping during an LBUG, even
- * if we have been steadily writing (and otherwise discarding) pages via the
- * debug daemon. */
-static void put_pages_on_daemon_list_on_cpu(void *info)
-{
-        struct page_collection *pc = info;
-        struct trace_cpu_data *tcd;
-        struct list_head *pos, *tmp;
-        unsigned long flags;
-
-        tcd = trace_get_tcd(flags);
-
-        spin_lock(&pc->pc_lock);
-        list_for_each_safe(pos, tmp, &pc->pc_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-                if ((unsigned long)page->mapping != smp_processor_id())
-                        continue;
-
-                list_del(&PAGE_LIST(page));
-                list_add_tail(&PAGE_LIST(page), &tcd->tcd_daemon_pages);
-                tcd->tcd_cur_daemon_pages++;
-
-                if (tcd->tcd_cur_daemon_pages > tcd->tcd_max_pages) {
-                        LASSERT(!list_empty(&tcd->tcd_daemon_pages));
-                        page = list_entry(tcd->tcd_daemon_pages.next,
-                                          struct page, PAGE_LIST_ENTRY);
-
-                        LASSERT(page->index <= PAGE_SIZE);
-                        LASSERT(page_count(page) > 0);
-
-                        page->index = 0;
-                        list_del(&PAGE_LIST(page));
-                        page->mapping = NULL;
-                        __free_page(page);
-                        tcd->tcd_cur_daemon_pages--;
-                }
-        }
-        spin_unlock(&pc->pc_lock);
-
-        trace_put_tcd(tcd, flags);
-}
-
-static void put_pages_on_daemon_list(struct page_collection *pc)
-{
-        put_pages_on_daemon_list_on_cpu(pc);
-        smp_call_function(put_pages_on_daemon_list_on_cpu, pc, 0, 1);
-}
-
-void trace_debug_print(void)
-{
-        struct page_collection pc;
-        struct list_head *pos, *tmp;
-
-        spin_lock_init(&pc.pc_lock);
-
-        collect_pages(&pc);
-        list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                struct page *page;
-                char *p, *file, *fn;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                p = page_address(page);
-                while (p < ((char *)page_address(page) + PAGE_SIZE)) {
-                        struct ptldebug_header *hdr;
-                        int len;
-                        hdr = (void *)p;
-                        p += sizeof(*hdr);
-                        file = p;
-                        p += strlen(file) + 1;
-                        fn = p;
-                        p += strlen(fn) + 1;
-                        len = hdr->ph_len - (p - (char *)hdr);
-
-                        print_to_console(hdr, D_EMERG, p, len, file, fn);
-                }
-
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
-        }
-}
-
-int tracefile_dump_all_pages(char *filename)
-{
-        struct page_collection pc;
-        struct file *filp;
-        struct list_head *pos, *tmp;
-        mm_segment_t oldfs;
-        int rc;
-
-        down_write(&tracefile_sem);
-
-        filp = filp_open(filename, O_CREAT|O_EXCL|O_WRONLY|O_LARGEFILE, 0600);
-        if (IS_ERR(filp)) {
-                rc = PTR_ERR(filp);
-                printk(KERN_ERR "LustreError: can't open %s for dump: rc %d\n",
-                       filename, rc);
-                goto out;
-        }
-
-        spin_lock_init(&pc.pc_lock);
-        pc.pc_want_daemon_pages = 1;
-        collect_pages(&pc);
-        if (list_empty(&pc.pc_pages)) {
-                rc = 0;
-                goto close;
-        }
-
-        /* ok, for now, just write the pages.  in the future we'll be building
-         * iobufs with the pages and calling generic_direct_IO */
-        oldfs = get_fs();
-        set_fs(get_ds());
-        list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                rc = filp->f_op->write(filp, page_address(page), page->index,
-                                       &filp->f_pos);
-                if (rc != page->index) {
-                        printk(KERN_WARNING "wanted to write %lu but wrote "
-                               "%d\n", page->index, rc);
-                        put_pages_back(&pc);
-                        break;
-                }
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
-        }
-        set_fs(oldfs);
-        rc = filp->f_op->fsync(filp, filp->f_dentry, 1);
-        if (rc)
-                printk(KERN_ERR "sync returns %d\n", rc);
- close:
-        filp_close(filp, 0);
- out:
-        up_write(&tracefile_sem);
-        return rc;
-}
-
-void trace_flush_pages(void)
-{
-        struct page_collection pc;
-        struct list_head *pos, *tmp;
-
-        spin_lock_init(&pc.pc_lock);
-
-        collect_pages(&pc);
-        list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
-        }
-}
-
-int trace_dk(struct file *file, const char *buffer, unsigned long count,
-             void *data)
-{
-        char *name;
-        unsigned long off;
-        int rc;
-
-        name = kmalloc(count + 1, GFP_KERNEL);
-        if (name == NULL)
-                return -ENOMEM;
-
-        if (copy_from_user(name, buffer, count)) {
-                rc = -EFAULT;
-                goto out;
-        }
-
-        if (name[0] != '/') {
-                rc = -EINVAL;
-                goto out;
-        }
-
-        /* be nice and strip out trailing '\n' */
-        for (off = count ; off > 2 && isspace(name[off - 1]); off--)
-                ;
-
-        name[off] = '\0';
-        rc = tracefile_dump_all_pages(name);
-out:
-        if (name)
-                kfree(name);
-        return count;
-}
-EXPORT_SYMBOL(trace_dk);
-
-static int tracefiled(void *arg)
-{
-        struct page_collection pc;
-        struct tracefiled_ctl *tctl = arg;
-        struct list_head *pos, *tmp;
-        struct ptldebug_header *hdr;
-        struct file *filp;
-        struct page *page;
-        mm_segment_t oldfs;
-        int rc;
-
-        /* we're started late enough that we pick up init's fs context */
-        /* this is so broken in uml?  what on earth is going on? */
-        kportal_daemonize("ktracefiled");
-        reparent_to_init();
-
-        spin_lock_init(&pc.pc_lock);
-        complete(&tctl->tctl_start);
-
-        while (1) {
-                wait_queue_t __wait;
-
-                init_waitqueue_entry(&__wait, current);
-                add_wait_queue(&tctl->tctl_waitq, &__wait);
-                set_current_state(TASK_INTERRUPTIBLE);
-                schedule_timeout(HZ);
-                remove_wait_queue(&tctl->tctl_waitq, &__wait);
-
-                if (atomic_read(&tctl->tctl_shutdown))
-                        break;
-
-                pc.pc_want_daemon_pages = 0;
-                collect_pages(&pc);
-                if (list_empty(&pc.pc_pages))
-                        continue;
-
-                filp = NULL;
-                down_read(&tracefile_sem);
-                if (tracefile != NULL) {
-                        filp = filp_open(tracefile, O_CREAT|O_RDWR|O_LARGEFILE,
-                                         0600);
-                        if (IS_ERR(filp)) {
-                                printk("couldn't open %s: %ld\n", tracefile,
-                                       PTR_ERR(filp));
-                                filp = NULL;
-                        }
-                }
-                up_read(&tracefile_sem);
-                if (filp == NULL) {
-                        put_pages_on_daemon_list(&pc);
-                        continue;
-                }
-
-                oldfs = get_fs();
-                set_fs(get_ds());
-
-                /* mark the first header, so we can sort in chunks */
-                page = list_entry(pc.pc_pages.next, struct page,
-                                  PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                hdr = page_address(page);
-                hdr->ph_flags |= PH_FLAG_FIRST_RECORD;
-
-                list_for_each_safe(pos, tmp, &pc.pc_pages) {
-                        static loff_t f_pos;
-                        page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                        LASSERT(page->index <= PAGE_SIZE);
-                        LASSERT(page_count(page) > 0);
-
-                        if (f_pos >= tracefile_size)
-                                f_pos = 0;
-                        else if (f_pos > filp->f_dentry->d_inode->i_size)
-                                f_pos = filp->f_dentry->d_inode->i_size;
-
-                        rc = filp->f_op->write(filp, page_address(page),
-                                               page->index, &f_pos);
-                        if (rc != page->index) {
-                                printk(KERN_WARNING "wanted to write %lu but "
-                                       "wrote %d\n", page->index, rc);
-                                put_pages_back(&pc);
-                        }
-                }
-                set_fs(oldfs);
-                filp_close(filp, 0);
-
-                put_pages_on_daemon_list(&pc);
-        }
-        complete(&tctl->tctl_stop);
-        return 0;
-}
-
-int trace_start_thread(void)
-{
-        struct tracefiled_ctl *tctl = &trace_tctl;
-        int rc = 0;
-
-        down(&trace_thread_sem);
-        if (thread_running)
-                goto out;
-
-        init_completion(&tctl->tctl_start);
-        init_completion(&tctl->tctl_stop);
-        init_waitqueue_head(&tctl->tctl_waitq);
-        atomic_set(&tctl->tctl_shutdown, 0);
-
-        if (kernel_thread(tracefiled, tctl, 0) < 0) {
-                rc = -ECHILD;
-                goto out;
-        }
-
-        wait_for_completion(&tctl->tctl_start);
-        thread_running = 1;
-out:
-        up(&trace_thread_sem);
-        return rc;
-}
-
-void trace_stop_thread(void)
-{
-        struct tracefiled_ctl *tctl = &trace_tctl;
-
-        down(&trace_thread_sem);
-        if (thread_running) {
-                printk(KERN_INFO "Shutting down debug daemon thread...\n");
-                atomic_set(&tctl->tctl_shutdown, 1);
-                wait_for_completion(&tctl->tctl_stop);
-                thread_running = 0;
-        }
-        up(&trace_thread_sem);
-}
-
-int trace_write_daemon_file(struct file *file, const char *buffer,
-                            unsigned long count, void *data)
-{
-        char *name;
-        unsigned long off;
-        int rc;
-
-        name = kmalloc(count + 1, GFP_KERNEL);
-        if (name == NULL)
-                return -ENOMEM;
-
-        if (copy_from_user(name, buffer, count)) {
-                rc = -EFAULT;
-                goto out;
-        }
-
-        /* be nice and strip out trailing '\n' */
-        for (off = count ; off > 2 && isspace(name[off - 1]); off--)
-                ;
-
-        name[off] = '\0';
-
-        down_write(&tracefile_sem);
-        if (strcmp(name, "stop") == 0) {
-                tracefile = NULL;
-                trace_stop_thread();
-                goto out_sem;
-        } else if (strncmp(name, "size=", 5) == 0) {
-                tracefile_size = simple_strtoul(name + 5, NULL, 0);
-                if (tracefile_size < 10 || tracefile_size > 20480)
-                        tracefile_size = TRACEFILE_SIZE;
-                else
-                        tracefile_size <<= 20;
-                goto out_sem;
-        }
-
-        if (name[0] != '/') {
-                rc = -EINVAL;
-                goto out_sem;
-        }
-
-        if (tracefile != NULL)
-                kfree(tracefile);
-
-        tracefile = name;
-        name = NULL;
-
-        printk(KERN_INFO "Lustre: debug daemon will attempt to start writing "
-               "to %s (%lukB max)\n", tracefile, (long)(tracefile_size >> 10));
-
-        trace_start_thread();
-
- out_sem:
-        up_write(&tracefile_sem);
-
- out:
-        kfree(name);
-        return count;
-}
-
-int trace_read_daemon_file(char *page, char **start, off_t off, int count,
-                           int *eof, void *data)
-{
-        int rc;
-
-        down_read(&tracefile_sem);
-        rc = snprintf(page, count, "%s", tracefile);
-        up_read(&tracefile_sem);
-
-        return rc;
-}
-
-int trace_write_debug_mb(struct file *file, const char *buffer,
-                         unsigned long count, void *data)
-{
-        char string[32];
-        int i;
-        unsigned max;
-
-        if (count >= sizeof(string)) {
-                printk(KERN_ERR "Lustre: value too large (length %lu bytes)\n",
-                       count);
-                return -EOVERFLOW;
-        }
-
-        if (copy_from_user(string, buffer, count))
-                return -EFAULT;
-
-        max = simple_strtoul(string, NULL, 0);
-        if (max == 0)
-                return -EINVAL;
-
-        if (max > (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5 || max >= 512) {
-                printk(KERN_ERR "Lustre: Refusing to set debug buffer size to "
-                       "%dMB, which is more than 80%% of available RAM (%lu)\n",
-                       max, (num_physpages >> (20 - 2 - PAGE_SHIFT)) / 5);
-                return -EINVAL;
-        }
-
-        max /= smp_num_cpus;
-
-        for (i = 0; i < NR_CPUS; i++) {
-                struct trace_cpu_data *tcd;
-                tcd = &trace_data[i].tcd;
-                tcd->tcd_max_pages = max << (20 - PAGE_SHIFT);
-        }
-        return count;
-}
-
-int trace_read_debug_mb(char *page, char **start, off_t off, int count,
-                        int *eof, void *data)
-{
-        struct trace_cpu_data *tcd;
-        unsigned long flags;
-        int rc;
-
-        tcd = trace_get_tcd(flags);
-        rc = snprintf(page, count, "%lu\n",
-                      (tcd->tcd_max_pages >> (20 - PAGE_SHIFT)) * smp_num_cpus);
-        trace_put_tcd(tcd, flags);
-
-        return rc;
-}
-
-int tracefile_init(void)
-{
-        struct trace_cpu_data *tcd;
-        int i;
-
-        for (i = 0; i < NR_CPUS; i++) {
-                tcd = &trace_data[i].tcd;
-                INIT_LIST_HEAD(&tcd->tcd_pages);
-                INIT_LIST_HEAD(&tcd->tcd_daemon_pages);
-                tcd->tcd_cur_pages = 0;
-                tcd->tcd_cur_daemon_pages = 0;
-                tcd->tcd_max_pages = TCD_MAX_PAGES;
-                tcd->tcd_shutting_down = 0;
-        }
-        return 0;
-}
-
-static void trace_cleanup_on_cpu(void *info)
-{
-        struct trace_cpu_data *tcd;
-        struct list_head *pos, *tmp;
-        unsigned long flags;
-
-        tcd = trace_get_tcd(flags);
-
-        tcd->tcd_shutting_down = 1;
-
-        list_for_each_safe(pos, tmp, &tcd->tcd_pages) {
-                struct page *page;
-
-                page = list_entry(pos, struct page, PAGE_LIST_ENTRY);
-                LASSERT(page->index <= PAGE_SIZE);
-                LASSERT(page_count(page) > 0);
-
-                list_del(&PAGE_LIST(page));
-                page->mapping = NULL;
-                __free_page(page);
-        }
-        tcd->tcd_cur_pages = 0;
-
-        trace_put_tcd(tcd, flags);
-}
-
-static void trace_cleanup(void)
-{
-        struct page_collection pc;
-
-        INIT_LIST_HEAD(&pc.pc_pages);
-        spin_lock_init(&pc.pc_lock);
-
-        trace_cleanup_on_cpu(&pc);
-        smp_call_function(trace_cleanup_on_cpu, &pc, 0, 1);
-}
-
-void tracefile_exit(void)
-{
-        trace_stop_thread();
-        trace_cleanup();
-}
diff --git a/lustre/portals/libcfs/tracefile.h b/lustre/portals/libcfs/tracefile.h
deleted file mode 100644 (file)
index f581257..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __PORTALS_TRACEFILE_H
-#define __PORTALS_TRACEFILE_H
-
-int tracefile_dump_all_pages(char *filename);
-void trace_debug_print(void);
-void trace_flush_pages(void);
-int trace_start_thread(void);
-void trace_stop_thread(void);
-int tracefile_init(void);
-void tracefile_exit(void);
-int trace_write_daemon_file(struct file *file, const char *buffer,
-                           unsigned long count, void *data);
-int trace_read_daemon_file(char *page, char **start, off_t off, int count,
-                          int *eof, void *data);
-int trace_write_debug_mb(struct file *file, const char *buffer,
-                        unsigned long count, void *data);
-int trace_read_debug_mb(char *page, char **start, off_t off, int count,
-                       int *eof, void *data);
-int trace_dk(struct file *file, const char *buffer, unsigned long count,
-             void *data);
-
-#endif /* __PORTALS_TRACEFILE_H */
diff --git a/lustre/portals/libcfs/watchdog.c b/lustre/portals/libcfs/watchdog.c
deleted file mode 100644 (file)
index 844845a..0000000
+++ /dev/null
@@ -1,402 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2004 Cluster File Systems, Inc.
- *   Author: Jacob Berkman <jacob@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/kp30.h>
-#include <linux/libcfs.h>
-#include <linux/portals_compat25.h>
-
-
-
-struct lc_watchdog {
-        struct timer_list lcw_timer; /* kernel timer */
-        struct list_head  lcw_list;
-        struct timeval    lcw_last_touched;
-        struct task_struct *lcw_task;
-
-        void (*lcw_callback)(struct lc_watchdog *,
-                            struct task_struct *,
-                            void *data);
-        void *lcw_data;
-
-        int lcw_pid;
-        int lcw_time; /* time until watchdog fires, in ms */
-
-        enum {
-                LC_WATCHDOG_DISABLED,
-                LC_WATCHDOG_ENABLED,
-                LC_WATCHDOG_EXPIRED
-        } lcw_state;
-};
-
-/*
- * The dispatcher will complete lcw_start_completion when it starts,
- * and lcw_stop_completion when it exits.
- * Wake lcw_event_waitq to signal timer callback dispatches.
- */
-static struct completion lcw_start_completion;
-static struct completion lcw_stop_completion;
-static wait_queue_head_t lcw_event_waitq;
-
-/*
- * Set this and wake lcw_event_waitq to stop the dispatcher.
- */
-enum {
-        LCW_FLAG_STOP = 0
-};
-static unsigned long lcw_flags = 0;
-
-/*
- * Number of outstanding watchdogs.
- * When it hits 1, we start the dispatcher.
- * When it hits 0, we stop the distpatcher.
- */
-static __u32         lcw_refcount = 0;
-static DECLARE_MUTEX(lcw_refcount_sem);
-
-/* 
- * List of timers that have fired that need their callbacks run by the
- * dispatcher.
- */
-static spinlock_t       lcw_pending_timers_lock = SPIN_LOCK_UNLOCKED;
-static struct list_head lcw_pending_timers = \
-        LIST_HEAD_INIT(lcw_pending_timers);
-
-static struct task_struct *lcw_lookup_task(struct lc_watchdog *lcw)
-{
-        struct task_struct *tsk;
-        unsigned long flags;
-        ENTRY;
-
-        read_lock_irqsave(&tasklist_lock, flags);
-        tsk = find_task_by_pid(lcw->lcw_pid);
-        read_unlock_irqrestore(&tasklist_lock, flags);
-        if (!tsk) {
-                CWARN("Process %d was not found in the task list; "
-                      "watchdog callback may be incomplete\n", lcw->lcw_pid);
-        } else if (tsk != lcw->lcw_task) {
-                tsk = NULL;
-                CWARN("The current process %d did not set the watchdog; "
-                      "watchdog callback may be incomplete\n", lcw->lcw_pid);
-        }
-
-        RETURN(tsk);
-}
-
-static void lcw_cb(unsigned long data)
-{
-        struct lc_watchdog *lcw = (struct lc_watchdog *)data;
-        struct task_struct *tsk;
-        unsigned long flags;
-
-        ENTRY;
-
-        if (lcw->lcw_state != LC_WATCHDOG_ENABLED) {
-                EXIT;
-                return;
-        }
-
-        lcw->lcw_state = LC_WATCHDOG_EXPIRED;
-
-        CWARN("Watchdog triggered for pid %d: it was inactive for %dus\n",
-              lcw->lcw_pid, (lcw->lcw_time * 1000) / HZ);
-
-        tsk = lcw_lookup_task(lcw);
-        if (tsk != NULL)
-                portals_debug_dumpstack(tsk);
-
-        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-        if (list_empty(&lcw->lcw_list)) {
-                list_add(&lcw->lcw_list, &lcw_pending_timers);
-                wake_up(&lcw_event_waitq);
-        }
-        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-
-        EXIT;
-}
-
-static int is_watchdog_fired(void)
-{
-        unsigned long flags;
-        int rc;
-
-        if (test_bit(LCW_FLAG_STOP, &lcw_flags))
-                return 1;
-
-        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-        rc = !list_empty(&lcw_pending_timers);
-        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-        return rc;
-}
-
-static int lcw_dispatch_main(void *data)
-{
-        int rc = 0;
-        unsigned long flags;
-        struct lc_watchdog *lcw;
-        struct task_struct *tsk;
-
-        ENTRY;
-
-        kportal_daemonize("lc_watchdogd");
-
-        SIGNAL_MASK_LOCK(current, flags);
-        sigfillset(&current->blocked);
-        RECALC_SIGPENDING;
-        SIGNAL_MASK_UNLOCK(current, flags);
-
-        complete(&lcw_start_completion);
-
-        while (1) {
-                wait_event_interruptible(lcw_event_waitq, is_watchdog_fired());
-                CDEBUG(D_INFO, "Watchdog got woken up...\n");
-                if (test_bit(LCW_FLAG_STOP, &lcw_flags)) {
-                        CDEBUG(D_INFO, "LCW_FLAG_STOP was set, shutting down...\n");
-
-                        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-                        rc = !list_empty(&lcw_pending_timers);
-                        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-                        if (rc) {
-                                CERROR("pending timers list was not empty at "
-                                       "time of watchdog dispatch shutdown\n");
-                        }
-                        break;
-                }
-
-                spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-                while (!list_empty(&lcw_pending_timers)) {
-
-                        lcw = list_entry(lcw_pending_timers.next,
-                                         struct lc_watchdog,
-                                         lcw_list);
-                        list_del_init(&lcw->lcw_list);
-                        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-
-                        CDEBUG(D_INFO, "found lcw for pid %d\n", lcw->lcw_pid);
-
-                        if (lcw->lcw_state != LC_WATCHDOG_DISABLED) {
-                                /* 
-                                 * sanity check the task against our
-                                 * watchdog 
-                                 */
-                                tsk = lcw_lookup_task(lcw);
-                                lcw->lcw_callback(lcw, tsk, lcw->lcw_data);
-                        }
-
-                        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-                }
-                spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-        }
-
-        complete(&lcw_stop_completion);
-
-        RETURN(rc);
-}
-
-static void lcw_dispatch_start(void)
-{
-        int rc;
-
-        ENTRY;
-        LASSERT(lcw_refcount == 1);
-
-        init_completion(&lcw_stop_completion);
-        init_completion(&lcw_start_completion);
-        init_waitqueue_head(&lcw_event_waitq);
-
-        CDEBUG(D_INFO, "starting dispatch thread\n");
-        rc = kernel_thread(lcw_dispatch_main, NULL, 0);
-        if (rc < 0) {
-                CERROR("error spawning watchdog dispatch thread: %d\n", rc);
-                EXIT;
-                return;
-        }
-        wait_for_completion(&lcw_start_completion);
-        CDEBUG(D_INFO, "watchdog dispatcher initialization complete.\n");
-
-        EXIT;
-}
-
-static void lcw_dispatch_stop(void)
-{
-        ENTRY;
-        LASSERT(lcw_refcount == 0);
-
-        CDEBUG(D_INFO, "trying to stop watchdog dispatcher.\n");
-
-        set_bit(LCW_FLAG_STOP, &lcw_flags);
-        wake_up(&lcw_event_waitq);
-
-        wait_for_completion(&lcw_stop_completion);
-
-        CDEBUG(D_INFO, "watchdog dispatcher has shut down.\n");
-
-        EXIT;
-}
-
-struct lc_watchdog *lc_watchdog_add(int time, 
-                                    void (*callback)(struct lc_watchdog *,
-                                                     struct task_struct *,
-                                                     void *),
-                                    void *data)
-{
-        struct lc_watchdog *lcw = NULL;
-        ENTRY;
-
-        PORTAL_ALLOC(lcw, sizeof(*lcw));
-        if (!lcw) {
-                CDEBUG(D_INFO, "Could not allocate new lc_watchdog\n");
-                RETURN(ERR_PTR(-ENOMEM));
-        }
-
-        lcw->lcw_task = current;
-        lcw->lcw_pid = current->pid;
-        lcw->lcw_time = (time * HZ) / 1000;
-        lcw->lcw_callback = callback ? callback : lc_watchdog_dumplog;
-        lcw->lcw_data = data;
-        lcw->lcw_state = LC_WATCHDOG_DISABLED;
-
-        INIT_LIST_HEAD(&lcw->lcw_list);
-
-        lcw->lcw_timer.function = lcw_cb;
-        lcw->lcw_timer.data = (unsigned long)lcw;
-        lcw->lcw_timer.expires = jiffies + lcw->lcw_time;
-        init_timer(&lcw->lcw_timer);
-
-        down(&lcw_refcount_sem);
-        if (++lcw_refcount == 1)
-                lcw_dispatch_start();
-        up(&lcw_refcount_sem);
-
-        /* Keep this working in case we enable them by default */
-        if (lcw->lcw_state == LC_WATCHDOG_ENABLED) {
-                do_gettimeofday(&lcw->lcw_last_touched);
-                add_timer(&lcw->lcw_timer);
-        }
-
-        RETURN(lcw);
-}
-EXPORT_SYMBOL(lc_watchdog_add);
-
-static long
-timeval_sub(struct timeval *large, struct timeval *small)
-{
-        return (large->tv_sec - small->tv_sec) * 1000000 +
-                (large->tv_usec - small->tv_usec);
-}
-
-static void lcw_update_time(struct lc_watchdog *lcw, const char *message)
-{
-        struct timeval newtime;
-        unsigned long timediff;
-
-        do_gettimeofday(&newtime);
-        if (lcw->lcw_state == LC_WATCHDOG_EXPIRED) {
-                timediff = timeval_sub(&newtime, &lcw->lcw_last_touched);
-                CWARN("Expired watchdog for pid %d %s after %lu.%.4lus\n",
-                      lcw->lcw_pid,
-                      message,
-                      timediff / 1000000,
-                      (timediff % 1000000) / 100);
-        }
-        lcw->lcw_last_touched = newtime;
-}
-
-void lc_watchdog_touch(struct lc_watchdog *lcw)
-{
-        unsigned long flags;
-        ENTRY;
-        LASSERT(lcw != NULL);
-
-        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-        if (!list_empty(&lcw->lcw_list))
-                list_del_init(&lcw->lcw_list);
-        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-
-        lcw_update_time(lcw, "touched");
-        lcw->lcw_state = LC_WATCHDOG_ENABLED;
-
-        mod_timer(&lcw->lcw_timer, jiffies + lcw->lcw_time);
-
-        EXIT;
-}
-EXPORT_SYMBOL(lc_watchdog_touch);
-
-void lc_watchdog_disable(struct lc_watchdog *lcw)
-{
-        unsigned long flags;
-        ENTRY;
-        LASSERT(lcw != NULL);
-
-        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-        if (!list_empty(&lcw->lcw_list))
-                list_del_init(&lcw->lcw_list);
-        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-
-        lcw_update_time(lcw, "disabled");
-        lcw->lcw_state = LC_WATCHDOG_DISABLED;
-
-        EXIT;
-}
-EXPORT_SYMBOL(lc_watchdog_disable);
-
-void lc_watchdog_delete(struct lc_watchdog *lcw)
-{
-        unsigned long flags;
-        ENTRY;
-        LASSERT(lcw != NULL);
-
-        del_timer(&lcw->lcw_timer);
-
-        lcw_update_time(lcw, "deleted");
-
-        spin_lock_irqsave(&lcw_pending_timers_lock, flags);
-        if (!list_empty(&lcw->lcw_list))
-                list_del_init(&lcw->lcw_list);
-        spin_unlock_irqrestore(&lcw_pending_timers_lock, flags);
-
-        down(&lcw_refcount_sem);
-        if (--lcw_refcount == 0)
-                lcw_dispatch_stop();
-        up(&lcw_refcount_sem);
-
-        PORTAL_FREE(lcw, sizeof(*lcw));
-
-        EXIT;
-}
-EXPORT_SYMBOL(lc_watchdog_delete);
-
-/*
- * Provided watchdog handlers
- */
-
-extern void portals_debug_dumplog_internal(void *arg);
-
-void lc_watchdog_dumplog(struct lc_watchdog *lcw,
-                         struct task_struct *tsk,
-                         void               *data)
-{
-        tsk = tsk ? tsk : current;
-        portals_debug_dumplog_internal((void *)(long)tsk->pid);
-}
-EXPORT_SYMBOL(lc_watchdog_dumplog);
diff --git a/lustre/portals/packaging/.cvsignore b/lustre/portals/packaging/.cvsignore
deleted file mode 100644 (file)
index fd1d56a..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-Makefile
-Makefile.in
-aclocal.m4
-config.log
-config.status
-config.cache
-configure
-portals.spec
diff --git a/lustre/portals/packaging/Makefile.am b/lustre/portals/packaging/Makefile.am
deleted file mode 100644 (file)
index 126bc69..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (C) 2002  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-EXTRA_DIST = portals.spec
\ No newline at end of file
diff --git a/lustre/portals/packaging/portals.spec.in b/lustre/portals/packaging/portals.spec.in
deleted file mode 100644 (file)
index e196b3f..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-%define kversion @RELEASE@
-%define linuxdir @LINUX@
-%define version HEAD
-
-Summary: Sandia Portals Message Passing - utilities 
-Name: portals
-Version: %{version}
-Release: 0210101748uml
-Copyright: LGPL
-Group: Utilities/System
-BuildRoot: /var/tmp/portals-%{version}-root
-Source: http://sandiaportals.org/portals-%{version}.tar.gz
-
-%description
-Sandia Portals message passing package.  Contains kernel modules, libraries and utilities. 
-
-%package -n portals-modules
-Summary: Kernel modules and NAL's for portals
-Group: Development/Kernel
-
-%description -n portals-modules
-Object-Based Disk storage drivers for Linux %{kversion}.
-
-%package -n portals-source
-Summary: Portals kernel source for rebuilding with other kernels
-Group: Development/Kernel
-
-%description -n portals-source
-Portals kernel source for rebuilding with other kernels
-
-%prep
-%setup -n portals-%{version}
-
-%build
-rm -rf $RPM_BUILD_ROOT
-
-# Create the pristine source directory.
-srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version}
-mkdir -p $srcdir
-find . -name CVS -prune -o -print | cpio -ap $srcdir
-
-# Set an explicit path to our Linux tree, if we can.
-conf_flag=
-linuxdir=%{linuxdir}
-test -d $linuxdir && conf_flag=--with-linux=$linuxdir
-./configure $conf_flag
-make 
-
-%install
-make install prefix=$RPM_BUILD_ROOT
-
-%ifarch alpha
-# this hurts me
-  conf_flag=
-  linuxdir=%{linuxdir}
-  test -d $linuxdir && conf_flag=--with-linux=$linuxdir
-  make clean
-  ./configure --enable-rtscts-myrinet $conf_flag
-  make
-  cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o
-  cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload
-%endif
-
-
-%files
-%attr(-, root, root) %doc COPYING
-%attr(-, root, root) /usr/sbin/acceptor
-%attr(-, root, root) /usr/sbin/ptlctl
-%attr(-, root, root) /usr/sbin/debugctl
-%ifarch alpha
-%attr(-, root, root) /usr/sbin/mcpload
-%endif
-%attr(-, root, root) /lib/libmyrnal.a
-%attr(-, root, root) /lib/libptlapi.a
-%attr(-, root, root) /lib/libptlctl.a
-%attr(-, root, root) /lib/libprocbridge.a
-%attr(-, root, root) /lib/libptllib.a
-%attr(-, root, root) /lib/libtcpnal.a 
-%attr(-, root, root) /lib/libtcpnalutil.a
-%attr(-, root, root) /usr/include/portals/*.h
-%attr(-, root, root) /usr/include/portals/base/*.h
-%attr(-, root, root) /usr/include/linux/*.h
-
-%files -n portals-modules
-%attr(-, root, root) %doc COPYING
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o
-%ifarch alpha
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o
-%endif
-%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o
-
-%files -n portals-source
-%attr(-, root, root) /usr/src/portals-%{version}
-
-%post
-if [ ! -e /dev/portals ]; then
-   mknod /dev/portals c 10 240
-fi
-depmod -ae || exit 0
-
-grep -q portals /etc/modules.conf || \
-       echo 'alias char-major-10-240 portals' >> /etc/modules.conf
-
-grep -q '/dev/portals' /etc/modules.conf || \
-       echo 'alias /dev/portals portals' >> /etc/modules.conf
-
-%postun
-depmod -ae || exit 0
-
-%clean
-#rm -rf $RPM_BUILD_ROOT
-
-# end of file
diff --git a/lustre/portals/portals/.cvsignore b/lustre/portals/portals/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/portals/Makefile.in b/lustre/portals/portals/Makefile.in
deleted file mode 100644 (file)
index c0f2e71..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-MODULES := portals
-portals-objs := api-errno.o api-ni.o api-wrap.o
-portals-objs += lib-init.o lib-me.o lib-msg.o lib-eq.o lib-md.o
-portals-objs += lib-move.o lib-ni.o lib-pid.o module.o
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/portals/Makefile.mk b/lustre/portals/portals/Makefile.mk
deleted file mode 100644 (file)
index 088902a..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include $(src)/../Kernelenv
-
-obj-y += portals.o
-portals-objs    :=     lib-eq.o lib-init.o lib-md.o lib-me.o \
-                       lib-move.o lib-msg.o lib-ni.o lib-pid.o \
-                       api-errno.o api-ni.o api-wrap.o \
-                       module.o
diff --git a/lustre/portals/portals/api-errno.c b/lustre/portals/portals/api-errno.c
deleted file mode 100644 (file)
index 9a4e5ac..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-errno.c
- * Instantiate the string table of errors
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- */
-
-/* If you change these, you must update the number table in portals/errno.h */
-const char *ptl_err_str[] = {
-        "PTL_OK",
-        "PTL_SEGV",
-
-        "PTL_NO_SPACE",
-        "PTL_ME_IN_USE",
-        "PTL_VAL_FAILED",
-
-        "PTL_NAL_FAILED",
-        "PTL_NO_INIT",
-        "PTL_IFACE_DUP",
-        "PTL_IFACE_INVALID",
-
-        "PTL_HANDLE_INVALID",
-        "PTL_MD_INVALID",
-        "PTL_ME_INVALID",
-/* If you change these, you must update the number table in portals/errno.h */
-        "PTL_PROCESS_INVALID",
-        "PTL_PT_INDEX_INVALID",
-
-        "PTL_SR_INDEX_INVALID",
-        "PTL_EQ_INVALID",
-        "PTL_EQ_DROPPED",
-
-        "PTL_EQ_EMPTY",
-        "PTL_MD_NO_UPDATE",
-        "PTL_FAIL",
-
-        "PTL_IOV_INVALID",
-
-        "PTL_EQ_IN_USE",
-
-        "PTL_NI_INVALID",
-        "PTL_MD_ILLEGAL",
-
-        "PTL_MAX_ERRNO"
-};
-/* If you change these, you must update the number table in portals/errno.h */
diff --git a/lustre/portals/portals/api-ni.c b/lustre/portals/portals/api-ni.c
deleted file mode 100644 (file)
index 7e92256..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-ni.c
- * Network Interface code
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-int ptl_init;
-
-/* Put some magic in the NI handle so uninitialised/zeroed handles are easy
- * to spot */
-#define NI_HANDLE_MAGIC  0xebc0de00
-#define NI_HANDLE_MASK   0x000000ff
-
-static struct nal_t *ptl_nal_table[NAL_MAX_NR + 1];
-
-#ifdef __KERNEL__
-DECLARE_MUTEX(ptl_mutex);
-
-static void ptl_mutex_enter (void) 
-{
-        down (&ptl_mutex);
-}
-
-static void ptl_mutex_exit (void)
-{
-        up (&ptl_mutex);
-}
-#else
-static void ptl_mutex_enter (void)
-{
-}
-
-static void ptl_mutex_exit (void) 
-{
-}
-#endif
-
-nal_t *ptl_hndl2nal(ptl_handle_any_t *handle)
-{
-        unsigned int idx = handle->nal_idx;
-
-        /* XXX we really rely on the caller NOT racing with interface
-         * setup/teardown.  That ensures her NI handle can't get
-         * invalidated out from under her (or worse, swapped for a
-         * completely different interface!) */
-
-        LASSERT (ptl_init);
-
-        if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0)
-                return NULL;
-
-        idx &= NI_HANDLE_MASK;
-        
-        if (idx > NAL_MAX_NR ||
-            ptl_nal_table[idx] == NULL ||
-            ptl_nal_table[idx]->nal_refct == 0)
-                return NULL;
-
-        return ptl_nal_table[idx];
-}
-
-int ptl_register_nal (ptl_interface_t interface, nal_t *nal)
-{
-        int    rc;
-        
-        ptl_mutex_enter();
-        
-        if (interface < 0 || interface > NAL_MAX_NR)
-                rc = PTL_IFACE_INVALID;
-        else if (ptl_nal_table[interface] != NULL)
-                rc = PTL_IFACE_DUP;
-        else {
-                rc = PTL_OK;
-                ptl_nal_table[interface] = nal;
-                LASSERT(nal->nal_refct == 0);
-        }
-
-        ptl_mutex_exit();
-        return (rc);
-}
-
-void ptl_unregister_nal (ptl_interface_t interface)
-{
-        LASSERT(interface >= 0 && interface <= NAL_MAX_NR);
-        LASSERT(ptl_nal_table[interface] != NULL);
-        LASSERT(ptl_nal_table[interface]->nal_refct == 0);
-        
-        ptl_mutex_enter();
-        
-        ptl_nal_table[interface] = NULL;
-
-        ptl_mutex_exit();
-}
-
-int PtlInit(int *max_interfaces)
-{
-        LASSERT(!strcmp(ptl_err_str[PTL_MAX_ERRNO], "PTL_MAX_ERRNO"));
-
-        /* If this assertion fails, we need more bits in NI_HANDLE_MASK and
-         * to shift NI_HANDLE_MAGIC left appropriately */
-        LASSERT (NAL_MAX_NR < (NI_HANDLE_MASK + 1));
-        
-        if (max_interfaces != NULL)
-                *max_interfaces = NAL_MAX_NR + 1;
-
-        ptl_mutex_enter();
-
-        if (!ptl_init) {
-                /* NULL pointers, clear flags */
-                memset(ptl_nal_table, 0, sizeof(ptl_nal_table));
-#ifndef __KERNEL__
-                /* Kernel NALs register themselves when their module loads,
-                 * and unregister themselves when their module is unloaded.
-                 * Userspace NALs, are plugged in explicitly here... */
-                {
-                        extern nal_t procapi_nal;
-
-                        /* XXX pretend it's socknal to keep liblustre happy... */
-                        ptl_nal_table[SOCKNAL] = &procapi_nal;
-                        LASSERT (procapi_nal.nal_refct == 0);
-                }
-#endif
-                ptl_init = 1;
-        }
-
-        ptl_mutex_exit();
-        
-        return PTL_OK;
-}
-
-void PtlFini(void)
-{
-        nal_t  *nal;
-        int     i;
-
-        ptl_mutex_enter();
-
-        if (ptl_init) {
-                for (i = 0; i <= NAL_MAX_NR; i++) {
-
-                        nal = ptl_nal_table[i];
-                        if (nal == NULL)
-                                continue;
-                        
-                        if (nal->nal_refct != 0) {
-                                CWARN("NAL %x has outstanding refcount %d\n",
-                                      i, nal->nal_refct);
-                                nal->nal_ni_fini(nal);
-                        }
-                        
-                        ptl_nal_table[i] = NULL;
-                }
-
-                ptl_init = 0;
-        }
-        
-        ptl_mutex_exit();
-}
-
-int PtlNIInit(ptl_interface_t interface, ptl_pid_t requested_pid,
-              ptl_ni_limits_t *desired_limits, ptl_ni_limits_t *actual_limits,
-              ptl_handle_ni_t *handle)
-{
-        nal_t *nal;
-        int    i;
-        int    rc;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-
-        ptl_mutex_enter ();
-
-        if (interface == PTL_IFACE_DEFAULT) {
-                for (i = 0; i <= NAL_MAX_NR; i++)
-                        if (ptl_nal_table[i] != NULL) {
-                                interface = i;
-                                break;
-                        }
-                /* NB if no interfaces are registered, 'interface' will
-                 * fail the valid test below */
-        }
-        
-        if (interface < 0 || 
-            interface > NAL_MAX_NR ||
-            ptl_nal_table[interface] == NULL) {
-                GOTO(out, rc = PTL_IFACE_INVALID);
-        }
-
-        nal = ptl_nal_table[interface];
-        nal->nal_handle.nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | interface;
-        nal->nal_handle.cookie = 0;
-        
-        CDEBUG(D_OTHER, "Starting up NAL (%x) refs %d\n", interface, nal->nal_refct);
-        rc = nal->nal_ni_init(nal, requested_pid, desired_limits, actual_limits);
-
-        if (rc != PTL_OK) {
-                CERROR("Error %d starting up NAL %x, refs %d\n", rc,
-                       interface, nal->nal_refct);
-                GOTO(out, rc);
-        }
-        
-        if (nal->nal_refct != 0) {
-                /* Caller gets to know if this was the first ref or not */
-                rc = PTL_IFACE_DUP;
-        }
-        
-        nal->nal_refct++;
-        *handle = nal->nal_handle;
-
- out:
-        ptl_mutex_exit ();
-
-        return rc;
-}
-
-int PtlNIFini(ptl_handle_ni_t ni)
-{
-        nal_t *nal;
-        int    idx;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-
-        ptl_mutex_enter ();
-
-        nal = ptl_hndl2nal (&ni);
-        if (nal == NULL) {
-                ptl_mutex_exit ();
-                return PTL_HANDLE_INVALID;
-        }
-
-        idx = ni.nal_idx & NI_HANDLE_MASK;
-
-        LASSERT(nal->nal_refct > 0);
-
-        nal->nal_refct--;
-
-        /* nal_refct == 0 tells nal->shutdown to really shut down */
-        nal->nal_ni_fini(nal);
-
-        ptl_mutex_exit ();
-        return PTL_OK;
-}
diff --git a/lustre/portals/portals/api-wrap.c b/lustre/portals/portals/api-wrap.c
deleted file mode 100644 (file)
index 37f6c0b..0000000
+++ /dev/null
@@ -1,366 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * api/api-wrap.c
- * User-level wrappers that dispatch across the protection boundaries
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-# define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/api-support.h>
-
-void PtlSnprintHandle(char *str, int len, ptl_handle_any_t h)
-{
-        snprintf(str, len, "0x%lx."LPX64, h.nal_idx, h.cookie);
-}
-
-int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t *ni_out)
-{
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        if (ptl_hndl2nal(&handle_in) == NULL)
-                return PTL_HANDLE_INVALID;
-        
-        *ni_out = handle_in;
-        return PTL_OK;
-}
-
-int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&ni_handle);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        return nal->nal_get_id(nal, id);
-}
-
-int PtlGetUid(ptl_handle_ni_t ni_handle, ptl_uid_t *uid)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&ni_handle);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        /* We don't support different uids yet */
-        *uid = 0;
-        return PTL_OK;
-}
-
-int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) 
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&interface);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        return nal->nal_fail_nid(nal, nid, threshold);
-}
-
-int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in,
-                ptl_sr_value_t *status_out)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&interface_in);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        return nal->nal_ni_status(nal, register_in, status_out);
-}
-
-int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in,
-              unsigned long *distance_out)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&interface_in);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        return nal->nal_ni_dist(nal, &process_in, distance_out);
-}
-
-int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in,
-                ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in,
-                ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in,
-                ptl_ins_pos_t pos_in, ptl_handle_me_t *handle_out)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&interface_in);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        return nal->nal_me_attach(nal, index_in, match_id_in, 
-                                  match_bits_in, ignore_bits_in,
-                                  unlink_in, pos_in, handle_out);
-}
-
-int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in,
-                ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in,
-                ptl_unlink_t unlink_in, ptl_ins_pos_t position_in,
-                ptl_handle_me_t * handle_out)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&current_in);
-        if (nal == NULL)
-                return PTL_ME_INVALID;
-
-        return nal->nal_me_insert(nal, &current_in, match_id_in,
-                                  match_bits_in, ignore_bits_in,
-                                  unlink_in, position_in, handle_out);
-}
-
-int PtlMEUnlink(ptl_handle_me_t current_in)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&current_in);
-        if (nal == NULL)
-                return PTL_ME_INVALID;
-
-        return nal->nal_me_unlink(nal, &current_in);
-}
-
-int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in,
-                ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&me_in);
-        if (nal == NULL)
-                return PTL_ME_INVALID;
-
-        if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) &&
-            ptl_hndl2nal(&md_in.eq_handle) != nal)
-                return PTL_MD_ILLEGAL;
-
-        return (nal->nal_md_attach)(nal, &me_in, &md_in, 
-                                    unlink_in, handle_out);
-}
-
-int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in,
-              ptl_unlink_t unlink_in, ptl_handle_md_t *handle_out)
-{
-        nal_t     *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&ni_in);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        if (!PtlHandleIsEqual(md_in.eq_handle, PTL_EQ_NONE) &&
-            ptl_hndl2nal(&md_in.eq_handle) != nal)
-                return PTL_MD_ILLEGAL;
-
-        return (nal->nal_md_bind)(nal, &md_in, unlink_in, handle_out);
-}
-
-int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout,
-                ptl_md_t *new_inout, ptl_handle_eq_t testq_in)
-{
-        nal_t    *nal;
-        
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&md_in);
-        if (nal == NULL)
-                return PTL_MD_INVALID;
-
-        if (!PtlHandleIsEqual(testq_in, PTL_EQ_NONE) &&
-            ptl_hndl2nal(&testq_in) != nal)
-                return PTL_EQ_INVALID;
-
-        return (nal->nal_md_update)(nal, &md_in, 
-                                    old_inout, new_inout, &testq_in);
-}
-
-int PtlMDUnlink(ptl_handle_md_t md_in)
-{
-        nal_t    *nal;
-        
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&md_in);
-        if (nal == NULL)
-                return PTL_MD_INVALID;
-        
-        return (nal->nal_md_unlink)(nal, &md_in);
-}
-
-int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count,
-               ptl_eq_handler_t callback,
-               ptl_handle_eq_t *handle_out)
-{
-        nal_t    *nal;
-        
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&interface);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-
-        return (nal->nal_eq_alloc)(nal, count, callback, handle_out);
-}
-
-int PtlEQFree(ptl_handle_eq_t eventq)
-{
-        nal_t       *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&eventq);
-        if (nal == NULL)
-                return PTL_EQ_INVALID;
-
-        return (nal->nal_eq_free)(nal, &eventq);
-}
-
-int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t *ev)
-{
-        int which;
-        
-        return (PtlEQPoll (&eventq, 1, 0, ev, &which));
-}
-
-int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out)
-{
-        int which;
-        
-        return (PtlEQPoll (&eventq_in, 1, PTL_TIME_FOREVER, 
-                           event_out, &which));
-}
-
-int PtlEQPoll(ptl_handle_eq_t *eventqs_in, int neq_in, int timeout,
-              ptl_event_t *event_out, int *which_out)
-{
-        int           i;
-        nal_t        *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-
-        if (neq_in < 1)
-                return PTL_EQ_INVALID;
-
-        nal = ptl_hndl2nal(&eventqs_in[0]);
-        if (nal == NULL)
-                return PTL_EQ_INVALID;
-
-        for (i = 1; i < neq_in; i++)
-                if (ptl_hndl2nal(&eventqs_in[i]) != nal)
-                        return PTL_EQ_INVALID;
-
-        return (nal->nal_eq_poll)(nal, eventqs_in, neq_in, timeout,
-                                  event_out, which_out);
-}
-
-
-int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in,
-               ptl_process_id_t match_id_in, ptl_pt_index_t portal_in)
-{
-        nal_t    *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&ni_in);
-        if (nal == NULL)
-                return PTL_NI_INVALID;
-        
-        return (nal->nal_ace_entry)(nal, index_in, match_id_in, portal_in);
-}
-
-int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in,
-           ptl_process_id_t target_in, ptl_pt_index_t portal_in,
-           ptl_ac_index_t ac_in, ptl_match_bits_t match_bits_in,
-           ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in)
-{
-        nal_t    *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-        
-        nal = ptl_hndl2nal(&md_in);
-        if (nal == NULL)
-                return PTL_MD_INVALID;
-
-        return (nal->nal_put)(nal, &md_in, ack_req_in,
-                              &target_in, portal_in, ac_in,
-                              match_bits_in, offset_in, hdr_data_in);
-}
-
-int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in,
-           ptl_pt_index_t portal_in, ptl_ac_index_t ac_in,
-           ptl_match_bits_t match_bits_in, ptl_size_t offset_in)
-{
-        nal_t  *nal;
-
-        if (!ptl_init)
-                return PTL_NO_INIT;
-
-        nal = ptl_hndl2nal(&md_in);
-        if (nal == NULL)
-                return PTL_MD_INVALID;
-
-        return (nal->nal_get)(nal, &md_in, 
-                              &target_in, portal_in, ac_in,
-                              match_bits_in, offset_in);
-}
-
diff --git a/lustre/portals/portals/autoMakefile.am b/lustre/portals/portals/autoMakefile.am
deleted file mode 100644 (file)
index 285f8fe..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-# Copyright (C) 2002  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-my_sources =    api-errno.c api-ni.c api-wrap.c \
-               lib-init.c lib-me.c lib-msg.c lib-eq.c \
-               lib-md.c lib-move.c lib-ni.c lib-pid.c
-
-if !CRAY_PORTALS
-
-if LIBLUSTRE
-noinst_LIBRARIES= libportals.a
-libportals_a_SOURCES= $(my_sources)
-libportals_a_CPPFLAGS = $(LLCPPFLAGS)
-libportals_a_CFLAGS = $(LLCFLAGS)
-endif
-
-if MODULES
-modulenet_DATA = portals$(KMODEXT)
-endif # MODULES
-
-endif # CRAY_PORTALS
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(portals-objs:%.o=%.c)
diff --git a/lustre/portals/portals/lib-eq.c b/lustre/portals/portals/lib-eq.c
deleted file mode 100644 (file)
index 8ea6fdd..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-eq.c
- * Library level Event queue management routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-
-int 
-lib_api_eq_alloc (nal_t *apinal, ptl_size_t count,
-                  ptl_eq_handler_t callback, 
-                  ptl_handle_eq_t *handle)
-{
-        lib_nal_t     *nal = apinal->nal_data;
-        lib_eq_t      *eq;
-        unsigned long  flags;
-        int            rc;
-
-        /* We need count to be a power of 2 so that when eq_{enq,deq}_seq
-         * overflow, they don't skip entries, so the queue has the same
-         * apparant capacity at all times */
-
-        if (count != LOWEST_BIT_SET(count)) {   /* not a power of 2 already */
-                do {                    /* knock off all but the top bit... */
-                        count &= ~LOWEST_BIT_SET (count);
-                } while (count != LOWEST_BIT_SET(count));
-
-                count <<= 1;                             /* ...and round up */
-        }
-
-        if (count == 0)        /* catch bad parameter / overflow on roundup */
-                return (PTL_VAL_FAILED);
-        
-        eq = lib_eq_alloc (nal);
-        if (eq == NULL)
-                return (PTL_NO_SPACE);
-
-        PORTAL_ALLOC(eq->eq_events, count * sizeof(ptl_event_t));
-        if (eq->eq_events == NULL) {
-                LIB_LOCK(nal, flags);
-                lib_eq_free (nal, eq);
-                LIB_UNLOCK(nal, flags);
-        }
-
-        if (nal->libnal_map != NULL) {
-                struct iovec iov = {
-                        .iov_base = eq->eq_events,
-                        .iov_len = count * sizeof(ptl_event_t)};
-
-                rc = nal->libnal_map(nal, 1, &iov, &eq->eq_addrkey);
-                if (rc != PTL_OK) {
-                        LIB_LOCK(nal, flags);
-                        lib_eq_free (nal, eq);
-                        LIB_UNLOCK(nal, flags);
-                        return (rc);
-                }
-        }
-
-        /* NB this resets all event sequence numbers to 0, to be earlier
-         * than eq_deq_seq */
-        memset(eq->eq_events, 0, count * sizeof(ptl_event_t));
-
-        eq->eq_deq_seq = 1;
-        eq->eq_enq_seq = 1;
-        eq->eq_size = count;
-        eq->eq_refcount = 0;
-        eq->eq_callback = callback;
-
-        LIB_LOCK(nal, flags);
-
-        lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ);
-        list_add (&eq->eq_list, &nal->libnal_ni.ni_active_eqs);
-
-        LIB_UNLOCK(nal, flags);
-
-        ptl_eq2handle(handle, nal, eq);
-        return (PTL_OK);
-}
-
-int 
-lib_api_eq_free(nal_t *apinal, ptl_handle_eq_t *eqh)
-{
-        lib_nal_t     *nal = apinal->nal_data;
-        lib_eq_t      *eq;
-        int            size;
-        ptl_event_t   *events;
-        void          *addrkey;
-        unsigned long  flags;
-
-        LIB_LOCK(nal, flags);
-
-        eq = ptl_handle2eq(eqh, nal);
-        if (eq == NULL) {
-                LIB_UNLOCK(nal, flags);
-                return (PTL_EQ_INVALID);
-        }
-
-        if (eq->eq_refcount != 0) {
-                LIB_UNLOCK(nal, flags);
-                return (PTL_EQ_IN_USE);
-        }
-
-        /* stash for free after lock dropped */
-        events  = eq->eq_events;
-        size    = eq->eq_size;
-        addrkey = eq->eq_addrkey;
-
-        lib_invalidate_handle (nal, &eq->eq_lh);
-        list_del (&eq->eq_list);
-        lib_eq_free (nal, eq);
-
-        LIB_UNLOCK(nal, flags);
-
-        if (nal->libnal_unmap != NULL) {
-                struct iovec iov = {
-                        .iov_base = events,
-                        .iov_len = size * sizeof(ptl_event_t)};
-
-                nal->libnal_unmap(nal, 1, &iov, &addrkey);
-        }
-
-        PORTAL_FREE(events, size * sizeof (ptl_event_t));
-
-        return (PTL_OK);
-}
-
-int
-lib_get_event (lib_eq_t *eq, ptl_event_t *ev)
-{
-        int          new_index = eq->eq_deq_seq & (eq->eq_size - 1);
-        ptl_event_t *new_event = &eq->eq_events[new_index];
-        int          rc;
-        ENTRY;
-
-        CDEBUG(D_INFO, "event: %p, sequence: %lu, eq->size: %u\n",
-               new_event, eq->eq_deq_seq, eq->eq_size);
-
-        if (PTL_SEQ_GT (eq->eq_deq_seq, new_event->sequence)) {
-                RETURN(PTL_EQ_EMPTY);
-        }
-
-        /* We've got a new event... */
-        *ev = *new_event;
-
-        /* ...but did it overwrite an event we've not seen yet? */
-        if (eq->eq_deq_seq == new_event->sequence) {
-                rc = PTL_OK;
-        } else {
-                CERROR("Event Queue Overflow: eq seq %lu ev seq %lu\n",
-                       eq->eq_deq_seq, new_event->sequence);
-                rc = PTL_EQ_DROPPED;
-        }
-
-        eq->eq_deq_seq = new_event->sequence + 1;
-        RETURN(rc);
-}
-
-
-int
-lib_api_eq_poll (nal_t *apinal, 
-                 ptl_handle_eq_t *eventqs, int neq, int timeout_ms,
-                 ptl_event_t *event, int *which)
-{
-        lib_nal_t       *nal = apinal->nal_data;
-        lib_ni_t        *ni = &nal->libnal_ni;
-        unsigned long    flags;
-        int              i;
-        int              rc;
-#ifdef __KERNEL__
-        wait_queue_t     wq;
-        unsigned long    now;
-#else
-        struct timeval   then;
-        struct timeval   now;
-        struct timespec  ts;
-#endif
-        ENTRY;
-
-        LIB_LOCK(nal, flags);
-
-        for (;;) {
-                for (i = 0; i < neq; i++) {
-                        lib_eq_t *eq = ptl_handle2eq(&eventqs[i], nal);
-
-                        rc = lib_get_event (eq, event);
-                        if (rc != PTL_EQ_EMPTY) {
-                                LIB_UNLOCK(nal, flags);
-                                *which = i;
-                                RETURN(rc);
-                        }
-                }
-                
-                if (timeout_ms == 0) {
-                        LIB_UNLOCK (nal, flags);
-                        RETURN (PTL_EQ_EMPTY);
-                }
-
-                /* Some architectures force us to do spin locking/unlocking
-                 * in the same stack frame, means we can abstract the
-                 * locking here */
-#ifdef __KERNEL__
-                init_waitqueue_entry(&wq, current);
-                set_current_state(TASK_INTERRUPTIBLE);
-                add_wait_queue(&ni->ni_waitq, &wq);
-
-                LIB_UNLOCK(nal, flags);
-
-                if (timeout_ms < 0) {
-                        schedule ();
-                } else {
-                        now = jiffies;
-                        schedule_timeout((timeout_ms * HZ)/1000);
-                        timeout_ms -= ((jiffies - now) * 1000)/HZ;
-                        if (timeout_ms < 0)
-                                timeout_ms = 0;
-                }
-                
-                LIB_LOCK(nal, flags);
-#else
-                if (timeout_ms < 0) {
-                        pthread_cond_wait(&ni->ni_cond, &ni->ni_mutex);
-                } else {
-                        gettimeofday(&then, NULL);
-                        
-                        ts.tv_sec = then.tv_sec + timeout_ms/1000;
-                        ts.tv_nsec = then.tv_usec * 1000 + 
-                                     (timeout_ms%1000) * 1000000;
-                        if (ts.tv_nsec >= 1000000000) {
-                                ts.tv_sec++;
-                                ts.tv_nsec -= 1000000000;
-                        }
-                        
-                        pthread_cond_timedwait(&ni->ni_cond,
-                                               &ni->ni_mutex, &ts);
-                        
-                        gettimeofday(&now, NULL);
-                        timeout_ms -= (now.tv_sec - then.tv_sec) * 1000 +
-                                      (now.tv_usec - then.tv_usec) / 1000;
-                        
-                        if (timeout_ms < 0)
-                                timeout_ms = 0;
-                }
-#endif
-        }
-}
diff --git a/lustre/portals/portals/lib-init.c b/lustre/portals/portals/lib-init.c
deleted file mode 100644 (file)
index 9d97bc1..0000000
+++ /dev/null
@@ -1,434 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-init.c
- * Start up the internal library and clear all structures
- * Called by the NAL when it initializes.  Safe to call multiple times.
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-# define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-
-#ifdef __KERNEL__
-# include <linux/string.h>      /* for memset() */
-# include <linux/kp30.h>
-# ifdef KERNEL_ADDR_CACHE
-#  include <compute/OS/addrCache/cache.h>
-# endif
-#else
-# include <string.h>
-# include <sys/time.h>
-#endif
-
-#ifndef PTL_USE_LIB_FREELIST
-
-int
-kportal_descriptor_setup (lib_nal_t *nal,
-                          ptl_ni_limits_t *requested_limits,
-                          ptl_ni_limits_t *actual_limits)
-{
-        /* Ignore requested limits! */
-        actual_limits->max_mes = INT_MAX;
-        actual_limits->max_mds = INT_MAX;
-        actual_limits->max_eqs = INT_MAX;
-
-        return PTL_OK;
-}
-
-void
-kportal_descriptor_cleanup (lib_nal_t *nal)
-{
-}
-#else
-
-int
-lib_freelist_init (lib_nal_t *nal, lib_freelist_t *fl, int n, int size)
-{
-        char *space;
-
-        LASSERT (n > 0);
-
-        size += offsetof (lib_freeobj_t, fo_contents);
-
-        PORTAL_ALLOC(space, n * size);
-        if (space == NULL)
-                return (PTL_NO_SPACE);
-
-        INIT_LIST_HEAD (&fl->fl_list);
-        fl->fl_objs = space;
-        fl->fl_nobjs = n;
-        fl->fl_objsize = size;
-
-        do
-        {
-                memset (space, 0, size);
-                list_add ((struct list_head *)space, &fl->fl_list);
-                space += size;
-        } while (--n != 0);
-
-        return (PTL_OK);
-}
-
-void
-lib_freelist_fini (lib_nal_t *nal, lib_freelist_t *fl)
-{
-        struct list_head *el;
-        int               count;
-
-        if (fl->fl_nobjs == 0)
-                return;
-
-        count = 0;
-        for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next)
-                count++;
-
-        LASSERT (count == fl->fl_nobjs);
-
-        PORTAL_FREE(fl->fl_objs, fl->fl_nobjs * fl->fl_objsize);
-        memset (fl, 0, sizeof (fl));
-}
-
-int
-kportal_descriptor_setup (lib_nal_t *nal,
-                          ptl_ni_limits_t *requested_limits,
-                          ptl_ni_limits_t *actual_limits)
-{
-        /* NB on failure caller must still call kportal_descriptor_cleanup */
-        /*               ******                                            */
-        lib_ni_t  *ni = &nal->libnal_ni;
-        int        rc;
-
-        memset (&ni->ni_free_mes,  0, sizeof (ni->ni_free_mes));
-        memset (&ni->ni_free_msgs, 0, sizeof (ni->ni_free_msgs));
-        memset (&ni->ni_free_mds,  0, sizeof (ni->ni_free_mds));
-        memset (&ni->ni_free_eqs,  0, sizeof (ni->ni_free_eqs));
-
-        /* Ignore requested limits! */
-        actual_limits->max_mes = MAX_MES;
-        actual_limits->max_mds = MAX_MDS;
-        actual_limits->max_eqs = MAX_EQS;
-        /* Hahahah what a load of bollocks.  There's nowhere to
-         * specify the max # messages in-flight */
-
-        rc = lib_freelist_init (nal, &ni->ni_free_mes,
-                                MAX_MES, sizeof (lib_me_t));
-        if (rc != PTL_OK)
-                return (rc);
-
-        rc = lib_freelist_init (nal, &ni->ni_free_msgs,
-                                MAX_MSGS, sizeof (lib_msg_t));
-        if (rc != PTL_OK)
-                return (rc);
-
-        rc = lib_freelist_init (nal, &ni->ni_free_mds,
-                                MAX_MDS, sizeof (lib_md_t));
-        if (rc != PTL_OK)
-                return (rc);
-
-        rc = lib_freelist_init (nal, &ni->ni_free_eqs,
-                                MAX_EQS, sizeof (lib_eq_t));
-        return (rc);
-}
-
-void
-kportal_descriptor_cleanup (lib_nal_t *nal)
-{
-        lib_ni_t   *ni = &nal->libnal_ni;
-        
-        lib_freelist_fini (nal, &ni->ni_free_mes);
-        lib_freelist_fini (nal, &ni->ni_free_msgs);
-        lib_freelist_fini (nal, &ni->ni_free_mds);
-        lib_freelist_fini (nal, &ni->ni_free_eqs);
-}
-
-#endif
-
-__u64
-lib_create_interface_cookie (lib_nal_t *nal)
-{
-        /* NB the interface cookie in wire handles guards against delayed
-         * replies and ACKs appearing valid in a new instance of the same
-         * interface.  Initialisation time, even if it's only implemented
-         * to millisecond resolution is probably easily good enough. */
-        struct timeval tv;
-        __u64          cookie;
-#ifndef __KERNEL__
-        int            rc = gettimeofday (&tv, NULL);
-        LASSERT (rc == 0);
-#else
-       do_gettimeofday(&tv);
-#endif
-        cookie = tv.tv_sec;
-        cookie *= 1000000;
-        cookie += tv.tv_usec;
-        return (cookie);
-}
-
-int
-lib_setup_handle_hash (lib_nal_t *nal) 
-{
-        lib_ni_t *ni = &nal->libnal_ni;
-        int       i;
-        
-        /* Arbitrary choice of hash table size */
-#ifdef __KERNEL__
-        ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head);
-#else
-        ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4;
-#endif
-        PORTAL_ALLOC(ni->ni_lh_hash_table,
-                     ni->ni_lh_hash_size * sizeof (struct list_head));
-        if (ni->ni_lh_hash_table == NULL)
-                return (PTL_NO_SPACE);
-        
-        for (i = 0; i < ni->ni_lh_hash_size; i++)
-                INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]);
-
-        ni->ni_next_object_cookie = PTL_COOKIE_TYPES;
-        
-        return (PTL_OK);
-}
-
-void
-lib_cleanup_handle_hash (lib_nal_t *nal)
-{
-        lib_ni_t *ni = &nal->libnal_ni;
-
-        if (ni->ni_lh_hash_table == NULL)
-                return;
-        
-        PORTAL_FREE(ni->ni_lh_hash_table,
-                    ni->ni_lh_hash_size * sizeof (struct list_head));
-}
-
-lib_handle_t *
-lib_lookup_cookie (lib_nal_t *nal, __u64 cookie, int type) 
-{
-        /* ALWAYS called with statelock held */
-        lib_ni_t            *ni = &nal->libnal_ni;
-        struct list_head    *list;
-        struct list_head    *el;
-        unsigned int         hash;
-
-        if ((cookie & (PTL_COOKIE_TYPES - 1)) != type)
-                return (NULL);
-        
-        hash = ((unsigned int)cookie) % ni->ni_lh_hash_size;
-        list = &ni->ni_lh_hash_table[hash];
-        
-        list_for_each (el, list) {
-                lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain);
-                
-                if (lh->lh_cookie == cookie)
-                        return (lh);
-        }
-        
-        return (NULL);
-}
-
-void
-lib_initialise_handle (lib_nal_t *nal, lib_handle_t *lh, int type) 
-{
-        /* ALWAYS called with statelock held */
-        lib_ni_t       *ni = &nal->libnal_ni;
-        unsigned int    hash;
-
-        LASSERT (type >= 0 && type < PTL_COOKIE_TYPES);
-        lh->lh_cookie = ni->ni_next_object_cookie | type;
-        ni->ni_next_object_cookie += PTL_COOKIE_TYPES;
-        
-        hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size;
-        list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]);
-}
-
-void
-lib_invalidate_handle (lib_nal_t *nal, lib_handle_t *lh)
-{
-        list_del (&lh->lh_hash_chain);
-}
-
-int
-lib_init(lib_nal_t *libnal, nal_t *apinal, 
-         ptl_process_id_t process_id,
-         ptl_ni_limits_t *requested_limits,
-         ptl_ni_limits_t *actual_limits)
-{
-        int       rc = PTL_OK;
-        lib_ni_t *ni = &libnal->libnal_ni;
-        int       ptl_size;
-        int       i;
-        ENTRY;
-
-        /* NB serialised in PtlNIInit() */
-
-        lib_assert_wire_constants ();
-
-        /* Setup the API nal with the lib API handling functions */
-        apinal->nal_get_id    = lib_api_get_id;
-        apinal->nal_ni_status = lib_api_ni_status;
-        apinal->nal_ni_dist   = lib_api_ni_dist;
-        apinal->nal_fail_nid  = lib_api_fail_nid;
-        apinal->nal_me_attach = lib_api_me_attach;
-        apinal->nal_me_insert = lib_api_me_insert;
-        apinal->nal_me_unlink = lib_api_me_unlink;
-        apinal->nal_md_attach = lib_api_md_attach;
-        apinal->nal_md_bind   = lib_api_md_bind;
-        apinal->nal_md_unlink = lib_api_md_unlink;
-        apinal->nal_md_update = lib_api_md_update;
-        apinal->nal_eq_alloc  = lib_api_eq_alloc;
-        apinal->nal_eq_free   = lib_api_eq_free;
-        apinal->nal_eq_poll   = lib_api_eq_poll;
-        apinal->nal_put       = lib_api_put;
-        apinal->nal_get       = lib_api_get;
-
-        apinal->nal_data      = libnal;
-        ni->ni_api            = apinal;
-
-        rc = kportal_descriptor_setup (libnal, requested_limits, 
-                                       &ni->ni_actual_limits);
-        if (rc != PTL_OK)
-                goto out;
-
-        memset(&ni->ni_counters, 0, sizeof(lib_counters_t));
-
-        INIT_LIST_HEAD (&ni->ni_active_msgs);
-        INIT_LIST_HEAD (&ni->ni_active_mds);
-        INIT_LIST_HEAD (&ni->ni_active_eqs);
-        INIT_LIST_HEAD (&ni->ni_test_peers);
-
-#ifdef __KERNEL__
-        spin_lock_init (&ni->ni_lock);
-        init_waitqueue_head (&ni->ni_waitq);
-#else
-        pthread_mutex_init(&ni->ni_mutex, NULL);
-        pthread_cond_init(&ni->ni_cond, NULL);
-#endif
-
-        ni->ni_interface_cookie = lib_create_interface_cookie (libnal);
-        ni->ni_next_object_cookie = 0;
-        rc = lib_setup_handle_hash (libnal);
-        if (rc != PTL_OK)
-                goto out;
-        
-        ni->ni_pid = process_id;
-
-        if (requested_limits != NULL)
-                ptl_size = requested_limits->max_pt_index + 1;
-        else
-                ptl_size = 64;
-
-        ni->ni_portals.size = ptl_size;
-        PORTAL_ALLOC(ni->ni_portals.tbl,
-                     ptl_size * sizeof(struct list_head));
-        if (ni->ni_portals.tbl == NULL) {
-                rc = PTL_NO_SPACE;
-                goto out;
-        }
-
-        for (i = 0; i < ptl_size; i++)
-                INIT_LIST_HEAD(&(ni->ni_portals.tbl[i]));
-
-        /* max_{mes,mds,eqs} set in kportal_descriptor_setup */
-
-        /* We don't have an access control table! */
-        ni->ni_actual_limits.max_ac_index = -1;
-
-        ni->ni_actual_limits.max_pt_index = ptl_size - 1;
-        ni->ni_actual_limits.max_md_iovecs = PTL_MD_MAX_IOV;
-        ni->ni_actual_limits.max_me_list = INT_MAX;
-
-        /* We don't support PtlGetPut! */
-        ni->ni_actual_limits.max_getput_md = 0;
-
-        if (actual_limits != NULL)
-                *actual_limits = ni->ni_actual_limits;
-
- out:
-        if (rc != PTL_OK) {
-                lib_cleanup_handle_hash (libnal);
-                kportal_descriptor_cleanup (libnal);
-        }
-
-        RETURN (rc);
-}
-
-int
-lib_fini(lib_nal_t *nal)
-{
-        lib_ni_t *ni = &nal->libnal_ni;
-        int       idx;
-
-        /* NB no state_lock() since this is the last reference.  The NAL
-         * should have shut down already, so it should be safe to unlink
-         * and free all descriptors, even those that appear committed to a
-         * network op (eg MD with non-zero pending count)
-         */
-
-        for (idx = 0; idx < ni->ni_portals.size; idx++)
-                while (!list_empty (&ni->ni_portals.tbl[idx])) {
-                        lib_me_t *me = list_entry (ni->ni_portals.tbl[idx].next,
-                                                   lib_me_t, me_list);
-
-                        CERROR ("Active me %p on exit\n", me);
-                        list_del (&me->me_list);
-                        lib_me_free (nal, me);
-                }
-
-        while (!list_empty (&ni->ni_active_mds)) {
-                lib_md_t *md = list_entry (ni->ni_active_mds.next,
-                                           lib_md_t, md_list);
-
-                CERROR ("Active md %p on exit\n", md);
-                list_del (&md->md_list);
-                lib_md_free (nal, md);
-        }
-
-        while (!list_empty (&ni->ni_active_eqs)) {
-                lib_eq_t *eq = list_entry (ni->ni_active_eqs.next,
-                                           lib_eq_t, eq_list);
-
-                CERROR ("Active eq %p on exit\n", eq);
-                list_del (&eq->eq_list);
-                lib_eq_free (nal, eq);
-        }
-
-        while (!list_empty (&ni->ni_active_msgs)) {
-                lib_msg_t *msg = list_entry (ni->ni_active_msgs.next,
-                                             lib_msg_t, msg_list);
-
-                CERROR ("Active msg %p on exit\n", msg);
-                list_del (&msg->msg_list);
-                lib_msg_free (nal, msg);
-        }
-
-        PORTAL_FREE(ni->ni_portals.tbl,  
-                    ni->ni_portals.size * sizeof(struct list_head));
-
-        lib_cleanup_handle_hash (nal);
-        kportal_descriptor_cleanup (nal);
-
-#ifndef __KERNEL__
-        pthread_mutex_destroy(&ni->ni_mutex);
-        pthread_cond_destroy(&ni->ni_cond);
-#endif
-
-        return (PTL_OK);
-}
diff --git a/lustre/portals/portals/lib-md.c b/lustre/portals/portals/lib-md.c
deleted file mode 100644 (file)
index 6deadb8..0000000
+++ /dev/null
@@ -1,426 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-md.c
- * Memory Descriptor management routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __KERNEL__
-# include <stdio.h>
-#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
-#endif
-
-#include <portals/lib-p30.h>
-
-/* must be called with state lock held */
-void
-lib_md_unlink(lib_nal_t *nal, lib_md_t *md)
-{
-        if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) == 0) {
-                /* first unlink attempt... */
-                lib_me_t *me = md->me;
-
-                md->md_flags |= PTL_MD_FLAG_ZOMBIE;
-
-                /* Disassociate from ME (if any), and unlink it if it was created
-                 * with PTL_UNLINK */
-                if (me != NULL) {
-                        me->md = NULL;
-                        if (me->unlink == PTL_UNLINK)
-                                lib_me_unlink(nal, me);
-                }
-
-                /* emsure all future handle lookups fail */
-                lib_invalidate_handle(nal, &md->md_lh);
-        }
-
-        if (md->pending != 0) {
-                CDEBUG(D_NET, "Queueing unlink of md %p\n", md);
-                return;
-        }
-
-        CDEBUG(D_NET, "Unlinking md %p\n", md);
-
-        if ((md->options & PTL_MD_KIOV) != 0) {
-                if (nal->libnal_unmap_pages != NULL)
-                        nal->libnal_unmap_pages (nal, 
-                                                 md->md_niov, 
-                                                 md->md_iov.kiov, 
-                                                 &md->md_addrkey);
-        } else if (nal->libnal_unmap != NULL) {
-                nal->libnal_unmap (nal, 
-                                   md->md_niov, md->md_iov.iov, 
-                                   &md->md_addrkey);
-        }
-
-        if (md->eq != NULL) {
-                md->eq->eq_refcount--;
-                LASSERT (md->eq->eq_refcount >= 0);
-        }
-
-        list_del (&md->md_list);
-        lib_md_free(nal, md);
-}
-
-/* must be called with state lock held */
-static int
-lib_md_build(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd, int unlink)
-{
-        lib_eq_t     *eq = NULL;
-        int           rc;
-        int           i;
-        int           niov;
-        int           total_length = 0;
-
-        /* NB we are passed an allocated, but uninitialised/active md.
-         * if we return success, caller may lib_md_unlink() it.
-         * otherwise caller may only lib_md_free() it.
-         */
-
-        if (!PtlHandleIsEqual (umd->eq_handle, PTL_EQ_NONE)) {
-                eq = ptl_handle2eq(&umd->eq_handle, nal);
-                if (eq == NULL)
-                        return PTL_EQ_INVALID;
-        }
-
-        /* This implementation doesn't know how to create START events or
-         * disable END events.  Best to LASSERT our caller is compliant so
-         * we find out quickly...  */
-        LASSERT (eq == NULL ||
-                 ((umd->options & PTL_MD_EVENT_START_DISABLE) != 0 &&
-                  (umd->options & PTL_MD_EVENT_END_DISABLE) == 0));
-
-        lmd->me = NULL;
-        lmd->start = umd->start;
-        lmd->offset = 0;
-        lmd->max_size = umd->max_size;
-        lmd->options = umd->options;
-        lmd->user_ptr = umd->user_ptr;
-        lmd->eq = eq;
-        lmd->threshold = umd->threshold;
-        lmd->pending = 0;
-        lmd->md_flags = (unlink == PTL_UNLINK) ? PTL_MD_FLAG_AUTO_UNLINK : 0;
-
-        if ((umd->options & PTL_MD_IOVEC) != 0) {
-
-                if ((umd->options & PTL_MD_KIOV) != 0) /* Can't specify both */
-                        return PTL_MD_ILLEGAL; 
-
-                lmd->md_niov = niov = umd->length;
-                memcpy(lmd->md_iov.iov, umd->start,
-                       niov * sizeof (lmd->md_iov.iov[0]));
-
-                for (i = 0; i < niov; i++) {
-                        /* We take the base address on trust */
-                        if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
-                                return PTL_MD_ILLEGAL;
-
-                        total_length += lmd->md_iov.iov[i].iov_len;
-                }
-
-                lmd->length = total_length;
-
-                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
-                    (umd->max_size < 0 || 
-                     umd->max_size > total_length)) // illegal max_size
-                        return PTL_MD_ILLEGAL;
-
-                if (nal->libnal_map != NULL) {
-                        rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, 
-                                              &lmd->md_addrkey);
-                        if (rc != PTL_OK)
-                                return (rc);
-                }
-        } else if ((umd->options & PTL_MD_KIOV) != 0) {
-#ifndef __KERNEL__
-                return PTL_MD_ILLEGAL;
-#else                
-                /* Trap attempt to use paged I/O if unsupported early. */
-                if (nal->libnal_send_pages == NULL ||
-                    nal->libnal_recv_pages == NULL)
-                        return PTL_MD_INVALID;
-
-                lmd->md_niov = niov = umd->length;
-                memcpy(lmd->md_iov.kiov, umd->start,
-                       niov * sizeof (lmd->md_iov.kiov[0]));
-
-                for (i = 0; i < niov; i++) {
-                        /* We take the page pointer on trust */
-                        if (lmd->md_iov.kiov[i].kiov_offset + 
-                            lmd->md_iov.kiov[i].kiov_len > PAGE_SIZE )
-                                return PTL_VAL_FAILED; /* invalid length */
-
-                        total_length += lmd->md_iov.kiov[i].kiov_len;
-                }
-
-                lmd->length = total_length;
-
-                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
-                    (umd->max_size < 0 || 
-                     umd->max_size > total_length)) // illegal max_size
-                        return PTL_MD_ILLEGAL;
-
-                if (nal->libnal_map_pages != NULL) {
-                        rc = nal->libnal_map_pages (nal, niov, lmd->md_iov.kiov, 
-                                                    &lmd->md_addrkey);
-                        if (rc != PTL_OK)
-                                return (rc);
-                }
-#endif
-        } else {   /* contiguous */
-                lmd->length = umd->length;
-                lmd->md_niov = niov = 1;
-                lmd->md_iov.iov[0].iov_base = umd->start;
-                lmd->md_iov.iov[0].iov_len = umd->length;
-
-                if ((umd->options & PTL_MD_MAX_SIZE) != 0 && /* max size used */
-                    (umd->max_size < 0 || 
-                     umd->max_size > umd->length)) // illegal max_size
-                        return PTL_MD_ILLEGAL;
-
-                if (nal->libnal_map != NULL) {
-                        rc = nal->libnal_map (nal, niov, lmd->md_iov.iov, 
-                                              &lmd->md_addrkey);
-                        if (rc != PTL_OK)
-                                return (rc);
-                }
-        } 
-
-        if (eq != NULL)
-                eq->eq_refcount++;
-
-        /* It's good; let handle2md succeed and add to active mds */
-        lib_initialise_handle (nal, &lmd->md_lh, PTL_COOKIE_TYPE_MD);
-        list_add (&lmd->md_list, &nal->libnal_ni.ni_active_mds);
-
-        return PTL_OK;
-}
-
-/* must be called with state lock held */
-void
-lib_md_deconstruct(lib_nal_t *nal, lib_md_t *lmd, ptl_md_t *umd)
-{
-        /* NB this doesn't copy out all the iov entries so when a
-         * discontiguous MD is copied out, the target gets to know the
-         * original iov pointer (in start) and the number of entries it had
-         * and that's all.
-         */
-        umd->start = lmd->start;
-        umd->length = ((lmd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) == 0) ?
-                      lmd->length : lmd->md_niov;
-        umd->threshold = lmd->threshold;
-        umd->max_size = lmd->max_size;
-        umd->options = lmd->options;
-        umd->user_ptr = lmd->user_ptr;
-        ptl_eq2handle(&umd->eq_handle, nal, lmd->eq);
-}
-
-int 
-lib_api_md_attach(nal_t *apinal, ptl_handle_me_t *meh,
-                  ptl_md_t *umd, ptl_unlink_t unlink, 
-                  ptl_handle_md_t *handle)
-{
-        lib_nal_t    *nal = apinal->nal_data;
-        lib_me_t     *me;
-        lib_md_t     *md;
-        unsigned long flags;
-        int           rc;
-
-        if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
-            umd->length > PTL_MD_MAX_IOV) /* too many fragments */
-                return PTL_IOV_INVALID;
-
-        md = lib_md_alloc(nal, umd);
-        if (md == NULL)
-                return PTL_NO_SPACE;
-
-        LIB_LOCK(nal, flags);
-
-        me = ptl_handle2me(meh, nal);
-        if (me == NULL) {
-                rc = PTL_ME_INVALID;
-        } else if (me->md != NULL) {
-                rc = PTL_ME_IN_USE;
-        } else {
-                rc = lib_md_build(nal, md, umd, unlink);
-                if (rc == PTL_OK) {
-                        me->md = md;
-                        md->me = me;
-
-                        ptl_md2handle(handle, nal, md);
-
-                        LIB_UNLOCK(nal, flags);
-                        return (PTL_OK);
-                }
-        }
-
-        lib_md_free (nal, md);
-
-        LIB_UNLOCK(nal, flags);
-        return (rc);
-}
-
-int
-lib_api_md_bind(nal_t *apinal, 
-                ptl_md_t *umd, ptl_unlink_t unlink,
-                ptl_handle_md_t *handle)
-{
-        lib_nal_t    *nal = apinal->nal_data;
-        lib_md_t     *md;
-        unsigned long flags;
-        int           rc;
-
-        if ((umd->options & (PTL_MD_KIOV | PTL_MD_IOVEC)) != 0 &&
-            umd->length > PTL_MD_MAX_IOV) /* too many fragments */
-                return PTL_IOV_INVALID;
-
-        md = lib_md_alloc(nal, umd);
-        if (md == NULL)
-                return PTL_NO_SPACE;
-
-        LIB_LOCK(nal, flags);
-
-        rc = lib_md_build(nal, md, umd, unlink);
-
-        if (rc == PTL_OK) {
-                ptl_md2handle(handle, nal, md);
-
-                LIB_UNLOCK(nal, flags);
-                return (PTL_OK);
-        }
-
-        lib_md_free (nal, md);
-
-        LIB_UNLOCK(nal, flags);
-        return (rc);
-}
-
-int
-lib_api_md_unlink (nal_t *apinal, ptl_handle_md_t *mdh)
-{
-        lib_nal_t       *nal = apinal->nal_data;
-        ptl_event_t      ev;
-        lib_md_t        *md;
-        unsigned long    flags;
-
-        LIB_LOCK(nal, flags);
-
-        md = ptl_handle2md(mdh, nal);
-        if (md == NULL) {
-                LIB_UNLOCK(nal, flags);
-                return PTL_MD_INVALID;
-        }
-
-        /* If the MD is busy, lib_md_unlink just marks it for deletion, and
-         * when the NAL is done, the completion event flags that the MD was
-         * unlinked.  Otherwise, we enqueue an event now... */
-
-        if (md->eq != NULL &&
-            md->pending == 0) {
-                memset(&ev, 0, sizeof(ev));
-
-                ev.type = PTL_EVENT_UNLINK;
-                ev.ni_fail_type = PTL_OK;
-                ev.unlinked = 1;
-                lib_md_deconstruct(nal, md, &ev.md);
-                ptl_md2handle(&ev.md_handle, nal, md);
-                
-                lib_enq_event_locked(nal, NULL, md->eq, &ev);
-        }
-
-        lib_md_unlink(nal, md);
-
-        LIB_UNLOCK(nal, flags);
-        return PTL_OK;
-}
-
-int
-lib_api_md_update (nal_t *apinal,
-                   ptl_handle_md_t *mdh,
-                   ptl_md_t *oldumd, ptl_md_t *newumd,
-                   ptl_handle_eq_t *testqh)
-{
-        lib_nal_t    *nal = apinal->nal_data;
-        lib_md_t     *md;
-        lib_eq_t     *test_eq = NULL;
-        unsigned long flags;
-        int           rc;
-
-        LIB_LOCK(nal, flags);
-
-        md = ptl_handle2md(mdh, nal);
-        if (md == NULL) {
-                 rc = PTL_MD_INVALID;
-                 goto out;
-        }
-
-        if (oldumd != NULL)
-                lib_md_deconstruct(nal, md, oldumd);
-
-        if (newumd == NULL) {
-                rc = PTL_OK;
-                goto out;
-        }
-
-        /* XXX fttb, the new MD must be the same "shape" wrt fragmentation,
-         * since we simply overwrite the old lib-md */
-        if ((((newumd->options ^ md->options) & 
-              (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0) ||
-            ((newumd->options & (PTL_MD_IOVEC | PTL_MD_KIOV)) != 0 && 
-             newumd->length != md->md_niov)) {
-                rc = PTL_IOV_INVALID;
-                goto out;
-        } 
-
-        if (!PtlHandleIsEqual (*testqh, PTL_EQ_NONE)) {
-                test_eq = ptl_handle2eq(testqh, nal);
-                if (test_eq == NULL) {
-                        rc = PTL_EQ_INVALID;
-                        goto out;
-                }
-        }
-
-        if (md->pending != 0) {
-                rc = PTL_MD_NO_UPDATE;
-                goto out;
-        }
-
-        if (test_eq == NULL ||
-            test_eq->eq_deq_seq == test_eq->eq_enq_seq) {
-                lib_me_t *me = md->me;
-                int       unlink = (md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) ?
-                                   PTL_UNLINK : PTL_RETAIN;
-
-                // #warning this does not track eq refcounts properly 
-                rc = lib_md_build(nal, md, newumd, unlink);
-
-                md->me = me;
-        } else {
-                rc = PTL_MD_NO_UPDATE;
-        }
-
- out:
-        LIB_UNLOCK(nal, flags);
-
-        return rc;
-}
diff --git a/lustre/portals/portals/lib-me.c b/lustre/portals/portals/lib-me.c
deleted file mode 100644 (file)
index 9665b4f..0000000
+++ /dev/null
@@ -1,185 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-me.c
- * Match Entry management routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __KERNEL__
-# include <stdio.h>
-#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
-#endif
-
-#include <portals/lib-p30.h>
-
-int
-lib_api_me_attach(nal_t *apinal,
-                  ptl_pt_index_t portal,
-                  ptl_process_id_t match_id, 
-                  ptl_match_bits_t match_bits, 
-                  ptl_match_bits_t ignore_bits,
-                  ptl_unlink_t unlink, ptl_ins_pos_t pos,
-                  ptl_handle_me_t *handle)
-{
-        lib_nal_t    *nal = apinal->nal_data;
-        lib_ni_t     *ni = &nal->libnal_ni;
-        lib_ptl_t    *tbl = &ni->ni_portals;
-        lib_me_t     *me;
-        unsigned long flags;
-
-        if (portal >= tbl->size)
-                return PTL_PT_INDEX_INVALID;
-
-        /* Should check for valid matchid, but not yet */
-
-        me = lib_me_alloc (nal);
-        if (me == NULL)
-                return PTL_NO_SPACE;
-
-        LIB_LOCK(nal, flags);
-
-        me->match_id = match_id;
-        me->match_bits = match_bits;
-        me->ignore_bits = ignore_bits;
-        me->unlink = unlink;
-        me->md = NULL;
-
-        lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME);
-
-        if (pos == PTL_INS_AFTER)
-                list_add_tail(&me->me_list, &(tbl->tbl[portal]));
-        else
-                list_add(&me->me_list, &(tbl->tbl[portal]));
-
-        ptl_me2handle(handle, nal, me);
-
-        LIB_UNLOCK(nal, flags);
-
-        return PTL_OK;
-}
-
-int
-lib_api_me_insert(nal_t *apinal,
-                  ptl_handle_me_t *current_meh,
-                  ptl_process_id_t match_id, 
-                  ptl_match_bits_t match_bits, 
-                  ptl_match_bits_t ignore_bits,
-                  ptl_unlink_t unlink, ptl_ins_pos_t pos,
-                  ptl_handle_me_t *handle)
-{
-        lib_nal_t    *nal = apinal->nal_data;
-        lib_me_t     *current_me;
-        lib_me_t     *new_me;
-        unsigned long flags;
-
-        new_me = lib_me_alloc (nal);
-        if (new_me == NULL)
-                return PTL_NO_SPACE;
-
-        /* Should check for valid matchid, but not yet */
-
-        LIB_LOCK(nal, flags);
-
-        current_me = ptl_handle2me(current_meh, nal);
-        if (current_me == NULL) {
-                lib_me_free (nal, new_me);
-
-                LIB_UNLOCK(nal, flags);
-                return PTL_ME_INVALID;
-        }
-
-        new_me->match_id = match_id;
-        new_me->match_bits = match_bits;
-        new_me->ignore_bits = ignore_bits;
-        new_me->unlink = unlink;
-        new_me->md = NULL;
-
-        lib_initialise_handle (nal, &new_me->me_lh, PTL_COOKIE_TYPE_ME);
-
-        if (pos == PTL_INS_AFTER)
-                list_add_tail(&new_me->me_list, &current_me->me_list);
-        else
-                list_add(&new_me->me_list, &current_me->me_list);
-
-        ptl_me2handle(handle, nal, new_me);
-
-        LIB_UNLOCK(nal, flags);
-
-        return PTL_OK;
-}
-
-int
-lib_api_me_unlink (nal_t *apinal, ptl_handle_me_t *meh)
-{
-        lib_nal_t    *nal = apinal->nal_data;
-        unsigned long flags;
-        lib_me_t     *me;
-        int           rc;
-
-        LIB_LOCK(nal, flags);
-
-        me = ptl_handle2me(meh, nal);
-        if (me == NULL) {
-                rc = PTL_ME_INVALID;
-        } else {
-                lib_me_unlink(nal, me);
-                rc = PTL_OK;
-        }
-
-        LIB_UNLOCK(nal, flags);
-
-        return (rc);
-}
-
-/* call with state_lock please */
-void 
-lib_me_unlink(lib_nal_t *nal, lib_me_t *me)
-{
-        list_del (&me->me_list);
-
-        if (me->md) {
-                me->md->me = NULL;
-                lib_md_unlink(nal, me->md);
-        }
-
-        lib_invalidate_handle (nal, &me->me_lh);
-        lib_me_free(nal, me);
-}
-
-#if 0
-static void 
-lib_me_dump(lib_nal_t *nal, lib_me_t * me)
-{
-        CWARN("Match Entry %p ("LPX64")\n", me, 
-              me->me_lh.lh_cookie);
-
-        CWARN("\tMatch/Ignore\t= %016lx / %016lx\n",
-              me->match_bits, me->ignore_bits);
-
-        CWARN("\tMD\t= %p\n", me->md);
-        CWARN("\tprev\t= %p\n",
-              list_entry(me->me_list.prev, lib_me_t, me_list));
-        CWARN("\tnext\t= %p\n",
-              list_entry(me->me_list.next, lib_me_t, me_list));
-}
-#endif
diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c
deleted file mode 100644 (file)
index d584f1c..0000000
+++ /dev/null
@@ -1,1427 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-move.c
- * Data movement routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __KERNEL__
-# include <stdio.h>
-#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
-#endif
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-
-/* forward ref */
-static void lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg);
-
-static lib_md_t *
-lib_match_md(lib_nal_t *nal, int index, int op_mask, 
-             ptl_nid_t src_nid, ptl_pid_t src_pid, 
-             ptl_size_t rlength, ptl_size_t roffset,
-             ptl_match_bits_t match_bits, lib_msg_t *msg,
-             ptl_size_t *mlength_out, ptl_size_t *offset_out)
-{
-        lib_ni_t         *ni = &nal->libnal_ni;
-        struct list_head *match_list = &ni->ni_portals.tbl[index];
-        struct list_head *tmp;
-        lib_me_t         *me;
-        lib_md_t         *md;
-        ptl_size_t        mlength;
-        ptl_size_t        offset;
-        ENTRY;
-
-        CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d "
-                "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits);
-
-        if (index < 0 || index >= ni->ni_portals.size) {
-                CERROR("Invalid portal %d not in [0-%d]\n",
-                       index, ni->ni_portals.size);
-                goto failed;
-        }
-
-        list_for_each (tmp, match_list) {
-                me = list_entry(tmp, lib_me_t, me_list);
-                md = me->md;
-
-                 /* ME attached but MD not attached yet */
-                if (md == NULL)
-                        continue;
-
-                LASSERT (me == md->me);
-
-                /* mismatched MD op */
-                if ((md->options & op_mask) == 0)
-                        continue;
-
-                /* MD exhausted */
-                if (lib_md_exhausted(md))
-                        continue;
-
-                /* mismatched ME nid/pid? */
-                if (me->match_id.nid != PTL_NID_ANY &&
-                    me->match_id.nid != src_nid)
-                        continue;
-                
-                CDEBUG(D_NET, "match_id.pid [%x], src_pid [%x]\n",
-                       me->match_id.pid, src_pid);
-
-                if (me->match_id.pid != PTL_PID_ANY &&
-                    me->match_id.pid != src_pid)
-                        continue;
-
-                /* mismatched ME matchbits? */
-                if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0)
-                        continue;
-
-                /* Hurrah! This _is_ a match; check it out... */
-
-                if ((md->options & PTL_MD_MANAGE_REMOTE) == 0)
-                        offset = md->offset;
-                else
-                        offset = roffset;
-
-                if ((md->options & PTL_MD_MAX_SIZE) != 0) {
-                        mlength = md->max_size;
-                        LASSERT (md->offset + mlength <= md->length);
-                } else {
-                        mlength = md->length - offset;
-                }
-
-                if (rlength <= mlength) {        /* fits in allowed space */
-                        mlength = rlength;
-                } else if ((md->options & PTL_MD_TRUNCATE) == 0) {
-                        /* this packet _really_ is too big */
-                        CERROR("Matching packet %d too big: %d left, "
-                               "%d allowed\n", rlength, md->length - offset,
-                               mlength);
-                        goto failed;
-                }
-
-                /* Commit to this ME/MD */
-                CDEBUG(D_NET, "Incoming %s index %x from "LPU64"/%u of "
-                       "length %d/%d into md "LPX64" [%d] + %d\n", 
-                       (op_mask == PTL_MD_OP_PUT) ? "put" : "get",
-                       index, src_nid, src_pid, mlength, rlength, 
-                       md->md_lh.lh_cookie, md->md_niov, offset);
-
-                lib_commit_md(nal, md, msg);
-                md->offset = offset + mlength;
-
-                /* NB Caller sets ev.type and ev.hdr_data */
-                msg->ev.initiator.nid = src_nid;
-                msg->ev.initiator.pid = src_pid;
-                msg->ev.pt_index = index;
-                msg->ev.match_bits = match_bits;
-                msg->ev.rlength = rlength;
-                msg->ev.mlength = mlength;
-                msg->ev.offset = offset;
-
-                lib_md_deconstruct(nal, md, &msg->ev.md);
-                ptl_md2handle(&msg->ev.md_handle, nal, md);
-
-                *offset_out = offset;
-                *mlength_out = mlength;
-
-                /* Auto-unlink NOW, so the ME gets unlinked if required.
-                 * We bumped md->pending above so the MD just gets flagged
-                 * for unlink when it is finalized. */
-                if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) != 0 &&
-                    lib_md_exhausted(md))
-                        lib_md_unlink(nal, md);
-
-                RETURN (md);
-        }
-
- failed:
-        CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64
-                " offset %d length %d: no match\n",
-                ni->ni_pid.nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT",
-                src_nid, src_pid, index, match_bits, roffset, rlength);
-        RETURN(NULL);
-}
-
-int lib_api_fail_nid (nal_t *apinal, ptl_nid_t nid, unsigned int threshold)
-{
-        lib_nal_t         *nal = apinal->nal_data;
-        lib_test_peer_t   *tp;
-        unsigned long      flags;
-        struct list_head  *el;
-        struct list_head  *next;
-        struct list_head   cull;
-        
-        if (threshold != 0) {
-                /* Adding a new entry */
-                PORTAL_ALLOC(tp, sizeof(*tp));
-                if (tp == NULL)
-                        return PTL_NO_SPACE;
-                
-                tp->tp_nid = nid;
-                tp->tp_threshold = threshold;
-                
-                LIB_LOCK(nal, flags);
-                list_add_tail (&tp->tp_list, &nal->libnal_ni.ni_test_peers);
-                LIB_UNLOCK(nal, flags);
-                return PTL_OK;
-        }
-        
-        /* removing entries */
-        INIT_LIST_HEAD (&cull);
-        
-        LIB_LOCK(nal, flags);
-
-        list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
-                tp = list_entry (el, lib_test_peer_t, tp_list);
-                
-                if (tp->tp_threshold == 0 ||    /* needs culling anyway */
-                    nid == PTL_NID_ANY ||       /* removing all entries */
-                    tp->tp_nid == nid)          /* matched this one */
-                {
-                        list_del (&tp->tp_list);
-                        list_add (&tp->tp_list, &cull);
-                }
-        }
-        
-        LIB_UNLOCK(nal, flags);
-                
-        while (!list_empty (&cull)) {
-                tp = list_entry (cull.next, lib_test_peer_t, tp_list);
-
-                list_del (&tp->tp_list);
-                PORTAL_FREE(tp, sizeof (*tp));
-        }
-        return PTL_OK;
-}
-
-static int
-fail_peer (lib_nal_t *nal, ptl_nid_t nid, int outgoing) 
-{
-        lib_test_peer_t  *tp;
-        struct list_head *el;
-        struct list_head *next;
-        unsigned long     flags;
-        struct list_head  cull;
-        int               fail = 0;
-
-        INIT_LIST_HEAD (&cull);
-        
-        LIB_LOCK (nal, flags);
-
-        list_for_each_safe (el, next, &nal->libnal_ni.ni_test_peers) {
-                tp = list_entry (el, lib_test_peer_t, tp_list);
-
-                if (tp->tp_threshold == 0) {
-                        /* zombie entry */
-                        if (outgoing) {
-                                /* only cull zombies on outgoing tests,
-                                 * since we may be at interrupt priority on
-                                 * incoming messages. */
-                                list_del (&tp->tp_list);
-                                list_add (&tp->tp_list, &cull);
-                        }
-                        continue;
-                }
-                        
-                if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */
-                    nid == tp->tp_nid) {        /* fail this peer */
-                        fail = 1;
-                        
-                        if (tp->tp_threshold != PTL_MD_THRESH_INF) {
-                                tp->tp_threshold--;
-                                if (outgoing &&
-                                    tp->tp_threshold == 0) {
-                                        /* see above */
-                                        list_del (&tp->tp_list);
-                                        list_add (&tp->tp_list, &cull);
-                                }
-                        }
-                        break;
-                }
-        }
-        
-        LIB_UNLOCK (nal, flags);
-
-        while (!list_empty (&cull)) {
-                tp = list_entry (cull.next, lib_test_peer_t, tp_list);
-                list_del (&tp->tp_list);
-                
-                PORTAL_FREE(tp, sizeof (*tp));
-        }
-
-        return (fail);
-}
-
-ptl_size_t
-lib_iov_nob (int niov, struct iovec *iov)
-{
-        ptl_size_t nob = 0;
-        
-        while (niov-- > 0)
-                nob += (iov++)->iov_len;
-        
-        return (nob);
-}
-
-void
-lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, 
-                  ptl_size_t offset, ptl_size_t len)
-{
-        ptl_size_t nob;
-
-        if (len == 0)
-                return;
-        
-        /* skip complete frags before 'offset' */
-        LASSERT (niov > 0);
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                iov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-                
-        do {
-                LASSERT (niov > 0);
-                nob = MIN (iov->iov_len - offset, len);
-                memcpy (dest, iov->iov_base + offset, nob);
-
-                len -= nob;
-                dest += nob;
-                niov--;
-                iov++;
-                offset = 0;
-        } while (len > 0);
-}
-
-void
-lib_copy_buf2iov (int niov, struct iovec *iov, ptl_size_t offset, 
-                  char *src, ptl_size_t len)
-{
-        ptl_size_t nob;
-
-        if (len == 0)
-                return;
-
-        /* skip complete frags before 'offset' */
-        LASSERT (niov > 0);
-        while (offset >= iov->iov_len) {
-                offset -= iov->iov_len;
-                iov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-        
-        do {
-                LASSERT (niov > 0);
-                nob = MIN (iov->iov_len - offset, len);
-                memcpy (iov->iov_base + offset, src, nob);
-                
-                len -= nob;
-                src += nob;
-                niov--;
-                iov++;
-                offset = 0;
-        } while (len > 0);
-}
-
-int
-lib_extract_iov (int dst_niov, struct iovec *dst,
-                 int src_niov, struct iovec *src,
-                 ptl_size_t offset, ptl_size_t len)
-{
-        /* Initialise 'dst' to the subset of 'src' starting at 'offset',
-         * for exactly 'len' bytes, and return the number of entries.
-         * NB not destructive to 'src' */
-        ptl_size_t      frag_len;
-        int             niov;
-
-        if (len == 0)                           /* no data => */
-                return (0);                     /* no frags */
-
-        LASSERT (src_niov > 0);
-        while (offset >= src->iov_len) {      /* skip initial frags */
-                offset -= src->iov_len;
-                src_niov--;
-                src++;
-                LASSERT (src_niov > 0);
-        }
-
-        niov = 1;
-        for (;;) {
-                LASSERT (src_niov > 0);
-                LASSERT (niov <= dst_niov);
-                
-                frag_len = src->iov_len - offset;
-                dst->iov_base = ((char *)src->iov_base) + offset;
-
-                if (len <= frag_len) {
-                        dst->iov_len = len;
-                        return (niov);
-                }
-                
-                dst->iov_len = frag_len;
-
-                len -= frag_len;
-                dst++;
-                src++;
-                niov++;
-                src_niov--;
-                offset = 0;
-        }
-}
-
-#ifndef __KERNEL__
-ptl_size_t
-lib_kiov_nob (int niov, ptl_kiov_t *kiov) 
-{
-        LASSERT (0);
-        return (0);
-}
-
-void
-lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, 
-                   ptl_size_t offset, ptl_size_t len)
-{
-        LASSERT (0);
-}
-
-void
-lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-                   char *src, ptl_size_t len)
-{
-        LASSERT (0);
-}
-
-int
-lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, 
-                  int src_niov, ptl_kiov_t *src,
-                  ptl_size_t offset, ptl_size_t len)
-{
-        LASSERT (0);
-}
-
-#else
-
-ptl_size_t
-lib_kiov_nob (int niov, ptl_kiov_t *kiov) 
-{
-        ptl_size_t  nob = 0;
-
-        while (niov-- > 0)
-                nob += (kiov++)->kiov_len;
-
-        return (nob);
-}
-
-void
-lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, 
-                   ptl_size_t offset, ptl_size_t len)
-{
-        ptl_size_t  nob;
-        char       *addr;
-
-        if (len == 0)
-                return;
-        
-        LASSERT (!in_interrupt ());
-
-        LASSERT (niov > 0);
-        while (offset > kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                kiov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-        
-        do{
-                LASSERT (niov > 0);
-                nob = MIN (kiov->kiov_len - offset, len);
-                
-                addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-                memcpy (dest, addr, nob);
-                kunmap (kiov->kiov_page);
-                
-                len -= nob;
-                dest += nob;
-                niov--;
-                kiov++;
-                offset = 0;
-        } while (len > 0);
-}
-
-void
-lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, ptl_size_t offset,
-                   char *src, ptl_size_t len)
-{
-        ptl_size_t  nob;
-        char       *addr;
-
-        if (len == 0)
-                return;
-
-        LASSERT (!in_interrupt ());
-
-        LASSERT (niov > 0);
-        while (offset >= kiov->kiov_len) {
-                offset -= kiov->kiov_len;
-                kiov++;
-                niov--;
-                LASSERT (niov > 0);
-        }
-        
-        do {
-                LASSERT (niov > 0);
-                nob = MIN (kiov->kiov_len - offset, len);
-                
-                addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset + offset;
-                memcpy (addr, src, nob);
-                kunmap (kiov->kiov_page);
-                
-                len -= nob;
-                src += nob;
-                niov--;
-                kiov++;
-                offset = 0;
-        } while (len > 0);
-}
-
-int
-lib_extract_kiov (int dst_niov, ptl_kiov_t *dst, 
-                  int src_niov, ptl_kiov_t *src,
-                  ptl_size_t offset, ptl_size_t len)
-{
-        /* Initialise 'dst' to the subset of 'src' starting at 'offset',
-         * for exactly 'len' bytes, and return the number of entries.
-         * NB not destructive to 'src' */
-        ptl_size_t      frag_len;
-        int             niov;
-
-        if (len == 0)                           /* no data => */
-                return (0);                     /* no frags */
-
-        LASSERT (src_niov > 0);
-        while (offset >= src->kiov_len) {      /* skip initial frags */
-                offset -= src->kiov_len;
-                src_niov--;
-                src++;
-                LASSERT (src_niov > 0);
-        }
-
-        niov = 1;
-        for (;;) {
-                LASSERT (src_niov > 0);
-                LASSERT (niov <= dst_niov);
-                
-                frag_len = src->kiov_len - offset;
-                dst->kiov_page = src->kiov_page;
-                dst->kiov_offset = src->kiov_offset + offset;
-
-                if (len <= frag_len) {
-                        dst->kiov_len = len;
-                        LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
-                        return (niov);
-                }
-
-                dst->kiov_len = frag_len;
-                LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE);
-
-                len -= frag_len;
-                dst++;
-                src++;
-                niov++;
-                src_niov--;
-                offset = 0;
-        }
-}
-#endif
-
-ptl_err_t
-lib_recv (lib_nal_t *nal, void *private, lib_msg_t *msg, lib_md_t *md,
-          ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen)
-{
-        if (mlen == 0)
-                return (nal->libnal_recv(nal, private, msg,
-                                         0, NULL,
-                                         offset, mlen, rlen));
-
-        if ((md->options & PTL_MD_KIOV) == 0)
-                return (nal->libnal_recv(nal, private, msg,
-                                         md->md_niov, md->md_iov.iov, 
-                                         offset, mlen, rlen));
-
-        return (nal->libnal_recv_pages(nal, private, msg, 
-                                       md->md_niov, md->md_iov.kiov,
-                                       offset, mlen, rlen));
-}
-
-ptl_err_t
-lib_send (lib_nal_t *nal, void *private, lib_msg_t *msg,
-          ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid,
-          lib_md_t *md, ptl_size_t offset, ptl_size_t len) 
-{
-        if (len == 0)
-                return (nal->libnal_send(nal, private, msg,
-                                         hdr, type, nid, pid,
-                                         0, NULL,
-                                         offset, len));
-        
-        if ((md->options & PTL_MD_KIOV) == 0)
-                return (nal->libnal_send(nal, private, msg, 
-                                         hdr, type, nid, pid,
-                                         md->md_niov, md->md_iov.iov,
-                                         offset, len));
-
-        return (nal->libnal_send_pages(nal, private, msg, 
-                                       hdr, type, nid, pid,
-                                       md->md_niov, md->md_iov.kiov,
-                                       offset, len));
-}
-
-static void
-lib_commit_md (lib_nal_t *nal, lib_md_t *md, lib_msg_t *msg)
-{
-        /* ALWAYS called holding the LIB_LOCK */
-        lib_counters_t *counters = &nal->libnal_ni.ni_counters;
-
-        /* Here, we commit the MD to a network OP by marking it busy and
-         * decrementing its threshold.  Come what may, the network "owns"
-         * the MD until a call to lib_finalize() signals completion. */
-        msg->md = md;
-         
-        md->pending++;
-        if (md->threshold != PTL_MD_THRESH_INF) {
-                LASSERT (md->threshold > 0);
-                md->threshold--;
-        }
-
-        counters->msgs_alloc++;
-        if (counters->msgs_alloc > counters->msgs_max)
-                counters->msgs_max = counters->msgs_alloc;
-
-        list_add (&msg->msg_list, &nal->libnal_ni.ni_active_msgs);
-}
-
-static void
-lib_drop_message (lib_nal_t *nal, void *private, ptl_hdr_t *hdr)
-{
-        unsigned long flags;
-
-        /* CAVEAT EMPTOR: this only drops messages that we've not committed
-         * to receive (init_msg() not called) and therefore can't cause an
-         * event. */
-        
-        LIB_LOCK(nal, flags);
-        nal->libnal_ni.ni_counters.drop_count++;
-        nal->libnal_ni.ni_counters.drop_length += hdr->payload_length;
-        LIB_UNLOCK(nal, flags);
-
-        /* NULL msg => if NAL calls lib_finalize it will be a noop */
-        (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
-}
-
-/*
- * Incoming messages have a ptl_msg_t object associated with them
- * by the library.  This object encapsulates the state of the
- * message and allows the NAL to do non-blocking receives or sends
- * of long messages.
- *
- */
-static ptl_err_t
-parse_put(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-{
-        lib_ni_t        *ni = &nal->libnal_ni;
-        ptl_size_t       mlength = 0;
-        ptl_size_t       offset = 0;
-        ptl_err_t        rc;
-        lib_md_t        *md;
-        unsigned long    flags;
-                
-        /* Convert put fields to host byte order */
-        hdr->msg.put.match_bits = le64_to_cpu(hdr->msg.put.match_bits);
-        hdr->msg.put.ptl_index = le32_to_cpu(hdr->msg.put.ptl_index);
-        hdr->msg.put.offset = le32_to_cpu(hdr->msg.put.offset);
-
-        LIB_LOCK(nal, flags);
-
-        md = lib_match_md(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT,
-                          hdr->src_nid, hdr->src_pid,
-                          hdr->payload_length, hdr->msg.put.offset,
-                          hdr->msg.put.match_bits, msg,
-                          &mlength, &offset);
-        if (md == NULL) {
-                LIB_UNLOCK(nal, flags);
-                return (PTL_FAIL);
-        }
-
-        msg->ev.type = PTL_EVENT_PUT_END;
-        msg->ev.hdr_data = hdr->msg.put.hdr_data;
-
-        if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) &&
-            !(md->options & PTL_MD_ACK_DISABLE)) {
-                msg->ack_wmd = hdr->msg.put.ack_wmd;
-        }
-
-        ni->ni_counters.recv_count++;
-        ni->ni_counters.recv_length += mlength;
-
-        LIB_UNLOCK(nal, flags);
-
-        rc = lib_recv(nal, private, msg, md, offset, mlength,
-                      hdr->payload_length);
-        if (rc != PTL_OK)
-                CERROR(LPU64": error on receiving PUT from "LPU64": %d\n",
-                       ni->ni_pid.nid, hdr->src_nid, rc);
-
-        return (rc);
-}
-
-static ptl_err_t
-parse_get(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-{
-        lib_ni_t        *ni = &nal->libnal_ni;
-        ptl_size_t       mlength = 0;
-        ptl_size_t       offset = 0;
-        lib_md_t        *md;
-        ptl_hdr_t        reply;
-        unsigned long    flags;
-        int              rc;
-
-        /* Convert get fields to host byte order */
-        hdr->msg.get.match_bits = le64_to_cpu(hdr->msg.get.match_bits);
-        hdr->msg.get.ptl_index = le32_to_cpu(hdr->msg.get.ptl_index);
-        hdr->msg.get.sink_length = le32_to_cpu(hdr->msg.get.sink_length);
-        hdr->msg.get.src_offset = le32_to_cpu(hdr->msg.get.src_offset);
-
-        LIB_LOCK(nal, flags);
-
-        md = lib_match_md(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET,
-                          hdr->src_nid, hdr->src_pid,
-                          hdr->msg.get.sink_length, hdr->msg.get.src_offset,
-                          hdr->msg.get.match_bits, msg,
-                          &mlength, &offset);
-        if (md == NULL) {
-                LIB_UNLOCK(nal, flags);
-                return (PTL_FAIL);
-        }
-
-        msg->ev.type = PTL_EVENT_GET_END;
-        msg->ev.hdr_data = 0;
-
-        ni->ni_counters.send_count++;
-        ni->ni_counters.send_length += mlength;
-
-        LIB_UNLOCK(nal, flags);
-
-        memset (&reply, 0, sizeof (reply));
-        reply.type     = cpu_to_le32(PTL_MSG_REPLY);
-        reply.dest_nid = cpu_to_le64(hdr->src_nid);
-        reply.dest_pid = cpu_to_le32(hdr->src_pid);
-        reply.src_nid  = cpu_to_le64(ni->ni_pid.nid);
-        reply.src_pid  = cpu_to_le32(ni->ni_pid.pid);
-        reply.payload_length = cpu_to_le32(mlength);
-
-        reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd;
-
-        /* NB call lib_send() _BEFORE_ lib_recv() completes the incoming
-         * message.  Some NALs _require_ this to implement optimized GET */
-
-        rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, 
-                       hdr->src_nid, hdr->src_pid, md, offset, mlength);
-        if (rc != PTL_OK)
-                CERROR(LPU64": Unable to send REPLY for GET from "LPU64": %d\n",
-                       ni->ni_pid.nid, hdr->src_nid, rc);
-
-        /* Discard any junk after the hdr */
-        (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
-
-        return (rc);
-}
-
-static ptl_err_t
-parse_reply(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-{
-        lib_ni_t        *ni = &nal->libnal_ni;
-        lib_md_t        *md;
-        int              rlength;
-        int              length;
-        unsigned long    flags;
-        ptl_err_t        rc;
-
-        LIB_LOCK(nal, flags);
-
-        /* NB handles only looked up by creator (no flips) */
-        md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal);
-        if (md == NULL || md->threshold == 0) {
-                CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n",
-                        ni->ni_pid.nid, hdr->src_nid,
-                        md == NULL ? "invalid" : "inactive",
-                        hdr->msg.reply.dst_wmd.wh_interface_cookie,
-                        hdr->msg.reply.dst_wmd.wh_object_cookie);
-
-                LIB_UNLOCK(nal, flags);
-                return (PTL_FAIL);
-        }
-
-        LASSERT (md->offset == 0);
-
-        length = rlength = hdr->payload_length;
-
-        if (length > md->length) {
-                if ((md->options & PTL_MD_TRUNCATE) == 0) {
-                        CERROR (LPU64": Dropping REPLY from "LPU64
-                                " length %d for MD "LPX64" would overflow (%d)\n",
-                                ni->ni_pid.nid, hdr->src_nid, length,
-                                hdr->msg.reply.dst_wmd.wh_object_cookie,
-                                md->length);
-                        LIB_UNLOCK(nal, flags);
-                        return (PTL_FAIL);
-                }
-                length = md->length;
-        }
-
-        CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n",
-               hdr->src_nid, length, rlength, 
-               hdr->msg.reply.dst_wmd.wh_object_cookie);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_REPLY_END;
-        msg->ev.initiator.nid = hdr->src_nid;
-        msg->ev.initiator.pid = hdr->src_pid;
-        msg->ev.rlength = rlength;
-        msg->ev.mlength = length;
-        msg->ev.offset = 0;
-
-        lib_md_deconstruct(nal, md, &msg->ev.md);
-        ptl_md2handle(&msg->ev.md_handle, nal, md);
-
-        ni->ni_counters.recv_count++;
-        ni->ni_counters.recv_length += length;
-
-        LIB_UNLOCK(nal, flags);
-
-        rc = lib_recv(nal, private, msg, md, 0, length, rlength);
-        if (rc != PTL_OK)
-                CERROR(LPU64": error on receiving REPLY from "LPU64": %d\n",
-                       ni->ni_pid.nid, hdr->src_nid, rc);
-
-        return (rc);
-}
-
-static ptl_err_t
-parse_ack(lib_nal_t *nal, ptl_hdr_t *hdr, void *private, lib_msg_t *msg)
-{
-        lib_ni_t      *ni = &nal->libnal_ni;
-        lib_md_t      *md;
-        unsigned long  flags;
-
-        /* Convert ack fields to host byte order */
-        hdr->msg.ack.match_bits = le64_to_cpu(hdr->msg.ack.match_bits);
-        hdr->msg.ack.mlength = le32_to_cpu(hdr->msg.ack.mlength);
-
-        LIB_LOCK(nal, flags);
-
-        /* NB handles only looked up by creator (no flips) */
-        md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal);
-        if (md == NULL || md->threshold == 0) {
-                CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD "
-                       LPX64"."LPX64"\n", ni->ni_pid.nid, hdr->src_nid, 
-                       (md == NULL) ? "invalid" : "inactive",
-                       hdr->msg.ack.dst_wmd.wh_interface_cookie,
-                       hdr->msg.ack.dst_wmd.wh_object_cookie);
-
-                LIB_UNLOCK(nal, flags);
-                return (PTL_FAIL);
-        }
-
-        CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n",
-               ni->ni_pid.nid, hdr->src_nid, 
-               hdr->msg.ack.dst_wmd.wh_object_cookie);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_ACK;
-        msg->ev.initiator.nid = hdr->src_nid;
-        msg->ev.initiator.pid = hdr->src_pid;
-        msg->ev.mlength = hdr->msg.ack.mlength;
-        msg->ev.match_bits = hdr->msg.ack.match_bits;
-
-        lib_md_deconstruct(nal, md, &msg->ev.md);
-        ptl_md2handle(&msg->ev.md_handle, nal, md);
-
-        ni->ni_counters.recv_count++;
-
-        LIB_UNLOCK(nal, flags);
-        
-        /* We have received and matched up the ack OK, create the
-         * completion event now... */
-        lib_finalize(nal, private, msg, PTL_OK);
-
-        /* ...and now discard any junk after the hdr */
-        (void) lib_recv(nal, private, NULL, NULL, 0, 0, hdr->payload_length);
-       return (PTL_OK);
-}
-
-static char *
-hdr_type_string (ptl_hdr_t *hdr)
-{
-        switch (hdr->type) {
-        case PTL_MSG_ACK:
-                return ("ACK");
-        case PTL_MSG_PUT:
-                return ("PUT");
-        case PTL_MSG_GET:
-                return ("GET");
-        case PTL_MSG_REPLY:
-                return ("REPLY");
-        case PTL_MSG_HELLO:
-                return ("HELLO");
-        default:
-                return ("<UNKNOWN>");
-        }
-}
-
-void print_hdr(lib_nal_t *nal, ptl_hdr_t * hdr)
-{
-        char *type_str = hdr_type_string (hdr);
-
-        CWARN("P3 Header at %p of type %s\n", hdr, type_str);
-        CWARN("    From nid/pid "LPX64"/%u", hdr->src_nid, hdr->src_pid);
-        CWARN("    To nid/pid "LPX64"/%u\n", hdr->dest_nid, hdr->dest_pid);
-
-        switch (hdr->type) {
-        default:
-                break;
-
-        case PTL_MSG_PUT:
-                CWARN("    Ptl index %d, ack md "LPX64"."LPX64", "
-                      "match bits "LPX64"\n",
-                      hdr->msg.put.ptl_index,
-                      hdr->msg.put.ack_wmd.wh_interface_cookie,
-                      hdr->msg.put.ack_wmd.wh_object_cookie,
-                      hdr->msg.put.match_bits);
-                CWARN("    Length %d, offset %d, hdr data "LPX64"\n",
-                      hdr->payload_length, hdr->msg.put.offset,
-                      hdr->msg.put.hdr_data);
-                break;
-
-        case PTL_MSG_GET:
-                CWARN("    Ptl index %d, return md "LPX64"."LPX64", "
-                      "match bits "LPX64"\n", hdr->msg.get.ptl_index,
-                      hdr->msg.get.return_wmd.wh_interface_cookie,
-                      hdr->msg.get.return_wmd.wh_object_cookie,
-                      hdr->msg.get.match_bits);
-                CWARN("    Length %d, src offset %d\n",
-                      hdr->msg.get.sink_length,
-                      hdr->msg.get.src_offset);
-                break;
-
-        case PTL_MSG_ACK:
-                CWARN("    dst md "LPX64"."LPX64", "
-                      "manipulated length %d\n",
-                      hdr->msg.ack.dst_wmd.wh_interface_cookie,
-                      hdr->msg.ack.dst_wmd.wh_object_cookie,
-                      hdr->msg.ack.mlength);
-                break;
-
-        case PTL_MSG_REPLY:
-                CWARN("    dst md "LPX64"."LPX64", "
-                      "length %d\n",
-                      hdr->msg.reply.dst_wmd.wh_interface_cookie,
-                      hdr->msg.reply.dst_wmd.wh_object_cookie,
-                      hdr->payload_length);
-        }
-
-}                               /* end of print_hdr() */
-
-
-ptl_err_t
-lib_parse(lib_nal_t *nal, ptl_hdr_t *hdr, void *private)
-{
-        unsigned long  flags;
-        ptl_err_t      rc;
-        lib_msg_t     *msg;
-
-        /* NB we return PTL_OK if we manage to parse the header and believe
-         * it looks OK.  Anything that goes wrong with receiving the
-         * message after that point is the responsibility of the NAL */
-        
-        /* convert common fields to host byte order */
-        hdr->type = le32_to_cpu(hdr->type);
-        hdr->src_nid = le64_to_cpu(hdr->src_nid);
-        hdr->src_pid = le32_to_cpu(hdr->src_pid);
-        hdr->dest_pid = le32_to_cpu(hdr->dest_pid);
-        hdr->payload_length = le32_to_cpu(hdr->payload_length);
-
-        switch (hdr->type) {
-        case PTL_MSG_HELLO: {
-                /* dest_nid is really ptl_magicversion_t */
-                ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid;
-
-                mv->magic = le32_to_cpu(mv->magic);
-                mv->version_major = le16_to_cpu(mv->version_major);
-                mv->version_minor = le16_to_cpu(mv->version_minor);
-
-                if (mv->magic == PORTALS_PROTO_MAGIC &&
-                    mv->version_major == PORTALS_PROTO_VERSION_MAJOR &&
-                    mv->version_minor == PORTALS_PROTO_VERSION_MINOR) {
-                        CWARN (LPU64": Dropping unexpected HELLO message: "
-                               "magic %d, version %d.%d from "LPD64"\n",
-                               nal->libnal_ni.ni_pid.nid, mv->magic, 
-                               mv->version_major, mv->version_minor,
-                               hdr->src_nid);
-
-                        /* it's good but we don't want it */
-                        lib_drop_message(nal, private, hdr);
-                        return PTL_OK;
-                }
-
-                /* we got garbage */
-                CERROR (LPU64": Bad HELLO message: "
-                        "magic %d, version %d.%d from "LPD64"\n",
-                        nal->libnal_ni.ni_pid.nid, mv->magic, 
-                        mv->version_major, mv->version_minor,
-                        hdr->src_nid);
-                return PTL_FAIL;
-        }
-
-        case PTL_MSG_ACK:
-        case PTL_MSG_PUT:
-        case PTL_MSG_GET:
-        case PTL_MSG_REPLY:
-                hdr->dest_nid = le64_to_cpu(hdr->dest_nid);
-                if (hdr->dest_nid != nal->libnal_ni.ni_pid.nid) {
-                        CERROR(LPU64": BAD dest NID in %s message from"
-                               LPU64" to "LPU64" (not me)\n", 
-                               nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr),
-                               hdr->src_nid, hdr->dest_nid);
-                        return PTL_FAIL;
-                }
-                break;
-
-        default:
-                CERROR(LPU64": Bad message type 0x%x from "LPU64"\n",
-                       nal->libnal_ni.ni_pid.nid, hdr->type, hdr->src_nid);
-                return PTL_FAIL;
-        }
-
-        /* We've decided we're not receiving garbage since we can parse the
-         * header.  We will return PTL_OK come what may... */
-
-        if (!list_empty (&nal->libnal_ni.ni_test_peers) && /* normally we don't */
-            fail_peer (nal, hdr->src_nid, 0))      /* shall we now? */
-        {
-                CERROR(LPU64": Dropping incoming %s from "LPU64
-                       ": simulated failure\n",
-                       nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), 
-                       hdr->src_nid);
-                lib_drop_message(nal, private, hdr);
-                return PTL_OK;
-        }
-
-        msg = lib_msg_alloc(nal);
-        if (msg == NULL) {
-                CERROR(LPU64": Dropping incoming %s from "LPU64
-                       ": can't allocate a lib_msg_t\n",
-                       nal->libnal_ni.ni_pid.nid, hdr_type_string (hdr), 
-                       hdr->src_nid);
-                lib_drop_message(nal, private, hdr);
-                return PTL_OK;
-        }
-
-        switch (hdr->type) {
-        case PTL_MSG_ACK:
-                rc = parse_ack(nal, hdr, private, msg);
-                break;
-        case PTL_MSG_PUT:
-                rc = parse_put(nal, hdr, private, msg);
-                break;
-        case PTL_MSG_GET:
-                rc = parse_get(nal, hdr, private, msg);
-                break;
-        case PTL_MSG_REPLY:
-                rc = parse_reply(nal, hdr, private, msg);
-                break;
-        default:
-                LASSERT(0);
-                rc = PTL_FAIL;                  /* no compiler warning please */
-                break;
-        }
-                
-        if (rc != PTL_OK) {
-                if (msg->md != NULL) {
-                        /* committed... */
-                        lib_finalize(nal, private, msg, rc);
-                } else {
-                        LIB_LOCK(nal, flags);
-                        lib_msg_free(nal, msg); /* expects LIB_LOCK held */
-                        LIB_UNLOCK(nal, flags);
-
-                        lib_drop_message(nal, private, hdr);
-                }
-        }
-
-        return PTL_OK;
-        /* That's "OK I can parse it", not "OK I like it" :) */
-}
-
-int 
-lib_api_put(nal_t *apinal, ptl_handle_md_t *mdh, 
-            ptl_ack_req_t ack, ptl_process_id_t *id,
-            ptl_pt_index_t portal, ptl_ac_index_t ac,
-            ptl_match_bits_t match_bits, 
-            ptl_size_t offset, ptl_hdr_data_t hdr_data)
-{
-        lib_nal_t        *nal = apinal->nal_data;
-        lib_ni_t         *ni = &nal->libnal_ni;
-        lib_msg_t        *msg;
-        ptl_hdr_t         hdr;
-        lib_md_t         *md;
-        unsigned long     flags;
-        int               rc;
-        
-        if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
-            fail_peer (nal, id->nid, 1))           /* shall we now? */
-        {
-                CERROR("Dropping PUT to "LPU64": simulated failure\n",
-                       id->nid);
-                return PTL_PROCESS_INVALID;
-        }
-
-        msg = lib_msg_alloc(nal);
-        if (msg == NULL) {
-                CERROR(LPU64": Dropping PUT to "LPU64": ENOMEM on lib_msg_t\n",
-                       ni->ni_pid.nid, id->nid);
-                return PTL_NO_SPACE;
-        }
-
-        LIB_LOCK(nal, flags);
-
-        md = ptl_handle2md(mdh, nal);
-        if (md == NULL || md->threshold == 0) {
-                lib_msg_free(nal, msg);
-                LIB_UNLOCK(nal, flags);
-        
-                return PTL_MD_INVALID;
-        }
-
-        CDEBUG(D_NET, "PtlPut -> "LPX64"\n", id->nid);
-
-        memset (&hdr, 0, sizeof (hdr));
-        hdr.type     = cpu_to_le32(PTL_MSG_PUT);
-        hdr.dest_nid = cpu_to_le64(id->nid);
-        hdr.dest_pid = cpu_to_le32(id->pid);
-        hdr.src_nid  = cpu_to_le64(ni->ni_pid.nid);
-        hdr.src_pid  = cpu_to_le32(ni->ni_pid.pid);
-        hdr.payload_length = cpu_to_le32(md->length);
-
-        /* NB handles only looked up by creator (no flips) */
-        if (ack == PTL_ACK_REQ) {
-                hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie;
-                hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie;
-        } else {
-                hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE;
-        }
-
-        hdr.msg.put.match_bits = cpu_to_le64(match_bits);
-        hdr.msg.put.ptl_index = cpu_to_le32(portal);
-        hdr.msg.put.offset = cpu_to_le32(offset);
-        hdr.msg.put.hdr_data = hdr_data;
-
-        lib_commit_md(nal, md, msg);
-        
-        msg->ev.type = PTL_EVENT_SEND_END;
-        msg->ev.initiator.nid = ni->ni_pid.nid;
-        msg->ev.initiator.pid = ni->ni_pid.pid;
-        msg->ev.pt_index = portal;
-        msg->ev.match_bits = match_bits;
-        msg->ev.rlength = md->length;
-        msg->ev.mlength = md->length;
-        msg->ev.offset = offset;
-        msg->ev.hdr_data = hdr_data;
-
-        lib_md_deconstruct(nal, md, &msg->ev.md);
-        ptl_md2handle(&msg->ev.md_handle, nal, md);
-
-        ni->ni_counters.send_count++;
-        ni->ni_counters.send_length += md->length;
-
-        LIB_UNLOCK(nal, flags);
-        
-        rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_PUT,
-                       id->nid, id->pid, md, 0, md->length);
-        if (rc != PTL_OK) {
-                CERROR("Error sending PUT to "LPX64": %d\n",
-                       id->nid, rc);
-                lib_finalize (nal, NULL, msg, rc);
-        }
-        
-        /* completion will be signalled by an event */
-        return PTL_OK;
-}
-
-lib_msg_t * 
-lib_create_reply_msg (lib_nal_t *nal, ptl_nid_t peer_nid, lib_msg_t *getmsg)
-{
-        /* The NAL can DMA direct to the GET md (i.e. no REPLY msg).  This
-         * returns a msg for the NAL to pass to lib_finalize() when the sink
-         * data has been received.
-         *
-         * CAVEAT EMPTOR: 'getmsg' is the original GET, which is freed when
-         * lib_finalize() is called on it, so the NAL must call this first */
-
-        lib_ni_t        *ni = &nal->libnal_ni;
-        lib_msg_t       *msg = lib_msg_alloc(nal);
-        lib_md_t        *getmd = getmsg->md;
-        unsigned long    flags;
-
-        LIB_LOCK(nal, flags);
-
-        LASSERT (getmd->pending > 0);
-
-        if (msg == NULL) {
-                CERROR ("Dropping REPLY from "LPU64": can't allocate msg\n",
-                        peer_nid);
-                goto drop;
-        }
-
-        if (getmd->threshold == 0) {
-                CERROR ("Dropping REPLY from "LPU64" for inactive MD %p\n",
-                        peer_nid, getmd);
-                goto drop_msg;
-        }
-
-        LASSERT (getmd->offset == 0);
-
-        CDEBUG(D_NET, "Reply from "LPU64" md %p\n", peer_nid, getmd);
-
-        lib_commit_md (nal, getmd, msg);
-
-        msg->ev.type = PTL_EVENT_REPLY_END;
-        msg->ev.initiator.nid = peer_nid;
-        msg->ev.initiator.pid = 0;      /* XXX FIXME!!! */
-        msg->ev.rlength = msg->ev.mlength = getmd->length;
-        msg->ev.offset = 0;
-
-        lib_md_deconstruct(nal, getmd, &msg->ev.md);
-        ptl_md2handle(&msg->ev.md_handle, nal, getmd);
-
-        ni->ni_counters.recv_count++;
-        ni->ni_counters.recv_length += getmd->length;
-
-        LIB_UNLOCK(nal, flags);
-
-        return msg;
-
- drop_msg:
-        lib_msg_free(nal, msg);
- drop:
-        nal->libnal_ni.ni_counters.drop_count++;
-        nal->libnal_ni.ni_counters.drop_length += getmd->length;
-
-        LIB_UNLOCK (nal, flags);
-
-        return NULL;
-}
-
-int 
-lib_api_get(nal_t *apinal, ptl_handle_md_t *mdh, ptl_process_id_t *id,
-            ptl_pt_index_t portal, ptl_ac_index_t ac,
-            ptl_match_bits_t match_bits, ptl_size_t offset)
-{
-        lib_nal_t        *nal = apinal->nal_data;
-        lib_ni_t         *ni = &nal->libnal_ni;
-        lib_msg_t        *msg;
-        ptl_hdr_t         hdr;
-        lib_md_t         *md;
-        unsigned long     flags;
-        int               rc;
-        
-        if (!list_empty (&ni->ni_test_peers) && /* normally we don't */
-            fail_peer (nal, id->nid, 1))           /* shall we now? */
-        {
-                CERROR("Dropping PUT to "LPX64": simulated failure\n",
-                       id->nid);
-                return PTL_PROCESS_INVALID;
-        }
-
-        msg = lib_msg_alloc(nal);
-        if (msg == NULL) {
-                CERROR("Dropping GET to "LPU64": ENOMEM on lib_msg_t\n",
-                       id->nid);
-                return PTL_NO_SPACE;
-        }
-
-        LIB_LOCK(nal, flags);
-
-        md = ptl_handle2md(mdh, nal);
-        if (md == NULL || !md->threshold) {
-                lib_msg_free(nal, msg);
-                LIB_UNLOCK(nal, flags);
-
-                return PTL_MD_INVALID;
-        }
-
-        CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid,
-               (unsigned long)id->pid);
-
-        memset (&hdr, 0, sizeof (hdr));
-        hdr.type     = cpu_to_le32(PTL_MSG_GET);
-        hdr.dest_nid = cpu_to_le64(id->nid);
-        hdr.dest_pid = cpu_to_le32(id->pid);
-        hdr.src_nid  = cpu_to_le64(ni->ni_pid.nid);
-        hdr.src_pid  = cpu_to_le32(ni->ni_pid.pid);
-        hdr.payload_length = 0;
-
-        /* NB handles only looked up by creator (no flips) */
-        hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie;
-        hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie;
-
-        hdr.msg.get.match_bits = cpu_to_le64(match_bits);
-        hdr.msg.get.ptl_index = cpu_to_le32(portal);
-        hdr.msg.get.src_offset = cpu_to_le32(offset);
-        hdr.msg.get.sink_length = cpu_to_le32(md->length);
-
-        lib_commit_md(nal, md, msg);
-
-        msg->ev.type = PTL_EVENT_SEND_END;
-        msg->ev.initiator = ni->ni_pid;
-        msg->ev.pt_index = portal;
-        msg->ev.match_bits = match_bits;
-        msg->ev.rlength = md->length;
-        msg->ev.mlength = md->length;
-        msg->ev.offset = offset;
-        msg->ev.hdr_data = 0;
-
-        lib_md_deconstruct(nal, md, &msg->ev.md);
-        ptl_md2handle(&msg->ev.md_handle, nal, md);
-
-        ni->ni_counters.send_count++;
-
-        LIB_UNLOCK(nal, flags);
-
-        rc = lib_send (nal, NULL, msg, &hdr, PTL_MSG_GET,
-                       id->nid, id->pid, NULL, 0, 0);
-        if (rc != PTL_OK) {
-                CERROR(LPU64": error sending GET to "LPU64": %d\n",
-                       ni->ni_pid.nid, id->nid, rc);
-                lib_finalize (nal, NULL, msg, rc);
-        }
-        
-        /* completion will be signalled by an event */
-        return PTL_OK;
-}
-
-void lib_assert_wire_constants (void)
-{
-        /* Wire protocol assertions generated by 'wirecheck'
-         * running on Linux mdevi 2.4.21-p4smp-55chaos #1 SMP Tue Jun 8 14:38:44 PDT 2004 i686 i686 i
-         * with gcc version 3.2.3 20030502 (Red Hat Linux 3.2.3-34) */
-
-
-        /* Constants... */
-        LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded);
-        LASSERT (PORTALS_PROTO_VERSION_MAJOR == 1);
-        LASSERT (PORTALS_PROTO_VERSION_MINOR == 0);
-        LASSERT (PTL_MSG_ACK == 0);
-        LASSERT (PTL_MSG_PUT == 1);
-        LASSERT (PTL_MSG_GET == 2);
-        LASSERT (PTL_MSG_REPLY == 3);
-        LASSERT (PTL_MSG_HELLO == 4);
-
-        /* Checks for struct ptl_handle_wire_t */
-        LASSERT ((int)sizeof(ptl_handle_wire_t) == 16);
-        LASSERT ((int)offsetof(ptl_handle_wire_t, wh_interface_cookie) == 0);
-        LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8);
-        LASSERT ((int)offsetof(ptl_handle_wire_t, wh_object_cookie) == 8);
-        LASSERT ((int)sizeof(((ptl_handle_wire_t *)0)->wh_object_cookie) == 8);
-
-        /* Checks for struct ptl_magicversion_t */
-        LASSERT ((int)sizeof(ptl_magicversion_t) == 8);
-        LASSERT ((int)offsetof(ptl_magicversion_t, magic) == 0);
-        LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->magic) == 4);
-        LASSERT ((int)offsetof(ptl_magicversion_t, version_major) == 4);
-        LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_major) == 2);
-        LASSERT ((int)offsetof(ptl_magicversion_t, version_minor) == 6);
-        LASSERT ((int)sizeof(((ptl_magicversion_t *)0)->version_minor) == 2);
-
-        /* Checks for struct ptl_hdr_t */
-        LASSERT ((int)sizeof(ptl_hdr_t) == 72);
-        LASSERT ((int)offsetof(ptl_hdr_t, dest_nid) == 0);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_nid) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, src_nid) == 8);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_nid) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, dest_pid) == 16);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->dest_pid) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, src_pid) == 20);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->src_pid) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, type) == 24);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->type) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, payload_length) == 28);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->payload_length) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg) == 32);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg) == 40);
-
-        /* Ack */
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.dst_wmd) == 32);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.match_bits) == 48);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.match_bits) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.ack.mlength) == 56);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.ack.mlength) == 4);
-
-        /* Put */
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ack_wmd) == 32);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.put.match_bits) == 48);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.match_bits) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.put.hdr_data) == 56);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.hdr_data) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.put.ptl_index) == 64);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.ptl_index) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.put.offset) == 68);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.put.offset) == 4);
-
-        /* Get */
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.get.return_wmd) == 32);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.return_wmd) == 16);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.get.match_bits) == 48);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.match_bits) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.get.ptl_index) == 56);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.ptl_index) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.get.src_offset) == 60);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.src_offset) == 4);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.get.sink_length) == 64);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.get.sink_length) == 4);
-
-        /* Reply */
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.reply.dst_wmd) == 32);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16);
-
-        /* Hello */
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.incarnation) == 32);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.incarnation) == 8);
-        LASSERT ((int)offsetof(ptl_hdr_t, msg.hello.type) == 40);
-        LASSERT ((int)sizeof(((ptl_hdr_t *)0)->msg.hello.type) == 4);
-}
diff --git a/lustre/portals/portals/lib-msg.c b/lustre/portals/portals/lib-msg.c
deleted file mode 100644 (file)
index 54e89bc..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-msg.c
- * Message decoding, parsing and finalizing routines
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __KERNEL__
-# include <stdio.h>
-#else
-# define DEBUG_SUBSYSTEM S_PORTALS
-# include <linux/kp30.h>
-#endif
-
-#include <portals/lib-p30.h>
-
-void
-lib_enq_event_locked (lib_nal_t *nal, void *private, 
-                      lib_eq_t *eq, ptl_event_t *ev)
-{
-        ptl_event_t  *eq_slot;
-
-        /* Allocate the next queue slot */
-        ev->link = ev->sequence = eq->eq_enq_seq++;
-        /* NB we don't support START events yet and we don't create a separate
-         * UNLINK event unless an explicit unlink succeeds, so the link
-         * sequence is pretty useless */
-
-        /* We don't support different uid/jids yet */
-        ev->uid = 0;
-        ev->jid = 0;
-        
-        /* size must be a power of 2 to handle sequence # overflow */
-        LASSERT (eq->eq_size != 0 &&
-                 eq->eq_size == LOWEST_BIT_SET (eq->eq_size));
-        eq_slot = eq->eq_events + (ev->sequence & (eq->eq_size - 1));
-
-        /* There is no race since both event consumers and event producers
-         * take the LIB_LOCK(), so we don't screw around with memory
-         * barriers, setting the sequence number last or wierd structure
-         * layout assertions. */
-        *eq_slot = *ev;
-
-        /* Call the callback handler (if any) */
-        if (eq->eq_callback != NULL)
-                eq->eq_callback (eq_slot);
-
-        /* Wake anyone sleeping for an event (see lib-eq.c) */
-#ifdef __KERNEL__
-        if (waitqueue_active(&nal->libnal_ni.ni_waitq))
-                wake_up_all(&nal->libnal_ni.ni_waitq);
-#else
-        pthread_cond_broadcast(&nal->libnal_ni.ni_cond);
-#endif
-}
-
-void 
-lib_finalize (lib_nal_t *nal, void *private, lib_msg_t *msg, ptl_err_t status)
-{
-        lib_md_t     *md;
-        int           unlink;
-        unsigned long flags;
-        int           rc;
-        ptl_hdr_t     ack;
-
-        if (msg == NULL)
-                return;
-
-        /* Only send an ACK if the PUT completed successfully */
-        if (status == PTL_OK &&
-            !ptl_is_wire_handle_none(&msg->ack_wmd)) {
-
-                LASSERT(msg->ev.type == PTL_EVENT_PUT_END);
-
-                memset (&ack, 0, sizeof (ack));
-                ack.type     = cpu_to_le32(PTL_MSG_ACK);
-                ack.dest_nid = cpu_to_le64(msg->ev.initiator.nid);
-                ack.dest_pid = cpu_to_le32(msg->ev.initiator.pid);
-                ack.src_nid  = cpu_to_le64(nal->libnal_ni.ni_pid.nid);
-                ack.src_pid  = cpu_to_le32(nal->libnal_ni.ni_pid.pid);
-                ack.payload_length = 0;
-
-                ack.msg.ack.dst_wmd = msg->ack_wmd;
-                ack.msg.ack.match_bits = msg->ev.match_bits;
-                ack.msg.ack.mlength = cpu_to_le32(msg->ev.mlength);
-
-                rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK,
-                               msg->ev.initiator.nid, msg->ev.initiator.pid, 
-                               NULL, 0, 0);
-                if (rc != PTL_OK) {
-                        /* send failed: there's nothing else to clean up. */
-                        CERROR("Error %d sending ACK to "LPX64"\n", 
-                               rc, msg->ev.initiator.nid);
-                }
-        }
-
-        md = msg->md;
-
-        LIB_LOCK(nal, flags);
-
-        /* Now it's safe to drop my caller's ref */
-        md->pending--;
-        LASSERT (md->pending >= 0);
-
-        /* Should I unlink this MD? */
-        if (md->pending != 0)                   /* other refs */
-                unlink = 0;
-        else if ((md->md_flags & PTL_MD_FLAG_ZOMBIE) != 0)
-                unlink = 1;
-        else if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINK) == 0)
-                unlink = 0;
-        else
-                unlink = lib_md_exhausted(md);
-
-        msg->ev.ni_fail_type = status;
-        msg->ev.unlinked = unlink;
-
-        if (md->eq != NULL)
-                lib_enq_event_locked(nal, private, md->eq, &msg->ev);
-
-        if (unlink)
-                lib_md_unlink(nal, md);
-
-        list_del (&msg->msg_list);
-        nal->libnal_ni.ni_counters.msgs_alloc--;
-        lib_msg_free(nal, msg);
-
-        LIB_UNLOCK(nal, flags);
-}
diff --git a/lustre/portals/portals/lib-ni.c b/lustre/portals/portals/lib-ni.c
deleted file mode 100644 (file)
index 0f298a0..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-ni.c
- * Network status registers and distance functions.
- *
- *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
- *  Copyright (c) 2001-2002 Sandia National Laboratories
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PORTALS
-#include <portals/lib-p30.h>
-
-#define MAX_DIST 18446744073709551615ULL
-
-int lib_api_ni_status (nal_t *apinal, ptl_sr_index_t sr_idx,
-                       ptl_sr_value_t *status)
-{
-        lib_nal_t      *nal = apinal->nal_data;
-        lib_ni_t       *ni = &nal->libnal_ni;
-        lib_counters_t *count = &ni->ni_counters;
-
-        switch (sr_idx) {
-        case PTL_SR_DROP_COUNT:
-                *status = count->drop_count;
-                return PTL_OK;
-        case PTL_SR_DROP_LENGTH:
-                *status = count->drop_length;
-                return PTL_OK;
-        case PTL_SR_RECV_COUNT:
-                *status = count->recv_count;
-                return PTL_OK;
-        case PTL_SR_RECV_LENGTH:
-                *status = count->recv_length;
-                return PTL_OK;
-        case PTL_SR_SEND_COUNT:
-                *status = count->send_count;
-                return PTL_OK;
-        case PTL_SR_SEND_LENGTH:
-                *status = count->send_length;
-                return PTL_OK;
-        case PTL_SR_MSGS_MAX:
-                *status = count->msgs_max;
-                return PTL_OK;
-        default:
-                *status = 0;
-                return PTL_SR_INDEX_INVALID;
-        }
-}
-
-
-int lib_api_ni_dist (nal_t *apinal, ptl_process_id_t *pid, unsigned long *dist)
-{
-        lib_nal_t *nal = apinal->nal_data;
-
-        return (nal->libnal_dist(nal, pid->nid, dist));
-}
diff --git a/lustre/portals/portals/lib-pid.c b/lustre/portals/portals/lib-pid.c
deleted file mode 100644 (file)
index ff2a601..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * lib/lib-pid.c
- *
- * Process identification routines
- * Copyright (C) 2001-2003 Cluster File Systems, Inc.
- * Copyright (C) 2001-2003 Cluster File Systems, Inc.
- *
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* This should be removed.  The NAL should have the PID information */
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-#if defined (__KERNEL__)
-#       include <linux/kernel.h>
-extern int getpid(void);
-#else
-#       include <stdio.h>
-#       include <unistd.h>
-#endif
-#include <portals/lib-p30.h>
-
-int
-lib_api_get_id(nal_t *apinal, ptl_process_id_t *pid)
-{
-        lib_nal_t *nal = apinal->nal_data;
-        
-        *pid = nal->libnal_ni.ni_pid;
-        return PTL_OK;
-}
diff --git a/lustre/portals/portals/module.c b/lustre/portals/portals/module.c
deleted file mode 100644 (file)
index c1303b7..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-#define DEBUG_SUBSYSTEM S_PORTALS
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/smp_lock.h>
-#include <linux/unistd.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <asm/uaccess.h>
-#include <asm/segment.h>
-#include <linux/miscdevice.h>
-
-#include <portals/lib-p30.h>
-#include <portals/p30.h>
-#include <portals/nal.h>
-#include <linux/kp30.h>
-#include <linux/kpr.h>
-#include <linux/portals_compat25.h>
-
-extern void (kping_client)(struct portal_ioctl_data *);
-
-static int kportal_ioctl(struct portal_ioctl_data *data, 
-                         unsigned int cmd, unsigned long arg)
-{
-        int err;
-        char str[PTL_NALFMT_SIZE];
-        ENTRY;
-
-        switch (cmd) {
-        case IOC_PORTAL_PING: {
-                void (*ping)(struct portal_ioctl_data *);
-
-                CDEBUG(D_IOCTL, "doing %d pings to nid "LPX64" (%s)\n",
-                       data->ioc_count, data->ioc_nid,
-                       portals_nid2str(data->ioc_nal, data->ioc_nid, str));
-                ping = PORTAL_SYMBOL_GET(kping_client);
-                if (!ping)
-                        CERROR("PORTAL_SYMBOL_GET failed\n");
-                else {
-                        ping(data);
-                        PORTAL_SYMBOL_PUT(kping_client);
-                }
-                RETURN(0);
-        }
-
-        case IOC_PORTAL_GET_NID: {
-                ptl_handle_ni_t    nih;
-                ptl_process_id_t   pid;
-
-                CDEBUG (D_IOCTL, "Getting nid for nal [%x]\n", data->ioc_nal);
-
-                err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL,
-                                NULL, &nih);
-                if (!(err == PTL_OK || err == PTL_IFACE_DUP))
-                        RETURN (-EINVAL);
-
-                err = PtlGetId (nih, &pid);
-                LASSERT (err == PTL_OK);
-
-                PtlNIFini(nih);
-
-                data->ioc_nid = pid.nid;
-                if (copy_to_user ((char *)arg, data, sizeof (*data)))
-                        RETURN (-EFAULT);
-                RETURN(0);
-        }
-
-        case IOC_PORTAL_FAIL_NID: {
-                ptl_handle_ni_t    nih;
-
-                CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n",
-                        data->ioc_nal, data->ioc_nid, data->ioc_count);
-
-                err = PtlNIInit(data->ioc_nal, LUSTRE_SRV_PTL_PID, NULL,
-                                NULL, &nih);
-                if (!(err == PTL_OK || err == PTL_IFACE_DUP))
-                        return (-EINVAL);
-
-                if (err == PTL_OK) {
-                        /* There's no point in failing an interface that
-                         * came into existance just for this */
-                        err = -EINVAL;
-                } else {
-                        err = PtlFailNid (nih, data->ioc_nid, data->ioc_count);
-                        if (err != PTL_OK)
-                                err = -EINVAL;
-                }
-
-                PtlNIFini(nih);
-                RETURN (err);
-        }
-        default:
-                RETURN(-EINVAL);
-        }
-        /* Not Reached */
-}
-
-DECLARE_IOCTL_HANDLER(kportal_ioctl_handler, kportal_ioctl);
-
-static int init_kportals_module(void)
-{
-        int rc;
-        ENTRY;
-
-        rc = PtlInit(NULL);
-        if (rc) {
-                CERROR("PtlInit: error %d\n", rc);
-                RETURN(rc);
-        }
-
-        rc = libcfs_register_ioctl(&kportal_ioctl_handler);
-        LASSERT (rc == 0);
-
-        RETURN(rc);
-}
-
-static void exit_kportals_module(void)
-{
-        int rc;
-
-        rc = libcfs_deregister_ioctl(&kportal_ioctl_handler);
-        LASSERT (rc == 0);
-
-        PtlFini();
-}
-
-EXPORT_SYMBOL(ptl_register_nal);
-EXPORT_SYMBOL(ptl_unregister_nal);
-
-EXPORT_SYMBOL(ptl_err_str);
-EXPORT_SYMBOL(PtlMEAttach);
-EXPORT_SYMBOL(PtlMEInsert);
-EXPORT_SYMBOL(PtlMEUnlink);
-EXPORT_SYMBOL(PtlEQAlloc);
-EXPORT_SYMBOL(PtlMDAttach);
-EXPORT_SYMBOL(PtlMDUnlink);
-EXPORT_SYMBOL(PtlNIInit);
-EXPORT_SYMBOL(PtlNIFini);
-EXPORT_SYMBOL(PtlInit);
-EXPORT_SYMBOL(PtlFini);
-EXPORT_SYMBOL(PtlSnprintHandle);
-EXPORT_SYMBOL(PtlPut);
-EXPORT_SYMBOL(PtlGet);
-EXPORT_SYMBOL(PtlEQWait);
-EXPORT_SYMBOL(PtlEQFree);
-EXPORT_SYMBOL(PtlEQGet);
-EXPORT_SYMBOL(PtlGetId);
-EXPORT_SYMBOL(PtlMDBind);
-EXPORT_SYMBOL(lib_iov_nob);
-EXPORT_SYMBOL(lib_copy_iov2buf);
-EXPORT_SYMBOL(lib_copy_buf2iov);
-EXPORT_SYMBOL(lib_extract_iov);
-EXPORT_SYMBOL(lib_kiov_nob);
-EXPORT_SYMBOL(lib_copy_kiov2buf);
-EXPORT_SYMBOL(lib_copy_buf2kiov);
-EXPORT_SYMBOL(lib_extract_kiov);
-EXPORT_SYMBOL(lib_finalize);
-EXPORT_SYMBOL(lib_parse);
-EXPORT_SYMBOL(lib_create_reply_msg);
-EXPORT_SYMBOL(lib_init);
-EXPORT_SYMBOL(lib_fini);
-
-MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
-MODULE_DESCRIPTION("Portals v3.1");
-MODULE_LICENSE("GPL");
-module_init(init_kportals_module);
-module_exit(exit_kportals_module);
diff --git a/lustre/portals/router/.cvsignore b/lustre/portals/router/.cvsignore
deleted file mode 100644 (file)
index 5ed596b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-.deps
-Makefile
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/router/Makefile.in b/lustre/portals/router/Makefile.in
deleted file mode 100644 (file)
index 3bb6cf7..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-MODULES := kptlrouter
-kptlrouter-objs := router.o proc.o
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/router/Makefile.mk b/lustre/portals/router/Makefile.mk
deleted file mode 100644 (file)
index 9b02c03..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include $(src)/../Kernelenv
-
-obj-y += kptlrouter.o
-kptlrouter-objs    := router.o proc.o
diff --git a/lustre/portals/router/autoMakefile.am b/lustre/portals/router/autoMakefile.am
deleted file mode 100644 (file)
index fa11e8c..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-modulenet_DATA = kptlrouter$(KMODEXT)
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c
-DIST_SOURCES = $(kptlrouter-objs:%.o=%.c) router.h
diff --git a/lustre/portals/router/proc.c b/lustre/portals/router/proc.c
deleted file mode 100644 (file)
index 61b6880..0000000
+++ /dev/null
@@ -1,242 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals
- *   http://sourceforge.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "router.h"
-
-#define KPR_PROC_ROUTER "sys/portals/router"
-#define KPR_PROC_ROUTES "sys/portals/routes"
-
-/* Used for multi-page route list book keeping */
-struct proc_route_data {
-        struct list_head *curr;
-        unsigned int generation;
-        off_t skip;
-        rwlock_t proc_route_rwlock;
-} kpr_read_routes_data;
-
-/* nal2name support re-used from utils/portals.c */
-struct name2num {
-        char *name;
-        int   num;
-} nalnames[] = {
-        { "any",         0},
-        { "elan",        QSWNAL},
-        { "tcp",         SOCKNAL},
-        { "gm",          GMNAL},
-        { "ib",          OPENIBNAL},
-        { "iib",         IIBNAL},
-        { "lo",          LONAL},
-        { NULL,          -1}
-};
-
-static struct name2num *name2num_lookup_num(struct name2num *table, int num)
-{
-        while (table->name != NULL)
-                if (num == table->num)
-                        return (table);
-                else
-                        table++;
-        return (NULL);
-}
-
-static char *nal2name(int nal)
-{
-        struct name2num *e = name2num_lookup_num(nalnames, nal);
-        return ((e == NULL) ? "???" : e->name);
-}
-
-
-static int kpr_proc_router_read(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
-{
-        unsigned long long bytes = kpr_fwd_bytes;
-        unsigned long      packets = kpr_fwd_packets;
-        unsigned long      errors = kpr_fwd_errors;
-        unsigned int       qdepth = atomic_read (&kpr_queue_depth);
-        int                len;
-
-        *eof = 1;
-        if (off != 0)
-                return (0);
-
-        len = sprintf(page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth);
-
-        *start = page;
-        return (len);
-}
-
-static int kpr_proc_router_write(struct file *file, const char *ubuffer,
-                                 unsigned long count, void *data)
-{
-        /* Ignore what we've been asked to write, and just zero the stats */
-        kpr_fwd_bytes = 0;
-        kpr_fwd_packets = 0;
-        kpr_fwd_errors = 0;
-
-        return (count);
-}
-
-static int kpr_proc_routes_read(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
-{
-        struct proc_route_data  *prd = data;
-        kpr_route_entry_t       *re;
-        kpr_gateway_entry_t     *ge;
-        int                     chunk_len = 0;
-        int                     line_len = 0;
-        int                     user_len = 0;
-        int                     rc = 0;
-
-        *eof = 1;
-        *start = page;
-
-        write_lock(&(prd->proc_route_rwlock));
-
-        if (prd->curr == NULL) {
-                if (off != 0)
-                        goto routes_read_exit;
-
-                /* First pass, initialize our private data */
-                prd->curr = kpr_routes.next;
-                prd->generation = kpr_routes_generation;
-                prd->skip = 0;
-        } else {
-                /* Abort route list generation change */
-                if (prd->generation != kpr_routes_generation) {
-                        prd->curr = NULL;
-                        rc = sprintf(page, "\nError: Routes Changed\n");
-                        goto routes_read_exit;
-                }
-
-                /* All the routes have been walked */
-                if (prd->curr == &kpr_routes) {
-                        prd->curr = NULL;
-                        goto routes_read_exit;
-                }
-        }
-
-        read_lock(&kpr_rwlock);
-        *start = page + prd->skip;
-        user_len = -prd->skip;
-
-        while ((prd->curr != NULL) && (prd->curr != &kpr_routes)) {
-                re = list_entry(prd->curr, kpr_route_entry_t, kpre_list);
-                ge = re->kpre_gateway;
-
-                line_len = sprintf(page + chunk_len,
-                        "%12s  "LPX64" : "LPX64" - "LPX64", %s\n",
-                        nal2name(ge->kpge_nalid), ge->kpge_nid,
-                        re->kpre_lo_nid, re->kpre_hi_nid,
-                        ge->kpge_alive ? "up" : "down");
-                chunk_len += line_len;
-                user_len += line_len;
-
-                /* Abort the route list changed */
-                if (prd->curr->next == NULL) {
-                        prd->curr = NULL;
-                        read_unlock(&kpr_rwlock);
-                        rc = sprintf(page, "\nError: Routes Changed\n");
-                        goto routes_read_exit;
-                }
-
-                prd->curr = prd->curr->next;
-
-                /* The route table will exceed one page, break the while loop
-                 * so the function can be re-called with a new page.
-                 */
-                if ((chunk_len > (PAGE_SIZE - 80)) || (user_len > count))
-                        break;
-        }
-
-        *eof = 0;
-
-        /* Caller received only a portion of the last entry, the
-         * remaining will be delivered in the next page if asked for.
-         */
-        if (user_len > count) {
-                prd->curr = prd->curr->prev;
-                prd->skip = line_len - (user_len - count);
-                read_unlock(&kpr_rwlock);
-                rc = count;
-                goto routes_read_exit;
-        }
-
-        /* Not enough data to entirely satify callers request */
-        prd->skip = 0;
-        read_unlock(&kpr_rwlock);
-        rc = user_len;
-
-routes_read_exit:
-        write_unlock(&(prd->proc_route_rwlock));
-        return rc;
-}
-
-static int kpr_proc_routes_write(struct file *file, const char *ubuffer,
-                                 unsigned long count, void *data)
-{
-        /* no-op; lctl should be used to adjust the routes */
-        return (count);
-}
-
-void kpr_proc_init(void)
-{
-        struct proc_dir_entry *router_entry;
-        struct proc_dir_entry *routes_entry;
-
-        /* Initialize KPR_PROC_ROUTER */
-        router_entry = create_proc_entry (KPR_PROC_ROUTER,
-                S_IFREG | S_IRUGO | S_IWUSR, NULL);
-
-        if (router_entry == NULL) {
-                CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER);
-                return;
-        }
-
-        router_entry->data = NULL;
-        router_entry->read_proc = kpr_proc_router_read;
-        router_entry->write_proc = kpr_proc_router_write;
-
-        /* Initialize KPR_PROC_ROUTES */
-        routes_entry = create_proc_entry (KPR_PROC_ROUTES,
-                S_IFREG | S_IRUGO | S_IWUSR, NULL);
-
-        if (routes_entry == NULL) {
-                CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTES);
-                return;
-        }
-
-        kpr_read_routes_data.curr = NULL;
-        kpr_read_routes_data.generation = 0;
-        kpr_read_routes_data.skip = 0;
-        kpr_read_routes_data.proc_route_rwlock = RW_LOCK_UNLOCKED;
-
-        routes_entry->data = &kpr_read_routes_data;
-        routes_entry->read_proc = kpr_proc_routes_read;
-        routes_entry->write_proc = kpr_proc_routes_write;
-}
-
-void kpr_proc_fini(void)
-{
-        remove_proc_entry(KPR_PROC_ROUTER, 0);
-        remove_proc_entry(KPR_PROC_ROUTES, 0);
-}
diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c
deleted file mode 100644 (file)
index 7edc5f6..0000000
+++ /dev/null
@@ -1,824 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals
- *   http://sourceforge.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include "router.h"
-
-LIST_HEAD(kpr_routes);
-LIST_HEAD(kpr_gateways);
-LIST_HEAD(kpr_nals);
-
-unsigned int       kpr_routes_generation;
-unsigned long long kpr_fwd_bytes;
-unsigned long      kpr_fwd_packets;
-unsigned long      kpr_fwd_errors;
-atomic_t           kpr_queue_depth;
-
-/* Mostly the tables are read-only (thread and interrupt context)
- *
- * Once in a blue moon we register/deregister NALs and add/remove routing
- * entries (thread context only)... */
-rwlock_t         kpr_rwlock = RW_LOCK_UNLOCKED;
-
-kpr_router_interface_t kpr_router_interface = {
-       kprri_register:         kpr_register_nal,
-       kprri_lookup:           kpr_lookup_target,
-       kprri_fwd_start:        kpr_forward_packet,
-       kprri_fwd_done:         kpr_complete_packet,
-        kprri_notify:           kpr_nal_notify,
-       kprri_shutdown:         kpr_shutdown_nal,
-       kprri_deregister:       kpr_deregister_nal,
-};
-
-int
-kpr_register_nal (kpr_nal_interface_t *nalif, void **argp)
-{
-       unsigned long      flags;
-       struct list_head  *e;
-       kpr_nal_entry_t   *ne;
-
-        CDEBUG (D_NET, "Registering NAL %x\n", nalif->kprni_nalid);
-
-       PORTAL_ALLOC (ne, sizeof (*ne));
-       if (ne == NULL)
-               return (-ENOMEM);
-
-       memset (ne, 0, sizeof (*ne));
-        memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif));
-
-       LASSERT (!in_interrupt());
-       write_lock_irqsave (&kpr_rwlock, flags);
-
-       for (e = kpr_nals.next; e != &kpr_nals; e = e->next)
-       {
-               kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list);
-
-               if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid)
-               {
-                       write_unlock_irqrestore (&kpr_rwlock, flags);
-
-                       CERROR ("Attempt to register same NAL %x twice\n", ne->kpne_interface.kprni_nalid);
-
-                       PORTAL_FREE (ne, sizeof (*ne));
-                       return (-EEXIST);
-               }
-       }
-
-        list_add (&ne->kpne_list, &kpr_nals);
-
-       write_unlock_irqrestore (&kpr_rwlock, flags);
-
-       *argp = ne;
-       PORTAL_MODULE_USE;
-        return (0);
-}
-
-void
-kpr_do_upcall (void *arg)
-{
-        kpr_upcall_t *u = (kpr_upcall_t *)arg;
-        char          nalstr[10];
-        char          nidstr[36];
-        char          whenstr[36];
-        char         *argv[] = {
-                NULL,
-                "ROUTER_NOTIFY",
-                nalstr,
-                nidstr,
-                u->kpru_alive ? "up" : "down",
-                whenstr,
-                NULL};
-        
-        snprintf (nalstr, sizeof(nalstr), "%d", u->kpru_nal_id);
-        snprintf (nidstr, sizeof(nidstr), LPX64, u->kpru_nid);
-        snprintf (whenstr, sizeof(whenstr), "%ld", u->kpru_when);
-
-        portals_run_upcall (argv);
-
-        kfree (u);
-}
-
-void
-kpr_upcall (int gw_nalid, ptl_nid_t gw_nid, int alive, time_t when)
-{
-        char str[PTL_NALFMT_SIZE];
-        
-        /* May be in arbitrary context */
-        kpr_upcall_t  *u = kmalloc (sizeof (kpr_upcall_t), GFP_ATOMIC);
-
-        if (u == NULL) {
-                CERROR ("Upcall out of memory: nal %x nid "LPX64" (%s) %s\n",
-                        gw_nalid, gw_nid,
-                        portals_nid2str(gw_nalid, gw_nid, str),
-                        alive ? "up" : "down");
-                return;
-        }
-
-        u->kpru_nal_id     = gw_nalid;
-        u->kpru_nid        = gw_nid;
-        u->kpru_alive      = alive;
-        u->kpru_when       = when;
-
-        prepare_work (&u->kpru_tq, kpr_do_upcall, u);
-        schedule_work (&u->kpru_tq);
-}
-
-int
-kpr_do_notify (int byNal, int gateway_nalid, ptl_nid_t gateway_nid,
-               int alive, time_t when)
-{
-       unsigned long        flags;
-        int                  found;
-        kpr_nal_entry_t     *ne = NULL;
-        kpr_gateway_entry_t *ge = NULL;
-        struct timeval       now;
-       struct list_head    *e;
-       struct list_head    *n;
-        char                 str[PTL_NALFMT_SIZE];
-
-        CDEBUG (D_NET, "%s notifying [%x] "LPX64": %s\n", 
-                byNal ? "NAL" : "userspace", 
-                gateway_nalid, gateway_nid, alive ? "up" : "down");
-
-        /* can't do predictions... */
-        do_gettimeofday (&now);
-        if (when > now.tv_sec) {
-                CWARN ("Ignoring prediction from %s of [%x] "LPX64" %s "
-                       "%ld seconds in the future\n", 
-                       byNal ? "NAL" : "userspace", 
-                       gateway_nalid, gateway_nid, 
-                       alive ? "up" : "down",
-                       when - now.tv_sec);
-                return (EINVAL);
-        }
-
-        LASSERT (when <= now.tv_sec);
-
-        /* Serialise with lookups (i.e. write lock) */
-       write_lock_irqsave(&kpr_rwlock, flags);
-
-        found = 0;
-        list_for_each_safe (e, n, &kpr_gateways) {
-
-                ge = list_entry(e, kpr_gateway_entry_t, kpge_list);
-                if ((gateway_nalid != 0 &&
-                     ge->kpge_nalid != gateway_nalid) ||
-                    ge->kpge_nid != gateway_nid)
-                        continue;
-
-                found = 1;
-                break;
-        }
-
-        if (!found) {
-                /* gateway not found */
-                write_unlock_irqrestore(&kpr_rwlock, flags);
-                CDEBUG (D_NET, "Gateway not found\n");
-                return (0);
-        }
-        
-        if (when < ge->kpge_timestamp) {
-                /* out of date information */
-                write_unlock_irqrestore (&kpr_rwlock, flags);
-                CDEBUG (D_NET, "Out of date\n");
-                return (0);
-        }
-
-        /* update timestamp */
-        ge->kpge_timestamp = when;
-
-        if ((!ge->kpge_alive) == (!alive)) {
-                /* new date for old news */
-                write_unlock_irqrestore (&kpr_rwlock, flags);
-                CDEBUG (D_NET, "Old news\n");
-                return (0);
-        }
-
-        ge->kpge_alive = alive;
-        CDEBUG(D_NET, "set "LPX64" [%p] %d\n", gateway_nid, ge, alive);
-
-        if (alive) {
-                /* Reset all gateway weights so the newly-enabled gateway
-                 * doesn't have to play catch-up */
-                list_for_each_safe (e, n, &kpr_gateways) {
-                        kpr_gateway_entry_t *ge = list_entry(e, kpr_gateway_entry_t,
-                                                             kpge_list);
-                        atomic_set (&ge->kpge_weight, 0);
-                }
-        }
-
-        found = 0;
-        if (!byNal) {
-                /* userland notified me: notify NAL? */
-                ne = kpr_find_nal_entry_locked (ge->kpge_nalid);
-                if (ne != NULL) {
-                        if (!ne->kpne_shutdown &&
-                            ne->kpne_interface.kprni_notify != NULL) {
-                                /* take a ref on this NAL until notifying
-                                 * it has completed... */
-                                atomic_inc (&ne->kpne_refcount);
-                                found = 1;
-                        }
-                }
-        }
-
-        write_unlock_irqrestore(&kpr_rwlock, flags);
-
-        if (found) {
-                ne->kpne_interface.kprni_notify (ne->kpne_interface.kprni_arg,
-                                                 gateway_nid, alive);
-                /* 'ne' can disappear now... */
-                atomic_dec (&ne->kpne_refcount);
-        }
-        
-        if (byNal) {
-                /* It wasn't userland that notified me... */
-                CWARN ("Upcall: NAL %x NID "LPX64" (%s) is %s\n",
-                       gateway_nalid, gateway_nid,
-                       portals_nid2str(gateway_nalid, gateway_nid, str),
-                       alive ? "alive" : "dead");
-                kpr_upcall (gateway_nalid, gateway_nid, alive, when);
-        } else {
-                CDEBUG (D_NET, " NOT Doing upcall\n");
-        }
-        
-        return (0);
-}
-
-void
-kpr_nal_notify (void *arg, ptl_nid_t peer, int alive, time_t when)
-{
-        kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
-        
-        kpr_do_notify (1, ne->kpne_interface.kprni_nalid, peer, alive, when);
-}
-
-void
-kpr_shutdown_nal (void *arg)
-{
-       unsigned long    flags;
-       kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg;
-
-        CDEBUG (D_NET, "Shutting down NAL %x\n", ne->kpne_interface.kprni_nalid);
-
-       LASSERT (!ne->kpne_shutdown);
-       LASSERT (!in_interrupt());
-
-       write_lock_irqsave (&kpr_rwlock, flags);
-       ne->kpne_shutdown = 1;
-       write_unlock_irqrestore (&kpr_rwlock, flags);
-}
-
-void
-kpr_deregister_nal (void *arg)
-{
-       unsigned long     flags;
-       kpr_nal_entry_t  *ne = (kpr_nal_entry_t *)arg;
-
-        CDEBUG (D_NET, "Deregister NAL %x\n", ne->kpne_interface.kprni_nalid);
-
-       LASSERT (ne->kpne_shutdown);            /* caller must have issued shutdown already */
-       LASSERT (!in_interrupt());
-
-       write_lock_irqsave (&kpr_rwlock, flags);
-       list_del (&ne->kpne_list);
-       write_unlock_irqrestore (&kpr_rwlock, flags);
-
-        /* Wait until all outstanding messages/notifications have completed */
-       while (atomic_read (&ne->kpne_refcount) != 0)
-       {
-               CDEBUG (D_NET, "Waiting for refcount on NAL %x to reach zero (%d)\n",
-                       ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount));
-
-               set_current_state (TASK_UNINTERRUPTIBLE);
-               schedule_timeout (HZ);
-       }
-
-       PORTAL_FREE (ne, sizeof (*ne));
-        PORTAL_MODULE_UNUSE;
-}
-
-int
-kpr_ge_isbetter (kpr_gateway_entry_t *ge1, kpr_gateway_entry_t *ge2)
-{
-        const int significant_bits = 0x00ffffff;
-        /* We use atomic_t to record/compare route weights for
-         * load-balancing.  Here we limit ourselves to only using
-         * 'significant_bits' when we do an 'after' comparison */
-
-        int    diff = (atomic_read (&ge1->kpge_weight) -
-                       atomic_read (&ge2->kpge_weight)) & significant_bits;
-        int    rc = (diff > (significant_bits >> 1));
-
-        CDEBUG(D_NET, "[%p]"LPX64"=%d %s [%p]"LPX64"=%d\n",
-               ge1, ge1->kpge_nid, atomic_read (&ge1->kpge_weight),
-               rc ? ">" : "<",
-               ge2, ge2->kpge_nid, atomic_read (&ge2->kpge_weight));
-
-        return (rc);
-}
-
-void
-kpr_update_weight (kpr_gateway_entry_t *ge, int nob)
-{
-        int weight = 1 + (nob + sizeof (ptl_hdr_t)/2)/sizeof (ptl_hdr_t);
-
-        /* We've chosen this route entry (i.e. gateway) to forward payload
-         * of length 'nob'; update the route's weight to make it less
-         * favoured.  Note that the weight is 1 plus the payload size
-         * rounded and scaled to the portals header size, so we get better
-         * use of the significant bits in kpge_weight. */
-
-        CDEBUG(D_NET, "gateway [%p]"LPX64" += %d\n", ge,
-               ge->kpge_nid, weight);
-        
-        atomic_add (weight, &ge->kpge_weight);
-}
-
-int
-kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob,
-                   ptl_nid_t *gateway_nidp)
-{
-       kpr_nal_entry_t     *ne = (kpr_nal_entry_t *)arg;
-       struct list_head    *e;
-        kpr_route_entry_t   *re;
-        kpr_gateway_entry_t *ge = NULL;
-       int                  rc = -ENOENT;
-
-        /* Caller wants to know if 'target_nid' can be reached via a gateway
-         * ON HER OWN NETWORK */
-
-        CDEBUG (D_NET, "lookup "LPX64" from NAL %x\n", target_nid, 
-                ne->kpne_interface.kprni_nalid);
-        LASSERT (!in_interrupt());
-
-       read_lock (&kpr_rwlock);
-
-       if (ne->kpne_shutdown) {        /* caller is shutting down */
-                read_unlock (&kpr_rwlock);
-               return (-ENOENT);
-        }
-
-       /* Search routes for one that has a gateway to target_nid on the callers network */
-
-        list_for_each (e, &kpr_routes) {
-               re = list_entry (e, kpr_route_entry_t, kpre_list);
-
-               if (re->kpre_lo_nid > target_nid ||
-                    re->kpre_hi_nid < target_nid)
-                       continue;
-
-               /* found table entry */
-
-               if (re->kpre_gateway->kpge_nalid != ne->kpne_interface.kprni_nalid ||
-                    !re->kpre_gateway->kpge_alive) {
-                        /* different NAL or gateway down */
-                        rc = -EHOSTUNREACH;
-                        continue;
-                }
-                
-                if (ge == NULL ||
-                    kpr_ge_isbetter (re->kpre_gateway, ge))
-                    ge = re->kpre_gateway;
-       }
-
-        if (ge != NULL) {
-                kpr_update_weight (ge, nob);
-                *gateway_nidp = ge->kpge_nid;
-                rc = 0;
-        }
-        
-       read_unlock (&kpr_rwlock);
-
-        /* NB can't deref 're' now; it might have been removed! */
-
-        CDEBUG (D_NET, "lookup "LPX64" from NAL %x: %d ("LPX64")\n",
-                target_nid, ne->kpne_interface.kprni_nalid, rc,
-                (rc == 0) ? *gateway_nidp : (ptl_nid_t)0);
-       return (rc);
-}
-
-kpr_nal_entry_t *
-kpr_find_nal_entry_locked (int nal_id)
-{
-        struct list_head    *e;
-        
-        /* Called with kpr_rwlock held */
-
-        list_for_each (e, &kpr_nals) {
-                kpr_nal_entry_t *ne = list_entry (e, kpr_nal_entry_t, kpne_list);
-
-                if (nal_id != ne->kpne_interface.kprni_nalid) /* no match */
-                        continue;
-
-                return (ne);
-        }
-        
-        return (NULL);
-}
-
-void
-kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd)
-{
-       kpr_nal_entry_t     *src_ne = (kpr_nal_entry_t *)arg;
-       ptl_nid_t            target_nid = fwd->kprfd_target_nid;
-        int                  nob = fwd->kprfd_nob;
-        kpr_gateway_entry_t *ge = NULL;
-        kpr_nal_entry_t     *dst_ne = NULL;
-       struct list_head    *e;
-        kpr_route_entry_t   *re;
-        kpr_nal_entry_t     *tmp_ne;
-        int                  rc;
-
-        CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x\n", fwd,
-                target_nid, src_ne->kpne_interface.kprni_nalid);
-
-        LASSERT (nob == lib_kiov_nob (fwd->kprfd_niov, fwd->kprfd_kiov));
-        LASSERT (!in_interrupt());
-
-       read_lock (&kpr_rwlock);
-
-        kpr_fwd_packets++;                   /* (loose) stats accounting */
-        kpr_fwd_bytes += nob + sizeof(ptl_hdr_t);
-
-       if (src_ne->kpne_shutdown) {         /* caller is shutting down */
-                rc = -ESHUTDOWN;
-               goto out;
-        }
-
-       fwd->kprfd_router_arg = src_ne;      /* stash caller's nal entry */
-
-       /* Search routes for one that has a gateway to target_nid NOT on the caller's network */
-
-        list_for_each (e, &kpr_routes) {
-               re = list_entry (e, kpr_route_entry_t, kpre_list);
-
-               if (re->kpre_lo_nid > target_nid || /* no match */
-                    re->kpre_hi_nid < target_nid)
-                       continue;
-
-               if (re->kpre_gateway->kpge_nalid == src_ne->kpne_interface.kprni_nalid)
-                       continue;               /* don't route to same NAL */
-
-                if (!re->kpre_gateway->kpge_alive)
-                        continue;               /* gateway is dead */
-                
-                tmp_ne = kpr_find_nal_entry_locked (re->kpre_gateway->kpge_nalid);
-
-                if (tmp_ne == NULL ||
-                    tmp_ne->kpne_shutdown) {
-                        /* NAL must be registered and not shutting down */
-                        continue;
-                }
-
-                if (ge == NULL ||
-                    kpr_ge_isbetter (re->kpre_gateway, ge)) {
-                        ge = re->kpre_gateway;
-                        dst_ne = tmp_ne;
-                }
-        }
-        
-        if (ge != NULL) {
-                LASSERT (dst_ne != NULL);
-                
-                kpr_update_weight (ge, nob);
-
-                fwd->kprfd_gateway_nid = ge->kpge_nid;
-                atomic_inc (&src_ne->kpne_refcount); /* source and dest nals are */
-                atomic_inc (&dst_ne->kpne_refcount); /* busy until fwd completes */
-                atomic_inc (&kpr_queue_depth);
-
-                read_unlock (&kpr_rwlock);
-
-                CDEBUG (D_NET, "forward [%p] "LPX64" from NAL %x: "
-                        "to "LPX64" on NAL %x\n", 
-                        fwd, target_nid, src_ne->kpne_interface.kprni_nalid,
-                        fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid);
-
-                dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd);
-                return;
-       }
-
-        rc = -EHOSTUNREACH;
- out:
-        kpr_fwd_errors++;
-
-        CDEBUG (D_NET, "Failed to forward [%p] "LPX64" from NAL %x: %d\n", 
-                fwd, target_nid, src_ne->kpne_interface.kprni_nalid, rc);
-
-       (fwd->kprfd_callback)(fwd->kprfd_callback_arg, rc);
-
-        read_unlock (&kpr_rwlock);
-}
-
-void
-kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error)
-{
-       kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg;
-       kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg;
-
-        CDEBUG (D_NET, "complete(1) [%p] from NAL %x to NAL %x: %d\n", fwd,
-                src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error);
-
-       atomic_dec (&dst_ne->kpne_refcount);    /* CAVEAT EMPTOR dst_ne can disappear now!!! */
-
-       (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error);
-
-        CDEBUG (D_NET, "complete(2) [%p] from NAL %x: %d\n", fwd,
-                src_ne->kpne_interface.kprni_nalid, error);
-
-        atomic_dec (&kpr_queue_depth);
-       atomic_dec (&src_ne->kpne_refcount);    /* CAVEAT EMPTOR src_ne can disappear now!!! */
-}
-
-int
-kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, 
-               ptl_nid_t lo_nid, ptl_nid_t hi_nid)
-{
-       unsigned long        flags;
-       struct list_head    *e;
-       kpr_route_entry_t   *re;
-        kpr_gateway_entry_t *ge;
-        int                  dup = 0;
-
-        CDEBUG(D_NET, "Add route: %x "LPX64" : "LPX64" - "LPX64"\n",
-               gateway_nalid, gateway_nid, lo_nid, hi_nid);
-
-        if (gateway_nalid == PTL_NID_ANY ||
-            lo_nid == PTL_NID_ANY ||
-            hi_nid == PTL_NID_ANY ||
-            lo_nid > hi_nid)
-                return (-EINVAL);
-
-        PORTAL_ALLOC (ge, sizeof (*ge));
-        if (ge == NULL)
-                return (-ENOMEM);
-
-        ge->kpge_nalid = gateway_nalid;
-        ge->kpge_nid   = gateway_nid;
-        ge->kpge_alive = 1;
-        ge->kpge_timestamp = 0;
-        ge->kpge_refcount = 0;
-        atomic_set (&ge->kpge_weight, 0);
-
-        PORTAL_ALLOC (re, sizeof (*re));
-        if (re == NULL) {
-                PORTAL_FREE (ge, sizeof (*ge));
-                return (-ENOMEM);
-        }
-
-        re->kpre_lo_nid = lo_nid;
-        re->kpre_hi_nid = hi_nid;
-
-        LASSERT(!in_interrupt());
-       write_lock_irqsave (&kpr_rwlock, flags);
-
-        list_for_each (e, &kpr_gateways) {
-                kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
-                                                      kpge_list);
-
-                if (ge2->kpge_nalid == gateway_nalid &&
-                    ge2->kpge_nid == gateway_nid) {
-                        PORTAL_FREE (ge, sizeof (*ge));
-                        ge = ge2;
-                        dup = 1;
-                        break;
-                }
-        }
-
-        if (!dup) {
-                /* Adding a new gateway... */
-                list_add (&ge->kpge_list, &kpr_gateways);
-
-                /* ...zero all gateway weights so this one doesn't have to
-                 * play catch-up */
-
-                list_for_each (e, &kpr_gateways) {
-                        kpr_gateway_entry_t *ge2 = list_entry(e, kpr_gateway_entry_t,
-                                                              kpge_list);
-                        atomic_set (&ge2->kpge_weight, 0);
-                }
-        }
-
-        re->kpre_gateway = ge;
-        ge->kpge_refcount++;
-        list_add (&re->kpre_list, &kpr_routes);
-        kpr_routes_generation++;
-
-        write_unlock_irqrestore (&kpr_rwlock, flags);
-        return (0);
-}
-
-int
-kpr_sys_notify (int gateway_nalid, ptl_nid_t gateway_nid,
-                int alive, time_t when)
-{
-        return (kpr_do_notify (0, gateway_nalid, gateway_nid, alive, when));
-}
-
-int
-kpr_del_route (int gw_nalid, ptl_nid_t gw_nid,
-               ptl_nid_t lo, ptl_nid_t hi)
-{
-        int                specific = (lo != PTL_NID_ANY);
-        unsigned long      flags;
-        int                rc = -ENOENT;
-        struct list_head  *e;
-        struct list_head  *n;
-
-        CDEBUG(D_NET, "Del route [%x] "LPX64" : "LPX64" - "LPX64"\n",
-               gw_nalid, gw_nid, lo, hi);
-
-        LASSERT(!in_interrupt());
-
-        /* NB Caller may specify either all routes via the given gateway
-         * (lo/hi == PTL_NID_ANY) or a specific route entry (lo/hi are
-         * actual NIDs) */
-        if (specific ? (hi == PTL_NID_ANY || hi < lo) : (hi != PTL_NID_ANY))
-                return (-EINVAL);
-
-        write_lock_irqsave(&kpr_rwlock, flags);
-
-        list_for_each_safe (e, n, &kpr_routes) {
-                kpr_route_entry_t   *re = list_entry(e, kpr_route_entry_t,
-                                                   kpre_list);
-                kpr_gateway_entry_t *ge = re->kpre_gateway;
-
-                if (ge->kpge_nalid != gw_nalid ||
-                    ge->kpge_nid != gw_nid ||
-                    (specific &&
-                     (lo != re->kpre_lo_nid || hi != re->kpre_hi_nid)))
-                        continue;
-
-                rc = 0;
-
-                if (--ge->kpge_refcount == 0) {
-                        list_del (&ge->kpge_list);
-                        PORTAL_FREE (ge, sizeof (*ge));
-                }
-
-                list_del (&re->kpre_list);
-                PORTAL_FREE(re, sizeof (*re));
-
-                if (specific)
-                        break;
-        }
-
-        kpr_routes_generation++;
-        write_unlock_irqrestore(&kpr_rwlock, flags);
-
-        return (rc);
-}
-
-int
-kpr_get_route (int idx, __u32 *gateway_nalid, ptl_nid_t *gateway_nid,
-               ptl_nid_t *lo_nid, ptl_nid_t *hi_nid, __u32 *alive)
-{
-       struct list_head  *e;
-
-        LASSERT (!in_interrupt());
-       read_lock(&kpr_rwlock);
-
-        for (e = kpr_routes.next; e != &kpr_routes; e = e->next) {
-                kpr_route_entry_t   *re = list_entry(e, kpr_route_entry_t,
-                                                     kpre_list);
-                kpr_gateway_entry_t *ge = re->kpre_gateway;
-                
-                if (idx-- == 0) {
-                        *gateway_nalid = ge->kpge_nalid;
-                        *gateway_nid = ge->kpge_nid;
-                        *alive = ge->kpge_alive;
-                        *lo_nid = re->kpre_lo_nid;
-                        *hi_nid = re->kpre_hi_nid;
-
-                        read_unlock(&kpr_rwlock);
-                        return (0);
-                }
-        }
-
-        read_unlock (&kpr_rwlock);
-        return (-ENOENT);
-}
-
-static int 
-kpr_nal_cmd(struct portals_cfg *pcfg, void * private)
-{
-        int err = -EINVAL;
-        ENTRY;
-
-        switch(pcfg->pcfg_command) {
-        default:
-                CDEBUG(D_IOCTL, "Inappropriate cmd: %d\n", pcfg->pcfg_command);
-                break;
-                
-        case NAL_CMD_ADD_ROUTE:
-                CDEBUG(D_IOCTL, "Adding route: [%x] "LPU64" : "LPU64" - "LPU64"\n",
-                       pcfg->pcfg_nal, pcfg->pcfg_nid, 
-                       pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                err = kpr_add_route(pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                    pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                break;
-
-        case NAL_CMD_DEL_ROUTE:
-                CDEBUG (D_IOCTL, "Removing routes via [%x] "LPU64" : "LPU64" - "LPU64"\n",
-                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid, 
-                        pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                err = kpr_del_route (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                     pcfg->pcfg_nid2, pcfg->pcfg_nid3);
-                break;
-
-        case NAL_CMD_NOTIFY_ROUTER: {
-                CDEBUG (D_IOCTL, "Notifying peer [%x] "LPU64" %s @ %ld\n",
-                        pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                        pcfg->pcfg_flags ? "Enabling" : "Disabling",
-                        (time_t)pcfg->pcfg_nid3);
-                
-                err = kpr_sys_notify (pcfg->pcfg_gw_nal, pcfg->pcfg_nid,
-                                      pcfg->pcfg_flags, (time_t)pcfg->pcfg_nid3);
-                break;
-        }
-                
-        case NAL_CMD_GET_ROUTE:
-                CDEBUG (D_IOCTL, "Getting route [%d]\n", pcfg->pcfg_count);
-                err = kpr_get_route(pcfg->pcfg_count, &pcfg->pcfg_gw_nal,
-                                    &pcfg->pcfg_nid, 
-                                    &pcfg->pcfg_nid2, &pcfg->pcfg_nid3,
-                                    &pcfg->pcfg_flags);
-                break;
-        }
-        RETURN(err);
-}
-
-
-static void /*__exit*/
-kpr_finalise (void)
-{
-        LASSERT (list_empty (&kpr_nals));
-
-        libcfs_nal_cmd_unregister(ROUTER);
-
-        PORTAL_SYMBOL_UNREGISTER(kpr_router_interface);
-
-        kpr_proc_fini();
-
-        while (!list_empty (&kpr_routes)) {
-                kpr_route_entry_t *re = list_entry(kpr_routes.next,
-                                                   kpr_route_entry_t,
-                                                   kpre_list);
-
-                list_del(&re->kpre_list);
-                PORTAL_FREE(re, sizeof (*re));
-        }
-
-        CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n",
-               atomic_read(&portal_kmemory));
-}
-
-static int __init
-kpr_initialise (void)
-{
-        int     rc;
-        
-        CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n",
-               atomic_read(&portal_kmemory));
-
-        kpr_routes_generation = 0;
-        kpr_proc_init();
-
-        rc = libcfs_nal_cmd_register(ROUTER, kpr_nal_cmd, NULL);
-        if (rc != 0) {
-                CERROR("Can't register nal cmd handler\n");
-                return (rc);
-        }
-        
-        PORTAL_SYMBOL_REGISTER(kpr_router_interface);
-        return (0);
-}
-
-MODULE_AUTHOR("Eric Barton");
-MODULE_DESCRIPTION("Kernel Portals Router v0.01");
-MODULE_LICENSE("GPL");
-
-module_init (kpr_initialise);
-module_exit (kpr_finalise);
-
-EXPORT_SYMBOL (kpr_router_interface);
diff --git a/lustre/portals/router/router.h b/lustre/portals/router/router.h
deleted file mode 100644 (file)
index 27e4983..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals
- *   http://sourceforge.net/projects/sandiaportals/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef _KPTLROUTER_H
-#define _KPTLROUTER_H
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-
-#define DEBUG_SUBSYSTEM S_PTLROUTER
-
-#include <linux/kp30.h>
-#include <linux/kpr.h>
-#include <portals/p30.h>
-#include <portals/lib-p30.h>
-
-typedef struct
-{
-       struct list_head        kpne_list;
-       kpr_nal_interface_t     kpne_interface;
-       atomic_t                kpne_refcount;
-       int                     kpne_shutdown;
-} kpr_nal_entry_t;
-
-typedef struct
-{
-        struct list_head        kpge_list;
-        atomic_t                kpge_weight;
-        time_t                  kpge_timestamp;
-        int                     kpge_alive;
-        int                     kpge_nalid;
-        int                     kpge_refcount;
-        ptl_nid_t               kpge_nid;
-} kpr_gateway_entry_t;
-
-typedef struct
-{
-       struct list_head        kpre_list;
-        kpr_gateway_entry_t    *kpre_gateway;
-       ptl_nid_t               kpre_lo_nid;
-        ptl_nid_t               kpre_hi_nid;
-} kpr_route_entry_t;
-
-typedef struct
-{
-        work_struct_t           kpru_tq;
-        int                     kpru_nal_id;
-        ptl_nid_t               kpru_nid;
-        int                     kpru_alive;
-        time_t                  kpru_when;
-} kpr_upcall_t;
-
-extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp);
-extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, int nob, 
-                              ptl_nid_t *gateway_nidp);
-extern kpr_nal_entry_t *kpr_find_nal_entry_locked (int nal_id);
-extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd);
-extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error);
-extern void kpr_nal_notify (void *arg, ptl_nid_t peer,
-                            int alive, time_t when);
-extern void kpr_shutdown_nal (void *arg);
-extern void kpr_deregister_nal (void *arg);
-
-extern void kpr_proc_init (void);
-extern void kpr_proc_fini (void);
-
-extern unsigned int       kpr_routes_generation;
-extern unsigned long long kpr_fwd_bytes;
-extern unsigned long      kpr_fwd_packets;
-extern unsigned long      kpr_fwd_errors;
-extern atomic_t           kpr_queue_depth;
-
-extern struct list_head   kpr_routes;
-extern rwlock_t           kpr_rwlock;
-
-#endif /* _KPLROUTER_H */
diff --git a/lustre/portals/tests/.cvsignore b/lustre/portals/tests/.cvsignore
deleted file mode 100644 (file)
index e034130..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-Makefile
-.deps
-.*.cmd
-autoMakefile.in
-autoMakefile
-*.ko
-*.mod.c
-.*.flags
-.tmp_versions
-.depend
diff --git a/lustre/portals/tests/Makefile.in b/lustre/portals/tests/Makefile.in
deleted file mode 100644 (file)
index c309db0..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-MODULES := pingsrv pingcli spingsrv spingcli
-pingsrv-objs := ping_srv.o
-
-ifeq ($(PATCHLEVEL),6)
-pingcli-objs := ping_cli.o
-spingsrv-objs := sping_srv.o
-spingcli-objs := sping_cli.o
-else
-ping%.c: ping_%.c
-       ln -sf $< $@
-
-sping%.c: sping_%.c
-       ln -sf $< $@
-endif
-
-@INCLUDE_RULES@
diff --git a/lustre/portals/tests/Makefile.mk b/lustre/portals/tests/Makefile.mk
deleted file mode 100644 (file)
index 751c0a0..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-include $(src)/../Kernelenv
-
-obj-y += ping_cli.o
-obj-y += ping_srv.o
diff --git a/lustre/portals/tests/autoMakefile.am b/lustre/portals/tests/autoMakefile.am
deleted file mode 100644 (file)
index 5f81b93..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-if MODULES
-if !CRAY_PORTALS
-if TESTS
-noinst_DATA := pingsrv$(KMODEXT) pingcli$(KMODEXT)
-noinst_DATA += spingsrv$(KMODEXT) spingcli$(KMODEXT)
-endif
-endif
-endif
-
-MOSTLYCLEANFILES = *.o *.ko *.mod.c pingsrv.c pingcli.c spingsrv.c spingcli.c
-DIST_SOURCES = ping_srv.c ping_cli.c sping_srv.c sping_cli.c ping.h
diff --git a/lustre/portals/tests/ping.h b/lustre/portals/tests/ping.h
deleted file mode 100644 (file)
index f07444b..0000000
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef _KPING_INCLUDED
-#define _KPING_INCLUDED
-
-#include <portals/p30.h>
-
-
-#define PTL_PING_IN_SIZE               256     // n packets per buffer
-#define PTL_PING_IN_BUFFERS            2       // n fallback buffers
-
-#define PTL_PING_CLIENT                        4
-#define PTL_PING_SERVER                        5
-
-#define PING_HEADER_MAGIC              0xDEADBEEF
-#define PING_BULK_MAGIC                        0xCAFEBABE
-
-#define PING_HEAD_BITS                 0x00000001
-#define PING_BULK_BITS                 0x00000002
-#define PING_IGNORE_BITS               0xFFFFFFFC
-
-#define PTL_PING_ACK                   0x01
-#define PTL_PING_VERBOSE               0x02
-#define PTL_PING_VERIFY                        0x04
-#define PTL_PING_PREALLOC              0x08
-
-
-#define NEXT_PRIMARY_BUFFER(index)             \
-       (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1))
-
-#define PDEBUG(str, err)                       \
-       CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err)
-
-
-/* Ping data to be passed via the ioctl to kernel space */
-
-#if __KERNEL__
-
-
-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#else
-#include <linux/tqueue.h>
-#endif
-struct pingsrv_data {
-        
-        ptl_handle_ni_t         ni;
-        ptl_handle_me_t         me;
-        ptl_handle_eq_t         eq;
-        void                   *in_buf;
-        ptl_process_id_t        my_id;
-        ptl_process_id_t        id_local;
-        ptl_md_t                mdin;
-        ptl_md_t                mdout;
-        ptl_handle_md_t         mdin_h;
-        ptl_handle_md_t         mdout_h;
-        ptl_event_t             evnt;
-        struct task_struct     *tsk;
-}; /* struct pingsrv_data */
-struct pingcli_data {
-        
-        struct portal_ioctl_data *args;
-        ptl_handle_me_t        me;
-        ptl_handle_eq_t                eq;
-        char                          *inbuf;    
-        char                   *outbuf;   
-        ptl_process_id_t       myid; 
-        ptl_process_id_t       id_local; 
-        ptl_process_id_t       id_remote;
-        ptl_md_t               md_in_head;
-        ptl_md_t               md_out_head;
-        ptl_handle_md_t        md_in_head_h;
-        ptl_handle_md_t        md_out_head_h;
-        ptl_event_t            ev;
-        struct task_struct     *tsk;
-}; /* struct pingcli_data */
-
-
-#endif /* __KERNEL__ */
-
-#endif /* _KPING_INCLUDED */
diff --git a/lustre/portals/tests/ping_cli.c b/lustre/portals/tests/ping_cli.c
deleted file mode 100644 (file)
index e9a8481..0000000
+++ /dev/null
@@ -1,303 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
- *         Kedar Sovani (kedar@calsoftinc.com)
- *         Amey Inamdar (amey@calsoftinc.com)
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define DEBUG_SUBSYSTEM S_PINGER
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include "ping.h"
-/* int portal_debug = D_PING_CLI;  */
-
-
-#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-
-#define MAX_TIME 100000
-
-/* This should be enclosed in a structure */
-
-static struct pingcli_data *client = NULL;
-
-static int count = 0;
-
-static void
-pingcli_shutdown(ptl_handle_ni_t nih, int err)
-{
-        int rc;
-
-        /* Yes, we are intentionally allowing us to fall through each
-         * case in to the next.  This allows us to pass an error
-         * code to just clean up the right stuff.
-         */
-        switch (err) {
-                case 1:
-                        /* Unlink any memory descriptors we may have used */
-                        if ((rc = PtlMDUnlink (client->md_out_head_h)))
-                                PDEBUG ("PtlMDUnlink", rc);
-                case 2:
-                        if ((rc = PtlMDUnlink (client->md_in_head_h)))
-                                PDEBUG ("PtlMDUnlink", rc);
-
-                        /* Free the event queue */
-                        if ((rc = PtlEQFree (client->eq)))
-                                PDEBUG ("PtlEQFree", rc);
-
-                        if ((rc = PtlMEUnlink (client->me)))
-                                PDEBUG ("PtlMEUnlink", rc);
-                case 3:
-                        PtlNIFini(nih);
-
-                case 4:
-                        /* Free our buffers */
-
-                        if (client != NULL)
-                                PORTAL_FREE (client,
-                                                sizeof(struct pingcli_data));
-        }
-
-
-        CDEBUG (D_OTHER, "ping client released resources\n");
-} /* pingcli_shutdown() */
-
-static void pingcli_callback(ptl_event_t *ev)
-{
-        int i, magic;
-        i = *(int *)(ev->md.start + ev->offset + sizeof(unsigned));
-        magic = *(int *)(ev->md.start + ev->offset);
-
-        if(magic != 0xcafebabe) {
-                CERROR("Unexpected response %x\n", magic);
-        }
-
-        if((i == count) || !count)
-                wake_up_process (client->tsk);
-        else
-                CERROR("Received response after timeout for %d\n",i);
-}
-
-
-static struct pingcli_data *
-pingcli_start(struct portal_ioctl_data *args)
-{
-        ptl_handle_ni_t nih = PTL_INVALID_HANDLE;
-        unsigned ping_head_magic = PING_HEADER_MAGIC;
-        unsigned ping_bulk_magic = PING_BULK_MAGIC;
-        int rc;
-        struct timeval tv1, tv2;
-        char str[PTL_NALFMT_SIZE];
-        
-        client->tsk = current;
-        client->args = args;
-        CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s),  \
-                        nal %x, size %u, count: %u, timeout: %u\n",
-                        args->ioc_nid,
-                        portals_nid2str(args->ioc_nal, args->ioc_nid, str),
-                        args->ioc_nal, args->ioc_size,
-                        args->ioc_count, args->ioc_timeout);
-
-
-        PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ;
-        if (client->outbuf == NULL)
-        {
-                CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
-                pingcli_shutdown (nih, 4);
-                return (NULL);
-        }
-
-        PORTAL_ALLOC (client->inbuf,
-                        (args->ioc_size + STDSIZE) * args->ioc_count);
-        if (client->inbuf == NULL)
-        {
-                CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
-                pingcli_shutdown (nih, 4);
-                return (NULL);
-        }
-
-        /* Aquire and initialize the proper nal for portals. */
-        rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih);
-        if (rc != PTL_OK || rc != PTL_IFACE_DUP)
-        {
-                CERROR ("NAL %x not loaded\n", args->ioc_nal);
-                pingcli_shutdown (nih, 4);
-                return (NULL);
-        }
-
-        /* Based on the initialization aquire our unique portal ID. */
-        if ((rc = PtlGetId (nih, &client->myid)))
-        {
-                CERROR ("PtlGetId error %d\n", rc);
-                pingcli_shutdown (nih, 2);
-                return (NULL);
-        }
-
-        /* Setup the local match entries */
-        client->id_local.nid = PTL_NID_ANY;
-        client->id_local.pid = PTL_PID_ANY;
-
-        /* Setup the remote match entries */
-        client->id_remote.nid = args->ioc_nid;
-        client->id_remote.pid = 0;
-
-        if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT,
-                   client->id_local, 0, ~0, PTL_RETAIN,
-                   PTL_INS_AFTER, &client->me)))
-        {
-                CERROR ("PtlMEAttach error %d\n", rc);
-                pingcli_shutdown (nih, 2);
-                return (NULL);
-        }
-
-        /* Allocate the event queue for this network interface */
-        if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq)))
-        {
-                CERROR ("PtlEQAlloc error %d\n", rc);
-                pingcli_shutdown (nih, 2);
-                return (NULL);
-        }
-
-        count = args->ioc_count;
-
-        client->md_in_head.start     = client->inbuf;
-        client->md_in_head.length    = (args->ioc_size + STDSIZE)
-                                                * count;
-        client->md_in_head.threshold = PTL_MD_THRESH_INF;
-        client->md_in_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-        client->md_in_head.user_ptr  = NULL;
-        client->md_in_head.eq_handle = client->eq;
-        memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count);
-
-        /* Attach the incoming buffer */
-        if ((rc = PtlMDAttach (client->me, client->md_in_head,
-                              PTL_UNLINK, &client->md_in_head_h))) {
-                CERROR ("PtlMDAttach error %d\n", rc);
-                pingcli_shutdown (nih, 1);
-                return (NULL);
-        }
-        /* Setup the outgoing ping header */
-        client->md_out_head.start     = client->outbuf;
-        client->md_out_head.length    = STDSIZE + args->ioc_size;
-        client->md_out_head.threshold = args->ioc_count;
-        client->md_out_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-        client->md_out_head.user_ptr  = NULL;
-        client->md_out_head.eq_handle = PTL_EQ_NONE;
-
-        memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic));
-
-        count = 0;
-
-        /* Bind the outgoing ping header */
-        if ((rc=PtlMDBind (nih, client->md_out_head,
-                           PTL_UNLINK, &client->md_out_head_h))) {
-                CERROR ("PtlMDBind error %d\n", rc);
-                pingcli_shutdown (nih, 1);
-                return NULL;
-        }
-        while ((args->ioc_count - count)) {
-                memcpy (client->outbuf + sizeof(unsigned),
-                       &(count), sizeof(unsigned));
-                 /* Put the ping packet */
-                do_gettimeofday (&tv1);
-
-                memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1,
-                       sizeof(struct timeval));
-
-                if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
-                          client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
-                         PDEBUG ("PtlPut (header)", rc);
-                         pingcli_shutdown (nih, 1);
-                         return NULL;
-                }
-                CWARN ("Lustre: sent msg no %d", count);
-
-                set_current_state (TASK_INTERRUPTIBLE);
-                rc = schedule_timeout (20 * args->ioc_timeout);
-                if (rc == 0) {
-                        CERROR ("timeout .....\n");
-                } else {
-                        do_gettimeofday (&tv2);
-                        CWARN("Reply in %u usec\n",
-                              (unsigned)((tv2.tv_sec - tv1.tv_sec)
-                                         * 1000000 +  (tv2.tv_usec - tv1.tv_usec)));
-                }
-                count++;
-        }
-
-        if (client->outbuf != NULL)
-                PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size);
-
-        if (client->inbuf != NULL)
-                PORTAL_FREE (client->inbuf,
-                               (args->ioc_size + STDSIZE) * args->ioc_count);
-
-        pingcli_shutdown (nih, 2);
-
-        /* Success! */
-        return NULL;
-} /* pingcli_setup() */
-
-
-
-/* called by the portals_ioctl for ping requests */
-int kping_client(struct portal_ioctl_data *args)
-{
-        PORTAL_ALLOC (client, sizeof(struct pingcli_data));
-        if (client == NULL)
-        {
-                CERROR ("Unable to allocate client structure\n");
-                return (0);
-        }
-        memset (client, 0, sizeof(struct pingcli_data));
-        pingcli_start (args);
-
-        return 0;
-} /* kping_client() */
-
-
-static int __init pingcli_init(void)
-{
-        PORTAL_SYMBOL_REGISTER(kping_client);
-        return 0;
-} /* pingcli_init() */
-
-
-static void /*__exit*/ pingcli_cleanup(void)
-{
-        PORTAL_SYMBOL_UNREGISTER (kping_client);
-} /* pingcli_cleanup() */
-
-
-MODULE_AUTHOR("Brian Behlendorf (LLNL)");
-MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
-MODULE_LICENSE("GPL");
-
-module_init(pingcli_init);
-module_exit(pingcli_cleanup);
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-EXPORT_SYMBOL (kping_client);
-#endif
diff --git a/lustre/portals/tests/ping_srv.c b/lustre/portals/tests/ping_srv.c
deleted file mode 100644 (file)
index 49e82af..0000000
+++ /dev/null
@@ -1,308 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
- *        Amey Inamdar     <amey@calsoftinc.com>
- *        Kedar Sovani     <kedar@calsoftinc.com>
- *
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#define DEBUG_SUBSYSTEM S_PINGER
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include "ping.h"
-
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#else
-#include <linux/tqueue.h>
-#endif
-#include <linux/wait.h>
-#include <linux/smp_lock.h>
-
-#include <asm/unistd.h>
-#include <asm/semaphore.h>
-
-#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval))
-#define MAXSIZE (16*1024)
-
-static unsigned ping_head_magic;
-static unsigned ping_bulk_magic;
-static int nal  = SOCKNAL;                            // Your NAL,
-static unsigned long packets_valid = 0;         // Valid packets 
-static int running = 1;
-atomic_t pkt;
-       
-static struct pingsrv_data *server=NULL;             // Our ping server
-
-static void *pingsrv_shutdown(int err)
-{
-        int rc;
-
-        /* Yes, we are intentionally allowing us to fall through each
-         * case in to the next.  This allows us to pass an error
-         * code to just clean up the right stuff.
-         */
-        switch (err) {
-                case 1:
-                        /* Unlink any memory descriptors we may have used */
-                        if ((rc = PtlMDUnlink (server->mdin_h)))
-                                PDEBUG ("PtlMDUnlink (out head buffer)", rc);
-                case 2:
-                        /* Free the event queue */
-                        if ((rc = PtlEQFree (server->eq)))
-                                PDEBUG ("PtlEQFree", rc);
-
-                        /* Unlink the client portal from the ME list */
-                        if ((rc = PtlMEUnlink (server->me)))
-                                        PDEBUG ("PtlMEUnlink", rc);
-
-                case 3:
-                        PtlNIFini (server->ni);
-
-                case 4:
-                        
-                case 5:
-                        if (server->in_buf != NULL)
-                                PORTAL_FREE (server->in_buf, MAXSIZE);
-                        
-                        if (server != NULL)
-                                PORTAL_FREE (server, 
-                                             sizeof (struct pingsrv_data));
-                        
-        }
-
-        CDEBUG (D_OTHER, "ping sever resources released\n");
-        return NULL;
-} /* pingsrv_shutdown() */
-
-
-int pingsrv_thread(void *arg)
-{
-        int rc;
-        unsigned long magic;
-        unsigned long ping_bulk_magic = 0xcafebabe;
-        
-        kportal_daemonize ("pingsrv");
-        server->tsk =  current;
-        
-        while (running) {
-                set_current_state (TASK_INTERRUPTIBLE);
-                if (atomic_read (&pkt) == 0) {
-                        schedule_timeout (MAX_SCHEDULE_TIMEOUT);
-                        continue;
-                }
-               
-                magic =  *((int *)(server->evnt.md.start 
-                                        + server->evnt.offset));
-                
-                
-                if(magic != 0xdeadbeef) {
-                        CERROR("Unexpected Packet to the server\n");
-                        
-                } 
-                memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic));
-                                
-                server->mdout.length    = server->evnt.rlength;
-                server->mdout.start     = server->in_buf;
-                server->mdout.threshold = 1; 
-                server->mdout.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-                server->mdout.user_ptr  = NULL;
-                server->mdout.eq_handle = PTL_EQ_NONE;
-       
-                /* Bind the outgoing buffer */
-                if ((rc = PtlMDBind (server->ni, server->mdout, 
-                                     PTL_UNLINK, &server->mdout_h))) {
-                         PDEBUG ("PtlMDBind", rc);
-                         pingsrv_shutdown (1);
-                         return 1;
-               }
-         
-                
-                server->mdin.start     = server->in_buf;
-                server->mdin.length    = MAXSIZE;
-                server->mdin.threshold = 1; 
-                server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-                server->mdin.user_ptr  = NULL;
-                server->mdin.eq_handle = server->eq;
-        
-                if ((rc = PtlMDAttach (server->me, server->mdin,
-                        PTL_UNLINK, &server->mdin_h))) {
-                        PDEBUG ("PtlMDAttach (bulk)", rc);
-                        CDEBUG (D_OTHER, "ping server resources allocated\n");
-                }
-                
-                if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
-                         server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
-                         PDEBUG ("PtlPut", rc);
-                
-                atomic_dec (&pkt);
-                
-        }
-        pingsrv_shutdown (1);
-        running = 1;
-        return 0;    
-}
-
-static void pingsrv_packet(ptl_event_t *ev)
-{
-        atomic_inc (&pkt);
-        wake_up_process (server->tsk);
-} /* pingsrv_head() */
-
-static void pingsrv_callback(ptl_event_t *ev)
-{
-        
-        if (ev == NULL) {
-                CERROR ("null in callback, ev=%p\n", ev);
-                return;
-        }
-        server->evnt = *ev;
-        
-        CWARN ("received ping from nid "LPX64" "
-               "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n",
-               ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
-               *((int *)(ev->md.start + ev->offset)),
-               *((int *)(ev->md.start + ev->offset + sizeof(unsigned))),
-               *((int *)(ev->md.start + ev->offset + 2 * 
-                               sizeof(unsigned))));
-        
-        packets_valid++;
-
-        pingsrv_packet(ev);
-        
-} /* pingsrv_callback() */
-
-
-static struct pingsrv_data *pingsrv_setup(void)
-{
-        int rc;
-
-        server->ni = PTL_INVALID_HANDLE;
-
-       /* Aquire and initialize the proper nal for portals. */
-        rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni);
-        if (!(rc == PTL_OK || rc == PTL_IFACE_DUP)) {
-                CDEBUG (D_OTHER, "NAL %x not loaded\n", nal);
-                return pingsrv_shutdown (4);
-        }
-
-
-        /* Based on the initialization aquire our unique portal ID. */
-        if ((rc = PtlGetId (server->ni, &server->my_id))) {
-                PDEBUG ("PtlGetId", rc);
-                return pingsrv_shutdown (2);
-        }
-
-        server->id_local.nid = PTL_NID_ANY;
-        server->id_local.pid = PTL_PID_ANY;
-
-        /* Attach a match entries for header packets */
-        if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
-            server->id_local,0, ~0,
-            PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
-                PDEBUG ("PtlMEAttach", rc);
-                return pingsrv_shutdown (2);
-        }
-
-
-        if ((rc = PtlEQAlloc (server->ni, 1024, &pingsrv_callback,
-                                        &server->eq))) {
-                PDEBUG ("PtlEQAlloc (callback)", rc);
-                return pingsrv_shutdown (2);
-        }
-        
-        PORTAL_ALLOC (server->in_buf, MAXSIZE);
-        if(!server->in_buf){
-                CDEBUG (D_OTHER,"Allocation error\n");
-                return pingsrv_shutdown(2);
-        }
-        
-        /* Setup the incoming buffer */
-        server->mdin.start     = server->in_buf;
-        server->mdin.length    = MAXSIZE;
-        server->mdin.threshold = 1; 
-        server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-        server->mdin.user_ptr  = NULL;
-        server->mdin.eq_handle = server->eq;
-        memset (server->in_buf, 0, STDSIZE);
-        
-        if ((rc = PtlMDAttach (server->me, server->mdin,
-                PTL_UNLINK, &server->mdin_h))) {
-                    PDEBUG ("PtlMDAttach (bulk)", rc);
-                CDEBUG (D_OTHER, "ping server resources allocated\n");
-       }
-        /* Success! */
-        return server; 
-} /* pingsrv_setup() */
-
-static int pingsrv_start(void)
-{
-        /* Setup our server */
-        if (!pingsrv_setup()) {
-                CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
-                return -ENOMEM;
-        }
-        kernel_thread (pingsrv_thread,NULL,0);
-        return 0;
-} /* pingsrv_start() */
-
-
-
-static int __init pingsrv_init(void)
-{
-        ping_head_magic = PING_HEADER_MAGIC;
-        ping_bulk_magic = PING_BULK_MAGIC;
-        PORTAL_ALLOC (server, sizeof(struct pingsrv_data));  
-        return pingsrv_start ();
-} /* pingsrv_init() */
-
-
-static void /*__exit*/ pingsrv_cleanup(void)
-{
-        remove_proc_entry ("net/pingsrv", NULL);
-        
-        running = 0;
-        wake_up_process (server->tsk);
-        while (running != 1) {
-                set_current_state (TASK_UNINTERRUPTIBLE);
-                schedule_timeout (HZ);
-        }
-        
-} /* pingsrv_cleanup() */
-
-
-MODULE_PARM(nal, "i");
-MODULE_PARM_DESC(nal, "Use the specified NAL "
-                "(2-ksocknal, 1-kqswnal)");
-MODULE_AUTHOR("Brian Behlendorf (LLNL)");
-MODULE_DESCRIPTION("A kernel space ping server for portals testing");
-MODULE_LICENSE("GPL");
-
-module_init(pingsrv_init);
-module_exit(pingsrv_cleanup);
diff --git a/lustre/portals/tests/sping_cli.c b/lustre/portals/tests/sping_cli.c
deleted file mode 100644 (file)
index d9970e7..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
- *         Kedar Sovani (kedar@calsoftinc.com)
- *         Amey Inamdar (amey@calsoftinc.com)
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-/* This is a striped down version of pinger. It follows a single
- * request-response protocol. Doesn't do Bulk data pinging. Also doesn't
- * send multiple packets in a single ioctl.
- */
-
-
-#define DEBUG_SUBSYSTEM S_PINGER
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include "ping.h"
-/* int portal_debug = D_PING_CLI;  */
-
-
-#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes
-                                                   assumed */
-
-/* This should be enclosed in a structure */
-
-static struct pingcli_data *client = NULL;
-
-static int count = 0;
-
-static void
-pingcli_shutdown(ptl_handle_ni_t nih, int err)
-{
-        int rc;
-
-        /* Yes, we are intentionally allowing us to fall through each
-         * case in to the next.  This allows us to pass an error
-         * code to just clean up the right stuff.
-         */
-        switch (err) {
-                case 1:
-                        /* Unlink any memory descriptors we may have used */
-                        if ((rc = PtlMDUnlink (client->md_out_head_h)))
-                                PDEBUG ("PtlMDUnlink", rc);
-                case 2:
-                        /* Free the event queue */
-                        if ((rc = PtlEQFree (client->eq)))
-                                PDEBUG ("PtlEQFree", rc);
-
-                        if ((rc = PtlMEUnlink (client->me)))
-                                PDEBUG ("PtlMEUnlink", rc);
-                case 3:
-                        PtlNIFini (nih);
-
-                case 4:
-                        /* Free our buffers */
-                        if (client->outbuf != NULL)
-                                PORTAL_FREE (client->outbuf, STDSIZE);
-
-                        if (client->inbuf != NULL)
-                                PORTAL_FREE (client->inbuf, STDSIZE);
-
-
-                        if (client != NULL)
-                                PORTAL_FREE (client,
-                                                sizeof(struct pingcli_data));
-        }
-
-
-        CDEBUG (D_OTHER, "ping client released resources\n");
-} /* pingcli_shutdown() */
-
-static void pingcli_callback(ptl_event_t *ev)
-{
-        wake_up_process (client->tsk);
-}
-
-
-static struct pingcli_data *
-pingcli_start(struct portal_ioctl_data *args)
-{
-        ptl_handle_ni_t nih = PTL_INVALID_HANDLE;
-        unsigned ping_head_magic = PING_HEADER_MAGIC;
-        char str[PTL_NALFMT_SIZE];
-        int rc;
-
-        client->tsk = current;
-        client->args = args;
-
-        CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64" (%s),  \
-                        nal %x, size %u, count: %u, timeout: %u\n",
-                        args->ioc_nid,
-                        portals_nid2str(args->ioc_nid, args->ioc_nal, str),
-                        args->ioc_nal, args->ioc_size,
-                        args->ioc_count, args->ioc_timeout);
-
-
-        PORTAL_ALLOC (client->outbuf, STDSIZE) ;
-        if (client->outbuf == NULL)
-        {
-                CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
-                pingcli_shutdown (nih, 4);
-                return (NULL);
-        }
-
-        PORTAL_ALLOC (client->inbuf,  STDSIZE);
-
-        if (client->inbuf == NULL)
-        {
-                CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE);
-                pingcli_shutdown (nih, 4);
-                return (NULL);
-        }
-
-        /* Aquire and initialize the proper nal for portals. */
-        rc = PtlNIInit(args->ioc_nal, 0, NULL, NULL, &nih);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP)
-        {
-                CERROR ("NAL %x not loaded.\n", args->ioc_nal);
-                pingcli_shutdown (nih, 4);
-                return (NULL);
-        }
-
-        /* Based on the initialization aquire our unique portal ID. */
-        if ((rc = PtlGetId (nih, &client->myid)))
-        {
-                CERROR ("PtlGetId error %d\n", rc);
-                pingcli_shutdown (nih, 2);
-                return (NULL);
-        }
-
-        /* Setup the local match entries */
-        client->id_local.nid = PTL_NID_ANY;
-        client->id_local.pid = PTL_PID_ANY;
-
-        /* Setup the remote match entries */
-        client->id_remote.nid = args->ioc_nid;
-        client->id_remote.pid = 0;
-
-        if ((rc = PtlMEAttach (nih, PTL_PING_CLIENT,
-                   client->id_local, 0, ~0, PTL_RETAIN,
-                   PTL_INS_AFTER, &client->me)))
-        {
-                CERROR ("PtlMEAttach error %d\n", rc);
-                pingcli_shutdown (nih, 2);
-                return (NULL);
-        }
-
-        /* Allocate the event queue for this network interface */
-        if ((rc = PtlEQAlloc (nih, 64, pingcli_callback, &client->eq)))
-        {
-                CERROR ("PtlEQAlloc error %d\n", rc);
-                pingcli_shutdown (nih, 2);
-                return (NULL);
-        }
-
-
-        client->md_in_head.start     = client->inbuf;
-        client->md_in_head.length    = STDSIZE;
-        client->md_in_head.threshold = 1;
-        client->md_in_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-        client->md_in_head.user_ptr  = NULL;
-        client->md_in_head.eq_handle = client->eq;
-        memset (client->inbuf, 0, STDSIZE);
-
-        /* Attach the incoming buffer */
-        if ((rc = PtlMDAttach (client->me, client->md_in_head,
-                              PTL_UNLINK, &client->md_in_head_h))) {
-                CERROR ("PtlMDAttach error %d\n", rc);
-                pingcli_shutdown (nih, 1);
-                return (NULL);
-        }
-
-        /* Setup the outgoing ping header */
-        client->md_out_head.start     = client->outbuf;
-        client->md_out_head.length    = STDSIZE;
-        client->md_out_head.threshold = 1;
-        client->md_out_head.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-        client->md_out_head.user_ptr  = NULL;
-        client->md_out_head.eq_handle = PTL_EQ_NONE;
-
-        memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic));
-
-        /* Bind the outgoing ping header */
-        if ((rc=PtlMDBind (nih, client->md_out_head,
-                           PTL_UNLINK, &client->md_out_head_h))) {
-                CERROR ("PtlMDBind error %d\n", rc);
-                pingcli_shutdown (nih, 1);
-                return (NULL);
-        }
-        /* Put the ping packet */
-        if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ,
-                         client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) {
-                PDEBUG ("PtlPut (header)", rc);
-                pingcli_shutdown (nih, 1);
-                return NULL;
-        }
-
-        count = 0;
-        set_current_state (TASK_INTERRUPTIBLE);
-        rc = schedule_timeout (20 * args->ioc_timeout);
-        if (rc == 0) {
-                CERROR ("Time out on the server\n");
-                pingcli_shutdown (nih, 2);
-                return NULL;
-        } else {
-                CWARN("Received respose from the server \n");
-        }
-
-        pingcli_shutdown (nih, 2);
-
-        /* Success! */
-        return NULL;
-} /* pingcli_setup() */
-
-
-
-/* called by the portals_ioctl for ping requests */
-int kping_client(struct portal_ioctl_data *args)
-{
-
-        PORTAL_ALLOC (client, sizeof(struct pingcli_data));
-        memset (client, 0, sizeof(struct pingcli_data));
-        if (client == NULL)
-        {
-                CERROR ("Unable to allocate client structure\n");
-                return (0);
-        }
-        pingcli_start (args);
-
-        return 0;
-} /* kping_client() */
-
-
-static int __init pingcli_init(void)
-{
-        PORTAL_SYMBOL_REGISTER(kping_client);
-        return 0;
-} /* pingcli_init() */
-
-
-static void /*__exit*/ pingcli_cleanup(void)
-{
-        PORTAL_SYMBOL_UNREGISTER (kping_client);
-} /* pingcli_cleanup() */
-
-
-MODULE_AUTHOR("Brian Behlendorf (LLNL)");
-MODULE_DESCRIPTION("A simple kernel space ping client for portals testing");
-MODULE_LICENSE("GPL");
-
-module_init(pingcli_init);
-module_exit(pingcli_cleanup);
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-EXPORT_SYMBOL (kping_client);
-#endif
diff --git a/lustre/portals/tests/sping_srv.c b/lustre/portals/tests/sping_srv.c
deleted file mode 100644 (file)
index 069423d..0000000
+++ /dev/null
@@ -1,294 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL)
- * Author: Brian Behlendorf <behlendorf1@llnl.gov>
- *        Amey Inamdar     <amey@calsoftinc.com>
- *        Kedar Sovani     <kedar@calsoftinc.com>
- *
- *
- * This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- * Portals is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * Portals is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Portals; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* This is a striped down version of pinger. It follows a single
- * request-response protocol. Doesn't do Bulk data pinging. Also doesn't 
- * send multiple packets in a single ioctl.
- */
-
-#define DEBUG_SUBSYSTEM S_PINGER
-
-#include <linux/kp30.h>
-#include <portals/p30.h>
-#include "ping.h"
-
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/version.h>
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#include <linux/workqueue.h>
-#else
-#include <linux/tqueue.h>
-#endif
-#include <linux/wait.h>
-#include <linux/smp_lock.h>
-
-#include <asm/unistd.h>
-#include <asm/semaphore.h>
-
-#define STDSIZE (sizeof(int) + sizeof(int) + 4)
-
-static int nal  = PTL_IFACE_DEFAULT;            // Your NAL,
-static unsigned long packets_valid = 0;         // Valid packets 
-static int running = 1;
-atomic_t pkt;
-       
-static struct pingsrv_data *server=NULL;             // Our ping server
-
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-#endif
-
-static void *pingsrv_shutdown(int err)
-{
-        int rc;
-
-        /* Yes, we are intentionally allowing us to fall through each
-         * case in to the next.  This allows us to pass an error
-         * code to just clean up the right stuff.
-         */
-        switch (err) {
-                case 1:
-                        /* Unlink any memory descriptors we may have used */
-                        if ((rc = PtlMDUnlink (server->mdin_h)))
-                                PDEBUG ("PtlMDUnlink (out head buffer)", rc);
-                case 2:
-                        /* Free the event queue */
-                        if ((rc = PtlEQFree (server->eq)))
-                                PDEBUG ("PtlEQFree", rc);
-
-                        /* Unlink the client portal from the ME list */
-                        if ((rc = PtlMEUnlink (server->me)))
-                                        PDEBUG ("PtlMEUnlink", rc);
-
-                case 3:
-                        PtlNIFini(server->ni);
-
-                case 4:
-                        
-                        if (server->in_buf != NULL)
-                                PORTAL_FREE (server->in_buf, STDSIZE);
-                        
-                        if (server != NULL)
-                                PORTAL_FREE (server, 
-                                             sizeof (struct pingsrv_data));
-                        
-        }
-
-        CDEBUG (D_OTHER, "ping sever resources released\n");
-        return NULL;
-} /* pingsrv_shutdown() */
-
-
-int pingsrv_thread(void *arg)
-{
-        int rc;
-        
-        kportal_daemonize ("pingsrv");
-        server->tsk = current;
-        
-        while (running) {
-                set_current_state (TASK_INTERRUPTIBLE);
-                if (atomic_read (&pkt) == 0) {
-                        schedule_timeout (MAX_SCHEDULE_TIMEOUT);
-                        continue;
-                }
-                               
-                server->mdout.start     = server->in_buf;
-                server->mdout.length    = STDSIZE;
-                server->mdout.threshold = 1; 
-                server->mdout.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-                server->mdout.user_ptr  = NULL;
-                server->mdout.eq_handle = PTL_EQ_NONE;
-       
-                /* Bind the outgoing buffer */
-                if ((rc = PtlMDBind (server->ni, server->mdout, 
-                                     PTL_UNLINK, &server->mdout_h))) {
-                         PDEBUG ("PtlMDBind", rc);
-                         pingsrv_shutdown (1);
-                         return 1;
-               }
-         
-                
-                server->mdin.start     = server->in_buf;
-                server->mdin.length    = STDSIZE;
-                server->mdin.threshold = 1; 
-                server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-                server->mdin.user_ptr  = NULL;
-                server->mdin.eq_handle = server->eq;
-        
-                if ((rc = PtlMDAttach (server->me, server->mdin,
-                        PTL_UNLINK, &server->mdin_h))) {
-                        PDEBUG ("PtlMDAttach (bulk)", rc);
-                        CDEBUG (D_OTHER, "ping server resources allocated\n");
-                }
-                
-                if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ,
-                         server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0)))
-                         PDEBUG ("PtlPut", rc);
-                
-                atomic_dec (&pkt);
-                
-        }
-        pingsrv_shutdown (1);
-        running = 1;
-        return 0;    
-}
-
-static void pingsrv_packet(ptl_event_t *ev)
-{
-        atomic_inc (&pkt);
-        wake_up_process (server->tsk);
-} /* pingsrv_head() */
-
-static void pingsrv_callback(ptl_event_t *ev)
-{
-        
-        if (ev == NULL) {
-                CERROR ("null in callback, ev=%p\n", ev);
-                return;
-        }
-        server->evnt = *ev;
-        
-        CWARN("Lustre: received ping from nid "LPX64" "
-              "(off=%u rlen=%u mlen=%u head=%x)\n",
-              ev->initiator.nid, ev->offset, ev->rlength, ev->mlength,
-              *((int *)(ev->md.start + ev->offset)));
-        
-        packets_valid++;
-
-        pingsrv_packet(ev);
-        
-} /* pingsrv_callback() */
-
-
-static struct pingsrv_data *pingsrv_setup(void)
-{
-        int rc;
-
-       /* Aquire and initialize the proper nal for portals. */
-        server->ni = PTL_INVALID_HANDLE;
-
-        rc = PtlNIInit(nal, 0, NULL, NULL, &server->ni);
-        if (rc != PTL_OK && rc != PTL_IFACE_DUP) {
-                CDEBUG (D_OTHER, "Nal %x not loaded.\n", nal);
-                return pingsrv_shutdown (4);
-        }
-
-        /* Based on the initialization aquire our unique portal ID. */
-        if ((rc = PtlGetId (server->ni, &server->my_id))) {
-                PDEBUG ("PtlGetId", rc);
-                return pingsrv_shutdown (2);
-        }
-
-        server->id_local.nid = PTL_NID_ANY;
-        server->id_local.pid = PTL_PID_ANY;
-
-        /* Attach a match entries for header packets */
-        if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER,
-            server->id_local,0, ~0,
-            PTL_RETAIN, PTL_INS_AFTER, &server->me))) {
-                PDEBUG ("PtlMEAttach", rc);
-                return pingsrv_shutdown (2);
-        }
-
-
-        if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback,
-                                        &server->eq))) {
-                PDEBUG ("PtlEQAlloc (callback)", rc);
-                return pingsrv_shutdown (2);
-        }
-        
-        PORTAL_ALLOC (server->in_buf, STDSIZE);
-        if(!server->in_buf){
-                CDEBUG (D_OTHER,"Allocation error\n");
-                return pingsrv_shutdown(2);
-        }
-        
-        /* Setup the incoming buffer */
-        server->mdin.start     = server->in_buf;
-        server->mdin.length    = STDSIZE;
-        server->mdin.threshold = 1; 
-        server->mdin.options   = PTL_MD_EVENT_START_DISABLE | PTL_MD_OP_PUT;
-        server->mdin.user_ptr  = NULL;
-        server->mdin.eq_handle = server->eq;
-        memset (server->in_buf, 0, STDSIZE);
-        
-        if ((rc = PtlMDAttach (server->me, server->mdin,
-                PTL_UNLINK, &server->mdin_h))) {
-                    PDEBUG ("PtlMDAttach (bulk)", rc);
-                CDEBUG (D_OTHER, "ping server resources allocated\n");
-       }
-        /* Success! */
-        return server; 
-} /* pingsrv_setup() */
-
-static int pingsrv_start(void)
-{
-        /* Setup our server */
-        if (!pingsrv_setup()) {
-                CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n");
-                return -ENOMEM;
-        }
-        kernel_thread (pingsrv_thread,NULL,0);
-        return 0;
-} /* pingsrv_start() */
-
-
-
-static int __init pingsrv_init(void)
-{
-        PORTAL_ALLOC (server, sizeof(struct pingsrv_data));  
-        return pingsrv_start ();
-} /* pingsrv_init() */
-
-
-static void /*__exit*/ pingsrv_cleanup(void)
-{
-        remove_proc_entry ("net/pingsrv", NULL);
-        
-        running = 0;
-        wake_up_process (server->tsk);
-        while (running != 1) {
-                set_current_state (TASK_UNINTERRUPTIBLE);
-                schedule_timeout (HZ);
-        }
-        
-} /* pingsrv_cleanup() */
-
-
-MODULE_PARM(nal, "i");
-MODULE_PARM_DESC(nal, "Use the specified NAL "
-                "(2-ksocknal, 1-kqswnal)");
-MODULE_AUTHOR("Brian Behlendorf (LLNL)");
-MODULE_DESCRIPTION("A kernel space ping server for portals testing");
-MODULE_LICENSE("GPL");
-
-module_init(pingsrv_init);
-module_exit(pingsrv_cleanup);
diff --git a/lustre/portals/tests/startclient.sh b/lustre/portals/tests/startclient.sh
deleted file mode 100755 (executable)
index be60509..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-SIMPLE=${SIMPLE:-0}
-
-if [ $SIMPLE -eq 0 ]; then
-       PING=pingcli.o
-else
-       PING=spingcli.o
-fi
-
-case "$1" in
-       tcp)
-               /sbin/insmod  ../oslib/portals.o
-               /sbin/insmod ../socknal/ksocknal.o
-               /sbin/insmod ./$PING 
-               echo ksocknal > /tmp/nal
-       ;;
-       
-       elan)
-               /sbin/insmod  ../oslib/portals.o
-               /sbin/insmod ../qswnal/kqswnal.o
-               /sbin/insmod ./$PING
-               echo kqswnal > /tmp/nal
-       ;;
-
-       gm)
-               /sbin/insmod  portals
-               /sbin/insmod kgmnal
-               /sbin/insmod ./$PING
-               echo kgmnal > /tmp/nal
-       ;;
-       
-       *)
-               echo "Usage : ${0} < tcp | elan | gm>"
-               exit 1;
-esac
-exit 0;
diff --git a/lustre/portals/tests/startserver.sh b/lustre/portals/tests/startserver.sh
deleted file mode 100755 (executable)
index 9b5ccf6..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-
-SIMPLE=${SIMPLE:-0}
-
-if [ $SIMPLE -eq 0 ]; then
-       PING=pingsrv.o
-else
-       PING=spingsrv.o
-fi
-
-case "$1" in
-       tcp)
-               /sbin/insmod  ../oslib/portals.o
-               /sbin/insmod ../socknal/ksocknal.o
-               /sbin/insmod ./$PING nal=2
-               echo ksocknal > /tmp/nal
-       ;;
-       
-       elan)
-               /sbin/insmod  ../oslib/portals.o
-               /sbin/insmod ../qswnal/kqswnal.o
-               /sbin/insmod ./$PING nal=4
-               echo kqswnal > /tmp/nal
-       ;;
-
-       gm)
-               /sbin/insmod  portals
-               /sbin/insmod kgmnal
-               /sbin/insmod ./$PING nal=3
-               echo kgmnal > /tmp/nal
-       ;;
-       
-       *)
-               echo "Usage : ${0} < tcp | elan | gm>"
-               exit 1;
-esac
-../utils/acceptor 9999&
-exit 0;
diff --git a/lustre/portals/tests/stopclient.sh b/lustre/portals/tests/stopclient.sh
deleted file mode 100755 (executable)
index f7e3aa1..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#!/bin/sh
-
-SIMPLE=${SIMPLE:-1}
-
-if [ $SIMPLE -eq 0 ]; then
-       PING=spingcli
-else
-       PING=pingcli
-fi
-
-rmmod $PING
-NAL=`cat /tmp/nal`;
-rmmod $NAL
-rmmod portals
diff --git a/lustre/portals/tests/stopserver.sh b/lustre/portals/tests/stopserver.sh
deleted file mode 100644 (file)
index 3e81831..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-
-SIMPLE=${SIMPLE:-1}
-
-if [ $SIMPLE -eq 0 ]; then
-       PING=spingsrv
-else
-       PING=pingsrv
-fi
-
-rmmod $PING
-NAL=`cat /tmp/nal`;
-rmmod $NAL
-killall -9 acceptor
-rm -f /var/run/acceptor-9999.pid
-rmmod portals
diff --git a/lustre/portals/unals/.cvsignore b/lustre/portals/unals/.cvsignore
deleted file mode 100644 (file)
index e995588..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-.deps
-Makefile
-Makefile.in
diff --git a/lustre/portals/unals/Makefile.am b/lustre/portals/unals/Makefile.am
deleted file mode 100644 (file)
index 3437d39..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-if LIBLUSTRE
-if !CRAY_PORTALS
-noinst_LIBRARIES = libtcpnal.a
-endif
-endif
-
-noinst_HEADERS =  pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h
-libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h
-libtcpnal_a_CPPFLAGS = $(LLCPPFLAGS)
-libtcpnal_a_CFLAGS = $(LLCFLAGS)
diff --git a/lustre/portals/unals/README b/lustre/portals/unals/README
deleted file mode 100644 (file)
index 6cb93d9..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-This library implements two NAL interfaces, both running over IP.
-The first, tcpnal, creates TCP connections between participating
-processes in order to transport the portals requests. The second,
-ernal, provides a simple transport protocol which runs over
-UDP datagrams.
-
-The interface functions return both of these values in host order for
-convenience and readability. However this means that addresses
-exchanged in messages between hosts of different orderings will not
-function properly.
-
-Both NALs use the same support functions in order to schedule events
-and communicate with the generic portals implementation.
-
-            -------------------------
-            |         api           |
-            |_______________________|
-            |         lib           |
-            |_______________________|
-            | ernal  |   |tcpnal    |
-            |--------|   |----------|
-            | udpsock|   |connection|
-            |-----------------------|
-            |     timer/select      |
-            -------------------------
-
-
-  These NALs uses the framework from fdnal of a pipe between the api
-and library sides. This is wrapped up in the select on the library
-side, and blocks on the api side. Performance could be severely
-enhanced by collapsing this aritificial barrier, by using shared
-memory queues, or by wiring the api layer directly to the library.
-
-
-nid is defined as the low order 24-bits of the IP address of the
-physical node left shifted by 8 plus a virtual node number of 0
-through 255 (really only 239).  The virtual node number of a tcpnal
-application should be specified using the environment variable
-PTL_VIRTNODE.  pid is now a completely arbitrary number in the
-range of 0 to 255.  The IP interface used can be overridden by
-specifying the appropriate hostid by setting the PTL_HOSTID
-environment variable.  The value can be either dotted decimal
-(n.n.n.n) or hex starting with "0x".
-TCPNAL:
-  As the NAL needs to try to send to a particular nid/pid pair, it
-  will open up connections on demand. Because the port associated with
-  the connecting socket is different from the bound port, two
-  connections will normally be established between a pair of peers, with
-  data flowing from the anonymous connect (active) port to the advertised
-  or well-known bound (passive) port of each peer.
-
-  Should the connection fail to open, an error is reported to the
-  library component, which causes the api request to fail.
diff --git a/lustre/portals/unals/address.c b/lustre/portals/unals/address.c
deleted file mode 100644 (file)
index f329e2a..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* address.c:
- * this file provides functions to aquire the IP address of the node
- * and translate them into a NID/PID pair which supports a static
- * mapping of virtual nodes into the port range of an IP socket.
-*/
-
-#include <stdlib.h>
-#include <netdb.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <portals/p30.h>
-#include <bridge.h>
-#include <ipmap.h>
-
-
-/* Function:  get_node_id
- * Returns: a 32 bit id for this node, actually a big-endian IP address
- *
- * get_node_id() determines the host name and uses the resolver to
- *  find out its ip address. This is fairly fragile and inflexible, but
- *  explicitly asking about interfaces and their addresses is very
- *  complicated and nonportable.
- */
-static unsigned int get_node_id(void)
-{
-    char buffer[255];
-    unsigned int x;
-    struct hostent *he;
-    char * host_envp;
-
-    if (!(host_envp = getenv("PTL_HOSTID")))
-        {
-            gethostname(buffer,sizeof(buffer));
-            he=gethostbyname(buffer);
-            if (he)
-                    x=*(unsigned int *)he->h_addr_list[0];
-            else
-                    x = 0;
-            return(ntohl(x));
-        }
-    else 
-        {
-            if (host_envp[1] != 'x')
-                {
-                    int a, b, c, d;
-                    sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d);
-                    return ((a<<24) | (b<<16) | (c<<8) | d);
-                }
-            else
-                {
-                    long long hostid = strtoll(host_envp, 0, 0);
-                    return((unsigned int) hostid);
-                }
-        }
-}
-
-
-/* Function:  set_address
- * Arugments: t: a procnal structure to populate with the request
- *
- * set_address performs the bit manipulations to set the nid, pid, and
- *    iptop8 fields of the procnal structures.
- *
- * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY
- */
-
-#ifdef DIRECT_IP_MODE
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
-    int port;
-    if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0;
-    else port=pidrequest;
-    t->lib_nal->libnal_ni.ni_pid.nid=get_node_id();
-    t->lib_nal->libnal_ni.ni_pid.pid=port;
-}
-#else
-
-void set_address(bridge t,ptl_pid_t pidrequest)
-{
-    int virtnode, in_addr, port; 
-    ptl_pid_t pid;
-
-    /* get and remember my node id*/
-    if (!getenv("PTL_VIRTNODE"))
-        virtnode = 0;
-    else 
-        {
-            int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT 
-                                              >> PNAL_VNODE_SHIFT);
-            virtnode = atoi(getenv("PTL_VIRTNODE"));
-            if (virtnode > maxvnode)
-                {
-                    fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n",
-                            virtnode, maxvnode);
-                    return;
-                }
-        }
-    
-    in_addr = get_node_id();
-
-    t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */
-    t->lib_nal->libnal_ni.ni_pid.nid = ((in_addr & PNAL_HOSTID_MASK) 
-                                        << PNAL_VNODE_SHIFT)
-                                       + virtnode;
-    pid=pidrequest;
-    /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */
-#ifdef notyet
-    if (pid==(unsigned short)PTL_PID_ANY) port = 0;
-#endif
-    if (pid==(unsigned short)PTL_PID_ANY) 
-        {
-            fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n");
-            return;
-        }
-    else if (pid > PNAL_PID_MASK)
-        {
-            fprintf(stderr, "portal pid of %d is too large - max %d\n",
-                    pid, PNAL_PID_MASK);
-            return;
-        }
-    else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT;
-    t->lib_nal->libnal_ni.ni_pid.pid=pid;
-}
-#endif
diff --git a/lustre/portals/unals/bridge.h b/lustre/portals/unals/bridge.h
deleted file mode 100644 (file)
index d2f0f2c..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#ifndef TCPNAL_PROCBRIDGE_H
-#define TCPNAL_PROCBRIDGE_H
-
-#include <portals/lib-p30.h>
-#include <portals/nal.h>
-
-#define PTL_IFACE_TCP 1
-#define PTL_IFACE_ER 2
-#define PTL_IFACE_SS 3
-#define PTL_IFACE_MAX 4
-
-typedef struct bridge {
-    int alive;
-    lib_nal_t *lib_nal;
-    void *lower;
-    void *local;
-    void (*shutdown)(struct bridge *);
-    /* this doesn't really belong here */
-    unsigned char iptop8;
-} *bridge;
-
-
-typedef int (*nal_initialize)(bridge);
-extern nal_initialize nal_table[PTL_IFACE_MAX];
-
-#endif
diff --git a/lustre/portals/unals/connection.c b/lustre/portals/unals/connection.c
deleted file mode 100644 (file)
index b399fcf..0000000
+++ /dev/null
@@ -1,508 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* connection.c:
-   This file provides a simple stateful connection manager which
-   builds tcp connections on demand and leaves them open for
-   future use. It also provides the machinery to allow peers
-   to connect to it
-*/
-
-#include <stdlib.h>
-#include <pqtimer.h>
-#include <dispatch.h>
-#include <table.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
-#include <portals/types.h>
-#include <portals/list.h>
-#include <portals/lib-types.h>
-#include <portals/socknal.h>
-#include <linux/kp30.h>
-#include <connection.h>
-#include <pthread.h>
-#include <errno.h>
-#ifndef __CYGWIN__
-#include <syscall.h>
-#endif
-
-/* global variable: acceptor port */
-unsigned short tcpnal_acceptor_port = 988;
-
-
-/* Function:  compare_connection
- * Arguments: connection c:      a connection in the hash table
- *            ptl_process_id_t:  an id to verify  agains
- * Returns: 1 if the connection is the one requested, 0 otherwise
- *
- *    compare_connection() tests for collisions in the hash table
- */
-static int compare_connection(void *arg1, void *arg2)
-{
-    connection c = arg1;
-    unsigned int * id = arg2;
-#if 0
-    return((c->ip==id[0]) && (c->port==id[1]));
-#else
-    /* CFS specific hacking */
-    return (c->ip == id[0]);
-#endif
-}
-
-
-/* Function:  connection_key
- * Arguments: ptl_process_id_t id:  an id to hash
- * Returns: a not-particularily-well-distributed hash
- *          of the id
- */
-static unsigned int connection_key(unsigned int *id)
-{
-#if 0
-    return(id[0]^id[1]);
-#else
-    /* CFS specific hacking */
-    return (unsigned int) id[0];
-#endif
-}
-
-
-/* Function:  remove_connection
- * Arguments: c: the connection to remove
- */
-void remove_connection(void *arg)
-{
-        connection c = arg;
-        unsigned int id[2];
-        
-        id[0]=c->ip;
-        id[1]=c->port;
-        hash_table_remove(c->m->connections,id);
-        close(c->fd);
-        free(c);
-}
-
-
-/* Function:  read_connection: 
- * Arguments: c:    the connection to read from 
- *            dest: the buffer to read into
- *            len:  the number of bytes to read   
- * Returns: success as 1, or failure as 0
- *
- *   read_connection() reads data from the connection, continuing
- *   to read partial results until the request is satisfied or
- *   it errors. TODO: this read should be covered by signal protection.
- */
-int read_connection(connection c,
-                    unsigned char *dest,
-                    int len)
-{
-    int offset = 0,rc;
-
-    if (len) {
-        do {
-#ifndef __CYGWIN__
-            rc = syscall(SYS_read, c->fd, dest+offset, len-offset);
-#else
-            rc = recv(c->fd, dest+offset, len-offset, 0);
-#endif
-            if (rc <= 0) {
-                if (errno == EINTR) {
-                    rc = 0;
-                } else {
-                    remove_connection(c);
-                    return (0);
-                }
-            }
-            offset += rc;
-        } while (offset < len);
-    }
-    return (1);
-}
-
-static int connection_input(void *d)
-{
-        connection c = d;
-        return((*c->m->handler)(c->m->handler_arg,c));
-}
-
-
-/* Function:  allocate_connection
- * Arguments: t:    tcpnal the allocation is occuring in the context of
- *            dest: portal endpoint address for this connection
- *            fd:   open file descriptor for the socket
- * Returns: an allocated connection structure
- *
- * just encompasses the action common to active and passive
- *  connections of allocation and placement in the global table
- */
-static connection allocate_connection(manager m,
-                               unsigned int ip,
-                               unsigned short port,
-                               int fd)
-{
-    connection c=malloc(sizeof(struct connection));
-    unsigned int id[2];
-    c->m=m;
-    c->fd=fd;
-    c->ip=ip;
-    c->port=port;
-    id[0]=ip;
-    id[1]=port;
-    register_io_handler(fd,READ_HANDLER,connection_input,c);
-    hash_table_insert(m->connections,c,id);
-    return(c);
-}
-
-
-/* Function:  new_connection
- * Arguments: t: opaque argument holding the tcpname
- * Returns: 1 in order to reregister for new connection requests
- *
- *  called when the bound service socket recieves
- *     a new connection request, it always accepts and
- *     installs a new connection
- */
-static int new_connection(void *z)
-{
-    manager m=z;
-    struct sockaddr_in s;
-    int len=sizeof(struct sockaddr_in);
-    int fd=accept(m->bound,(struct sockaddr *)&s,&len);
-    unsigned int nid=*((unsigned int *)&s.sin_addr);
-    /* cfs specific hack */
-    //unsigned short pid=s.sin_port;
-    pthread_mutex_lock(&m->conn_lock);
-    allocate_connection(m,htonl(nid),0/*pid*/,fd);
-    pthread_mutex_unlock(&m->conn_lock);
-    return(1);
-}
-
-extern ptl_nid_t tcpnal_mynid;
-
-int
-tcpnal_hello (int sockfd, ptl_nid_t *nid, int type, __u64 incarnation)
-{
-        int                 rc;
-        int                 nob;
-        ptl_hdr_t           hdr;
-        ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid;
-
-        LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid));
-
-        memset (&hdr, 0, sizeof (hdr));
-        hmv->magic         = cpu_to_le32(PORTALS_PROTO_MAGIC);
-        hmv->version_major = cpu_to_le32(PORTALS_PROTO_VERSION_MAJOR);
-        hmv->version_minor = cpu_to_le32(PORTALS_PROTO_VERSION_MINOR);
-        
-        hdr.src_nid = cpu_to_le64(tcpnal_mynid);
-        hdr.type    = cpu_to_le32(PTL_MSG_HELLO);
-
-        hdr.msg.hello.type = cpu_to_le32(type);
-        hdr.msg.hello.incarnation = cpu_to_le64(incarnation);
-
-        /* I don't send any interface info */
-
-        /* Assume sufficient socket buffering for this message */
-        rc = syscall(SYS_write, sockfd, &hdr, sizeof(hdr));
-        if (rc <= 0) {
-                CERROR ("Error %d sending HELLO to "LPX64"\n", rc, *nid);
-                return (rc);
-        }
-
-        rc = syscall(SYS_read, sockfd, hmv, sizeof(*hmv));
-        if (rc <= 0) {
-                CERROR ("Error %d reading HELLO from "LPX64"\n", rc, *nid);
-                return (rc);
-        }
-        
-        if (hmv->magic != le32_to_cpu(PORTALS_PROTO_MAGIC)) {
-                CERROR ("Bad magic %#08x (%#08x expected) from "LPX64"\n",
-                        cpu_to_le32(hmv->magic), PORTALS_PROTO_MAGIC, *nid);
-                return (-EPROTO);
-        }
-
-        if (hmv->version_major != cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR) ||
-            hmv->version_minor != cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR)) {
-                CERROR ("Incompatible protocol version %d.%d (%d.%d expected)"
-                        " from "LPX64"\n",
-                        le16_to_cpu (hmv->version_major),
-                        le16_to_cpu (hmv->version_minor),
-                        PORTALS_PROTO_VERSION_MAJOR,
-                        PORTALS_PROTO_VERSION_MINOR,
-                        *nid);
-                return (-EPROTO);
-        }
-
-#if (PORTALS_PROTO_VERSION_MAJOR != 1)
-# error "This code only understands protocol version 1.x"
-#endif
-        /* version 1 sends magic/version as the dest_nid of a 'hello' header,
-         * so read the rest of it in now... */
-
-        rc = syscall(SYS_read, sockfd, hmv + 1, sizeof(hdr) - sizeof(*hmv));
-        if (rc <= 0) {
-                CERROR ("Error %d reading rest of HELLO hdr from "LPX64"\n",
-                        rc, *nid);
-                return (rc);
-        }
-
-        /* ...and check we got what we expected */
-        if (hdr.type != cpu_to_le32 (PTL_MSG_HELLO)) {
-                CERROR ("Expecting a HELLO hdr "
-                        " but got type %d with %d payload from "LPX64"\n",
-                        le32_to_cpu (hdr.type),
-                        le32_to_cpu (hdr.payload_length), *nid);
-                return (-EPROTO);
-        }
-
-        if (le64_to_cpu(hdr.src_nid) == PTL_NID_ANY) {
-                CERROR("Expecting a HELLO hdr with a NID, but got PTL_NID_ANY\n");
-                return (-EPROTO);
-        }
-
-        if (*nid == PTL_NID_ANY) {              /* don't know peer's nid yet */
-                *nid = le64_to_cpu(hdr.src_nid);
-        } else if (*nid != le64_to_cpu (hdr.src_nid)) {
-                CERROR ("Connected to nid "LPX64", but expecting "LPX64"\n",
-                        le64_to_cpu (hdr.src_nid), *nid);
-                return (-EPROTO);
-        }
-
-        /* Ignore any interface info in the payload */
-        nob = le32_to_cpu(hdr.payload_length);
-        if (nob > getpagesize()) {
-                CERROR("Unexpected HELLO payload %d from "LPX64"\n",
-                       nob, *nid);
-                return (-EPROTO);
-        }
-        if (nob > 0) {
-                char *space = (char *)malloc(nob);
-                
-                if (space == NULL) {
-                        CERROR("Can't allocate scratch buffer %d\n", nob);
-                        return (-ENOMEM);
-                }
-                
-                rc = syscall(SYS_read, sockfd, space, nob);
-                if (rc <= 0) {
-                        CERROR("Error %d skipping HELLO payload from "
-                               LPX64"\n", rc, *nid);
-                        return (rc);
-                }
-        }
-
-        return (0);
-}
-
-/* Function:  force_tcp_connection
- * Arguments: t: tcpnal
- *            dest: portals endpoint for the connection
- * Returns: an allocated connection structure, either
- *          a pre-existing one, or a new connection
- */
-connection force_tcp_connection(manager m,
-                                unsigned int ip,
-                                unsigned short port,
-                                procbridge pb)
-{
-    connection conn;
-    struct sockaddr_in addr;
-    struct sockaddr_in locaddr; 
-    unsigned int id[2];
-    struct timeval tv;
-    __u64 incarnation;
-
-    int fd;
-    int option;
-    int rc;
-    int rport;
-    ptl_nid_t peernid = PTL_NID_ANY;
-
-    port = tcpnal_acceptor_port;
-
-    id[0] = ip;
-    id[1] = port;
-
-    pthread_mutex_lock(&m->conn_lock);
-
-    conn = hash_table_find(m->connections, id);
-    if (conn)
-            goto out;
-
-    memset(&addr, 0, sizeof(addr));
-    addr.sin_family      = AF_INET;
-    addr.sin_addr.s_addr = htonl(ip);
-    addr.sin_port        = htons(port);
-
-    memset(&locaddr, 0, sizeof(locaddr)); 
-    locaddr.sin_family = AF_INET; 
-    locaddr.sin_addr.s_addr = INADDR_ANY;
-
-    for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
-            fd = socket(AF_INET, SOCK_STREAM, 0);
-            if (fd < 0) {
-                    perror("tcpnal socket failed");
-                    goto out;
-            } 
-            
-            option = 1;
-            rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 
-                            &option, sizeof(option));
-            if (rc != 0) {
-                    perror ("Can't set SO_REUSEADDR for socket"); 
-                    close(fd);
-                    goto out;
-            } 
-
-            locaddr.sin_port = htons(rport);
-            rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr));
-            if (rc == 0 || errno == EACCES) {
-                    rc = connect(fd, (struct sockaddr *)&addr,
-                                 sizeof(struct sockaddr_in));
-                    if (rc == 0) {
-                            break;
-                    } else if (errno != EADDRINUSE && errno != EADDRNOTAVAIL) {
-                            perror("Error connecting to remote host");
-                            close(fd);
-                            goto out;
-                    }
-            } else if (errno != EADDRINUSE) {
-                    perror("Error binding to privileged port");
-                    close(fd);
-                    goto out;
-            }
-            close(fd);
-    }
-    
-    if (rport == IPPORT_RESERVED / 2) {
-            fprintf(stderr, "Out of ports trying to bind to a reserved port\n");
-            goto out;
-    }
-    
-#if 1
-    option = 1;
-    setsockopt(fd, SOL_TCP, TCP_NODELAY, &option, sizeof(option));
-    option = 1<<20;
-    setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &option, sizeof(option));
-    option = 1<<20;
-    setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &option, sizeof(option));
-#endif
-   
-    gettimeofday(&tv, NULL);
-    incarnation = (((__u64)tv.tv_sec) * 1000000) + tv.tv_usec;
-
-    /* say hello */
-    if (tcpnal_hello(fd, &peernid, SOCKNAL_CONN_ANY, incarnation))
-            exit(-1);
-    
-    conn = allocate_connection(m, ip, port, fd);
-    
-    /* let nal thread know this event right away */
-    if (conn)
-            procbridge_wakeup_nal(pb);
-
-out:
-    pthread_mutex_unlock(&m->conn_lock);
-    return (conn);
-}
-
-
-/* Function:  bind_socket
- * Arguments: t: the nal state for this interface
- *            port: the port to attempt to bind to
- * Returns: 1 on success, or 0 on error
- *
- * bind_socket() attempts to allocate and bind a socket to the requested
- *  port, or dynamically assign one from the kernel should the port be
- *  zero. Sets the bound and bound_handler elements of m.
- *
- *  TODO: The port should be an explicitly sized type.
- */
-static int bind_socket(manager m,unsigned short port)
-{
-    struct sockaddr_in addr;
-    int alen=sizeof(struct sockaddr_in);
-    
-    if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0)  
-        return(0);
-    
-    bzero((char *) &addr, sizeof(addr));
-    addr.sin_family      = AF_INET;
-    addr.sin_addr.s_addr = 0;
-    addr.sin_port        = htons(port);
-
-    if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){
-        perror ("tcpnal bind"); 
-        return(0);
-    }
-    
-    getsockname(m->bound,(struct sockaddr *)&addr, &alen);
-
-    m->bound_handler=register_io_handler(m->bound,READ_HANDLER,
-                                         new_connection,m);
-    listen(m->bound,5); 
-    m->port=addr.sin_port;
-    return(1);
-}
-
-
-/* Function:  shutdown_connections
- * Arguments: m: the manager structure
- *
- * close all connections and reclaim resources
- */
-void shutdown_connections(manager m)
-{
-    close(m->bound);
-    remove_io_handler(m->bound_handler);
-    hash_destroy_table(m->connections,remove_connection);
-    free(m);
-}
-
-
-/* Function:  init_connections
- * Arguments: t: the nal state for this interface
- *            port: the port to attempt to bind to
- * Returns: a newly allocated manager structure, or
- *          zero if the fixed port could not be bound
- */
-manager init_connections(unsigned short pid,
-                         int (*input)(void *, void *),
-                         void *a)
-{
-    manager m = (manager)malloc(sizeof(struct manager));
-    m->connections = hash_create_table(compare_connection,connection_key);
-    m->handler = input;
-    m->handler_arg = a;
-    pthread_mutex_init(&m->conn_lock, 0);
-
-    if (bind_socket(m,pid))
-        return(m);
-
-    free(m);
-    return(0);
-}
diff --git a/lustre/portals/unals/connection.h b/lustre/portals/unals/connection.h
deleted file mode 100644 (file)
index 343ffa6..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#include <table.h>
-#include <procbridge.h>
-
-typedef struct manager {
-    table connections;
-    pthread_mutex_t conn_lock; /* protect connections table */
-    int bound;
-    io_handler bound_handler;
-    int (*handler)(void *, void *);
-    void *handler_arg;
-    unsigned short port;
-} *manager;
-
-
-typedef struct connection {
-    unsigned int ip;
-    unsigned short port;
-    int fd;
-    manager m;
-} *connection;
-
-connection force_tcp_connection(manager m, unsigned int ip, unsigned int short,
-                                procbridge pb);
-manager init_connections(unsigned short, int (*f)(void *, void *), void *);
-void remove_connection(void *arg);
-void shutdown_connections(manager m);
-int read_connection(connection c, unsigned char *dest, int len);
diff --git a/lustre/portals/unals/debug.c b/lustre/portals/unals/debug.c
deleted file mode 100644 (file)
index b82bb2f..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2002 Cluster File Systems, Inc.
- *   Author: Phil Schwan <phil@clusterfs.com>
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <sys/time.h>
-
-int smp_processor_id = 1;
-char debug_file_path[1024] = "/tmp/lustre-log";
-char debug_file_name[1024];
-FILE *debug_file_fd;
-
-int portals_do_debug_dumplog(void *arg)
-{
-        printf("Look in %s\n", debug_file_name);
-        return 0;
-}
-
-
-void portals_debug_print(void)
-{
-        return;
-}
-
-
-void portals_debug_dumplog(void)
-{
-        printf("Look in %s\n", debug_file_name);
-        return;
-}
-
-
-int portals_debug_init(unsigned long bufsize)
-{ 
-        debug_file_fd = stdout;
-        return 0;
-}
-
-int portals_debug_cleanup(void)
-{
-        return 0; //close(portals_debug_fd);
-}
-
-int portals_debug_clear_buffer(void)
-{
-        return 0;
-}
-
-int portals_debug_mark_buffer(char *text)
-{
-
-        fprintf(debug_file_fd, "*******************************************************************************\n");
-        fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text);
-        fprintf(debug_file_fd, "*******************************************************************************\n");
-
-        return 0;
-}
-
-int portals_debug_copy_to_user(char *buf, unsigned long len)
-{
-        return 0;
-}
-
-/* FIXME: I'm not very smart; someone smarter should make this better. */
-void
-portals_debug_msg (int subsys, int mask, char *file, const char *fn, 
-                   const int line, const char *format, ...)
-{
-        va_list       ap;
-        unsigned long flags;
-        struct timeval tv;
-        int nob;
-
-
-        /* NB since we pass a non-zero sized buffer (at least) on the first
-         * print, we can be assured that by the end of all the snprinting,
-         * we _do_ have a terminated buffer, even if our message got truncated.
-         */
-
-        gettimeofday(&tv, NULL);
-
-        nob += fprintf(debug_file_fd,
-                              "%02x:%06x:%d:%lu.%06lu ",
-                              subsys >> 24, mask, smp_processor_id,
-                              tv.tv_sec, tv.tv_usec);
-
-        nob += fprintf(debug_file_fd,
-                            "(%s:%d:%s() %d+%ld): ",
-                            file, line, fn, 0,
-                            8192 - ((unsigned long)&flags & 8191UL));
-
-        va_start (ap, format);
-        nob += fprintf(debug_file_fd, format, ap);
-        va_end (ap);
-
-
-}
-
diff --git a/lustre/portals/unals/dispatch.h b/lustre/portals/unals/dispatch.h
deleted file mode 100644 (file)
index a8f916d..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-/* this file is only called dispatch.h to prevent it
-   from colliding with /usr/include/sys/select.h */
-
-typedef struct io_handler *io_handler;
-
-struct io_handler{
-  io_handler *last;
-  io_handler next;
-  int fd;
-  int type;
-  int (*function)(void *);
-  void *argument;
-  int disabled;
-};
-
-
-#define READ_HANDLER 1
-#define WRITE_HANDLER 2
-#define EXCEPTION_HANDLER 4
-#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER)
-
-io_handler register_io_handler(int fd,
-                               int type,
-                               int (*function)(void *),
-                               void *arg);
-
-void remove_io_handler (io_handler i);
-void init_unix_timer(void);
-void select_timer_block(when until);
-when now(void);
-
-/*
- * hacking for CFS internal MPI testing
- */ 
-#if !CRAY_PORTALS
-#define ENABLE_SELECT_DISPATCH
-#endif
diff --git a/lustre/portals/unals/ipmap.h b/lustre/portals/unals/ipmap.h
deleted file mode 100644 (file)
index 85b1e18..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#define DIRECT_IP_MODE
-#ifdef DIRECT_IP_MODE
-#define PNAL_NID(in_addr, port) (in_addr)
-#define PNAL_PID(pid) (pid)
-#define PNAL_IP(in_addr, port) (in_addr)
-#define PNAL_PORT(nid, pid) (pid)
-#else
-
-#define PNAL_BASE_PORT 4096
-#define PNAL_HOSTID_SHIFT 24
-#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1)
-#define PNAL_VNODE_SHIFT 8
-#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1)
-#define PNAL_PID_SHIFT 8
-#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1)
-
-#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \
-                                    << PNAL_VNODE_SHIFT) \
-                                   | (((ntohs(port)-PNAL_BASE_PORT) >>\
-                                       PNAL_PID_SHIFT)))
-#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT)  & PNAL_PID_MASK)
-
-#define PNAL_IP(nid,t)  (htonl((((unsigned)(nid))\
-                                >> PNAL_VNODE_SHIFT)\
-                               | (t->iptop8 << PNAL_HOSTID_SHIFT)))
-#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \
-                                 << PNAL_VNODE_SHIFT) \
-                                | ((pid) & PNAL_PID_MASK)) \
-                               + PNAL_BASE_PORT))
-#endif
diff --git a/lustre/portals/unals/pqtimer.c b/lustre/portals/unals/pqtimer.c
deleted file mode 100644 (file)
index 98c48eb..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* timer.c:
- *   this file implements a simple priority-queue based timer system. when
- * combined with a file which implements now() and block(), it can
- * be used to provide course-grained time-based callbacks.
- */
-
-#include <pqtimer.h>
-#include <stdlib.h>
-#include <string.h>
-
-struct timer {
-  void (*function)(void *);
-  void *arg;
-  when w;
-  int interval;
-  int disable;
-};
-
-typedef struct thunk *thunk;
-struct thunk {
-    void (*f)(void *);
-    void *a;
-    thunk next;
-};
-
-extern when now(void);
-
-static thunk thunks;
-static int internal;
-static void (*block_function)(when);
-static int number_of_timers;
-static int size_of_pqueue;
-static timer *timers;
-
-
-static void heal(int where)
-{
-    int left=(where<<1);
-    int right=(where<<1)+1;
-    int min=where;
-    timer temp;
-  
-    if (left <= number_of_timers)
-       if (timers[left]->w < timers[min]->w) min=left;
-    if (right <= number_of_timers)
-       if (timers[right]->w < timers[min]->w) min=right;
-    if (min != where){
-       temp=timers[where];
-       timers[where]=timers[min];
-       timers[min]=temp;
-       heal(min);
-    }
-}
-
-static void add_pqueue(int i)
-{
-    timer temp;
-    int parent=(i>>1);
-    if ((i>1) && (timers[i]->w< timers[parent]->w)){
-       temp=timers[i];
-       timers[i]=timers[parent];
-       timers[parent]=temp;
-       add_pqueue(parent);
-    }
-}
-
-static void add_timer(timer t)
-{
-    if (size_of_pqueue<(number_of_timers+2)){
-       int oldsize=size_of_pqueue;
-       timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10));
-       memcpy(new,timers,sizeof(timer)*oldsize);
-       timers=new;
-    }
-    timers[++number_of_timers]=t;
-    add_pqueue(number_of_timers);
-}
-
-/* Function: register_timer
- * Arguments: interval: the time interval from the current time when
- *                      the timer function should be called
- *            function: the function to call when the time has expired
- *            argument: the argument to call it with.
- * Returns: a pointer to a timer structure
- */
-timer register_timer(when interval,
-                    void (*function)(void *),
-                    void *argument)
-{
-    timer t=(timer)malloc(sizeof(struct timer));
-
-    t->arg=argument;
-    t->function=function;
-    t->interval=interval;
-    t->disable=0;
-    t->w=now()+interval;
-    add_timer(t);
-    if (!internal && (number_of_timers==1))
-        block_function(t->w);
-    return(t);
-}
-
-/* Function: remove_timer
- * Arguments: t: 
- * Returns: nothing
- *
- * remove_timer removes a timer from the system, insuring
- * that it will never be called. It does not actually
- * free the timer due to reentrancy issues.
- */
-
-void remove_timer(timer t)
-{
-    t->disable=1;
-}
-
-
-
-void timer_fire()
-{
-    timer current;
-
-    current=timers[1];
-    timers[1]=timers[number_of_timers--];
-    heal(1);
-    if (!current->disable) {
-        (*current->function)(current->arg);
-    }
-    free(current);
-}
-
-when next_timer(void)
-{
-    when here=now();
-
-    while (number_of_timers && (timers[1]->w <= here)) timer_fire();
-    if (number_of_timers) return(timers[1]->w);
-    return(0);
-}
-
-/* Function: timer_loop
- * Arguments: none
- * Returns: never
- * 
- * timer_loop() is the blocking dispatch function for the timer.
- * Is calls the block() function registered with init_timer,
- * and handles associated with timers that have been registered.
- */
-void timer_loop()
-{
-    when here;
-
-    while (1){
-       thunk z;
-       here=now();
-
-       for (z=thunks;z;z=z->next) (*z->f)(z->a);
-
-       if (number_of_timers){
-           if (timers[1]->w > here){
-               (*block_function)(timers[1]->w);
-           } else {
-                timer_fire();
-           }
-       } else {
-           thunk z;
-           for (z=thunks;z;z=z->next) (*z->f)(z->a);
-           (*block_function)(0);
-       }
-    }
-}
-
-
-/* Function: register_thunk
- * Arguments: f: the function to call
- *            a: the single argument to call it with
- *
- * Thunk functions get called at irregular intervals, they
- * should not assume when, or take a particularily long
- * amount of time. Thunks are for background cleanup tasks.
- */
-void register_thunk(void (*f)(void *),void *a)
-{
-    thunk t=(void *)malloc(sizeof(struct thunk));
-    t->f=f;
-    t->a=a;
-    t->next=thunks;
-    thunks=t;
-}
-
-/* Function: initialize_timer
- * Arguments: block: the function to call to block for the specified interval 
- *
- * initialize_timer() must be called before any other timer function,
- * including timer_loop.
- */
-void initialize_timer(void (*block)(when))
-{
-    block_function=block;
-    number_of_timers=0;
-    size_of_pqueue=10;
-    timers=(timer *)malloc(sizeof(timer)*size_of_pqueue);
-    thunks=0;
-}
diff --git a/lustre/portals/unals/pqtimer.h b/lustre/portals/unals/pqtimer.h
deleted file mode 100644 (file)
index 11efb0e..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-typedef unsigned long long when;
-when now(void);
-typedef struct timer *timer;
-timer register_timer(when interval,
-                    void (*function)(void *),
-                    void *argument);
-timer register_timer_wait(void);
-void remove_timer(timer);
-void timer_loop(void);
-void initialize_timer(void (*block)(when));
-void timer_fire(void);
-
-
-#define HZ 0x100000000ull
-
-
diff --git a/lustre/portals/unals/procapi.c b/lustre/portals/unals/procapi.c
deleted file mode 100644 (file)
index 6b471c0..0000000
+++ /dev/null
@@ -1,196 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2003 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* api.c:
- *  This file provides the 'api' side for the process-based nals.
- *  it is responsible for creating the 'library' side thread,
- *  and passing wrapped portals transactions to it.
- *
- *  Along with initialization, shutdown, and transport to the library
- *  side, this file contains some stubs to satisfy the nal definition.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#ifndef __CYGWIN__
-#include <syscall.h>
-#endif
-#include <sys/socket.h>
-#include <procbridge.h>
-#include <pqtimer.h>
-#include <dispatch.h>
-#include <errno.h>
-
-
-/* XXX CFS workaround, to give a chance to let nal thread wake up
- * from waiting in select
- */
-static int procbridge_notifier_handler(void *arg)
-{
-    static char buf[8];
-    procbridge p = (procbridge) arg;
-
-    syscall(SYS_read, p->notifier[1], buf, sizeof(buf));
-    return 1;
-}
-
-void procbridge_wakeup_nal(procbridge p)
-{
-    static char buf[8];
-    syscall(SYS_write, p->notifier[0], buf, sizeof(buf));
-}
-
-/* Function: shutdown
- * Arguments: nal: a pointer to my top side nal structure
- *            ni: my network interface index
- *
- * cleanup nal state, reclaim the lower side thread and
- *   its state using PTL_FINI codepoint
- */
-static void procbridge_shutdown(nal_t *n)
-{
-    lib_nal_t *nal = n->nal_data;
-    bridge b=(bridge)nal->libnal_data;
-    procbridge p=(procbridge)b->local;
-
-    p->nal_flags |= NAL_FLAG_STOPPING;
-    procbridge_wakeup_nal(p);
-
-    do {
-        pthread_mutex_lock(&p->mutex);
-        if (p->nal_flags & NAL_FLAG_STOPPED) {
-                pthread_mutex_unlock(&p->mutex);
-                break;
-        }
-        pthread_cond_wait(&p->cond, &p->mutex);
-        pthread_mutex_unlock(&p->mutex);
-    } while (1);
-
-    free(p);
-}
-
-
-/* forward decl */
-extern int procbridge_startup (nal_t *, ptl_pid_t,
-                               ptl_ni_limits_t *, ptl_ni_limits_t *);
-
-/* api_nal
- *  the interface vector to allow the generic code to access
- *  this nal. this is seperate from the library side lib_nal.
- *  TODO: should be dyanmically allocated
- */
-nal_t procapi_nal = {
-    nal_data: NULL,
-    nal_ni_init: procbridge_startup,
-    nal_ni_fini: procbridge_shutdown,
-};
-
-ptl_nid_t tcpnal_mynid;
-
-#ifdef ENABLE_SELECT_DISPATCH
-procbridge __global_procbridge = NULL;
-#endif
-
-/* Function: procbridge_startup
- *
- * Arguments:  pid: requested process id (port offset)
- *                  PTL_ID_ANY not supported.
- *             desired: limits passed from the application
- *                      and effectively ignored
- *             actual:  limits actually allocated and returned
- *
- * Returns: portals rc
- *
- * initializes the tcp nal. we define unix_failure as an
- * error wrapper to cut down clutter.
- */
-int procbridge_startup (nal_t *nal, ptl_pid_t requested_pid,
-                        ptl_ni_limits_t *requested_limits,
-                        ptl_ni_limits_t *actual_limits)
-{
-    nal_init_args_t args;
-
-    procbridge p;
-    bridge b;
-    /* XXX nal_type is purely private to tcpnal here */
-    int nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */
-
-    LASSERT(nal == &procapi_nal);
-
-    init_unix_timer();
-
-    b=(bridge)malloc(sizeof(struct bridge));
-    p=(procbridge)malloc(sizeof(struct procbridge));
-    b->local=p;
-
-    args.nia_requested_pid = requested_pid;
-    args.nia_requested_limits = requested_limits;
-    args.nia_actual_limits = actual_limits;
-    args.nia_nal_type = nal_type;
-    args.nia_bridge = b;
-    args.nia_apinal = nal;
-
-    /* init procbridge */
-    pthread_mutex_init(&p->mutex,0);
-    pthread_cond_init(&p->cond, 0);
-    p->nal_flags = 0;
-
-    /* initialize notifier */
-    if (socketpair(AF_UNIX, SOCK_STREAM, 0, p->notifier)) {
-        perror("socketpair failed");
-        return PTL_FAIL;
-    }
-
-    if (!register_io_handler(p->notifier[1], READ_HANDLER,
-                procbridge_notifier_handler, p)) {
-        perror("fail to register notifier handler");
-        return PTL_FAIL;
-    }
-
-#ifdef ENABLE_SELECT_DISPATCH
-    __global_procbridge = p;
-#endif
-
-    /* create nal thread */
-    if (pthread_create(&p->t, NULL, nal_thread, &args)) {
-        perror("nal_init: pthread_create");
-        return PTL_FAIL;
-    }
-
-    do {
-        pthread_mutex_lock(&p->mutex);
-        if (p->nal_flags & (NAL_FLAG_RUNNING | NAL_FLAG_STOPPED)) {
-                pthread_mutex_unlock(&p->mutex);
-                break;
-        }
-        pthread_cond_wait(&p->cond, &p->mutex);
-        pthread_mutex_unlock(&p->mutex);
-    } while (1);
-
-    if (p->nal_flags & NAL_FLAG_STOPPED)
-        return PTL_FAIL;
-
-    b->lib_nal->libnal_ni.ni_pid.nid = tcpnal_mynid;
-
-    return PTL_OK;
-}
diff --git a/lustre/portals/unals/procbridge.h b/lustre/portals/unals/procbridge.h
deleted file mode 100644 (file)
index 1f91ced..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2003 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#ifndef _PROCBRIDGE_H_
-#define _PROCBRIDGE_H_
-
-#include <pthread.h>
-#include <bridge.h>
-#include <ipmap.h>
-
-
-#define NAL_FLAG_RUNNING        1
-#define NAL_FLAG_STOPPING       2
-#define NAL_FLAG_STOPPED        4
-
-typedef struct procbridge {
-    /* sync between user threads and nal thread */
-    pthread_t t;
-    pthread_cond_t cond;
-    pthread_mutex_t mutex;
-
-    /* socket pair used to notify nal thread */
-    int notifier[2];
-
-    int nal_flags;
-
-} *procbridge;
-
-typedef struct nal_init_args {
-    ptl_pid_t        nia_requested_pid;
-    ptl_ni_limits_t *nia_requested_limits;
-    ptl_ni_limits_t *nia_actual_limits;
-    int              nia_nal_type;
-    bridge           nia_bridge;
-    nal_t           *nia_apinal;
-} nal_init_args_t;
-
-extern void *nal_thread(void *);
-
-
-#define PTL_INIT        (LIB_MAX_DISPATCH+1)
-#define PTL_FINI        (LIB_MAX_DISPATCH+2)
-
-#define MAX_ACLS        1
-#define MAX_PTLS        128
-
-extern void set_address(bridge t,ptl_pid_t pidrequest);
-extern void procbridge_wakeup_nal(procbridge p);
-
-#endif
diff --git a/lustre/portals/unals/proclib.c b/lustre/portals/unals/proclib.c
deleted file mode 100644 (file)
index 7ee7c71..0000000
+++ /dev/null
@@ -1,137 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2003 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* lib.c:
- *  This file provides the 'library' side for the process-based nals.
- *  it is responsible for communication with the 'api' side and
- *  providing service to the generic portals 'library'
- *  implementation. 'library' might be better termed 'communication'
- *  or 'kernel'.
- */
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <procbridge.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netdb.h>
-#include <errno.h>
-#include <timer.h>
-#include <dispatch.h>
-
-/* the following functions are stubs to satisfy the nal definition
-   without doing anything particularily useful*/
-
-static int nal_dist(lib_nal_t *nal,
-                    ptl_nid_t nid,
-                    unsigned long *dist)
-{
-    return 0;
-}
-
-static void check_stopping(void *z)
-{
-    bridge b = z;
-    procbridge p = b->local;
-
-    if ((p->nal_flags & NAL_FLAG_STOPPING) == 0)
-            return;
-    
-    pthread_mutex_lock(&p->mutex);
-    p->nal_flags |= NAL_FLAG_STOPPED;
-    pthread_cond_broadcast(&p->cond);
-    pthread_mutex_unlock(&p->mutex);
-
-    pthread_exit(0);
-}
-
-
-/* Function:  nal_thread
- * Arguments: z: an opaque reference to a nal control structure
- *               allocated and partially populated by the api level code
- * Returns: nothing, and only on error or explicit shutdown
- *
- *  This function is the entry point of the pthread initiated on 
- *  the api side of the interface. This thread is used to handle
- *  asynchronous delivery to the application.
- * 
- *  We define a limit macro to place a ceiling on limits
- *   for syntactic convenience
- */
-extern int tcpnal_init(bridge);
-
-nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0};
-
-void *nal_thread(void *z)
-{
-    nal_init_args_t *args = (nal_init_args_t *) z;
-    bridge b = args->nia_bridge;
-    procbridge p=b->local;
-    int rc;
-    ptl_process_id_t process_id;
-    int nal_type;
-    
-    b->lib_nal=(lib_nal_t *)malloc(sizeof(lib_nal_t));
-    b->lib_nal->libnal_data=b;
-    b->lib_nal->libnal_map=NULL;
-    b->lib_nal->libnal_unmap=NULL;
-    b->lib_nal->libnal_dist=nal_dist;
-
-    nal_type = args->nia_nal_type;
-
-    /* Wierd, but this sets b->lib_nal->libnal_ni.ni_pid.{nid,pid}, which
-     * lib_init() is about to do from the process_id passed to it...*/
-    set_address(b,args->nia_requested_pid);
-
-    process_id = b->lib_nal->libnal_ni.ni_pid;
-    
-    if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b);
-    /* initialize the generic 'library' level code */
-
-    rc = lib_init(b->lib_nal, args->nia_apinal, 
-                  process_id, 
-                  args->nia_requested_limits, 
-                  args->nia_actual_limits);
-
-    /*
-     * Whatever the initialization returned is passed back to the
-     * user level code for further interpretation.  We just exit if
-     * it is non-zero since something went wrong.
-     */
-    /* this should perform error checking */
-    pthread_mutex_lock(&p->mutex);
-    p->nal_flags |= (rc != PTL_OK) ? NAL_FLAG_STOPPED : NAL_FLAG_RUNNING;
-    pthread_cond_broadcast(&p->cond);
-    pthread_mutex_unlock(&p->mutex);
-
-    if (rc == PTL_OK) {
-        /* the thunk function is called each time the timer loop
-           performs an operation and returns to blocking mode. we
-           overload this function to inform the api side that
-           it may be interested in looking at the event queue */
-        register_thunk(check_stopping,b);
-        timer_loop();
-    }
-    return(0);
-}
diff --git a/lustre/portals/unals/select.c b/lustre/portals/unals/select.c
deleted file mode 100644 (file)
index 09e1542..0000000
+++ /dev/null
@@ -1,419 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* select.c:
- *  Provides a general mechanism for registering and dispatching
- *  io events through the select system call.
- */
-
-#ifdef sun
-#include <sys/filio.h>
-#else
-#include <sys/ioctl.h>
-#endif
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <stdlib.h>
-#include <syscall.h>
-#include <pthread.h>
-#include <errno.h>
-#include <pqtimer.h>
-#include <dispatch.h>
-#include <procbridge.h>
-
-
-static struct timeval beginning_of_epoch;
-static io_handler io_handlers;
-
-/* Function: now
- *
- * Return: the current time in canonical units: a 64 bit number
- *   where the most significant 32 bits contains the number
- *   of seconds, and the least signficant a count of (1/(2^32))ths
- *   of a second.
- */
-when now()
-{
-    struct timeval result;
-  
-    gettimeofday(&result,0);
-    return((((unsigned long long)result.tv_sec)<<32)|
-           (((unsigned long long)result.tv_usec)<<32)/1000000);
-}
-
-
-/* Function: register_io_handler
- * Arguments: fd: the file descriptor of interest
- *            type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER
- *            function: a function to call when io is available on fd
- *            arg: an opaque correlator to return to the handler
- * Returns: a pointer to the io_handler structure
- */
-io_handler register_io_handler(int fd,
-                               int type,
-                               int (*function)(void *),
-                               void *arg)
-{
-    io_handler i=(io_handler)malloc(sizeof(struct io_handler));
-    if ((i->fd=fd)>=0){
-        i->type=type;
-        i->function=function;
-        i->argument=arg;
-        i->disabled=0;
-        i->last=&io_handlers;
-        if ((i->next=io_handlers)) i->next->last=&i->next;
-        io_handlers=i;
-    }
-    return(i);
-}
-
-/* Function: remove_io_handler
- * Arguments: i: a pointer to the handler to stop servicing
- *
- * remove_io_handler() doesn't actually free the handler, due
- * to reentrancy problems. it just marks the handler for 
- * later cleanup by the blocking function.
- */
-void remove_io_handler (io_handler i)
-{
-    i->disabled=1;
-}
-
-static void set_flag(io_handler n,fd_set *r, fd_set *w, fd_set *e)
-{
-    if (n->type & READ_HANDLER) FD_SET(n->fd, r);
-    if (n->type & WRITE_HANDLER) FD_SET(n->fd, w);
-    if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd, e);
-}
-
-static int prepare_fd_sets(fd_set *r, fd_set *w, fd_set *e)
-{
-    io_handler j;
-    io_handler *k;
-    int max = 0;
-
-    FD_ZERO(r);
-    FD_ZERO(w);
-    FD_ZERO(e);
-    for (k=&io_handlers;*k;){
-        if ((*k)->disabled){
-            j=*k;
-            *k=(*k)->next;
-            free(j);
-        }
-        if (*k) {
-           set_flag(*k,r,w,e);
-            if ((*k)->fd > max)
-                max = (*k)->fd;
-           k=&(*k)->next;
-       }
-    }
-    return max + 1;
-}
-
-static int execute_callbacks(fd_set *r, fd_set *w, fd_set *e)
-{
-    io_handler j;
-    int n = 0, t;
-
-    for (j = io_handlers; j; j = j->next) {
-        if (j->disabled)
-            continue;
-
-        t = 0;
-        if (FD_ISSET(j->fd, r) && (j->type & READ_HANDLER)) {
-            FD_CLR(j->fd, r);
-            t++;
-        }
-        if (FD_ISSET(j->fd, w) && (j->type & WRITE_HANDLER)) {
-            FD_CLR(j->fd, w);
-            t++;
-        }
-        if (FD_ISSET(j->fd, e) && (j->type & EXCEPTION_HANDLER)) {
-            FD_CLR(j->fd, e);
-            t++;
-        }
-        if (t == 0)
-            continue;
-
-        if (!(*j->function)(j->argument))
-            j->disabled = 1;
-
-        n += t;
-    }
-
-    return n;
-}
-
-#ifdef ENABLE_SELECT_DISPATCH
-
-static struct {
-    pthread_mutex_t mutex;
-    pthread_cond_t  cond;
-    int             submitted;
-    int             nready;
-    int             maxfd;
-    fd_set         *rset;
-    fd_set         *wset;
-    fd_set         *eset;
-    struct timeval *timeout;
-    struct timeval  submit_time;
-} fd_extra = {
-    PTHREAD_MUTEX_INITIALIZER,
-    PTHREAD_COND_INITIALIZER,
-    0, 0, 0,
-    NULL, NULL, NULL, NULL,
-};
-
-extern int liblustre_wait_event(int timeout);
-extern procbridge __global_procbridge;
-
-/*
- * this will intercept syscall select() of user apps
- * such as MPI libs.
- */
-int select(int n, fd_set *rset, fd_set *wset, fd_set *eset,
-           struct timeval *timeout)
-{
-    LASSERT(fd_extra.submitted == 0);
-
-    fd_extra.nready = 0;
-    fd_extra.maxfd = n;
-    fd_extra.rset = rset;
-    fd_extra.wset = wset;
-    fd_extra.eset = eset;
-    fd_extra.timeout = timeout;
-
-    liblustre_wait_event(0);
-    pthread_mutex_lock(&fd_extra.mutex);
-    gettimeofday(&fd_extra.submit_time, NULL);
-    fd_extra.submitted = 1;
-    LASSERT(__global_procbridge);
-    procbridge_wakeup_nal(__global_procbridge);
-
-again:
-    if (fd_extra.submitted)
-        pthread_cond_wait(&fd_extra.cond, &fd_extra.mutex);
-    pthread_mutex_unlock(&fd_extra.mutex);
-
-    liblustre_wait_event(0);
-
-    pthread_mutex_lock(&fd_extra.mutex);
-    if (fd_extra.submitted)
-        goto again;
-    pthread_mutex_unlock(&fd_extra.mutex);
-
-    LASSERT(fd_extra.nready >= 0);
-    LASSERT(fd_extra.submitted == 0);
-    return fd_extra.nready;
-}
-
-static int merge_fds(int max, fd_set *rset, fd_set *wset, fd_set *eset)
-{
-    int i;
-
-    LASSERT(rset);
-    LASSERT(wset);
-    LASSERT(eset);
-
-    for (i = 0; i < __FD_SETSIZE/__NFDBITS; i++) {
-        LASSERT(!fd_extra.rset ||
-                !(__FDS_BITS(rset)[i] & __FDS_BITS(fd_extra.rset)[i]));
-        LASSERT(!fd_extra.wset ||
-                !(__FDS_BITS(wset)[i] & __FDS_BITS(fd_extra.wset)[i]));
-        LASSERT(!fd_extra.eset ||
-                !(__FDS_BITS(eset)[i] & __FDS_BITS(fd_extra.eset)[i]));
-
-        if (fd_extra.rset && __FDS_BITS(fd_extra.rset)[i])
-            __FDS_BITS(rset)[i] |= __FDS_BITS(fd_extra.rset)[i];
-        if (fd_extra.wset && __FDS_BITS(fd_extra.wset)[i])
-            __FDS_BITS(wset)[i] |= __FDS_BITS(fd_extra.wset)[i];
-        if (fd_extra.eset && __FDS_BITS(fd_extra.eset)[i])
-            __FDS_BITS(eset)[i] |= __FDS_BITS(fd_extra.eset)[i];
-    }
-
-    return (fd_extra.maxfd > max ? fd_extra.maxfd : max);
-}
-
-static inline
-int timeval_ge(struct timeval *tv1, struct timeval *tv2)
-{
-    LASSERT(tv1 && tv2);
-    return ((tv1->tv_sec - tv2->tv_sec) * 1000000 +
-            (tv1->tv_usec - tv2->tv_usec) >= 0);
-}
-
-/*
- * choose the most recent timeout value
- */
-static struct timeval *choose_timeout(struct timeval *tv1,
-                                      struct timeval *tv2)
-{
-    if (!tv1)
-        return tv2;
-    else if (!tv2)
-        return tv1;
-
-    if (timeval_ge(tv1, tv2))
-        return tv2;
-    else
-        return tv1;
-}
-
-/* Function: select_timer_block
- * Arguments: until: an absolute time when the select should return
- * 
- *   This function dispatches the various file descriptors' handler
- *   functions, if the kernel indicates there is io available.
- */
-void select_timer_block(when until)
-{
-    fd_set fds[3];
-    struct timeval timeout;
-    struct timeval *timeout_pointer, *select_timeout;
-    int max, nready, nexec;
-    int fd_handling;
-
-again:
-    if (until) {
-        when interval;
-
-        interval = until - now();
-        timeout.tv_sec = (interval >> 32);
-        timeout.tv_usec = ((interval << 32) / 1000000) >> 32;
-        timeout_pointer = &timeout;
-    } else
-        timeout_pointer = NULL;
-
-    fd_handling = 0;
-    max = prepare_fd_sets(&fds[0], &fds[1], &fds[2]);
-    select_timeout = timeout_pointer;
-
-    pthread_mutex_lock(&fd_extra.mutex);
-    fd_handling = fd_extra.submitted;
-    pthread_mutex_unlock(&fd_extra.mutex);
-    if (fd_handling) {
-        max = merge_fds(max, &fds[0], &fds[1], &fds[2]);
-        select_timeout = choose_timeout(timeout_pointer, fd_extra.timeout);
-    }
-
-    /* XXX only compile for linux */
-#if __WORDSIZE == 64
-    nready = syscall(SYS_select, max, &fds[0], &fds[1], &fds[2],
-                     select_timeout);
-#else
-    nready = syscall(SYS__newselect, max, &fds[0], &fds[1], &fds[2],
-                     select_timeout);
-#endif
-    if (nready < 0) {
-        CERROR("select return err %d, errno %d\n", nready, errno);
-        return;
-    }
-
-    if (nready) {
-        nexec = execute_callbacks(&fds[0], &fds[1], &fds[2]);
-        nready -= nexec;
-    } else
-        nexec = 0;
-
-    /* even both nready & nexec are 0, we still need try to wakeup
-     * upper thread since it may have timed out
-     */
-    if (fd_handling) {
-        LASSERT(nready >= 0);
-
-        pthread_mutex_lock(&fd_extra.mutex);
-        if (nready) {
-            if (fd_extra.rset)
-                *fd_extra.rset = fds[0];
-            if (fd_extra.wset)
-                *fd_extra.wset = fds[1];
-            if (fd_extra.eset)
-                *fd_extra.eset = fds[2];
-            fd_extra.nready = nready;
-            fd_extra.submitted = 0;
-        } else {
-            struct timeval t;
-
-            fd_extra.nready = 0;
-            if (fd_extra.timeout) {
-                gettimeofday(&t, NULL);
-                if (timeval_ge(&t, &fd_extra.submit_time))
-                    fd_extra.submitted = 0;
-            }
-        }
-
-        pthread_cond_signal(&fd_extra.cond);
-        pthread_mutex_unlock(&fd_extra.mutex);
-    }
-
-    /* haven't found portals event, go back to loop if time
-     * is not expired */
-    if (!nexec) {
-        if (timeout_pointer == NULL || now() >= until)
-            goto again;
-    }
-}
-
-#else /* !ENABLE_SELECT_DISPATCH */
-
-/* Function: select_timer_block
- * Arguments: until: an absolute time when the select should return
- * 
- *   This function dispatches the various file descriptors' handler
- *   functions, if the kernel indicates there is io available.
- */
-void select_timer_block(when until)
-{
-    fd_set fds[3];
-    struct timeval timeout;
-    struct timeval *timeout_pointer;
-    int max, nready;
-
-again:
-    if (until) {
-        when interval;
-        interval = until - now();
-        timeout.tv_sec = (interval >> 32);
-        timeout.tv_usec = ((interval << 32) / 1000000) >> 32;
-        timeout_pointer = &timeout;
-    } else
-        timeout_pointer = NULL;
-
-    max = prepare_fd_sets(&fds[0], &fds[1], &fds[2]);
-
-    nready = select(max, &fds[0], &fds[1], &fds[2], timeout_pointer);
-    if (nready > 0)
-        execute_callbacks(&fds[0], &fds[1], &fds[2]);
-}
-#endif /* ENABLE_SELECT_DISPATCH */
-
-/* Function: init_unix_timer()
- *   is called to initialize the library 
- */
-void init_unix_timer()
-{
-    io_handlers=0;
-    gettimeofday(&beginning_of_epoch, 0);
-    initialize_timer(select_timer_block);
-}
diff --git a/lustre/portals/unals/table.c b/lustre/portals/unals/table.c
deleted file mode 100644 (file)
index 662775a..0000000
+++ /dev/null
@@ -1,264 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <table.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-/* table.c:
- * a very simple hash table implementation with paramerterizable 
- * comparison and key generation functions. it does resize
- * in order to accomidate more entries, but never collapses 
- * the table 
- */
-
-static table_entry *table_lookup (table t,void *comparator,
-                                  unsigned int k,
-                                  int (*compare_function)(void *, void *),
-                                  int *success)
-{
-    unsigned int key=k%t->size;
-    table_entry *i;
-
-    for (i=&(t->entries[key]);*i;i=&((*i)->next)){
-        if (compare_function && ((*i)->key==k))
-            if ((*t->compare_function)((*i)->value,comparator)){
-                *success=1;
-                return(i);
-            }
-    }
-    *success=0;
-    return(&(t->entries[key]));
-}
-
-
-static void resize_table(table t, int size)
-{
-    int old_size=t->size;
-    table_entry *old_entries=t->entries;
-    int i; 
-    table_entry j,n;
-    table_entry *position;
-    int success;
-  
-    t->size=size;
-    t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size);
-    memset(t->entries,0,sizeof(table_entry)*t->size);
-
-    for (i=0;i<old_size;i++)
-        for (j=old_entries[i];j;j=n){
-            n=j->next;
-            position=table_lookup(t,0,j->key,0,&success);
-            j->next= *position;
-            *position=j;
-        }
-    free(old_entries);
-}
-
-
-/* Function: key_from_int
- * Arguments: int i: value to compute the key of
- * Returns: the key 
- */
-unsigned int key_from_int(int i)
-{
-    return(i);
-}
-
-
-/* Function: key_from_string
- * Arguments: char *s: the null terminated string
- *                     to compute the key of
- * Returns: the key 
- */
-unsigned int key_from_string(char *s)
-{
-    unsigned int result=0;
-    unsigned char *n;
-    int i;
-    if (!s) return(1);
-    for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i;
-    return(result);
-}
-
-
-/* Function: hash_create_table
- * Arguments: compare_function: a function to compare
- *                              a table instance with a correlator
- *            key_function: a function to generate a 32 bit 
- *                          hash key from a correlator
- * Returns: a pointer to the new table
- */
-table hash_create_table (int (*compare_function)(void *, void *),
-                    unsigned int (*key_function)(unsigned int *))
-{
-    table new=(table)malloc(sizeof(struct table));
-    memset(new, 0, sizeof(struct table));
-
-    new->compare_function=compare_function;
-    new->key_function=key_function;
-    new->number_of_entries=0;
-    new->size=4;
-    new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size);
-    memset(new->entries,0,sizeof(table_entry)*new->size);
-    return(new);
-}
-
-
-/* Function: hash_table_find
- * Arguments: t: a table to look in
- *            comparator: a value to access the table entry
- * Returns: the element references to by comparator, or null
- */
-void *hash_table_find (table t, void *comparator)
-{
-    int success;
-    table_entry* entry=table_lookup(t,comparator,
-                                    (*t->key_function)(comparator),
-                                    t->compare_function,
-                                    &success);
-    if (success)  return((*entry)->value);
-    return(0);
-}
-
-
-/* Function: hash_table_insert
- * Arguments: t: a table to insert the object
- *            value: the object to put in the table
- *            comparator: the value by which the object 
- *                        will be addressed
- * Returns: nothing
- */
-void hash_table_insert (table t, void *value, void *comparator)
-{
-    int success;
-    unsigned int k=(*t->key_function)(comparator);
-    table_entry *position=table_lookup(t,comparator,k,
-                                       t->compare_function,&success);
-    table_entry entry;
-
-    if (success) {
-        entry = *position;
-    } else {
-        entry = (table_entry)malloc(sizeof(struct table_entry));
-        memset(entry, 0, sizeof(struct table_entry));
-        entry->next= *position;
-        *position=entry;
-        t->number_of_entries++;
-    }
-    entry->value=value;
-    entry->key=k;
-    if (t->number_of_entries > t->size) resize_table(t,t->size*2);
-}
-
-/* Function: hash_table_remove
- * Arguments: t: the table to remove the object from
- *            comparator: the index value of the object to remove
- * Returns: 
- */
-void hash_table_remove (table t, void *comparator)
-{
-    int success;
-    table_entry temp;
-    table_entry *position=table_lookup(t,comparator,
-                                       (*t->key_function)(comparator),
-                                       t->compare_function,&success);
-    if(success) {
-        temp=*position;
-        *position=(*position)->next;
-        free(temp); /* the value? */
-        t->number_of_entries--;
-    }
-}
-
-/* Function: hash_iterate_table_entries
- * Arguments: t: the table to iterate over
- *            handler: a function to call with each element
- *                     of the table, along with arg
- *            arg: the opaque object to pass to handler
- * Returns: nothing
- */
-void hash_iterate_table_entries(table t,
-                           void (*handler)(void *,void *), 
-                           void *arg)
-{
-    int i;
-    table_entry *j,*next;
-  
-    for (i=0;i<t->size;i++)
-        for (j=t->entries+i;*j;j=next){
-            next=&((*j)->next);
-            (*handler)(arg,(*j)->value);
-        }
-}
-
-/* Function: hash_filter_table_entries
- * Arguments: t: the table to iterate over
- *            handler: a function to call with each element
- *                     of the table, along with arg
- *            arg: the opaque object to pass to handler
- * Returns: nothing
- * Notes: operations on the table inside handler are not safe
- *
- * filter_table_entires() calls the handler function for each
- *   item in the table, passing it and arg. The handler function
- *   returns 1 if it is to be retained in the table, and 0
- *   if it is to be removed.
- */
-void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg)
-{
-    int i;
-    table_entry *j,*next,v;
-  
-    for (i=0;i<t->size;i++)
-        for (j=t->entries+i;*j;j=next){
-            next=&((*j)->next);
-            if (!(*handler)(arg,(*j)->value)){
-                next=j;
-                v=*j;
-                *j=(*j)->next;
-                free(v);
-                t->number_of_entries--;
-            }
-        }
-}
-
-/* Function: destroy_table
- * Arguments: t: the table to free
- *            thunk: a function to call with each element,
- *                   most likely free()
- * Returns: nothing
- */
-void hash_destroy_table(table t,void (*thunk)(void *))
-{
-    table_entry j,next;
-    int i;
-    for (i=0;i<t->size;i++)
-        for (j=t->entries[i];j;j=next){
-            next=j->next;
-            if (thunk) (*thunk)(j->value);
-            free(j);
-        }
-    free(t->entries);
-    free(t);
-}
diff --git a/lustre/portals/unals/table.h b/lustre/portals/unals/table.h
deleted file mode 100644 (file)
index 7fab586..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-#ifndef E_TABLE
-#define E_TABLE
-
-typedef struct table_entry {
-  unsigned int key;
-  void *value;
-  struct table_entry *next;
-} *table_entry;
-
-
-typedef struct table {
-  unsigned int size;
-  int number_of_entries;
-  table_entry *entries;
-  int (*compare_function)(void *, void *);
-  unsigned int (*key_function)(unsigned int *);
-} *table;
-
-/* table.c */
-unsigned int key_from_int(int i);
-unsigned int key_from_string(char *s);
-table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *));
-void *hash_table_find(table t, void *comparator);
-void hash_table_insert(table t, void *value, void *comparator);
-void hash_table_remove(table t, void *comparator);
-void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg);
-void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg);
-void hash_destroy_table(table t, void (*thunk)(void *));
-
-#endif
diff --git a/lustre/portals/unals/tcpnal.c b/lustre/portals/unals/tcpnal.c
deleted file mode 100644 (file)
index abb6d01..0000000
+++ /dev/null
@@ -1,256 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2003 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.lustre.org.
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-/* tcpnal.c:
-   This file implements the TCP-based nal by providing glue
-   between the connection service and the generic NAL implementation */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <pqtimer.h>
-#include <dispatch.h>
-#include <bridge.h>
-#include <ipmap.h>
-#include <connection.h>
-#include <pthread.h>
-#include <errno.h>
-#ifndef __CYGWIN__
-#include <syscall.h>
-#endif
-
-/* Function:  tcpnal_send
- * Arguments: nal:     pointer to my nal control block
- *            private: unused
- *            cookie:  passed back to the portals library
- *            hdr:     pointer to the portals header
- *            nid:     destination node
- *            pid:     destination process
- *            data:    body of the message
- *            len:     length of the body
- * Returns: zero on success
- *
- * sends a packet to the peer, after insuring that a connection exists
- */
-ptl_err_t tcpnal_send(lib_nal_t *n,
-                      void *private,
-                      lib_msg_t *cookie,
-                      ptl_hdr_t *hdr,
-                      int type,
-                      ptl_nid_t nid,
-                      ptl_pid_t pid,
-                      unsigned int niov,
-                      struct iovec *iov,
-                      size_t offset,
-                      size_t len)
-{
-    connection c;
-    bridge b=(bridge)n->libnal_data;
-    struct iovec tiov[257];
-    static pthread_mutex_t send_lock = PTHREAD_MUTEX_INITIALIZER;
-    ptl_err_t rc = PTL_OK;
-    int   sysrc;
-    int   total;
-    int   ntiov;
-    int i;
-
-    if (!(c=force_tcp_connection((manager)b->lower,
-                                 PNAL_IP(nid,b),
-                                 PNAL_PORT(nid,pid),
-                                 b->local)))
-        return(PTL_FAIL);
-
-    /* TODO: these results should be checked. furthermore, provision
-       must be made for the SIGPIPE which is delivered when
-       writing on a tcp socket which has closed underneath
-       the application. there is a linux flag in the sendmsg
-       call which turns off the signally behaviour, but its
-       nonstandard */
-
-    LASSERT (niov <= 256);
-
-    tiov[0].iov_base = hdr;
-    tiov[0].iov_len = sizeof(ptl_hdr_t);
-    ntiov = 1 + lib_extract_iov(256, &tiov[1], niov, iov, offset, len);
-
-    pthread_mutex_lock(&send_lock);
-#if 1
-    for (i = total = 0; i < ntiov; i++)
-            total += tiov[i].iov_len;
-    
-    sysrc = syscall(SYS_writev, c->fd, tiov, ntiov);
-    if (sysrc != total) {
-            fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n",
-                     rc, total, errno);
-            rc = PTL_FAIL;
-    }
-#else
-    for (i = total = 0; i <= ntiov; i++) {
-            rc = send(c->fd, tiov[i].iov_base, tiov[i].iov_len, 0);
-            
-            if (rc != tiov[i].iov_len) {
-                    fprintf (stderr, "BAD SEND rc %d != %d, errno %d\n",
-                             rc, tiov[i].iov_len, errno);
-                    rc = PTL_FAIL;
-                    break;
-            }
-            total += rc;
-    }
-#endif
-#if 0
-    fprintf (stderr, "sent %s total %d in %d frags\n", 
-             hdr->type == PTL_MSG_ACK ? "ACK" :
-             hdr->type == PTL_MSG_PUT ? "PUT" :
-             hdr->type == PTL_MSG_GET ? "GET" :
-             hdr->type == PTL_MSG_REPLY ? "REPLY" :
-             hdr->type == PTL_MSG_HELLO ? "HELLO" : "UNKNOWN",
-             total, niov + 1);
-#endif
-    pthread_mutex_unlock(&send_lock);
-
-    if (rc == PTL_OK) {
-            /* NB the NAL only calls lib_finalize() if it returns PTL_OK
-             * from cb_send() */
-            lib_finalize(n, private, cookie, PTL_OK);
-    }
-
-    return(rc);
-}
-
-
-/* Function:  tcpnal_recv
- * Arguments: lib_nal_t *nal:    pointer to my nal control block
- *            void *private:     connection pointer passed through
- *                               lib_parse()
- *            lib_msg_t *cookie: passed back to portals library
- *            user_ptr data:     pointer to the destination buffer
- *            size_t mlen:       length of the body
- *            size_t rlen:       length of data in the network
- * Returns: zero on success
- *
- * blocking read of the requested data. must drain out the
- * difference of mainpulated and requested lengths from the network
- */
-ptl_err_t tcpnal_recv(lib_nal_t *n,
-                      void *private,
-                      lib_msg_t *cookie,
-                      unsigned int niov,
-                      struct iovec *iov,
-                      size_t offset,
-                      size_t mlen,
-                      size_t rlen)
-
-{
-    struct iovec tiov[256];
-    int ntiov;
-    int i;
-
-    if (!niov)
-            goto finalize;
-
-    LASSERT(mlen);
-    LASSERT(rlen);
-    LASSERT(rlen >= mlen);
-
-    ntiov = lib_extract_iov(256, tiov, niov, iov, offset, mlen);
-    
-    /* FIXME
-     * 1. Is this effecient enough? change to use readv() directly?
-     * 2. need check return from read_connection()
-     * - MeiJia
-     */
-    for (i = 0; i < ntiov; i++)
-        read_connection(private, tiov[i].iov_base, tiov[i].iov_len);
-
-finalize:
-    /* FIXME; we always assume success here... */
-    lib_finalize(n, private, cookie, PTL_OK);
-
-    if (mlen!=rlen){
-        char *trash=malloc(rlen-mlen);
-        
-        /*TODO: check error status*/
-        read_connection(private,trash,rlen-mlen);
-        free(trash);
-    }
-
-    return(PTL_OK);
-}
-
-
-/* Function:  from_connection: 
- * Arguments: c: the connection to read from 
- * Returns: whether or not to continue reading from this connection,
- *          expressed as a 1 to continue, and a 0 to not
- *
- *  from_connection() is called from the select loop when i/o is 
- *  available. It attempts to read the portals header and 
- *  pass it to the generic library for processing.
- */
-static int from_connection(void *a, void *d)
-{
-    connection c = d;
-    bridge b = a;
-    ptl_hdr_t hdr;
-
-    if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){
-        lib_parse(b->lib_nal, &hdr, c);
-        /*TODO: check error status*/
-        return(1);
-    }
-    return(0);
-}
-
-
-static void tcpnal_shutdown(bridge b)
-{
-    shutdown_connections(b->lower);
-}
-
-/* Function:  PTL_IFACE_TCP
- * Arguments: pid_request: desired port number to bind to
- *            desired: passed NAL limits structure
- *            actual: returned NAL limits structure
- * Returns: a nal structure on success, or null on failure
- */
-int tcpnal_init(bridge b)
-{
-    manager m;
-        
-    b->lib_nal->libnal_send=tcpnal_send;
-    b->lib_nal->libnal_recv=tcpnal_recv;
-    b->shutdown=tcpnal_shutdown;
-    
-    if (!(m=init_connections(PNAL_PORT(b->lib_nal->libnal_ni.ni_pid.nid,
-                                       b->lib_nal->libnal_ni.ni_pid.pid),
-                             from_connection,b))){
-        /* TODO: this needs to shut down the
-           newly created junk */
-        return(PTL_NAL_FAILED);
-    }
-    b->lower=m;
-    return(PTL_OK);
-}
diff --git a/lustre/portals/unals/timer.h b/lustre/portals/unals/timer.h
deleted file mode 100644 (file)
index aaf39d2..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *  Copyright (c) 2002 Eric Hoffman
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-/* TODO: make this an explicit type when they become available */
-typedef unsigned long long when;
-
-typedef struct timer {
-  void (*function)(void *);
-  void *arg;
-  when w;
-  int interval;
-  int disable;
-} *timer;
-
-timer register_timer(when, void (*f)(void *), void *a);
-void remove_timer(timer t);
-void timer_loop(void);
-void initialize_timer(void);
-void register_thunk(void (*f)(void *),void *a);
-
-
-#define HZ 0x100000000ull
-
-
diff --git a/lustre/portals/unals/utypes.h b/lustre/portals/unals/utypes.h
deleted file mode 100644 (file)
index 7eca959..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- *  Copyright (c) 2002 Cray Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/sandiaportals/
- */
-
-typedef unsigned short uint16;
-typedef unsigned long uint32;
-typedef unsigned long long uint64;
-typedef unsigned char uint8;
diff --git a/lustre/portals/utils/.cvsignore b/lustre/portals/utils/.cvsignore
deleted file mode 100644 (file)
index e2a0d44..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-Makefile
-Makefile.in
-acceptor
-debugctl
-ptlctl
-.deps
-routerstat
-wirecheck
-gmnalnid
-.*.cmd
diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am
deleted file mode 100644 (file)
index 1d9f905..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (C) 2001  Cluster File Systems, Inc.
-#
-# This code is issued under the GNU General Public License.
-# See the file COPYING in this distribution
-
-## $(srcdir)/../ for <portals/*.h>, ../../ for generated <config.h>
-#COMPILE = $(CC) -Wall -g -I$(srcdir)/../include -I../../include
-#LINK = $(CC) -o $@
-
-if LIBLUSTRE
-noinst_LIBRARIES = libuptlctl.a
-endif
-
-libuptlctl_a_SOURCES = portals.c debug.c l_ioctl.c
-libuptlctl_a_CPPFLAGS = $(LLCPPFLAGS)
-libuptlctl_a_CFLAGS = $(LLCFLAGS)
-
-sbin_PROGRAMS = debugctl
-
-lib_LIBRARIES = libptlctl.a
-
-libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h
-
-if UTILS
-if !CRAY_PORTALS
-sbin_PROGRAMS += acceptor ptlctl routerstat wirecheck gmnalnid
-endif
-endif
-
-acceptor_SOURCES = acceptor.c
-acceptor_LDADD = $(LIBWRAP)
-
-wirecheck_SOURCES = wirecheck.c
-
-gmnalnid_SOURCES = gmnalnid.c
-
-ptlctl_SOURCES = ptlctl.c
-ptlctl_LDADD =  -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
-ptlctl_DEPENDENCIES = libptlctl.a
-
-routerstat_SOURCES = routerstat.c
-
-debugctl_SOURCES = debugctl.c
-debugctl_LDADD = -L. -lptlctl $(LIBREADLINE) $(LIBEFENCE)
-debugctl_DEPENDENCIES = libptlctl.a
-
diff --git a/lustre/portals/utils/Makefile.mk b/lustre/portals/utils/Makefile.mk
deleted file mode 100644 (file)
index cbbe6d5..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-include $(src)/../Kernelenv
-
-host-progs := acceptor ptlctl
-always := $(host-progs)
-
-ptlctl-objs := ptlctl.o $(PTLCTLOBJS)
diff --git a/lustre/portals/utils/acceptor.c b/lustre/portals/utils/acceptor.c
deleted file mode 100644 (file)
index 524d128..0000000
+++ /dev/null
@@ -1,249 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <syslog.h>
-#include <errno.h>
-#ifdef HAVE_LIBWRAP
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <tcpd.h>
-#endif
-
-#include <portals/api-support.h>
-#include <portals/list.h>
-#include <portals/lib-types.h>
-#include <portals/socknal.h>
-
-/* should get this from autoconf somehow */
-#ifndef PIDFILE_DIR
-#define PIDFILE_DIR "/var/run"
-#endif 
-
-#define PROGNAME "acceptor"
-
-#ifdef HAVE_LIBWRAP
-/* needed because libwrap declares these as externs */
-int     allow_severity = LOG_INFO;
-int     deny_severity = LOG_WARNING;
-#endif
-
-void create_pidfile(char *name, int port)
-{
-        char pidfile[1024];
-        FILE *fp;
-
-        snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", 
-                 PIDFILE_DIR, name, port);
-        
-        if ((fp = fopen(pidfile, "w"))) {
-                fprintf(fp, "%d\n", getpid());
-                fclose(fp);
-        } else {
-                syslog(LOG_ERR, "%s: %s\n", pidfile, 
-                       strerror(errno));
-        }
-}
-
-int pidfile_exists(char *name, int port)
-{
-        char pidfile[1024];
-
-        snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", 
-                 PIDFILE_DIR, name, port);
-        
-        if (!access(pidfile, F_OK)) {
-                fprintf(stderr, "%s: exists, acceptor already running.\n", 
-                        pidfile);
-                return (1);
-        } 
-        return (0);
-}
-
-void
-show_connection (int fd, __u32 net_ip)
-{
-        struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET);
-        __u32 host_ip = ntohl (net_ip);
-        int  len;
-        char host[1024];
-        
-        if (h == NULL)
-                snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff,
-                                    (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff);
-        else
-                snprintf (host, sizeof(host), "%s", h->h_name);
-                
-        syslog (LOG_INFO, "Accepted host: %s\n", host);
-}
-
-void
-usage (char *myname)
-{
-        fprintf (stderr, 
-                 "Usage: %s [-N nal_id] [-p] [-l] port\n\n"
-                 " -l\tKeep stdin/stdout open\n"
-                 " -p\tAllow connections from non-privileged ports\n",
-                 myname);
-        exit (1);
-}
-
-int main(int argc, char **argv)
-{
-        int o, fd, rc, port, pfd;
-        struct sockaddr_in srvaddr;
-        int c;
-        int noclose = 0;
-        int nal = SOCKNAL;
-        int rport;
-        int require_privports = 1;
-        
-        while ((c = getopt (argc, argv, "N:lp")) != -1) {
-                switch (c) {
-                case 'N':
-                        if (sscanf(optarg, "%d", &nal) != 1 ||
-                            nal < 0 || nal > NAL_MAX_NR)
-                                usage(argv[0]);
-                        break;
-                case 'l':
-                        noclose = 1;
-                        break;
-                case 'p':
-                        require_privports = 0;
-                        break;
-                default:
-                        usage (argv[0]);
-                        break;
-                }
-        }
-
-        if (optind >= argc)
-                usage (argv[0]);
-
-        port = atol(argv[optind++]);
-
-        if (pidfile_exists(PROGNAME, port))
-                exit(1);
-
-        memset(&srvaddr, 0, sizeof(srvaddr));
-        srvaddr.sin_family = AF_INET;
-        srvaddr.sin_port = htons(port);
-        srvaddr.sin_addr.s_addr = INADDR_ANY;
-
-        fd = socket(PF_INET, SOCK_STREAM, 0);
-        if (fd < 0) {
-                perror("opening socket");
-                exit(1);
-        }
-
-        o = 1;
-        if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) {
-                perror("Cannot set REUSEADDR socket opt");
-                exit(1);
-        }
-
-        rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
-        if ( rc == -1 ) {
-                perror("bind: ");
-                exit(1);
-        }
-
-        if (listen(fd, 127)) {
-                perror("listen: ");
-                exit(1);
-        }
-        fprintf(stderr, "listening on port %d\n", port);
-
-        pfd = open("/dev/portals", O_RDWR);
-        if ( pfd < 0 ) {
-                perror("opening portals device");
-                exit(1);
-        }
-
-        rc = daemon(0, noclose);
-        if (rc < 0) {
-                perror("daemon(): ");
-                exit(1);
-        }
-
-        openlog(PROGNAME, LOG_PID, LOG_DAEMON);
-        syslog(LOG_INFO, "started, listening on port %d\n", port);
-        create_pidfile(PROGNAME, port);
-
-        while (1) {
-                struct sockaddr_in clntaddr;
-                int len = sizeof(clntaddr);
-                int cfd;
-                struct portal_ioctl_data data;
-                struct portals_cfg pcfg;
-#ifdef HAVE_LIBWRAP
-                struct request_info request;
-#endif
-                char addrstr[INET_ADDRSTRLEN];
-               
-                cfd = accept(fd, (struct sockaddr *)&clntaddr, &len);
-                if ( cfd < 0 ) {
-                        perror("accept");
-                        exit(0);
-                        continue;
-                }
-
-#ifdef HAVE_LIBWRAP
-                /* libwrap access control */
-                request_init(&request, RQ_DAEMON, "lustre", RQ_FILE, cfd, 0);
-                sock_host(&request);
-                if (!hosts_access(&request)) {
-                        inet_ntop(AF_INET, &clntaddr.sin_addr,
-                                  addrstr, INET_ADDRSTRLEN);
-                        syslog(LOG_WARNING, "Unauthorized access from %s:%hd\n",
-                               addrstr, ntohs(clntaddr.sin_port));
-                        close (cfd);
-                        continue;
-                }
-#endif
-
-                if (require_privports && ntohs(clntaddr.sin_port) >= IPPORT_RESERVED) {
-                        inet_ntop(AF_INET, &clntaddr.sin_addr,
-                                  addrstr, INET_ADDRSTRLEN);
-                        syslog(LOG_ERR, "Closing non-privileged connection from %s:%d\n",
-                               addrstr, ntohs(clntaddr.sin_port));
-                        rc = close(cfd);
-                        if (rc)
-                                perror ("close un-privileged client failed");
-                        continue;
-                }
-
-                show_connection (cfd, clntaddr.sin_addr.s_addr);
-
-                PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD);
-                pcfg.pcfg_nal = nal;
-                pcfg.pcfg_fd = cfd;
-                pcfg.pcfg_misc = SOCKNAL_CONN_NONE; /* == incoming connection */
-                
-                PORTAL_IOC_INIT(data);
-                data.ioc_pbuf1 = (char*)&pcfg;
-                data.ioc_plen1 = sizeof(pcfg);
-                
-                if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) {
-                        perror("ioctl failed");
-                } else {
-                        printf("client registered\n");
-                }
-                rc = close(cfd);
-                if (rc)
-                        perror ("close failed");
-        }
-
-        closelog();
-        exit(0);
-
-}
diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c
deleted file mode 100644 (file)
index 9886a5c..0000000
+++ /dev/null
@@ -1,833 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Some day I'll split all of this functionality into a cfs_debug module
- * of its own.  That day is not today.
- *
- */
-
-#define __USE_FILE_OFFSET64
-#define  _GNU_SOURCE
-
-#include <portals/list.h>
-
-#include <stdio.h>
-#ifdef HAVE_NETDB_H
-#include <netdb.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include "ioctl.h"
-#include <fcntl.h>
-#include <errno.h>
-#include <unistd.h>
-#ifndef __CYGWIN__
-# include <syscall.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-
-#ifdef HAVE_LINUX_VERSION_H
-#include <linux/version.h>
-
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-#define BUG()                            /* workaround for module.h includes */
-#include <linux/module.h>
-#endif
-#endif /* !HAVE_LINUX_VERSION_H */
-
-#include <sys/utsname.h>
-
-#include <portals/api-support.h>
-#include <portals/ptlctl.h>
-#include "parser.h"
-
-#include <time.h>
-
-static char rawbuf[8192];
-static char *buf = rawbuf;
-static int max = 8192;
-/*static int g_pfd = -1;*/
-static int subsystem_mask = ~0;
-static int debug_mask = ~0;
-
-#define MAX_MARK_SIZE 100
-
-static const char *portal_debug_subsystems[] =
-        {"undefined", "mdc", "mds", "osc", 
-         "ost", "class", "log", "llite",
-         "rpc", "mgmt", "portals", "nal", 
-         "pinger", "filter", "ptlbd", "echo", 
-         "ldlm", "lov", "router", "cobd", 
-         "sm", "asobd", "confobd", "lmv", 
-         "cmobd", NULL};
-static const char *portal_debug_masks[] =
-        {"trace", "inode", "super", "ext2", 
-         "malloc", "cache", "info", "ioctl",
-         "blocks", "net", "warning", "buffs", 
-         "other", "dentry", "portals", "page", 
-         "dlmtrace", "error", "emerg", "ha", 
-         "rpctrace", "vfstrace", "reada", "mmap",
-         "config", NULL};
-
-struct debug_daemon_cmd {
-        char *cmd;
-        unsigned int cmdv;
-};
-
-static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = {
-        {"start", DEBUG_DAEMON_START},
-        {"stop", DEBUG_DAEMON_STOP},
-        {0, 0}
-};
-
-static int do_debug_mask(char *name, int enable)
-{
-        int found = 0, i;
-
-        for (i = 0; portal_debug_subsystems[i] != NULL; i++) {
-                if (strcasecmp(name, portal_debug_subsystems[i]) == 0 ||
-                    strcasecmp(name, "all_subs") == 0) {
-                        printf("%s output from subsystem \"%s\"\n",
-                                enable ? "Enabling" : "Disabling",
-                                portal_debug_subsystems[i]);
-                        if (enable)
-                                subsystem_mask |= (1 << i);
-                        else
-                                subsystem_mask &= ~(1 << i);
-                        found = 1;
-                }
-        }
-        for (i = 0; portal_debug_masks[i] != NULL; i++) {
-                if (strcasecmp(name, portal_debug_masks[i]) == 0 ||
-                    strcasecmp(name, "all_types") == 0) {
-                        printf("%s output of type \"%s\"\n",
-                                enable ? "Enabling" : "Disabling",
-                                portal_debug_masks[i]);
-                        if (enable)
-                                debug_mask |= (1 << i);
-                        else
-                                debug_mask &= ~(1 << i);
-                        found = 1;
-                }
-        }
-
-        return found;
-}
-
-int dbg_initialize(int argc, char **argv)
-{
-        return 0;
-}
-
-int jt_dbg_filter(int argc, char **argv)
-{
-        int   i;
-
-        if (argc < 2) {
-                fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
-                        argv[0]);
-                return 0;
-        }
-
-        for (i = 1; i < argc; i++)
-                if (!do_debug_mask(argv[i], 0))
-                        fprintf(stderr, "Unknown subsystem or debug type: %s\n",
-                                argv[i]);
-        return 0;
-}
-
-int jt_dbg_show(int argc, char **argv)
-{
-        int    i;
-
-        if (argc < 2) {
-                fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n",
-                        argv[0]);
-                return 0;
-        }
-
-        for (i = 1; i < argc; i++)
-                if (!do_debug_mask(argv[i], 1))
-                        fprintf(stderr, "Unknown subsystem or debug type: %s\n",
-                                argv[i]);
-
-        return 0;
-}
-
-static int applymask(char* procpath, int value)
-{
-        int rc;
-        char buf[64];
-        int len = snprintf(buf, 64, "%d", value);
-
-        int fd = open(procpath, O_WRONLY);
-        if (fd == -1) {
-                fprintf(stderr, "Unable to open %s: %s\n",
-                        procpath, strerror(errno));
-                return fd;
-        }
-        rc = write(fd, buf, len+1);
-        if (rc<0) {
-                fprintf(stderr, "Write to %s failed: %s\n",
-                        procpath, strerror(errno));
-                return rc;
-        }
-        close(fd);
-        return 0;
-}
-
-static void applymask_all(unsigned int subs_mask, unsigned int debug_mask)
-{
-        if (!dump_filename) {
-                applymask("/proc/sys/portals/subsystem_debug", subs_mask);
-                applymask("/proc/sys/portals/debug", debug_mask);
-        } else {
-                struct portals_debug_ioctl_data data;
-
-                data.hdr.ioc_len = sizeof(data);
-                data.hdr.ioc_version = 0;
-                data.subs = subs_mask;
-                data.debug = debug_mask;
-
-                dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data);
-        }
-        printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n",
-               subs_mask, debug_mask);
-}
-
-int jt_dbg_list(int argc, char **argv)
-{
-        int i;
-
-        if (argc != 2) {
-                fprintf(stderr, "usage: %s <subs || types>\n", argv[0]);
-                return 0;
-        }
-
-        if (strcasecmp(argv[1], "subs") == 0) {
-                printf("Subsystems: all_subs");
-                for (i = 0; portal_debug_subsystems[i] != NULL; i++)
-                        printf(", %s", portal_debug_subsystems[i]);
-                printf("\n");
-        } else if (strcasecmp(argv[1], "types") == 0) {
-                printf("Types: all_types");
-                for (i = 0; portal_debug_masks[i] != NULL; i++)
-                        printf(", %s", portal_debug_masks[i]);
-                printf("\n");
-        } else if (strcasecmp(argv[1], "applymasks") == 0) {
-                applymask_all(subsystem_mask, debug_mask);
-        }
-        return 0;
-}
-
-/* all strings nul-terminated; only the struct and hdr need to be freed */
-struct dbg_line {
-        struct ptldebug_header *hdr;
-        char *file;
-        char *fn;
-        char *text;
-        struct list_head chain;
-};
-
-/* nurr. */
-static void list_add_ordered(struct dbg_line *new, struct list_head *head)
-{
-        struct list_head *pos;
-        struct dbg_line *curr;
-
-        list_for_each(pos, head) {
-                curr = list_entry(pos, struct dbg_line, chain);
-
-                if (curr->hdr->ph_sec < new->hdr->ph_sec)
-                        continue;
-                if (curr->hdr->ph_sec == new->hdr->ph_sec &&
-                    curr->hdr->ph_usec < new->hdr->ph_usec)
-                        continue;
-
-                list_add(&new->chain, pos->prev);
-                return;
-        }
-        list_add_tail(&new->chain, head);
-}
-
-static void print_saved_records(struct list_head *list, FILE *out)
-{
-        struct list_head *pos, *tmp;
-
-        list_for_each_safe(pos, tmp, list) {
-                struct dbg_line *line;
-                struct ptldebug_header *hdr;
-
-                line = list_entry(pos, struct dbg_line, chain);
-                list_del(&line->chain);
-
-                hdr = line->hdr;
-                fprintf(out, "%06x:%06x:%u:%u.%06Lu:%u:%u:%u:(%s:%u:%s()) %s",
-                        hdr->ph_subsys, hdr->ph_mask, hdr->ph_cpu_id,
-                        hdr->ph_sec, (unsigned long long)hdr->ph_usec,
-                        hdr->ph_stack, hdr->ph_pid, hdr->ph_extern_pid,
-                        line->file, hdr->ph_line_num, line->fn, line->text);
-                free(line->hdr);
-                free(line);
-        }
-}
-
-static int parse_buffer(FILE *in, FILE *out)
-{
-        struct dbg_line *line;
-        struct ptldebug_header *hdr;
-        char buf[4097], *p;
-        int rc;
-        unsigned long dropped = 0, kept = 0;
-        struct list_head chunk_list;
-
-        INIT_LIST_HEAD(&chunk_list);
-
-        while (1) {
-                rc = fread(buf, sizeof(hdr->ph_len), 1, in);
-                if (rc <= 0)
-                        break;
-
-                hdr = (void *)buf;
-                if (hdr->ph_len == 0)
-                        break;
-                if (hdr->ph_len > 4094) {
-                        fprintf(stderr, "unexpected large record: %d bytes.  "
-                                "aborting.\n",
-                                hdr->ph_len);
-                        break;
-                }
-
-                if (hdr->ph_flags & PH_FLAG_FIRST_RECORD) {
-                        print_saved_records(&chunk_list, out);
-                        assert(list_empty(&chunk_list));
-                }
-
-                rc = fread(buf + sizeof(hdr->ph_len), 1,
-                           hdr->ph_len - sizeof(hdr->ph_len), in);
-                if (rc <= 0)
-                        break;
-
-                if (hdr->ph_mask &&
-                    (!(subsystem_mask & hdr->ph_subsys) ||
-                     (!(debug_mask & hdr->ph_mask)))) {
-                        dropped++;
-                        continue;
-                }
-
-                line = malloc(sizeof(*line));
-                if (line == NULL) {
-                        fprintf(stderr, "malloc failed; printing accumulated "
-                                "records and exiting.\n");
-                        break;
-                }
-
-                line->hdr = malloc(hdr->ph_len + 1);
-                if (line->hdr == NULL) {
-                        fprintf(stderr, "malloc failed; printing accumulated "
-                                "records and exiting.\n");
-                        break;
-                }
-
-                p = (void *)line->hdr;
-                memcpy(line->hdr, buf, hdr->ph_len);
-                p[hdr->ph_len] = '\0';
-
-                p += sizeof(*hdr);
-                line->file = p;
-                p += strlen(line->file) + 1;
-                line->fn = p;
-                p += strlen(line->fn) + 1;
-                line->text = p;
-
-                list_add_ordered(line, &chunk_list);
-                kept++;
-        }
-
-        print_saved_records(&chunk_list, out);
-
-        printf("Debug log: %lu lines, %lu kept, %lu dropped.\n",
-                dropped + kept, kept, dropped);
-        return 0;
-}
-
-int jt_dbg_debug_kernel(int argc, char **argv)
-{
-        char filename[4096];
-        struct stat st;
-        int rc, raw = 0, fd;
-        FILE *in, *out = stdout;
-
-        if (argc > 3) {
-                fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]);
-                return 0;
-        }
-
-        if (argc > 2) {
-                raw = atoi(argv[2]);
-        } else if (argc > 1 && (argv[1][0] == '0' || argv[1][0] == '1')) {
-                raw = atoi(argv[1]);
-                argc--;
-        }
-
-        /* If we are dumping raw (which means no conversion step to ASCII)
-         * then dump directly to any supplied filename, otherwise this is
-         * just a temp file and we dump to the real file at convert time. */
-        if (argc > 1 && raw)
-                strcpy(filename, argv[1]);
-        else
-                sprintf(filename, "/tmp/lustre-log.%lu.%u",time(NULL),getpid());
-
-        if (stat(filename, &st) == 0 && S_ISREG(st.st_mode))
-                unlink(filename);
-
-        fd = open("/proc/sys/portals/dump_kernel", O_WRONLY);
-        if (fd < 0) {
-                fprintf(stderr, "open(dump_kernel) failed: %s\n",
-                        strerror(errno));
-                return 1;
-        }
-
-        rc = write(fd, filename, strlen(filename));
-        if (rc != strlen(filename)) {
-                fprintf(stderr, "write(%s) failed: %s\n", filename,
-                        strerror(errno));
-                close(fd);
-                return 1;
-        }
-        close(fd);
-
-        if (raw)
-                return 0;
-
-        in = fopen(filename, "r");
-        if (in == NULL) {
-                if (errno == ENOENT) /* no dump file created */
-                        return 0;
-
-                fprintf(stderr, "fopen(%s) failed: %s\n", filename,
-                        strerror(errno));
-                return 1;
-        }
-        if (argc > 1) {
-                out = fopen(argv[1], "w");
-                if (out == NULL) {
-                        fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
-                                strerror(errno));
-                        fclose(in);
-                        return 1;
-                }
-        }
-
-        rc = parse_buffer(in, out);
-        fclose(in);
-        if (argc > 1)
-                fclose(out);
-        if (rc) {
-                fprintf(stderr, "parse_buffer failed; leaving tmp file %s "
-                        "behind.\n", filename);
-        } else {
-                rc = unlink(filename);
-                if (rc)
-                        fprintf(stderr, "dumped successfully, but couldn't "
-                                "unlink tmp file %s: %s\n", filename,
-                                strerror(errno));
-        }
-        return rc;
-}
-
-int jt_dbg_debug_file(int argc, char **argv)
-{
-        int fdin,fdout;
-        FILE *in, *out = stdout;
-        if (argc > 3 || argc < 2) {
-                fprintf(stderr, "usage: %s <input> [output]\n", argv[0]);
-                return 0;
-        }
-
-        fdin = open(argv[1], O_RDONLY | O_LARGEFILE);
-        if (fdin == -1) {
-                fprintf(stderr, "open(%s) failed: %s\n", argv[1],
-                        strerror(errno));
-                return 1;
-        }
-        in = fdopen(fdin, "r");
-        if (in == NULL) {
-                fprintf(stderr, "fopen(%s) failed: %s\n", argv[1],
-                        strerror(errno));
-                close(fdin);
-                return 1;
-        }
-        if (argc > 2) {
-                fdout = open(argv[2], O_CREAT | O_WRONLY | O_LARGEFILE);
-                if (fdout == -1) {
-                        fprintf(stderr, "open(%s) failed: %s\n", argv[2],
-                                strerror(errno));
-                        fclose(in);
-                        return 1;
-                }
-                out = fdopen(fdout, "w");
-                if (out == NULL) {
-                        fprintf(stderr, "fopen(%s) failed: %s\n", argv[2],
-                                strerror(errno));
-                        fclose(in);
-                        close(fdout);
-                        return 1;
-                }
-        }
-
-        return parse_buffer(in, out);
-}
-
-static int
-dbg_write_cmd(int fd, char *str)
-{
-        int    len = strlen(str);
-        int    rc  = write(fd, str, len);
-        
-        return (rc == len ? 0 : 1);
-}
-
-const char debug_daemon_usage[] = "usage: %s {start file [MB]|stop}\n";
-#define DAEMON_FILE "/proc/sys/portals/daemon_file"
-int jt_dbg_debug_daemon(int argc, char **argv)
-{
-        int  rc;
-        int  fd;
-
-        if (argc <= 1) {
-                fprintf(stderr, debug_daemon_usage, argv[0]);
-                return 1;
-        }
-
-        fd = open(DAEMON_FILE, O_WRONLY);
-        if (fd < 0) {
-                fprintf(stderr, "open %s failed: %s\n", DAEMON_FILE,
-                        strerror(errno));
-                return -1;
-        }
-        
-        rc = -1;
-        if (strcasecmp(argv[1], "start") == 0) {
-                if (argc < 3 || argc > 4 ||
-                    (argc == 4 && strlen(argv[3]) > 5)) {
-                        fprintf(stderr, debug_daemon_usage, argv[0]);
-                        goto out;
-                }
-
-                if (argc == 4) {
-                        char       buf[12];
-                        const long min_size = 10;
-                        const long max_size = 20480;
-                        long       size;
-                        char      *end;
-
-                        size = strtoul(argv[3], &end, 0);
-                        if (size < min_size || 
-                            size > max_size ||
-                            *end != 0) {
-                                fprintf(stderr, "size %s invalid, must be in "
-                                        "the range %ld-%ld MB\n", argv[3],
-                                        min_size, max_size);
-                                goto out;
-                        }
-
-                        snprintf(buf, sizeof(buf), "size=%ld", size);
-                        rc = dbg_write_cmd(fd, buf);
-                        if (rc != 0) {
-                                fprintf(stderr, "set %s failed: %s\n",
-                                        buf, strerror(errno));
-                                goto out;
-                        }
-                }
-
-                rc = dbg_write_cmd(fd, "start");
-                if (rc != 0) {
-                        fprintf(stderr, "start debug_daemon on %s failed: %s\n",
-                                argv[2], strerror(errno));
-                        goto out;
-                }
-
-                rc = 0;
-                goto out;
-        }
-        
-        if (strcasecmp(argv[1], "stop") == 0) {
-                rc = dbg_write_cmd(fd, "stop");
-                if (rc != 0) {
-                        fprintf(stderr, "stopping debug_daemon failed: %s\n",
-                                strerror(errno));
-                        goto out;
-                }
-
-                rc = 0;
-                goto out;
-        }
-
-        fprintf(stderr, debug_daemon_usage, argv[0]);
-        rc = -1;
-out:
-        close(fd);
-        return rc;
-}
-
-int jt_dbg_clear_debug_buf(int argc, char **argv)
-{
-        int rc;
-        struct portal_ioctl_data data;
-
-        if (argc != 1) {
-                fprintf(stderr, "usage: %s\n", argv[0]);
-                return 0;
-        }
-
-        memset(&data, 0, sizeof(data));
-        if (portal_ioctl_pack(&data, &buf, max) != 0) {
-                fprintf(stderr, "portal_ioctl_pack failed.\n");
-                return -1;
-        }
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf);
-        if (rc) {
-                fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-        return 0;
-}
-
-int jt_dbg_mark_debug_buf(int argc, char **argv)
-{
-        int rc, max_size = MAX_MARK_SIZE-1;
-        struct portal_ioctl_data data;
-        char *text;
-        time_t now = time(NULL);
-
-        if (argc > 1) {
-                int counter;
-                text = malloc(MAX_MARK_SIZE);
-                strncpy(text, argv[1], max_size);
-                max_size-=strlen(argv[1]);
-                for(counter = 2; (counter < argc) && (max_size > 0) ; counter++){
-                        strncat(text, " ", 1);
-                        max_size-=1;
-                        strncat(text, argv[counter], max_size);
-                        max_size-=strlen(argv[counter]);
-                }
-        } else {
-                text = ctime(&now);
-                text[strlen(text) - 1] = '\0'; /* stupid \n */
-        }
-        if (!max_size) {
-                text[MAX_MARK_SIZE - 1] = '\0';
-        }
-
-        memset(&data, 0, sizeof(data));
-        data.ioc_inllen1 = strlen(text) + 1;
-        data.ioc_inlbuf1 = text;
-        if (portal_ioctl_pack(&data, &buf, max) != 0) {
-                fprintf(stderr, "portal_ioctl_pack failed.\n");
-                return -1;
-        }
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf);
-        if (rc) {
-                fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-        return 0;
-}
-
-static struct mod_paths {
-        char *name, *path;
-} mod_paths[] = {
-        {"libcfs", "lustre/portals/libcfs"},
-        {"portals", "lustre/portals/portals"},
-        {"ksocknal", "lustre/portals/knals/socknal"},
-        {"kptlrouter", "lustre/portals/router"},
-        {"lvfs", "lustre/lvfs"},
-        {"obdclass", "lustre/obdclass"},
-        {"llog_test", "lustre/obdclass"},
-        {"ptlrpc", "lustre/ptlrpc"},
-        {"obdext2", "lustre/obdext2"},
-        {"ost", "lustre/ost"},
-        {"osc", "lustre/osc"},
-        {"mds", "lustre/mds"},
-        {"mdc", "lustre/mdc"},
-        {"llite", "lustre/llite"},
-        {"ldiskfs", "lustre/ldiskfs"},
-        {"smfs", "lustre/smfs"},
-        {"obdecho", "lustre/obdecho"},
-        {"ldlm", "lustre/ldlm"},
-        {"obdfilter", "lustre/obdfilter"},
-        {"extN", "lustre/extN"},
-        {"lov", "lustre/lov"},
-        {"lmv", "lustre/lmv"},
-        {"fsfilt_ext3", "lustre/lvfs"},
-        {"fsfilt_extN", "lustre/lvfs"},
-        {"fsfilt_reiserfs", "lustre/lvfs"},
-        {"fsfilt_smfs", "lustre/lvfs"},
-        {"fsfilt_ldiskfs", "lustre/lvfs"},
-        {"mds_ext2", "lustre/mds"},
-        {"mds_ext3", "lustre/mds"},
-        {"mds_extN", "lustre/mds"},
-        {"ptlbd", "lustre/ptlbd"},
-        {"mgmt_svc", "lustre/mgmt"},
-        {"mgmt_cli", "lustre/mgmt"},
-        {"cobd", "lustre/cobd"},
-        {"cmobd", "lustre/cmobd"},
-        {"conf_obd", "lustre/obdclass"},
-        {NULL, NULL}
-};
-
-static int jt_dbg_modules_2_4(int argc, char **argv)
-{
-#ifdef HAVE_LINUX_VERSION_H
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-        struct mod_paths *mp;
-        char *path = "..";
-        char *kernel = "linux";
-
-        if (argc >= 2)
-                path = argv[1];
-        if (argc == 3)
-                kernel = argv[2];
-        if (argc > 3) {
-                printf("%s [path] [kernel]\n", argv[0]);
-                return 0;
-        }
-
-        for (mp = mod_paths; mp->name != NULL; mp++) {
-                struct module_info info;
-                int rc;
-                size_t crap;
-                int query_module(const char *name, int which, void *buf,
-                                 size_t bufsize, size_t *ret);
-
-                rc = query_module(mp->name, QM_INFO, &info, sizeof(info),
-                                  &crap);
-                if (rc < 0) {
-                        if (errno != ENOENT)
-                                printf("query_module(%s) failed: %s\n",
-                                       mp->name, strerror(errno));
-                } else {
-                        printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
-                               mp->path, mp->name,
-                               info.addr + sizeof(struct module));
-                }
-        }
-
-        return 0;
-#endif /* Headers are 2.6-only */
-#endif /* !HAVE_LINUX_VERSION_H */
-        return -EINVAL;
-}
-
-static int jt_dbg_modules_2_5(int argc, char **argv)
-{
-        struct mod_paths *mp;
-        char *path = "..";
-        char *kernel = "linux";
-        const char *proc = "/proc/modules";
-        char modname[128], others[128];
-        long modaddr;
-        int rc;
-        FILE *file;
-
-        if (argc >= 2)
-                path = argv[1];
-        if (argc == 3)
-                kernel = argv[2];
-        if (argc > 3) {
-                printf("%s [path] [kernel]\n", argv[0]);
-                return 0;
-        }
-
-        file = fopen(proc, "r");
-        if (!file) {
-                printf("failed open %s: %s\n", proc, strerror(errno));
-                return 0;
-        }
-
-        while ((rc = fscanf(file, "%s %s %s %s %s %lx\n",
-                modname, others, others, others, others, &modaddr)) == 6) {
-                for (mp = mod_paths; mp->name != NULL; mp++) {
-                        if (!strcmp(mp->name, modname))
-                                break;
-                }
-                if (mp->name) {
-                        printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
-                               mp->path, mp->name, modaddr);
-                }
-        }
-
-        return 0;
-}
-
-int jt_dbg_modules(int argc, char **argv)
-{
-        int rc = 0;
-        struct utsname sysinfo;
-
-        rc = uname(&sysinfo);
-        if (rc) {
-                printf("uname() failed: %s\n", strerror(errno));
-                return 0;
-        }
-
-        if (sysinfo.release[2] > '4') {
-                return jt_dbg_modules_2_5(argc, argv);
-        } else {
-                return jt_dbg_modules_2_4(argc, argv);
-        }
-
-        return 0;
-}
-
-int jt_dbg_panic(int argc, char **argv)
-{
-        int rc;
-        struct portal_ioctl_data data;
-
-        if (argc != 1) {
-                fprintf(stderr, "usage: %s\n", argv[0]);
-                return 0;
-        }
-
-        memset(&data, 0, sizeof(data));
-        if (portal_ioctl_pack(&data, &buf, max) != 0) {
-                fprintf(stderr, "portal_ioctl_pack failed.\n");
-                return -1;
-        }
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf);
-        if (rc) {
-                fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-        return 0;
-}
diff --git a/lustre/portals/utils/debugctl.c b/lustre/portals/utils/debugctl.c
deleted file mode 100644 (file)
index 1b6cd96..0000000
+++ /dev/null
@@ -1,66 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Some day I'll split all of this functionality into a cfs_debug module
- * of its own.  That day is not today.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <portals/api-support.h>
-#include <portals/ptlctl.h>
-#include "parser.h"
-
-
-command_t list[] = {
-        {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"},
-        {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file|stop], control debug daemon to dump debug buffer to a file"}, 
-        {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"},
-        {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"},
-        {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"},
-        {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"},
-        {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"},
-        {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"},
-        {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"},
-        {"panic", jt_dbg_panic, 0, "cause the kernel to panic"},
-        {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
-        {"help", Parser_help, 0, "help"},
-        {"exit", Parser_quit, 0, "quit"},
-        {"quit", Parser_quit, 0, "quit"},
-        { 0, 0, 0, NULL }
-};
-
-int main(int argc, char **argv)
-{
-        if (dbg_initialize(argc, argv) < 0)
-                exit(2);
-
-        register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
-
-        Parser_init("debugctl > ", list);
-        if (argc > 1)
-                return Parser_execarg(argc - 1, &argv[1], list);
-
-        Parser_commands();
-
-        unregister_ioc_dev(PORTALS_DEV_ID);
-        return 0;
-}
diff --git a/lustre/portals/utils/gmnalnid.c b/lustre/portals/utils/gmnalnid.c
deleted file mode 100644 (file)
index e45fae4..0000000
+++ /dev/null
@@ -1,117 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-  * vim:expandtab:shiftwidth=8:tabstop=8:
-  *
-  *  Copyright (c) 2003 Los Alamos National Laboratory (LANL)
-  *
-  *   This file is part of Lustre, http://www.lustre.org/
-  *
-  *   This file is free software; you can redistribute it and/or
-  *   modify it under the terms of version 2.1 of the GNU Lesser General
-  *   Public License as published by the Free Software Foundation.
-  *
-  *   Lustre is distributed in the hope that it will be useful,
-  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
-  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  *   GNU Lesser General Public License for more details.
-  *
-  *   You should have received a copy of the GNU Lesser General Public
-  *   License along with Portals; if not, write to the Free Software
-  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-  */
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <unistd.h>
-#include <syslog.h>
-#include <errno.h>
-
-#include <portals/api-support.h>
-#include <portals/list.h>
-#include <portals/lib-types.h>
-
-#define GMNAL_IOC_GET_GNID 1
-
-int
-roundup(int len)
-{
-       return((len+7) & (~0x7));
-}
-
-int main(int argc, char **argv)
-{
-        int rc, pfd;
-        struct portal_ioctl_data data;
-        struct portals_cfg pcfg;
-       unsigned int    nid = 0, len;
-       char    *name = NULL;
-       int     c;
-
-
-
-       while ((c = getopt(argc, argv, "n:l")) != -1) {
-               switch(c) {
-               case('n'):
-                       name = optarg;  
-               break;
-               case('l'):
-                       printf("Get local id not implemented yet!\n");
-                       exit(-1);
-               default:
-                       printf("usage %s -n nodename [-p]\n", argv[0]);
-               }
-       }
-
-       if (!name) {
-               printf("usage %s -n nodename [-p]\n", argv[0]);
-               exit(-1);
-       }
-
-
-
-
-        PCFG_INIT(pcfg, GMNAL_IOC_GET_GNID);
-        pcfg.pcfg_nal = GMNAL;
-
-       /*
-        *      set up the inputs
-        */
-       len = strlen(name) + 1;
-       pcfg.pcfg_pbuf1 = malloc(len);
-       strcpy(pcfg.pcfg_pbuf1, name);
-       pcfg.pcfg_plen1 = len;
-
-       /*
-        *      set up the outputs
-        */
-       pcfg.pcfg_pbuf2 = (void*)&nid;
-       pcfg.pcfg_plen2 = sizeof(unsigned int*);
-
-        pfd = open("/dev/portals", O_RDWR);
-        if ( pfd < 0 ) {
-                perror("opening portals device");
-               free(pcfg.pcfg_pbuf1);
-                exit(-1);
-        }
-
-        PORTAL_IOC_INIT(data);
-        data.ioc_pbuf1 = (char*)&pcfg;
-        data.ioc_plen1 = sizeof(pcfg);
-                
-        rc = ioctl (pfd, IOC_PORTAL_NAL_CMD, &data);
-        if (rc < 0)
-        {
-               perror ("Can't get my NID");
-        }
-                        
-       free(pcfg.pcfg_pbuf1);
-       close(pfd);
-       printf("%u\n", nid);
-        exit(0);
-}
diff --git a/lustre/portals/utils/l_ioctl.c b/lustre/portals/utils/l_ioctl.c
deleted file mode 100644 (file)
index 0671c24..0000000
+++ /dev/null
@@ -1,339 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#define __USE_FILE_OFFSET64
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include <portals/api-support.h>
-#include <portals/ptlctl.h>
-
-#ifndef __CYGWIN__
- #include <syscall.h>
-#else
- #include <windows.h>
- #include <windef.h>
-#endif
-
-static ioc_handler_t  do_ioctl;                 /* forward ref */
-static ioc_handler_t *current_ioc_handler = &do_ioctl;
-
-struct ioc_dev {
-       const char * dev_name;
-       int dev_fd;
-};
-
-static struct ioc_dev ioc_dev_list[10];
-
-struct dump_hdr {
-       int magic;
-       int dev_id;
-        unsigned int opc;
-};
-
-char *dump_filename;
-
-void
-set_ioc_handler (ioc_handler_t *handler)
-{
-        if (handler == NULL)
-                current_ioc_handler = do_ioctl;
-        else
-                current_ioc_handler = handler;
-}
-
-static int
-open_ioc_dev(int dev_id) 
-{
-       const char * dev_name;
-
-       if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
-               return -EINVAL;
-
-       dev_name = ioc_dev_list[dev_id].dev_name;
-       if (dev_name == NULL) {
-                fprintf(stderr, "unknown device id: %d\n", dev_id);
-               return -EINVAL;
-       }
-
-       if (ioc_dev_list[dev_id].dev_fd < 0) {
-               int fd = open(dev_name, O_RDWR);
-               
-               if (fd < 0) {
-                       fprintf(stderr, "opening %s failed: %s\n"
-                               "hint: the kernel modules may not be loaded\n",
-                               dev_name, strerror(errno));
-                       return fd;
-               }
-               ioc_dev_list[dev_id].dev_fd = fd;
-       }
-
-       return ioc_dev_list[dev_id].dev_fd;
-}
-
-
-static int 
-do_ioctl(int dev_id, unsigned int opc, void *buf)
-{
-       int fd, rc;
-       
-       fd = open_ioc_dev(dev_id);
-       if (fd < 0) 
-               return fd;
-
-       rc = ioctl(fd, opc, buf);
-       return rc;
-       
-}
-
-static FILE *
-get_dump_file() 
-{
-       FILE *fp = NULL;
-       
-       if (!dump_filename) {
-               fprintf(stderr, "no dump filename\n");
-       } else 
-               fp = fopen(dump_filename, "a");
-       return fp;
-}
-
-/*
- * The dump file should start with a description of which devices are
- * used, but for now it will assumed whatever app reads the file will
- * know what to do. */
-int 
-dump(int dev_id, unsigned int opc, void *buf)
-{
-       FILE *fp;
-       struct dump_hdr dump_hdr;
-        struct portal_ioctl_hdr * ioc_hdr = (struct  portal_ioctl_hdr *) buf;
-       int rc;
-       
-       printf("dumping opc %x to %s\n", opc, dump_filename);
-       
-
-       dump_hdr.magic = 0xdeadbeef;
-       dump_hdr.dev_id = dev_id;
-       dump_hdr.opc = opc;
-
-       fp = get_dump_file();
-       if (fp == NULL) {
-               fprintf(stderr, "%s: %s\n", dump_filename, 
-                       strerror(errno));
-               return -EINVAL;
-       }
-       
-        rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp);
-        if (rc == 1)
-                rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp);
-        fclose(fp);
-        if (rc != 1) {
-                fprintf(stderr, "%s: %s\n", dump_filename,
-                        strerror(errno));
-                return -EINVAL;
-        }
-
-        return 0;
-}
-
-/* register a device to send ioctls to.  */
-int 
-register_ioc_dev(int dev_id, const char * dev_name) 
-{
-
-       if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
-               return -EINVAL;
-
-       unregister_ioc_dev(dev_id);
-
-       ioc_dev_list[dev_id].dev_name = dev_name;
-       ioc_dev_list[dev_id].dev_fd = -1;
-
-       return dev_id;
-}
-
-void
-unregister_ioc_dev(int dev_id) 
-{
-
-       if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list))
-               return;
-       if (ioc_dev_list[dev_id].dev_name != NULL &&
-           ioc_dev_list[dev_id].dev_fd >= 0) 
-               close(ioc_dev_list[dev_id].dev_fd);
-
-       ioc_dev_list[dev_id].dev_name = NULL;
-       ioc_dev_list[dev_id].dev_fd = -1;
-}
-
-/* If this file is set, then all ioctl buffers will be 
-   appended to the file. */
-int
-set_ioctl_dump(char * file)
-{
-       if (dump_filename)
-               free(dump_filename);
-       
-       dump_filename = strdup(file);
-        if (dump_filename == NULL)
-                abort();
-
-        set_ioc_handler(&dump);
-       return 0;
-}
-
-int
-l_ioctl(int dev_id, unsigned int opc, void *buf)
-{
-        return current_ioc_handler(dev_id, opc, buf);
-}
-
-/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer
- * in the file.  For example:
- *
- * parse_dump("lctl.dump", l_ioctl);
- *
- * Note: if using l_ioctl, then you also need to register_ioc_dev() for 
- * each device used in the dump.
- */
-int 
-parse_dump(char * dump_file, ioc_handler_t ioc_func)
-{
-       int line =0;
-       struct stat st;
-       char *start, *buf, *end;
-#ifndef __CYGWIN__
-        int fd;
-#else
-        HANDLE fd, hmap;
-        DWORD size;
-#endif
-       
-#ifndef __CYGWIN__
-       fd = syscall(SYS_open, dump_file, O_RDONLY);
-        if (fd < 0) {
-                fprintf(stderr, "couldn't open %s: %s\n", dump_file, 
-                        strerror(errno));
-                exit(1);
-        }
-
-       if (fstat(fd, &st)) { 
-               perror("stat fails");
-               exit(1);
-       }
-
-       if (st.st_size < 1) {
-               fprintf(stderr, "KML is empty\n");
-               exit(1);
-       }
-
-       start = buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0);
-       end = start + st.st_size;
-       close(fd);
-        if (start == MAP_FAILED) {
-               fprintf(stderr, "can't create file mapping\n");
-               exit(1);
-        }
-#else
-        fd = CreateFile(dump_file, GENERIC_READ, FILE_SHARE_READ, NULL,
-                        OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
-        size = GetFileSize(fd, NULL);
-        if (size < 1) {
-               fprintf(stderr, "KML is empty\n");
-               exit(1);
-       }
-
-        hmap = CreateFileMapping(fd, NULL, PAGE_READONLY, 0,0, NULL);
-        start = buf = MapViewOfFile(hmap, FILE_MAP_READ, 0, 0, 0);
-        end = buf + size;
-        CloseHandle(fd);
-        if (start == NULL) {
-               fprintf(stderr, "can't create file mapping\n");
-               exit(1);
-        }
-#endif /* __CYGWIN__ */
-
-       while (buf < end) {
-                struct dump_hdr *dump_hdr = (struct dump_hdr *) buf;
-                struct portal_ioctl_hdr * data;
-                char tmp[8096];
-                int rc;
-
-                line++;
-
-                data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr));
-                if (buf + data->ioc_len > end ) {
-                        fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf,
-                                data->ioc_len, end);
-                        return -1;
-                }
-#if 0
-                printf ("dump_hdr: %lx data: %lx\n",
-                        (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf);
-
-                printf("%d: opcode %x len: %d  ver: %x ", line, dump_hdr->opc,
-                       data->ioc_len, data->ioc_version);
-#endif
-
-                memcpy(tmp, data, data->ioc_len);
-
-                rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp);
-                if (rc) {
-                        printf("failed: %d\n", rc);
-                        exit(1);
-                }
-
-                buf += data->ioc_len + sizeof(*dump_hdr);
-       }
-
-#ifndef __CYGWIN__
-        munmap(start, end - start);
-#else
-        UnmapViewOfFile(start);
-        CloseHandle(hmap);
-#endif
-
-       return 0;
-}
-
-int 
-jt_ioc_dump(int argc, char **argv)
-{
-        if (argc > 2) {
-                fprintf(stderr, "usage: %s [hostname]\n", argv[0]);
-                return 0;
-        }
-       printf("setting dumpfile to: %s\n", argv[1]);
-       
-       set_ioctl_dump(argv[1]);
-       return 0;
-}
diff --git a/lustre/portals/utils/parser.c b/lustre/portals/utils/parser.c
deleted file mode 100644 (file)
index b91295b..0000000
+++ /dev/null
@@ -1,651 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001 Cluster File Systems, Inc.
- *
- *   This file is part of Lustre, http://www.sf.net/projects/lustre/
- *
- *   Lustre is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Lustre is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Lustre; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <stddef.h>
-#include <unistd.h>
-#include <sys/param.h>
-#include <assert.h>
-
-#ifdef HAVE_LIBREADLINE
-#define        READLINE_LIBRARY
-#include <readline/readline.h>
-
-/* completion_matches() is #if 0-ed out in modern glibc */
-#ifndef completion_matches
-#  define completion_matches rl_completion_matches
-#endif
-#endif
-
-extern void using_history(void);
-extern void stifle_history(int);
-extern void add_history(char *);
-
-#include "parser.h"
-
-static command_t * top_level;      /* Top level of commands, initialized by
-                                    * InitParser                            */
-static char * parser_prompt = NULL;/* Parser prompt, set by InitParser      */
-static int done;                  /* Set to 1 if user types exit or quit   */
-
-
-/* static functions */
-static char *skipwhitespace(char *s);
-static char *skiptowhitespace(char *s);
-static command_t *find_cmd(char *name, command_t cmds[], char **next);
-static int process(char *s, char **next, command_t *lookup, command_t **result,
-                   char **prev);
-static void print_commands(char *str, command_t *table);
-
-static char * skipwhitespace(char * s)
-{
-    char * t;
-    int    len;
-
-    len = (int)strlen(s);
-    for (t = s; t <= s + len && isspace(*t); t++);
-    return(t);
-}
-
-
-static char * skiptowhitespace(char * s)
-{
-    char * t;
-
-    for (t = s; *t && !isspace(*t); t++);
-    return(t);
-}
-
-static int line2args(char *line, char **argv, int maxargs)
-{
-    char *arg;
-    int i = 0;
-
-    arg = strtok(line, " \t");
-    if ( arg ) {
-            argv[i] = arg;
-       i++;
-    } else
-       return 0;
-
-    while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) {
-       argv[i] = arg;
-       i++;
-    }
-    return i;
-}
-
-/* find a command -- return it if unique otherwise print alternatives */
-static command_t *Parser_findargcmd(char *name, command_t cmds[])
-{
-       command_t *cmd;
-
-       for (cmd = cmds; cmd->pc_name; cmd++) {
-               if (strcmp(name, cmd->pc_name) == 0)
-                       return cmd;
-       }
-       return NULL;
-}
-
-int Parser_execarg(int argc, char **argv, command_t cmds[])
-{
-       command_t *cmd;
-
-        cmd = Parser_findargcmd(argv[0], cmds);
-       if ( cmd ) {
-                int rc = (cmd->pc_func)(argc, argv);
-                if (rc == CMD_HELP)
-                        fprintf(stderr, "%s\n", cmd->pc_help);
-                return rc;
-       } else {
-               printf("Try interactive use without arguments or use one of:\n");
-               for (cmd = cmds; cmd->pc_name; cmd++)
-                       printf("\"%s\" ", cmd->pc_name);
-               printf("\nas argument.\n");
-       }
-       return -1;
-}
-
-/* returns the command_t * (NULL if not found) corresponding to a
-   _partial_ match with the first token in name.  It sets *next to
-   point to the following token. Does not modify *name. */
-static command_t * find_cmd(char * name, command_t cmds[], char ** next)
-{
-        int    i, len;
-    
-        if (!cmds || !name ) 
-                return NULL;
-    
-        /* This sets name to point to the first non-white space character,
-           and next to the first whitespace after name, len to the length: do
-           this with strtok*/
-        name = skipwhitespace(name);
-        *next = skiptowhitespace(name);
-        len = *next - name;
-        if (len == 0) 
-                return NULL;
-
-        for (i = 0; cmds[i].pc_name; i++) {
-                if (strncasecmp(name, cmds[i].pc_name, len) == 0) {
-                        *next = skipwhitespace(*next);
-                        return(&cmds[i]);
-                }
-        }
-        return NULL;
-}
-
-/* Recursively process a command line string s and find the command
-   corresponding to it. This can be ambiguous, full, incomplete,
-   non-existent. */
-static int process(char *s, char ** next, command_t *lookup,
-                  command_t **result, char **prev)
-{
-    *result = find_cmd(s, lookup, next);
-    *prev = s;
-
-        /* non existent */
-        if ( ! *result ) 
-                return CMD_NONE;
-
-        /* found entry: is it ambigous, i.e. not exact command name and
-           more than one command in the list matches.  Note that find_cmd
-           points to the first ambiguous entry */
-        if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) &&
-             find_cmd(s, (*result) + 1, next)) 
-                return CMD_AMBIG;
-
-        /* found a unique command: component or full? */
-        if ( (*result)->pc_func ) {
-                return CMD_COMPLETE;
-        } else {
-                if ( *next == '\0' ) {
-                        return CMD_INCOMPLETE;
-                } else {
-                        return process(*next, next, (*result)->pc_sub_cmd, result, prev);
-                }
-        }
-}
-
-#ifdef HAVE_LIBREADLINE
-static command_t * match_tbl;   /* Command completion against this table */
-static char * command_generator(const char * text, int state)
-{
-        static int index,
-                len;
-        char       *name;
-
-        /* Do we have a match table? */
-        if (!match_tbl)
-                return NULL;
-
-        /* If this is the first time called on this word, state is 0 */
-        if (!state) {
-                index = 0;
-                len = (int)strlen(text);
-        }
-
-        /* Return next name in the command list that paritally matches test */
-        while ( (name = (match_tbl + index)->pc_name) ) {
-                index++;
-
-                if (strncasecmp(name, text, len) == 0) {
-                        return(strdup(name));
-                }
-        }
-
-    /* No more matches */
-    return NULL;
-}
-
-/* probably called by readline */
-static char **command_completion(char * text, int start, int end)
-{
-    command_t  * table;
-    char       * pos;
-
-    match_tbl = top_level;
-    
-    for (table = find_cmd(rl_line_buffer, match_tbl, &pos);
-        table; table = find_cmd(pos, match_tbl, &pos)) 
-    {
-
-       if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd;
-    }
-
-    return completion_matches(text, command_generator);
-}
-#endif
-
-/* take a string and execute the function or print help */
-int execute_line(char * line)
-{
-        command_t         *cmd, *ambig;
-        char *prev;
-        char *next, *tmp;
-        char *argv[MAXARGS];
-        int         i;
-        int rc = 0;
-
-        switch( process(line, &next, top_level, &cmd, &prev) ) {
-        case CMD_AMBIG:
-                fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
-                while( (ambig = find_cmd(prev, cmd, &tmp)) ) {
-                        fprintf(stderr, "%s ", ambig->pc_name);
-                        cmd = ambig + 1;
-                }
-                fprintf(stderr, "\n");
-                break;
-        case CMD_NONE:
-                fprintf(stderr, "No such command, type help\n");
-                break;
-        case CMD_INCOMPLETE:
-                fprintf(stderr,
-                        "'%s' incomplete command.  Use '%s x' where x is one of:\n",
-                        line, line);
-                fprintf(stderr, "\t");
-                for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) {
-                        fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name);
-                }
-                fprintf(stderr, "\n");
-                break;
-        case CMD_COMPLETE:
-                i = line2args(line, argv, MAXARGS);
-                rc = (cmd->pc_func)(i, argv);
-
-                if (rc == CMD_HELP)
-                        fprintf(stderr, "%s\n", cmd->pc_help);
-
-                break;
-        }
-
-        return rc;
-}
-
-int
-noop_fn ()
-{
-        return (0);
-}
-
-/* just in case you're ever in an airplane and discover you 
-   forgot to install readline-dev. :) */
-int init_input() 
-{
-        int   interactive = isatty (fileno (stdin));
-
-#ifdef HAVE_LIBREADLINE
-        using_history();
-        stifle_history(HISTORY);
-
-        if (!interactive)
-        {
-                rl_prep_term_function = (rl_vintfunc_t *)noop_fn;
-                rl_deprep_term_function = (rl_voidfunc_t *)noop_fn;
-        }
-
-        rl_attempted_completion_function = (CPPFunction *)command_completion;
-        rl_completion_entry_function = (void *)command_generator;
-#endif 
-        return interactive;
-}
-
-#ifndef HAVE_LIBREADLINE
-#define add_history(s)
-char * readline(char * prompt) 
-{
-        char line[2048];
-        int n = 0;
-        if (prompt)
-                printf ("%s", prompt);
-        if (fgets(line, sizeof(line), stdin) == NULL)
-                return (NULL);
-        n = strlen(line);
-        if (n && line[n-1] == '\n')
-                line[n-1] = '\0';
-        return strdup(line);
-}
-#endif
-
-/* this is the command execution machine */
-int Parser_commands(void)
-{
-        char *line, *s;
-        int rc = 0;
-        int interactive;
-        
-        interactive = init_input();
-
-        while(!done) {
-                line = readline(interactive ? parser_prompt : NULL);
-
-                if (!line) break;
-
-                s = skipwhitespace(line);
-
-                if (*s) {
-                        add_history(s);
-                        rc = execute_line(s);
-                }
-                
-                free(line);
-        }
-        return rc;
-}
-
-
-/* sets the parser prompt */
-void Parser_init(char * prompt, command_t * cmds)
-{
-    done = 0;
-    top_level = cmds;
-    if (parser_prompt) free(parser_prompt);
-    parser_prompt = strdup(prompt);
-}
-
-/* frees the parser prompt */
-void Parser_exit(int argc, char *argv[])
-{
-    done = 1;
-    free(parser_prompt);
-    parser_prompt = NULL;
-}
-
-/* convert a string to an integer */
-int Parser_int(char *s, int *val)
-{
-    int ret;
-
-    if (*s != '0')
-       ret = sscanf(s, "%d", val);
-    else if (*(s+1) != 'x')
-       ret = sscanf(s, "%o", val);
-    else {
-       s++;
-       ret = sscanf(++s, "%x", val);
-    }
-
-    return(ret);
-}
-
-
-void Parser_qhelp(int argc, char *argv[]) {
-
-    printf("Available commands are:\n");
-
-    print_commands(NULL, top_level);
-    printf("For more help type: help command-name\n");
-}
-
-int Parser_help(int argc, char **argv) 
-{
-        char line[1024];
-        char *next, *prev, *tmp;
-        command_t *result, *ambig;
-        int i;
-
-        if ( argc == 1 ) {
-                Parser_qhelp(argc, argv);
-                return 0;
-        }
-
-        line[0]='\0';
-        for ( i = 1 ;  i < argc ; i++ ) {
-                strcat(line, argv[i]);
-        }
-
-        switch ( process(line, &next, top_level, &result, &prev) ) {
-        case CMD_COMPLETE:
-                fprintf(stderr, "%s: %s\n",line, result->pc_help);
-                break;
-        case CMD_NONE:
-                fprintf(stderr, "%s: Unknown command.\n", line);
-                break;
-        case CMD_INCOMPLETE:
-                fprintf(stderr,
-                        "'%s' incomplete command.  Use '%s x' where x is one of:\n",
-                        line, line);
-                fprintf(stderr, "\t");
-                for (i = 0; result->pc_sub_cmd[i].pc_name; i++) {
-                        fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name);
-                }
-                fprintf(stderr, "\n");
-                break;
-        case CMD_AMBIG:
-                fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line);
-                while( (ambig = find_cmd(prev, result, &tmp)) ) {
-                        fprintf(stderr, "%s ", ambig->pc_name);
-                        result = ambig + 1;
-                }
-                fprintf(stderr, "\n");
-                break;
-        }
-        return 0;
-}  
-
-
-void Parser_printhelp(char *cmd)
-{
-        char *argv[] = { "help", cmd }; 
-        Parser_help(2, argv);
-}
-
-/*************************************************************************
- * COMMANDS                                                             *
- *************************************************************************/
-
-
-static void print_commands(char * str, command_t * table) {
-    command_t * cmds;
-    char       buf[80];
-
-    for (cmds = table; cmds->pc_name; cmds++) {
-       if (cmds->pc_func) {
-           if (str) printf("\t%s %s\n", str, cmds->pc_name);
-           else printf("\t%s\n", cmds->pc_name);
-       }
-       if (cmds->pc_sub_cmd) {
-           if (str) {
-               sprintf(buf, "%s %s", str, cmds->pc_name);
-               print_commands(buf, cmds->pc_sub_cmd);
-           } else {
-               print_commands(cmds->pc_name, cmds->pc_sub_cmd);
-           }
-       }
-    }
-}
-
-char *Parser_getstr(const char *prompt, const char *deft, char *res,
-                   size_t len)
-{
-    char *line = NULL;
-    int size = strlen(prompt) + strlen(deft) + 8;
-    char *theprompt;
-    theprompt = malloc(size);
-    assert(theprompt);
-
-    sprintf(theprompt, "%s [%s]: ", prompt, deft);
-
-    line  = readline(theprompt);
-    free(theprompt);
-
-    if ( line == NULL || *line == '\0' ) {
-       strncpy(res, deft, len);
-    } else {
-       strncpy(res, line, len);
-    }
-
-    if ( line ) {
-       free(line);
-       return res;
-    } else {
-       return NULL;
-    }
-}
-
-/* get integer from prompt, loop forever to get it */
-int Parser_getint(const char *prompt, long min, long max, long deft, int base)
-{
-    int rc;
-    long result;
-    char *line;
-    int size = strlen(prompt) + 40;
-    char *theprompt = malloc(size);
-    assert(theprompt);
-    sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft);
-
-    fflush(stdout);
-
-    do {
-       line = NULL;
-       line = readline(theprompt);
-       if ( !line ) {
-           fprintf(stdout, "Please enter an integer.\n");
-           fflush(stdout);
-           continue;
-       }
-       if ( *line == '\0' ) {
-           free(line);
-           result =  deft;
-           break;
-       }
-       rc = Parser_arg2int(line, &result, base);
-       free(line);
-       if ( rc != 0 ) {
-           fprintf(stdout, "Invalid string.\n");
-           fflush(stdout);
-       } else if ( result > max || result < min ) {
-           fprintf(stdout, "Error: response must lie between %ld and %ld.\n",
-                   min, max);
-           fflush(stdout);
-       } else {
-           break;
-       }
-    } while ( 1 ) ;
-
-    if (theprompt)
-       free(theprompt);
-    return result;
-
-}
-
-/* get boolean (starting with YyNn; loop forever */
-int Parser_getbool(const char *prompt, int deft)
-{
-    int result = 0;
-    char *line;
-    int size = strlen(prompt) + 8;
-    char *theprompt = malloc(size);
-    assert(theprompt);
-
-    fflush(stdout);
-
-    if ( deft != 0 && deft != 1 ) {
-       fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n",
-               deft);
-       assert ( 0 );
-    }
-    sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y");
-
-    do {
-       line = NULL;
-       line = readline(theprompt);
-       if ( line == NULL ) {
-           result = deft;
-           break;
-       }
-       if ( *line == '\0' ) {
-           result = deft;
-           break;
-       }
-       if ( *line == 'y' || *line == 'Y' ) {
-           result = 1;
-           break;
-       }
-       if ( *line == 'n' || *line == 'N' ) {
-           result = 0;
-           break;
-       }
-       if ( line )
-           free(line);
-       fprintf(stdout, "Invalid string. Must start with yY or nN\n");
-       fflush(stdout);
-    } while ( 1 );
-
-    if ( line )
-       free(line);
-    if ( theprompt )
-       free(theprompt);
-    return result;
-}
-
-/* parse int out of a string or prompt for it */
-long Parser_intarg(const char *inp, const char *prompt, int deft,
-                 int min, int max, int base)
-{
-    long result;
-    int rc;
-
-    rc = Parser_arg2int(inp, &result, base);
-
-    if ( rc == 0 ) {
-       return result;
-    } else {
-       return Parser_getint(prompt, deft, min, max, base);
-    }
-}
-
-/* parse int out of a string or prompt for it */
-char *Parser_strarg(char *inp, const char *prompt, const char *deft,
-                   char *answer, int len)
-{
-    if ( inp == NULL || *inp == '\0' ) {
-       return Parser_getstr(prompt, deft, answer, len);
-    } else
-       return inp;
-}
-
-/* change a string into a number: return 0 on success. No invalid characters
-   allowed. The processing of base and validity follows strtol(3)*/
-int Parser_arg2int(const char *inp, long *result, int base)
-{
-    char *endptr;
-
-    if ( (base !=0) && (base < 2 || base > 36) )
-       return 1;
-
-    *result = strtol(inp, &endptr, base);
-
-        if ( *inp != '\0' && *endptr == '\0' )
-                return 0;
-        else 
-                return 1;
-}
-
-int Parser_quit(int argc, char **argv)
-{
-        argc = argc;
-        argv = argv;
-        done = 1;
-        return 0;
-}
diff --git a/lustre/portals/utils/parser.h b/lustre/portals/utils/parser.h
deleted file mode 100644 (file)
index 9e7e95a..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef _PARSER_H_
-#define _PARSER_H_
-
-#define HISTORY        100             /* Don't let history grow unbounded    */
-#define MAXARGS 512
-
-#define CMD_COMPLETE   0
-#define CMD_INCOMPLETE 1
-#define CMD_NONE       2
-#define CMD_AMBIG      3
-#define CMD_HELP       4
-
-typedef struct parser_cmd {
-       char    *pc_name;
-       int     (* pc_func)(int, char **);
-       struct parser_cmd * pc_sub_cmd;
-       char *pc_help;
-} command_t;
-
-typedef struct argcmd {
-       char    *ac_name;
-       int      (*ac_func)(int, char **);
-       char     *ac_help;
-} argcmd_t;
-
-typedef struct network {
-       char    *type;
-       char    *server;
-       int     port;
-} network_t;
-
-int  Parser_quit(int argc, char **argv);
-void Parser_init(char *, command_t *); /* Set prompt and load command list */
-int Parser_commands(void);                     /* Start the command parser */
-void Parser_qhelp(int, char **);       /* Quick help routine */
-int Parser_help(int, char **);         /* Detailed help routine */
-void Parser_printhelp(char *);         /* Detailed help routine */
-void Parser_exit(int, char **);                /* Shuts down command parser */
-int Parser_execarg(int argc, char **argv, command_t cmds[]);
-int execute_line(char * line);
-
-/* Converts a string to an integer */
-int Parser_int(char *, int *);
-
-/* Prompts for a string, with default values and a maximum length */
-char *Parser_getstr(const char *prompt, const char *deft, char *res, 
-                   size_t len);
-
-/* Prompts for an integer, with minimum, maximum and default values and base */
-int Parser_getint(const char *prompt, long min, long max, long deft,
-                 int base);
-
-/* Prompts for a yes/no, with default */
-int Parser_getbool(const char *prompt, int deft);
-
-/* Extracts an integer from a string, or prompts if it cannot get one */
-long Parser_intarg(const char *inp, const char *prompt, int deft,
-                  int min, int max, int base);
-
-/* Extracts a word from the input, or propmts if it cannot get one */
-char *Parser_strarg(char *inp, const char *prompt, const char *deft,
-                   char *answer, int len);
-
-/* Extracts an integer from a string  with a base */
-int Parser_arg2int(const char *inp, long *result, int base);
-
-#endif
diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c
deleted file mode 100644 (file)
index 9c1537b..0000000
+++ /dev/null
@@ -1,1935 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <stdio.h>
-#include <sys/types.h>
-#ifdef HAVE_NETDB_H
-#include <netdb.h>
-#endif
-#include <sys/socket.h>
-#ifdef HAVE_NETINET_TCP_H
-#include <netinet/tcp.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-#include <fcntl.h>
-#include "ioctl.h"
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <unistd.h>
-#include <time.h>
-#include <stdarg.h>
-#include <endian.h>
-#if CRAY_PORTALS
-#ifdef REDSTORM
-#define __QK__
-#endif
-#include <portals/ipmap.h>
-#endif
-
-#ifdef __CYGWIN__
-
-#include <netinet/in.h>
-
-#endif /* __CYGWIN__ */
-#include <portals/api-support.h>
-#include <portals/ptlctl.h>
-#include <portals/list.h>
-#include <portals/lib-types.h>
-#include <portals/socknal.h>
-#include "parser.h"
-
-unsigned int portal_debug;
-unsigned int portal_printk;
-
-static unsigned int g_nal = 0;
-
-typedef struct
-{
-        char *name;
-        int   num;
-} name2num_t;
-
-static name2num_t nalnames[] = {
-        {"any",         0},
-#if !CRAY_PORTALS
-        {"tcp",                SOCKNAL},
-        {"elan",       QSWNAL},
-        {"gm",         GMNAL},
-        {"openib",      OPENIBNAL},
-        {"iib",         IIBNAL},
-        {"lo",          LONAL},
-        {"ra",          RANAL},
-#else
-        {"cray_kern_nal", CRAY_KERN_NAL},
-        {"cray_user_nal", CRAY_USER_NAL},
-        {"cray_qk_nal",   CRAY_QK_NAL},
-#endif
-        {NULL,         -1}
-};
-
-static cfg_record_cb_t g_record_cb;
-
-/* Convert a string boolean to an int; "enable" -> 1 */
-int ptl_parse_bool (int *b, char *str) {
-        if (!strcasecmp (str, "no") ||
-            !strcasecmp (str, "n") ||
-            !strcasecmp (str, "off") ||
-            !strcasecmp (str, "down") ||
-            !strcasecmp (str, "disable"))
-        {
-                *b = 0;
-                return (0);
-        }
-        
-        if (!strcasecmp (str, "yes") ||
-            !strcasecmp (str, "y") ||
-            !strcasecmp (str, "on") ||
-            !strcasecmp (str, "up") ||
-            !strcasecmp (str, "enable"))
-        {
-                *b = 1;
-                return (0);
-        }
-        
-        return (-1);
-}
-
-/* Convert human readable size string to and int; "1k" -> 1000 */
-int ptl_parse_size (int *sizep, char *str) {
-        int size;
-        char mod[32];
-
-        switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) {
-        default:
-                return (-1);
-
-        case 1:
-                *sizep = size;
-                return (0);
-
-        case 2:
-                switch (*mod) {
-                case 'g':
-                case 'G':
-                        *sizep = size << 30;
-                        return (0);
-
-                case 'm':
-                case 'M':
-                        *sizep = size << 20;
-                        return (0);
-
-                case 'k':
-                case 'K':
-                        *sizep = size << 10;
-                        return (0);
-
-                default:
-                        *sizep = size;
-                        return (0);
-                }
-        }
-}
-
-int 
-ptl_set_cfg_record_cb(cfg_record_cb_t cb)
-{
-        g_record_cb = cb;
-        return 0;
-}
-
-int 
-pcfg_ioctl(struct portals_cfg *pcfg) 
-{
-        int rc;
-
-        if (pcfg->pcfg_nal ==0)
-                pcfg->pcfg_nal    = g_nal;
-
-        if (g_record_cb) {
-                rc = g_record_cb(PORTALS_CFG_TYPE, sizeof(*pcfg), pcfg);
-        } else {
-                struct portal_ioctl_data data;
-                PORTAL_IOC_INIT (data);
-                data.ioc_pbuf1   = (char*)pcfg;
-                data.ioc_plen1   = sizeof(*pcfg);
-                /* XXX liblustre hack XXX */
-                data.ioc_nal_cmd = pcfg->pcfg_command;
-                data.ioc_nid = pcfg->pcfg_nid;
-
-                rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data);
-        }
-
-        return (rc);
-}
-
-
-
-static name2num_t *
-name2num_lookup_name (name2num_t *table, char *str)
-{
-        while (table->name != NULL)
-                if (!strcmp (str, table->name))
-                        return (table);
-                else
-                        table++;
-        return (NULL);
-}
-
-static name2num_t *
-name2num_lookup_num (name2num_t *table, int num)
-{
-        while (table->name != NULL)
-                if (num == table->num)
-                        return (table);
-                else
-                        table++;
-        return (NULL);
-}
-
-int
-ptl_name2nal (char *str)
-{
-        name2num_t *e = name2num_lookup_name (nalnames, str);
-
-        return ((e == NULL) ? -1 : e->num);
-}
-
-static char *
-nal2name (int nal)
-{
-        name2num_t *e = name2num_lookup_num (nalnames, nal);
-
-        return ((e == NULL) ? "???" : e->name);
-}
-
-#ifdef HAVE_GETHOSTBYNAME
-static struct hostent *
-ptl_gethostbyname(char * hname) {
-        struct hostent *he;
-        he = gethostbyname(hname);
-        if (!he) {
-                switch(h_errno) {
-                case HOST_NOT_FOUND:
-                case NO_ADDRESS:
-                        fprintf(stderr, "Unable to resolve hostname: %s\n",
-                                hname);
-                        break;
-                default:
-                        fprintf(stderr, "gethostbyname error: %s\n",
-                                strerror(errno));
-                        break;
-                }
-                return NULL;
-        }
-        return he;
-}
-#endif
-
-int
-ptl_parse_port (int *port, char *str)
-{
-        char      *end;
-        
-        *port = strtol (str, &end, 0);
-
-        if (*end == 0 &&                        /* parsed whole string */
-            *port > 0 && *port < 65536)         /* minimal sanity check */
-                return (0);
-        
-        return (-1);
-}
-
-int
-ptl_parse_time (time_t *t, char *str) 
-{
-        char          *end;
-        int            n;
-        struct tm      tm;
-        
-        *t = strtol (str, &end, 0);
-        if (*end == 0) /* parsed whole string */
-                return (0);
-        
-        memset (&tm, 0, sizeof (tm));
-        n = sscanf (str, "%d-%d-%d-%d:%d:%d",
-                    &tm.tm_year, &tm.tm_mon, &tm.tm_mday, 
-                    &tm.tm_hour, &tm.tm_min, &tm.tm_sec);
-        if (n != 6)
-                return (-1);
-        
-        tm.tm_mon--;                    /* convert to 0 == Jan */
-        tm.tm_year -= 1900;             /* y2k quirk */
-        tm.tm_isdst = -1;               /* dunno if it's daylight savings... */
-        
-        *t = mktime (&tm);
-        if (*t == (time_t)-1)
-                return (-1);
-                        
-        return (0);
-}
-
-int
-ptl_parse_ipquad (__u32 *ipaddrp, char *str)
-{
-        int             a;
-        int             b;
-        int             c;
-        int             d;
-
-        if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 &&
-            (a & ~0xff) == 0 && (b & ~0xff) == 0 &&
-            (c & ~0xff) == 0 && (d & ~0xff) == 0)
-        {
-                *ipaddrp = (a<<24)|(b<<16)|(c<<8)|d;
-                return (0);
-        }
-
-        return (-1);
-}
-
-int
-ptl_parse_ipaddr (__u32 *ipaddrp, char *str)
-{
-#ifdef HAVE_GETHOSTBYNAME
-        struct hostent *he;
-#endif
-
-        if (!strcmp (str, "_all_")) 
-        {
-                *ipaddrp = 0;
-                return (0);
-        }
-
-        if (ptl_parse_ipquad(ipaddrp, str) == 0)
-                return (0);
-
-#if HAVE_GETHOSTBYNAME        
-        if ((('a' <= str[0] && str[0] <= 'z') ||
-             ('A' <= str[0] && str[0] <= 'Z')) &&
-             (he = ptl_gethostbyname (str)) != NULL)
-        {
-                __u32 addr = *(__u32 *)he->h_addr;
-
-                *ipaddrp = ntohl(addr);         /* HOST byte order */
-                return (0);
-        }
-#endif
-
-        return (-1);
-}
-
-char *
-ptl_ipaddr_2_str (__u32 ipaddr, char *str, int lookup)
-{
-#ifdef HAVE_GETHOSTBYNAME
-        __u32           net_ip;
-        struct hostent *he;
-
-        if (lookup) {
-                net_ip = htonl (ipaddr);
-                he = gethostbyaddr (&net_ip, sizeof (net_ip), AF_INET);
-                if (he != NULL) {
-                        strcpy(str, he->h_name);
-                        return (str);
-                }
-        }
-#endif
-
-        sprintf (str, "%d.%d.%d.%d",
-                 (ipaddr >> 24) & 0xff, (ipaddr >> 16) & 0xff,
-                 (ipaddr >> 8) & 0xff, ipaddr & 0xff);
-        return (str);
-}
-
-int
-ptl_parse_nid (ptl_nid_t *nidp, char *str)
-{
-        __u32               ipaddr;
-        char               *end;
-        unsigned long long  ullval;
-        
-        if (!strcmp (str, "_all_")) {
-                *nidp = PTL_NID_ANY;
-                return (0);
-        }
-
-        if (ptl_parse_ipaddr (&ipaddr, str) == 0) {
-#if !CRAY_PORTALS
-                *nidp = (ptl_nid_t)ipaddr;
-#else
-                *nidp = (((ptl_nid_t)ipaddr & PNAL_HOSTID_MASK) << PNAL_VNODE_SHIFT);
-#endif
-                return (0);
-        }
-
-        ullval = strtoull(str, &end, 0);
-        if (*end == 0) {
-                /* parsed whole string */
-                *nidp = (ptl_nid_t)ullval;
-                return (0);
-        }
-
-        return (-1);
-}
-
-__u64 ptl_nid2u64(ptl_nid_t nid)
-{
-        switch (sizeof (nid)) {
-        case 8:
-                return (nid);
-        case 4:
-                return ((__u32)nid);
-        default:
-                fprintf(stderr, "Unexpected sizeof(ptl_nid_t) == %u\n", sizeof(nid));
-                abort();
-                /* notreached */
-                return (-1);
-        }
-}
-
-char *
-ptl_nid2str (char *buffer, ptl_nid_t nid)
-{
-        __u64           nid64 = ptl_nid2u64(nid);
-#ifdef HAVE_GETHOSTBYNAME
-        struct hostent *he = 0;
-
-        /* Don't try to resolve NIDs that are e.g. Elan host IDs.  Assume
-         * TCP addresses in the 0.x.x.x subnet are not in use.  This can
-         * happen on routers and slows things down a _lot_.  Bug 3442. */
-        if (nid & 0xff000000) {
-                __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */
-
-                he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET);
-        }
-
-        if (he != NULL)
-                sprintf(buffer, "%#x:%s", (int)(nid64 >> 32), he->h_name);
-        else
-#endif /* HAVE_GETHOSTBYNAME */
-                sprintf(buffer, LPX64, nid64);
-
-        return (buffer);
-}
-
-int g_nal_is_set () 
-{
-        if (g_nal == 0) {
-                fprintf (stderr, "Error: you must run the 'network' command first.\n");
-                return (0);
-        }
-
-        return (1);
-}
-
-int g_nal_is_compatible (char *cmd, ...)
-{
-        va_list       ap;
-        int           nal;
-
-        if (!g_nal_is_set ())
-                return (0);
-
-        va_start (ap, cmd);
-
-        do {
-                nal = va_arg (ap, int);
-        } while (nal != 0 && nal != g_nal);
-        
-        va_end (ap);
-        
-        if (g_nal == nal)
-                return (1);
-
-        if (cmd != NULL) {
-                /* Don't complain verbosely if we've not been passed a command
-                 * name to complain about! */
-                fprintf (stderr, "Command %s not compatible with nal %s\n",
-                         cmd, nal2name (g_nal));
-        }
-        return (0);
-}
-
-int
-sock_write (int cfd, void *buffer, int nob)
-{
-        while (nob > 0)
-        {
-                int rc = write (cfd, buffer, nob);
-
-                if (rc < 0)
-                {
-                        if (errno == EINTR)
-                                continue;
-                        
-                        return (rc);
-                }
-
-                if (rc == 0)
-                {
-                        fprintf (stderr, "Unexpected zero sock_write\n");
-                        abort();
-                }
-
-                nob -= rc;
-                buffer = (char *)buffer + nob;
-        }
-        
-        return (0);
-}
-
-int
-sock_read (int cfd, void *buffer, int nob)
-{
-        while (nob > 0)
-        {
-                int rc = read (cfd, buffer, nob);
-                
-                if (rc < 0)
-                {
-                        if (errno == EINTR)
-                                continue;
-                        
-                        return (rc);
-                }
-                
-                if (rc == 0)                    /* EOF */
-                {
-                        errno = ECONNABORTED;
-                        return (-1);
-                }
-                
-                nob -= rc;
-                buffer = (char *)buffer + nob;
-        }
-        
-        return (0);
-}
-
-int ptl_initialize(int argc, char **argv) 
-{
-        register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH);
-        return 0;
-}
-
-
-int jt_ptl_network(int argc, char **argv)
-{
-        name2num_t *entry;
-        int         nal;
-        
-        if (argc == 2 &&
-            (nal = ptl_name2nal (argv[1])) >= 0) {
-                g_nal = nal;
-                return (0);
-        }
-                
-        fprintf(stderr, "usage: %s \n", argv[0]);
-        for (entry = nalnames; entry->name != NULL; entry++)
-                fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name);
-        fprintf(stderr, ">\n");
-        return (-1);
-}
-
-int
-jt_ptl_print_interfaces (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        char                     buffer[3][64];
-        int                      index;
-        int                      rc;
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
-                return -1;
-
-        for (index = 0;;index++) {
-                PCFG_INIT (pcfg, NAL_CMD_GET_INTERFACE);
-                pcfg.pcfg_count = index;
-
-                rc = pcfg_ioctl (&pcfg);
-                if (rc != 0)
-                        break;
-
-                printf ("%s: (%s/%s) npeer %d nroute %d\n",
-                        ptl_ipaddr_2_str(pcfg.pcfg_id, buffer[2], 1),
-                        ptl_ipaddr_2_str(pcfg.pcfg_id, buffer[0], 0),
-                        ptl_ipaddr_2_str(pcfg.pcfg_misc, buffer[1], 0),
-                        pcfg.pcfg_fd, pcfg.pcfg_count);
-        }
-
-        if (index == 0)
-                printf ("<no interfaces>\n");
-        return 0;
-}
-
-int
-jt_ptl_add_interface (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        __u32                    ipaddr;
-        int                      rc;
-        __u32                    netmask = 0xffffff00;
-        int                      i;
-        int                      count;
-        char                    *end;
-
-        if (argc < 2 || argc > 3) {
-                fprintf (stderr, "usage: %s ipaddr [netmask]\n", argv[0]);
-                return 0;
-        }
-
-        if (!g_nal_is_compatible(argv[0], SOCKNAL, 0))
-                return -1;
-
-        if (ptl_parse_ipaddr(&ipaddr, argv[1]) != 0) {
-                fprintf (stderr, "Can't parse ip: %s\n", argv[1]);
-                return -1;
-        }
-
-        if (argc > 2 ) {
-                count = strtol(argv[2], &end, 0);
-                if (count > 0 && count < 32 && *end == 0) {
-                        netmask = 0;
-                        for (i = count; i > 0; i--)
-                                netmask = netmask|(1<<(32-i));
-                } else if (ptl_parse_ipquad(&netmask, argv[2]) != 0) {
-                        fprintf (stderr, "Can't parse netmask: %s\n", argv[2]);
-                        return -1;
-                }
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_ADD_INTERFACE);
-        pcfg.pcfg_id     = ipaddr;
-        pcfg.pcfg_misc   = netmask;
-
-        rc = pcfg_ioctl (&pcfg);
-        if (rc != 0) {
-                fprintf (stderr, "failed to add interface: %s\n",
-                         strerror (errno));
-                return -1;
-        }
-
-        return 0;
-}
-
-int
-jt_ptl_del_interface (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        int                      rc;
-        __u32                    ipaddr = 0;
-
-        if (argc > 2) {
-                fprintf (stderr, "usage: %s [ipaddr]\n", argv[0]);
-                return 0;
-        }
-
-        if (!g_nal_is_compatible(argv[0], SOCKNAL, 0))
-                return -1;
-
-        if (argc == 2 &&
-            ptl_parse_ipaddr(&ipaddr, argv[1]) != 0) {
-                fprintf (stderr, "Can't parse ip: %s\n", argv[1]);
-                return -1;
-        }
-        
-        PCFG_INIT(pcfg, NAL_CMD_DEL_INTERFACE);
-        pcfg.pcfg_id = ipaddr;
-
-        rc = pcfg_ioctl (&pcfg);
-        if (rc != 0) {
-                fprintf (stderr, "failed to delete interface: %s\n",
-                         strerror (errno));
-                return -1;
-        }
-
-        return 0;
-}
-
-int
-jt_ptl_print_peers (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        char                     buffer[2][64];
-        int                      index;
-        int                      rc;
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, IIBNAL, RANAL, 0))
-                return -1;
-
-        for (index = 0;;index++) {
-                PCFG_INIT (pcfg, NAL_CMD_GET_PEER);
-                pcfg.pcfg_count   = index;
-
-                rc = pcfg_ioctl (&pcfg);
-                if (rc != 0)
-                        break;
-
-                if (g_nal_is_compatible(NULL, SOCKNAL, 0))
-                        printf (LPX64"[%d]%s@%s:%d #%d\n",
-                                pcfg.pcfg_nid, pcfg.pcfg_wait,
-                                ptl_ipaddr_2_str (pcfg.pcfg_size, buffer[0], 1),
-                                ptl_ipaddr_2_str (pcfg.pcfg_id, buffer[1], 1),
-                                pcfg.pcfg_misc, pcfg.pcfg_count);
-                else
-                        printf (LPX64"[%d]\n",
-                                pcfg.pcfg_nid, pcfg.pcfg_wait);
-        }
-
-        if (index == 0)
-                printf ("<no peers>\n");
-        return 0;
-}
-
-int 
-jt_ptl_add_peer (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        ptl_nid_t                nid;
-        __u32                    ip = 0;
-        int                      port = 0;
-        int                      rc;
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, IIBNAL, RANAL, 0))
-                return -1;
-
-        if (g_nal_is_compatible(NULL, SOCKNAL, RANAL, 0)) {
-                if (argc != 4) {
-                        fprintf (stderr, "usage(tcp): %s nid ipaddr port\n", 
-                                 argv[0]);
-                        return 0;
-                }
-        } else if (argc != 2) {
-                fprintf (stderr, "usage(openib,iib): %s nid\n", argv[0]);
-                return 0;
-        }
-
-        if (ptl_parse_nid (&nid, argv[1]) != 0 ||
-                nid == PTL_NID_ANY) {
-                fprintf (stderr, "Can't parse NID: %s\n", argv[1]);
-                return -1;
-        }
-
-        if (g_nal_is_compatible (NULL, SOCKNAL, RANAL, 0)) {
-                if (ptl_parse_ipaddr (&ip, argv[2]) != 0) {
-                        fprintf (stderr, "Can't parse ip addr: %s\n", argv[2]);
-                        return -1;
-                }
-
-                if (ptl_parse_port (&port, argv[3]) != 0) {
-                        fprintf (stderr, "Can't parse port: %s\n", argv[3]);
-                        return -1;
-                }
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_ADD_PEER);
-        pcfg.pcfg_nid     = nid;
-        pcfg.pcfg_id      = ip;
-        pcfg.pcfg_misc    = port;
-
-        rc = pcfg_ioctl (&pcfg);
-        if (rc != 0) {
-                fprintf (stderr, "failed to add peer: %s\n",
-                         strerror (errno));
-                return -1;
-        }
-        
-        return 0;
-}
-
-int 
-jt_ptl_del_peer (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        ptl_nid_t                nid = PTL_NID_ANY;
-        __u32                    ip = 0;
-        int                      single_share = 0;
-        int                      argidx;
-        int                      rc;
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, IIBNAL, RANAL, 0))
-                return -1;
-
-        if (g_nal_is_compatible(NULL, SOCKNAL, 0)) {
-                if (argc > 4) {
-                        fprintf (stderr, "usage: %s [nid] [ipaddr] [single_share]\n",
-                                 argv[0]);
-                        return 0;
-                }
-        } else if (argc > 3) {
-                fprintf (stderr, "usage: %s [nid] [single_share]\n", argv[0]);
-                return 0;
-        }
-                
-        if (argc > 1 &&
-            ptl_parse_nid (&nid, argv[1]) != 0) {
-                fprintf (stderr, "Can't parse nid: %s\n", argv[1]);
-                return -1;
-        }
-
-        argidx = 2;
-        if (g_nal_is_compatible(NULL, SOCKNAL, 0)) {
-                if (argc > argidx &&
-                    ptl_parse_ipaddr (&ip, argv[argidx]) != 0) {
-                        fprintf (stderr, "Can't parse ip addr: %s\n",
-                                 argv[argidx]);
-                        return -1;
-                }
-                argidx++;
-        }
-        
-        if (argc > argidx) {
-                if (!strcmp (argv[argidx], "single_share")) {
-                        single_share = 1;
-                } else {
-                        fprintf (stderr, "Unrecognised arg %s'\n", argv[3]);
-                        return -1;
-                }
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_DEL_PEER);
-        pcfg.pcfg_nid = nid;
-        pcfg.pcfg_id = ip;
-        pcfg.pcfg_flags = single_share;
-
-        rc = pcfg_ioctl (&pcfg);
-        if (rc != 0) {
-                fprintf (stderr, "failed to remove peer: %s\n",
-                         strerror (errno));
-                return -1;
-        }
-        
-        return 0;
-}
-
-int 
-jt_ptl_print_connections (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        char                     buffer[2][64];
-        int                      index;
-        int                      rc;
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, OPENIBNAL, IIBNAL, RANAL, 0))
-                return -1;
-
-        for (index = 0;;index++) {
-                PCFG_INIT (pcfg,  NAL_CMD_GET_CONN);
-                pcfg.pcfg_count   = index;
-                
-                rc = pcfg_ioctl (&pcfg);
-                if (rc != 0)
-                        break;
-
-                if (g_nal_is_compatible (NULL, SOCKNAL, 0))
-                        printf ("[%d]%s:"LPX64"@%s:%d:%s %d/%d %s\n",
-                                pcfg.pcfg_gw_nal,       /* scheduler */
-                                ptl_ipaddr_2_str (pcfg.pcfg_fd, buffer[0], 1), /* local IP addr */
-                                pcfg.pcfg_nid, 
-                                ptl_ipaddr_2_str (pcfg.pcfg_id, buffer[1], 1), /* remote IP addr */
-                                pcfg.pcfg_misc,         /* remote port */
-                                (pcfg.pcfg_flags == SOCKNAL_CONN_ANY) ? "A" :
-                                (pcfg.pcfg_flags == SOCKNAL_CONN_CONTROL) ? "C" :
-                                (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_IN) ? "I" :
-                                (pcfg.pcfg_flags == SOCKNAL_CONN_BULK_OUT) ? "O" : "?",
-                                pcfg.pcfg_count,        /* tx buffer size */
-                                pcfg.pcfg_size,         /* rx buffer size */
-                                pcfg.pcfg_wait ? "nagle" : "nonagle");
-                else
-                        printf (LPX64"\n",
-                                pcfg.pcfg_nid);
-        }
-
-        if (index == 0)
-                printf ("<no connections>\n");
-        return 0;
-}
-
-int jt_ptl_connect(int argc, char **argv)
-{
-#ifndef HAVE_CONNECT
-        /* no connect() support */
-        return -1;
-#else /* HAVE_CONNECT */
-        struct portals_cfg pcfg;
-        struct sockaddr_in srvaddr;
-        struct sockaddr_in locaddr;
-        __u32 ipaddr;
-        char *flag;
-        int fd, rc;
-        int type = SOCKNAL_CONN_ANY;
-        int port, rport;
-        int o;
-
-        if (argc < 3) {
-                fprintf(stderr, "usage: %s ip port [type]\n", argv[0]);
-                return 0;
-        }
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
-                return -1;
-        
-        rc = ptl_parse_ipaddr (&ipaddr, argv[1]);
-        if (rc != 0) {
-                fprintf(stderr, "Can't parse hostname: %s\n", argv[1]);
-                return -1;
-        }
-
-        if (ptl_parse_port (&port, argv[2]) != 0) {
-                fprintf (stderr, "Can't parse port: %s\n", argv[2]);
-                return -1;
-        }
-
-        if (argc > 3)
-                for (flag = argv[3]; *flag != 0; flag++)
-                        switch (*flag)
-                        {
-                        case 'I':
-                                if (type != SOCKNAL_CONN_ANY) {
-                                        fprintf(stderr, "Can't flag type twice\n");
-                                        return -1;
-                                }
-                                type = SOCKNAL_CONN_BULK_IN;
-                                break;
-
-                        case 'O':
-                                if (type != SOCKNAL_CONN_ANY) {
-                                        fprintf(stderr, "Can't flag type twice\n");
-                                        return -1;
-                                }
-                                type = SOCKNAL_CONN_BULK_OUT;
-                                break;
-
-                        case 'C':
-                                if (type != SOCKNAL_CONN_ANY) {
-                                        fprintf(stderr, "Can't flag type twice\n");
-                                        return -1;
-                                }
-                                type = SOCKNAL_CONN_CONTROL;
-                                break;
-                                
-                        default:
-                                fprintf (stderr, "unrecognised flag '%c'\n",
-                                         *flag);
-                                return (-1);
-                        }
-
-        memset(&locaddr, 0, sizeof(locaddr)); 
-        locaddr.sin_family = AF_INET; 
-        locaddr.sin_addr.s_addr = INADDR_ANY;
-
-        memset(&srvaddr, 0, sizeof(srvaddr));
-        srvaddr.sin_family = AF_INET;
-        srvaddr.sin_port = htons(port);
-        srvaddr.sin_addr.s_addr = htonl(ipaddr);
-
-
-        for (rport = IPPORT_RESERVED - 1; rport > IPPORT_RESERVED / 2; --rport) {
-                fd = socket(PF_INET, SOCK_STREAM, 0); 
-                if ( fd < 0 ) { 
-                        fprintf(stderr, "socket() failed: %s\n", strerror(errno)); 
-                        return -1; 
-                }
-
-                o = 1;
-                rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, 
-                                &o, sizeof(o));
-                
-                locaddr.sin_port = htons(rport);
-                rc = bind(fd, (struct sockaddr *)&locaddr, sizeof(locaddr)); 
-                if (rc == 0 || errno == EACCES) {
-                        rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr));
-                        if (rc == 0) {
-                                break;
-                        } else if (errno != EADDRINUSE) {
-                                fprintf(stderr, "Error connecting to host: %s\n", strerror(errno));
-                                close(fd);
-                                return -1;
-                        }
-                } else if (errno != EADDRINUSE) {
-                        fprintf(stderr, "Error binding to port %d: %d: %s\n", port, errno, strerror(errno));
-                        close(fd);
-                        return -1;
-                }
-        }
-
-        if (rport == IPPORT_RESERVED / 2) {
-                fprintf(stderr,
-                        "Warning: all privileged ports are in use.\n"); 
-                return -1;
-        }
-
-        printf("Connected host: %s type: %s\n", 
-               argv[1],
-               (type == SOCKNAL_CONN_ANY) ? "A" :
-               (type == SOCKNAL_CONN_CONTROL) ? "C" :
-               (type == SOCKNAL_CONN_BULK_IN) ? "I" :
-               (type == SOCKNAL_CONN_BULK_OUT) ? "O" : "?");
-
-        PCFG_INIT(pcfg, NAL_CMD_REGISTER_PEER_FD);
-        pcfg.pcfg_nal = g_nal;
-        pcfg.pcfg_fd = fd;
-        pcfg.pcfg_misc = type;
-        
-        rc = pcfg_ioctl(&pcfg);
-        if (rc) {
-                fprintf(stderr, "failed to register fd with portals: %s\n", 
-                        strerror(errno));
-                close (fd);
-                return -1;
-        }
-
-        printf("Connection to %s registered with socknal\n", argv[1]);
-
-        rc = close(fd);
-        if (rc)
-                fprintf(stderr, "close failed: %d\n", rc);
-
-        return 0;
-#endif /* HAVE_CONNECT */
-}
-
-int jt_ptl_disconnect(int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        ptl_nid_t                nid = PTL_NID_ANY;
-        __u32                    ipaddr = 0;
-        int                      rc;
-
-        if (argc > 3) {
-                fprintf(stderr, "usage: %s [nid] [ipaddr]\n", argv[0]);
-                return 0;
-        }
-
-        if (!g_nal_is_compatible (NULL, SOCKNAL, OPENIBNAL, IIBNAL, RANAL, 0))
-                return 0;
-
-        if (argc >= 2 &&
-            ptl_parse_nid (&nid, argv[1]) != 0) {
-                fprintf (stderr, "Can't parse nid %s\n", argv[1]);
-                return -1;
-        }
-
-        if (g_nal_is_compatible (NULL, SOCKNAL, 0) &&
-            argc >= 3 &&
-            ptl_parse_ipaddr (&ipaddr, argv[2]) != 0) {
-                fprintf (stderr, "Can't parse ip addr %s\n", argv[2]);
-                return -1;
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_CLOSE_CONNECTION);
-        pcfg.pcfg_nid     = nid;
-        pcfg.pcfg_id      = ipaddr;
-        
-        rc = pcfg_ioctl(&pcfg);
-        if (rc) {
-                fprintf(stderr, "failed to remove connection: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-
-        return 0;
-}
-
-int jt_ptl_push_connection (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        int                      rc;
-        ptl_nid_t                nid = PTL_NID_ANY;
-        __u32                    ipaddr = 0;
-
-        if (argc > 3) {
-                fprintf(stderr, "usage: %s [nid] [ip]\n", argv[0]);
-                return 0;
-        }
-
-        if (!g_nal_is_compatible (argv[0], SOCKNAL, 0))
-                return -1;
-        
-        if (argc > 1 &&
-            ptl_parse_nid (&nid, argv[1]) != 0) {
-                fprintf(stderr, "Can't parse nid: %s\n", argv[1]);
-                return -1;
-        }
-                        
-        if (argc > 2 &&
-            ptl_parse_ipaddr (&ipaddr, argv[2]) != 0) {
-                fprintf(stderr, "Can't parse ipaddr: %s\n", argv[2]);
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_PUSH_CONNECTION);
-        pcfg.pcfg_nid     = nid;
-        pcfg.pcfg_id      = ipaddr;
-        
-        rc = pcfg_ioctl(&pcfg);
-        if (rc) {
-                fprintf(stderr, "failed to push connection: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-
-        return 0;
-}
-
-int 
-jt_ptl_print_active_txs (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        int                      index;
-        int                      rc;
-
-        if (!g_nal_is_compatible (argv[0], QSWNAL, 0))
-                return -1;
-
-        for (index = 0;;index++) {
-                PCFG_INIT(pcfg, NAL_CMD_GET_TXDESC);
-                pcfg.pcfg_count   = index;
-        
-                rc = pcfg_ioctl(&pcfg);
-                if (rc != 0)
-                        break;
-
-                printf ("%p: %5s payload %6d bytes to "LPX64" via "LPX64" by pid %6d: %s, %s, state %d\n",
-                        pcfg.pcfg_pbuf1,
-                        pcfg.pcfg_count == PTL_MSG_ACK ? "ACK" :
-                        pcfg.pcfg_count == PTL_MSG_PUT ? "PUT" :
-                        pcfg.pcfg_count == PTL_MSG_GET ? "GET" :
-                        pcfg.pcfg_count == PTL_MSG_REPLY ? "REPLY" : "<wierd message>",
-                        pcfg.pcfg_size,
-                        pcfg.pcfg_nid,
-                        pcfg.pcfg_nid2,
-                        pcfg.pcfg_misc,
-                        (pcfg.pcfg_flags & 1) ? "delayed" : "immediate",
-                        (pcfg.pcfg_flags & 2) ? "nblk"    : "normal",
-                        pcfg.pcfg_flags >> 2);
-        }
-
-        if (index == 0)
-                printf ("<no active descs>\n");
-        return 0;
-}
-
-int jt_ptl_ping(int argc, char **argv)
-{
-        int       rc;
-        ptl_nid_t nid;
-        long      count   = 1;
-        long      size    = 4;
-        long      timeout = 1;
-        struct portal_ioctl_data data;
-
-        if (argc < 2) {
-                fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]);
-                return 0;
-        }
-
-        if (!g_nal_is_set())
-                return -1;
-
-        if (ptl_parse_nid (&nid, argv[1]) != 0)
-        {
-                fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
-                return (-1);
-        }
-        
-        if (argc > 2)
-        {
-                count = atol(argv[2]);
-
-                if (count < 0 || count > 20000) 
-                {
-                        fprintf(stderr, "are you insane?  %ld is a crazy count.\n", count);
-                        return -1;
-                }
-        }
-        
-        if (argc > 3)
-                size= atol(argv[3]);
-
-        if (argc > 4)
-                timeout = atol (argv[4]);
-        
-        PORTAL_IOC_INIT (data);
-        data.ioc_count   = count;
-        data.ioc_size    = size;
-        data.ioc_nid     = nid;
-        data.ioc_nal     = g_nal;
-        data.ioc_timeout = timeout;
-        
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data);
-        if (rc) {
-                fprintf(stderr, "failed to start pinger: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-        return 0;
-}
-
-int jt_ptl_shownid(int argc, char **argv)
-{
-        struct portal_ioctl_data data;
-        int                      rc;
-        
-        if (argc > 1) {
-                fprintf(stderr, "usage: %s\n", argv[0]);
-                return 0;
-        }
-        
-        if (!g_nal_is_set())
-                return -1;
-
-        PORTAL_IOC_INIT (data);
-        data.ioc_nal = g_nal;
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data);
-        if (rc < 0)
-                fprintf(stderr, "getting my NID failed: %s\n",
-                        strerror (errno));
-        else
-                printf(LPX64"\n", data.ioc_nid);
-        return 0;
-}
-
-int jt_ptl_mynid(int argc, char **argv)
-{
-        int rc;
-        char hostname[1024];
-        char *nidstr;
-        struct portals_cfg pcfg;
-        ptl_nid_t mynid;
-
-        if (argc > 2) {
-                fprintf(stderr, "usage: %s [NID]\n", argv[0]);
-                fprintf(stderr, "NID defaults to the primary IP address of the machine.\n");
-                return 0;
-        }
-
-        if (!g_nal_is_set())
-                return -1;
-
-        if (argc >= 2)
-                nidstr = argv[1];
-        else if (gethostname(hostname, sizeof(hostname)) != 0) {
-                fprintf(stderr, "gethostname failed: %s\n",
-                        strerror(errno));
-                return -1;
-        }
-        else
-                nidstr = hostname;
-
-        rc = ptl_parse_nid (&mynid, nidstr);
-        if (rc != 0) {
-                fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr);
-                return -1;
-        }
-        
-        PCFG_INIT(pcfg, NAL_CMD_REGISTER_MYNID);
-        pcfg.pcfg_nid = mynid;
-
-        rc = pcfg_ioctl(&pcfg);
-        if (rc < 0)
-                fprintf(stderr, "setting my NID failed: %s\n",
-                       strerror(errno));
-        else
-                printf("registered my nid "LPX64" (%s)\n", 
-                       ptl_nid2u64(mynid), hostname);
-        return 0;
-}
-
-int
-jt_ptl_fail_nid (int argc, char **argv)
-{
-        int                      rc;
-        ptl_nid_t                nid;
-        unsigned int             threshold;
-        struct portal_ioctl_data data;
-
-        if (argc < 2 || argc > 3)
-        {
-                fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]);
-                return (0);
-        }
-        
-        if (!g_nal_is_set())
-                return (-1);
-
-        if (!strcmp (argv[1], "_all_"))
-                nid = PTL_NID_ANY;
-        else if (ptl_parse_nid (&nid, argv[1]) != 0)
-        {
-                fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]);
-                return (-1);
-        }
-
-        if (argc < 3)
-                threshold = PTL_MD_THRESH_INF;
-        else if (sscanf (argv[2], "%i", &threshold) != 1) {
-                fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]);
-                return (-1);
-        }
-        
-        PORTAL_IOC_INIT (data);
-        data.ioc_nal = g_nal;
-        data.ioc_nid = nid;
-        data.ioc_count = threshold;
-        
-        rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data);
-        if (rc < 0)
-                fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n",
-                         strerror (errno));
-        else
-                printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]);
-        
-        return (0);
-}
-
-int
-jt_ptl_add_route (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        ptl_nid_t                nid1;
-        ptl_nid_t                nid2;
-        ptl_nid_t                gateway_nid;
-        int                      rc;
-        
-        if (argc < 3)
-        {
-                fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]);
-                return (0);
-        }
-
-        if (!g_nal_is_set())
-                return (-1);
-
-        if (ptl_parse_nid (&gateway_nid, argv[1]) != 0)
-        {
-                fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
-                return (-1);
-        }
-
-        if (ptl_parse_nid (&nid1, argv[2]) != 0)
-        {
-                fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]);
-                return (-1);
-        }
-
-        if (argc < 4)
-                nid2 = nid1;
-        else if (ptl_parse_nid (&nid2, argv[3]) != 0)
-        {
-                fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]);
-                return (-1);
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_ADD_ROUTE);
-        pcfg.pcfg_nid = gateway_nid;
-        pcfg.pcfg_nal = ROUTER;
-        pcfg.pcfg_gw_nal = g_nal;
-        pcfg.pcfg_nid2 = MIN (nid1, nid2);
-        pcfg.pcfg_nid3 = MAX (nid1, nid2);
-
-        rc = pcfg_ioctl(&pcfg);
-        if (rc != 0) 
-        {
-                fprintf (stderr, "NAL_CMD_ADD_ROUTE failed: %s\n", strerror (errno));
-                return (-1);
-        }
-        
-        return (0);
-}
-
-int
-jt_ptl_del_route (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        ptl_nid_t                nid;
-        ptl_nid_t                nid1 = PTL_NID_ANY;
-        ptl_nid_t                nid2 = PTL_NID_ANY;
-        int                      rc;
-        
-        if (argc < 2)
-        {
-                fprintf (stderr, "usage: %s targetNID\n", argv[0]);
-                return (0);
-        }
-
-        if (!g_nal_is_set())
-                return (-1);
-
-        if (ptl_parse_nid (&nid, argv[1]) != 0)
-        {
-                fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]);
-                return (-1);
-        }
-
-        if (argc >= 3 &&
-            ptl_parse_nid (&nid1, argv[2]) != 0)
-        {
-                fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[2]);
-                return (-1);
-        }
-
-        if (argc < 4) {
-                nid2 = nid1;
-        } else {
-                if (ptl_parse_nid (&nid2, argv[3]) != 0) {
-                        fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[3]);
-                        return (-1);
-                }
-
-                if (nid1 > nid2) {
-                        ptl_nid_t tmp = nid1;
-                        
-                        nid1 = nid2;
-                        nid2 = tmp;
-                }
-        }
-        
-        PCFG_INIT(pcfg, NAL_CMD_DEL_ROUTE);
-        pcfg.pcfg_nal = ROUTER;
-        pcfg.pcfg_gw_nal = g_nal;
-        pcfg.pcfg_nid = nid;
-        pcfg.pcfg_nid2 = nid1;
-        pcfg.pcfg_nid3 = nid2;
-
-        rc = pcfg_ioctl(&pcfg);
-        if (rc != 0) 
-        {
-                fprintf (stderr, "NAL_CMD_DEL_ROUTE ("LPX64") failed: %s\n", 
-                         ptl_nid2u64(nid), strerror (errno));
-                return (-1);
-        }
-        
-        return (0);
-}
-
-int
-jt_ptl_notify_router (int argc, char **argv)
-{
-        struct portals_cfg       pcfg;
-        int                      enable;
-        ptl_nid_t                nid;
-        int                      rc;
-        struct timeval           now;
-        time_t                   when;
-
-        if (argc < 3)
-        {
-                fprintf (stderr, "usage: %s targetNID <up/down> [<time>]\n", 
-                         argv[0]);
-                return (0);
-        }
-
-        if (ptl_parse_nid (&nid, argv[1]) != 0)
-        {
-                fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]);
-                return (-1);
-        }
-
-        if (ptl_parse_bool (&enable, argv[2]) != 0) {
-                fprintf (stderr, "Can't parse boolean %s\n", argv[2]);
-                return (-1);
-        }
-
-        gettimeofday(&now, NULL);
-        
-        if (argc < 4) {
-                when = now.tv_sec;
-        } else if (ptl_parse_time (&when, argv[3]) != 0) {
-                fprintf(stderr, "Can't parse time %s\n"
-                        "Please specify either 'YYYY-MM-DD-HH:MM:SS'\n"
-                        "or an absolute unix time in seconds\n", argv[3]);
-                return (-1);
-        } else if (when > now.tv_sec) {
-                fprintf (stderr, "%s specifies a time in the future\n",
-                         argv[3]);
-                return (-1);
-        }
-
-        PCFG_INIT(pcfg, NAL_CMD_NOTIFY_ROUTER);
-        pcfg.pcfg_nal = ROUTER;
-        pcfg.pcfg_gw_nal = g_nal;
-        pcfg.pcfg_nid = nid;
-        pcfg.pcfg_flags = enable;
-        /* Yeuch; 'cept I need a __u64 on 64 bit machines... */
-        pcfg.pcfg_nid3 = (__u64)when;
-        
-        rc = pcfg_ioctl(&pcfg);
-        if (rc != 0) 
-        {
-                fprintf (stderr, "NAL_CMD_NOTIFY_ROUTER ("LPX64") failed: %s\n",
-                         ptl_nid2u64(nid), strerror (errno));
-                return (-1);
-        }
-        
-        return (0);
-}
-
-int
-jt_ptl_print_routes (int argc, char **argv)
-{
-        char                      buffer[3][128];
-        struct portals_cfg        pcfg;
-        int                       rc;
-        int                       index;
-        int                      gateway_nal;
-        ptl_nid_t                gateway_nid;
-        ptl_nid_t                nid1;
-        ptl_nid_t                nid2;
-        int                       alive;
-
-        for (index = 0;;index++)
-        {
-                PCFG_INIT(pcfg, NAL_CMD_GET_ROUTE);
-                pcfg.pcfg_nal = ROUTER;
-                pcfg.pcfg_count = index;
-                
-                rc = pcfg_ioctl(&pcfg);
-                if (rc != 0)
-                        break;
-
-                gateway_nal = pcfg.pcfg_gw_nal;
-                gateway_nid = pcfg.pcfg_nid;
-                nid1 = pcfg.pcfg_nid2;
-                nid2 = pcfg.pcfg_nid3;
-                alive = pcfg.pcfg_flags;
-
-                printf ("%8s %18s : %s - %s, %s\n", 
-                        nal2name (gateway_nal), 
-                        ptl_nid2str (buffer[0], gateway_nid),
-                        ptl_nid2str (buffer[1], nid1),
-                        ptl_nid2str (buffer[2], nid2),
-                        alive ? "up" : "down");
-        }
-        return (0);
-}
-
-static int
-lwt_control(int enable, int clear)
-{
-        struct portal_ioctl_data data;
-        int                      rc;
-
-        PORTAL_IOC_INIT(data);
-        data.ioc_flags = enable;
-        data.ioc_misc = clear;
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_CONTROL, &data);
-        if (rc == 0)
-                return (0);
-
-        fprintf(stderr, "IOC_PORTAL_LWT_CONTROL failed: %s\n",
-                strerror(errno));
-        return (-1);
-}
-
-static int
-lwt_snapshot(cycles_t *now, int *ncpu, int *totalsize, 
-             lwt_event_t *events, int size)
-{
-        struct portal_ioctl_data data;
-        int                      rc;
-
-        PORTAL_IOC_INIT(data);
-        data.ioc_pbuf1 = (char *)events;
-        data.ioc_plen1 = size;
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_SNAPSHOT, &data);
-        if (rc != 0) {
-                fprintf(stderr, "IOC_PORTAL_LWT_SNAPSHOT failed: %s\n",
-                        strerror(errno));
-                return (-1);
-        }
-
-        /* crappy overloads */
-        if (data.ioc_nid2 != sizeof(lwt_event_t) ||
-            data.ioc_nid3 != offsetof(lwt_event_t, lwte_where)) {
-                fprintf(stderr,"kernel/user LWT event mismatch %d(%d),%d(%d)\n",
-                        (int)data.ioc_nid2, sizeof(lwt_event_t),
-                        (int)data.ioc_nid3,
-                        (int)offsetof(lwt_event_t, lwte_where));
-                return (-1);
-        }
-
-        LASSERT (data.ioc_count != 0);
-        LASSERT (data.ioc_misc != 0);
-
-        if (now != NULL)
-                *now = data.ioc_nid;
-
-        if (ncpu != NULL)
-                *ncpu = data.ioc_count;
-
-        if (totalsize != NULL)
-                *totalsize = data.ioc_misc;
-
-        return (0);
-}
-
-static char *
-lwt_get_string(char *kstr)
-{
-        char                     *ustr;
-        struct portal_ioctl_data  data;
-        int                       size;
-        int                       rc;
-
-        /* FIXME: this could maintain a symbol table since we expect to be
-         * looking up the same strings all the time... */
-
-        PORTAL_IOC_INIT(data);
-        data.ioc_pbuf1 = kstr;
-        data.ioc_plen1 = 1;        /* non-zero just to fool portal_ioctl_is_invalid() */
-        data.ioc_pbuf2 = NULL;
-        data.ioc_plen2 = 0;
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_LOOKUP_STRING, &data);
-        if (rc != 0) {
-                fprintf(stderr, "IOC_PORTAL_LWT_LOOKUP_STRING failed: %s\n",
-                        strerror(errno));
-                return (NULL);
-        }
-
-        size = data.ioc_count;
-        ustr = (char *)malloc(size);
-        if (ustr == NULL) {
-                fprintf(stderr, "Can't allocate string storage of size %d\n",
-                        size);
-                return (NULL);
-        }
-
-        PORTAL_IOC_INIT(data);
-        data.ioc_pbuf1 = kstr;
-        data.ioc_plen1 = 1;        /* non-zero just to fool portal_ioctl_is_invalid() */
-        data.ioc_pbuf2 = ustr;
-        data.ioc_plen2 = size;
-
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_LWT_LOOKUP_STRING, &data);
-        if (rc != 0) {
-                fprintf(stderr, "IOC_PORTAL_LWT_LOOKUP_STRING failed: %s\n",
-                        strerror(errno));
-                return (NULL);
-        }
-
-        LASSERT(strlen(ustr) == size - 1);
-        return (ustr);
-}
-
-static void
-lwt_put_string(char *ustr)
-{
-        free(ustr);
-}
-
-static int
-lwt_print(FILE *f, cycles_t t0, cycles_t tlast, double mhz, int cpu, lwt_event_t *e)
-{
-#ifndef __WORDSIZE
-# error "__WORDSIZE not defined"
-#elif __WORDSIZE == 32
-# define XFMT "%#010lx"
-#elif __WORDSIZE== 64
-# define XFMT "%#018lx"
-#else
-# error "Unexpected __WORDSIZE"
-#endif
-        char           *where = lwt_get_string(e->lwte_where);
-
-        if (where == NULL)
-                return (-1);
-
-        fprintf(f, XFMT" "XFMT" "XFMT" "XFMT": "XFMT" %2d %10.6f %10.2f %s\n",
-                e->lwte_p1, e->lwte_p2, e->lwte_p3, e->lwte_p4,
-                (long)e->lwte_task, cpu, (e->lwte_when - t0) / (mhz * 1000000.0),
-                (t0 == e->lwte_when) ? 0.0 : (e->lwte_when - tlast) / mhz,
-                where);
-
-        lwt_put_string(where);
-
-        return (0);
-#undef XFMT
-}
-
-double
-get_cycles_per_usec ()
-{
-        FILE      *f = fopen ("/proc/cpuinfo", "r");
-        double     mhz;
-        char      line[64];
-        
-        if (f != NULL) {
-                while (fgets (line, sizeof (line), f) != NULL)
-                        if (sscanf (line, "cpu MHz : %lf", &mhz) == 1) {
-                                fclose (f);
-                                return (mhz);
-                        }
-                fclose (f);
-        }
-
-        fprintf (stderr, "Can't read/parse /proc/cpuinfo\n");
-        return (1000.0);
-}
-
-int
-jt_ptl_lwt(int argc, char **argv)
-{
-        const int       lwt_max_cpus = 32;
-        int             ncpus;
-        int             totalspace;
-        int             nevents_per_cpu;
-        lwt_event_t    *events;
-        lwt_event_t    *cpu_event[lwt_max_cpus + 1];
-        lwt_event_t    *next_event[lwt_max_cpus];
-        lwt_event_t    *first_event[lwt_max_cpus];
-        int             cpu;
-        lwt_event_t    *e;
-        int             rc;
-        int             i;
-        double          mhz;
-        cycles_t        t0;
-        cycles_t        tlast;
-        cycles_t        tnow;
-        struct timeval  tvnow;
-        int             printed_date = 0;
-        int             nlines = 0;
-        FILE           *f = stdout;
-
-        if (argc < 2 ||
-            (strcmp(argv[1], "start") &&
-             strcmp(argv[1], "stop"))) {
-                fprintf(stderr, 
-                        "usage:  %s start\n"
-                        "        %s stop [fname]\n", argv[0], argv[0]);
-                return (-1);
-        }
-        
-        if (!strcmp(argv[1], "start")) {
-                /* disable */
-                if (lwt_control(0, 0) != 0)
-                        return (-1);
-
-                /* clear */
-                if (lwt_control(0, 1) != 0)
-                        return (-1);
-
-                /* enable */
-                if (lwt_control(1, 0) != 0)
-                        return (-1);
-
-                return (0);
-        }
-                
-        if (lwt_snapshot(NULL, &ncpus, &totalspace, NULL, 0) != 0)
-                return (-1);
-
-        if (ncpus > lwt_max_cpus) {
-                fprintf(stderr, "Too many cpus: %d (%d)\n", 
-                        ncpus, lwt_max_cpus);
-                return (-1);
-        }
-
-        events = (lwt_event_t *)malloc(totalspace);
-        if (events == NULL) {
-                fprintf(stderr, "Can't allocate %d\n", totalspace);
-                return (-1);
-        }
-
-        if (lwt_control(0, 0) != 0) {           /* disable */
-                free(events);
-                return (-1);
-        }
-
-        if (lwt_snapshot(&tnow, NULL, NULL, events, totalspace)) {
-                free(events);
-                return (-1);
-        }
-
-        /* we want this time to be sampled at snapshot time */
-        gettimeofday(&tvnow, NULL);
-
-        if (argc > 2) {
-                f = fopen (argv[2], "w");
-                if (f == NULL) {
-                        fprintf(stderr, "Can't open %s for writing: %s\n", argv[2], strerror (errno));
-                        free(events);
-                        return (-1);
-                }
-        }
-
-        mhz = get_cycles_per_usec();
-        
-        /* carve events into per-cpu slices */
-        nevents_per_cpu = totalspace / (ncpus * sizeof(lwt_event_t));
-        for (cpu = 0; cpu <= ncpus; cpu++)
-                cpu_event[cpu] = &events[cpu * nevents_per_cpu];
-
-        /* find the earliest event on each cpu */
-        for (cpu = 0; cpu < ncpus; cpu++) {
-                first_event[cpu] = NULL;
-
-                for (e = cpu_event[cpu]; e < cpu_event[cpu + 1]; e++) {
-
-                        if (e->lwte_where == NULL) /* not an event */
-                                continue;
-
-                        if (first_event[cpu] == NULL ||
-                            first_event[cpu]->lwte_when > e->lwte_when)
-                                first_event[cpu] = e;
-                }
-
-                next_event[cpu] = first_event[cpu];
-        }
-
-        t0 = tlast = 0;
-        for (cpu = 0; cpu < ncpus; cpu++) {
-                e = first_event[cpu];
-                if (e == NULL)                  /* no events this cpu */
-                        continue;
-                
-                if (e == cpu_event[cpu])
-                        e = cpu_event[cpu + 1] - 1;
-                else 
-                        e = e - 1;
-                
-                /* If there's an event immediately before the first one, this
-                 * cpu wrapped its event buffer */
-                if (e->lwte_where == NULL)
-                        continue;
-         
-                /* We should only start outputting events from the most recent
-                 * first event in any wrapped cpu.  Events before this time on
-                 * other cpus won't have any events from this CPU to interleave
-                 * with. */
-                if (t0 < first_event[cpu]->lwte_when)
-                        t0 = first_event[cpu]->lwte_when;
-        }
-
-        for (;;) {
-                /* find which cpu has the next event */
-                cpu = -1;
-                for (i = 0; i < ncpus; i++) {
-
-                        if (next_event[i] == NULL) /* this cpu exhausted */
-                                continue;
-
-                        if (cpu < 0 ||
-                            next_event[i]->lwte_when < next_event[cpu]->lwte_when)
-                                cpu = i;
-                }
-
-                if (cpu < 0)                    /* all cpus exhausted */
-                        break;
-
-                if (t0 == 0) {
-                        /* no wrapped cpus and this is he first ever event */
-                        t0 = next_event[cpu]->lwte_when;
-                }
-                
-                if (t0 <= next_event[cpu]->lwte_when) {
-                        /* on or after the first event */
-                        if (!printed_date) {
-                                cycles_t du = (tnow - t0) / mhz;
-                                time_t   then = tvnow.tv_sec - du/1000000;
-                                
-                                if (du % 1000000 > tvnow.tv_usec)
-                                        then--;
-
-                                fprintf(f, "%s", ctime(&then));
-                                printed_date = 1;
-                        }
-                        
-                        rc = lwt_print(f, t0, tlast, mhz, cpu, next_event[cpu]);
-                        if (rc != 0)
-                                break;
-
-                        if (++nlines % 10000 == 0 && f != stdout) {
-                                /* show some activity... */
-                                printf(".");
-                                fflush (stdout);
-                        }
-                }
-
-                tlast = next_event[cpu]->lwte_when;
-                
-                next_event[cpu]++;
-                if (next_event[cpu] == cpu_event[cpu + 1])
-                        next_event[cpu] = cpu_event[cpu];
-
-                if (next_event[cpu]->lwte_where == NULL ||
-                    next_event[cpu] == first_event[cpu])
-                        next_event[cpu] = NULL;
-        }
-
-        if (f != stdout) {
-                printf("\n");
-                fclose(f);
-        }
-
-        free(events);
-        return (0);
-}
-
-int jt_ptl_memhog(int argc, char **argv)
-{
-        static int                gfp = 0;        /* sticky! */
-
-        struct portal_ioctl_data  data;
-        int                       rc;
-        int                       count;
-        char                     *end;
-        
-        if (argc < 2)  {
-                fprintf(stderr, "usage: %s <npages> [<GFP flags>]\n", argv[0]);
-                return 0;
-        }
-
-        count = strtol(argv[1], &end, 0);
-        if (count < 0 || *end != 0) {
-                fprintf(stderr, "Can't parse page count '%s'\n", argv[1]);
-                return -1;
-        }
-
-        if (argc >= 3) {
-                rc = strtol(argv[2], &end, 0);
-                if (*end != 0) {
-                        fprintf(stderr, "Can't parse gfp flags '%s'\n", argv[2]);
-                        return -1;
-                }
-                gfp = rc;
-        }
-        
-        PORTAL_IOC_INIT(data);
-        data.ioc_count = count;
-        data.ioc_flags = gfp;
-        rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MEMHOG, &data);
-
-        if (rc != 0) {
-                fprintf(stderr, "memhog %d failed: %s\n", count, strerror(errno));
-                return -1;
-        }
-        
-        printf("memhog %d OK\n", count);
-        return 0;
-}
-
diff --git a/lustre/portals/utils/ptlctl.c b/lustre/portals/utils/ptlctl.c
deleted file mode 100644 (file)
index 03cfe77..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
- * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
- *
- *   This file is part of Portals, http://www.sf.net/projects/lustre/
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <portals/api-support.h>
-#include <portals/ptlctl.h>
-
-#include "parser.h"
-
-
-command_t list[] = {
-        {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"},
-        {"print_interfaces", jt_ptl_print_interfaces, 0, "print interface entries (no args)"},
-        {"add_interface", jt_ptl_add_interface, 0, "add interface entry (args: ip [netmask])"},
-        {"del_interface", jt_ptl_del_interface, 0, "delete interface entries (args: [ip])"},
-        {"print_peers", jt_ptl_print_peers, 0, "print peer entries (no args)"},
-        {"add_peer", jt_ptl_add_peer, 0, "add peer entry (args: nid host port)"},
-        {"del_peer", jt_ptl_del_peer, 0, "delete peer entry (args: [nid] [host])"},
-        {"print_conns", jt_ptl_print_connections, 0, "print connections (no args)"},
-        {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: host port [iIOC])"},
-        {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [nid] [host]"},
-        {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [nid]"},
-        {"active_tx", jt_ptl_print_active_txs, 0, "print active transmits (no args)"},
-        {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"},
-        {"shownid", jt_ptl_shownid, 0, "print the local NID"},
-        {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"},
-        {"add_route", jt_ptl_add_route, 0, 
-         "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"},
-        {"del_route", jt_ptl_del_route, 0, 
-         "delete all routes via a gateway from the routing table (args: gatewayNID"},
-        {"set_route", jt_ptl_notify_router, 0, 
-         "enable/disable a route in the routing table (args: gatewayNID up/down [time]"},
-        {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"},
-        {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"},
-        {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"},
-        {"help", Parser_help, 0, "help"},
-        {"exit", Parser_quit, 0, "quit"},
-        {"quit", Parser_quit, 0, "quit"},
-        { 0, 0, 0, NULL }
-};
-
-int main(int argc, char **argv)
-{
-        if (ptl_initialize(argc, argv) < 0)
-                exit(1);
-
-        Parser_init("ptlctl > ", list);
-        if (argc > 1)
-                return Parser_execarg(argc - 1, &argv[1], list);
-
-        Parser_commands();
-
-        return 0;
-}
diff --git a/lustre/portals/utils/routerstat.c b/lustre/portals/utils/routerstat.c
deleted file mode 100644 (file)
index 99bc59b..0000000
+++ /dev/null
@@ -1,120 +0,0 @@
-#include <stdio.h>
-#include <errno.h>
-#include <string.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/time.h>
-
-double
-timenow ()
-{
-   struct timeval tv;
-   
-   gettimeofday (&tv, NULL);
-   return (tv.tv_sec + tv.tv_usec / 1000000.0);
-}
-
-void
-do_stat (int fd)
-{
-   static char  buffer[1024];
-   static double last = 0.0;
-   static unsigned long long old_bytes;
-   static unsigned long      old_packets;
-   static unsigned long      old_errors;
-   double now;
-   double t;
-   unsigned long long new_bytes, bytes;
-   unsigned long      new_packets, packets;
-   unsigned long      new_errors, errors;
-   unsigned long      depth;
-   int    n;
-   
-   lseek (fd, 0, SEEK_SET);
-   now = timenow();
-   n = read (fd, buffer, sizeof (buffer));
-   if (n < 0)
-   {
-      fprintf (stderr, "Can't read statfile\n");
-      exit (1);
-   }    
-   buffer[n] = 0;
-   
-   n = sscanf (buffer, "%Lu %lu %lu %lu",
-              &new_bytes, &new_packets, &new_errors, &depth);
-   
-   if (n < 3)
-   {
-      fprintf (stderr, "Can't parse statfile\n");
-      exit (1);
-   }
-   
-   if (last == 0.0)
-      printf ("%llu bytes, %lu packets (sz %lld), %lu errors", 
-             new_bytes, new_packets,
-             (long long)((new_packets == 0) ? 0LL : new_bytes/new_packets),
-             new_errors);
-   else
-   {
-      t = now - last;
-
-      if (new_bytes < old_bytes)
-         bytes = -1ULL - old_bytes + new_bytes + 1;
-      else
-         bytes = new_bytes - old_bytes;
-      if (new_packets < old_packets)
-         packets = -1UL - old_packets + new_packets + 1;
-      else
-         packets = new_packets - old_packets;
-      if (new_errors < old_errors)
-         errors = -1UL - old_errors + new_errors + 1;
-      else
-         errors = new_errors - old_errors;
-      
-      printf ("%9llu bytes (%7.2fMb/s), %7lu packets (sz %5lld, %5ld/s), %lu errors (%ld/s)", 
-             bytes, ((double)bytes)/((1<<20) * t),
-             packets, (long long)((packets == 0) ? 0LL : bytes/packets), (long)(packets/t),
-             errors, (long)(errors/t));
-   }
-   old_bytes = new_bytes;
-   old_packets = new_packets;
-   old_errors = new_errors;
-
-   if (n == 4)
-      printf (", depth (%ld)\n", depth);
-   else
-      printf ("\n");
-
-   fflush (stdout);
-   
-   lseek (fd, 0, SEEK_SET);
-   last = timenow();
-}
-
-int main (int argc, char **argv)
-{
-   int  interval = 0;
-   int  fd;
-   
-   if (argc > 1)
-      interval = atoi (argv[1]);
-
-   fd = open ("/proc/sys/portals/router", O_RDONLY);
-   if (fd < 0)
-   {
-      fprintf (stderr, "Can't open stat: %s\n", strerror (errno));
-      return (1);
-   }
-   
-   do_stat (fd);
-   if (interval == 0)
-      return (0);
-   
-   for (;;)
-   {
-      sleep (interval);
-      do_stat (fd);
-   }
-}
diff --git a/lustre/portals/utils/wirecheck.c b/lustre/portals/utils/wirecheck.c
deleted file mode 100644 (file)
index 6316290..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- */
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <portals/api-support.h>
-#include <portals/list.h>
-#include <portals/lib-types.h>
-
-extern size_t strnlen(const char *, size_t);
-
-#define BLANK_LINE()                            \
-do {                                            \
-        printf ("\n");                          \
-} while (0)
-
-#define COMMENT(c)                              \
-do {                                            \
-        printf ("        /* "c" */\n");         \
-} while (0)
-
-#define STRINGIFY(a) #a
-
-#define CHECK_DEFINE(a)                                         \
-do {                                                            \
-        printf ("        LASSERT ("#a" == "STRINGIFY(a)");\n"); \
-} while (0)
-
-#define CHECK_VALUE(a)                                  \
-do {                                                    \
-        printf ("        LASSERT ("#a" == %d);\n", a);  \
-} while (0)
-
-#define CHECK_MEMBER_OFFSET(s,m)                \
-do {                                            \
-        CHECK_VALUE((int)offsetof(s, m));       \
-} while (0)
-
-#define CHECK_MEMBER_SIZEOF(s,m)                \
-do {                                            \
-        CHECK_VALUE((int)sizeof(((s *)0)->m));  \
-} while (0)
-
-#define CHECK_MEMBER(s,m)                       \
-do {                                            \
-        CHECK_MEMBER_OFFSET(s, m);              \
-        CHECK_MEMBER_SIZEOF(s, m);              \
-} while (0)
-
-#define CHECK_STRUCT(s)                         \
-do {                                            \
-        BLANK_LINE ();                          \
-        COMMENT ("Checks for struct "#s);       \
-        CHECK_VALUE((int)sizeof(s));            \
-} while (0)
-
-void
-check_ptl_handle_wire (void)
-{
-        CHECK_STRUCT (ptl_handle_wire_t);
-        CHECK_MEMBER (ptl_handle_wire_t, wh_interface_cookie);
-        CHECK_MEMBER (ptl_handle_wire_t, wh_object_cookie);
-}
-
-void
-check_ptl_magicversion (void)
-{
-        CHECK_STRUCT (ptl_magicversion_t);
-        CHECK_MEMBER (ptl_magicversion_t, magic);
-        CHECK_MEMBER (ptl_magicversion_t, version_major);
-        CHECK_MEMBER (ptl_magicversion_t, version_minor);
-}
-
-void
-check_ptl_hdr (void)
-{
-        CHECK_STRUCT (ptl_hdr_t);
-        CHECK_MEMBER (ptl_hdr_t, dest_nid);
-        CHECK_MEMBER (ptl_hdr_t, src_nid);
-        CHECK_MEMBER (ptl_hdr_t, dest_pid);
-        CHECK_MEMBER (ptl_hdr_t, src_pid);
-        CHECK_MEMBER (ptl_hdr_t, type);
-        CHECK_MEMBER (ptl_hdr_t, payload_length);
-        CHECK_MEMBER (ptl_hdr_t, msg);
-
-        BLANK_LINE ();
-        COMMENT ("Ack");
-        CHECK_MEMBER (ptl_hdr_t, msg.ack.dst_wmd);
-        CHECK_MEMBER (ptl_hdr_t, msg.ack.match_bits);
-        CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength);
-
-        BLANK_LINE ();
-        COMMENT ("Put");
-        CHECK_MEMBER (ptl_hdr_t, msg.put.ack_wmd);
-        CHECK_MEMBER (ptl_hdr_t, msg.put.match_bits);
-        CHECK_MEMBER (ptl_hdr_t, msg.put.hdr_data);
-        CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index);
-        CHECK_MEMBER (ptl_hdr_t, msg.put.offset);
-
-        BLANK_LINE ();
-        COMMENT ("Get");
-        CHECK_MEMBER (ptl_hdr_t, msg.get.return_wmd);
-        CHECK_MEMBER (ptl_hdr_t, msg.get.match_bits);
-        CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index);
-        CHECK_MEMBER (ptl_hdr_t, msg.get.src_offset);
-        CHECK_MEMBER (ptl_hdr_t, msg.get.sink_length);
-
-        BLANK_LINE ();
-        COMMENT ("Reply");
-        CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd);
-
-        BLANK_LINE ();
-        COMMENT ("Hello");
-        CHECK_MEMBER (ptl_hdr_t, msg.hello.incarnation);
-        CHECK_MEMBER (ptl_hdr_t, msg.hello.type);
-}
-
-void
-system_string (char *cmdline, char *str, int len)
-{
-        int   fds[2];
-        int   rc;
-        pid_t pid;
-
-        rc = pipe (fds);
-        if (rc != 0)
-                abort ();
-
-        pid = fork ();
-        if (pid == 0) {
-                /* child */
-                int   fd = fileno(stdout);
-
-                rc = dup2(fds[1], fd);
-                if (rc != fd)
-                        abort();
-
-                exit(system(cmdline));
-                /* notreached */
-        } else if ((int)pid < 0) {
-                abort();
-        } else {
-                FILE *f = fdopen (fds[0], "r");
-
-                if (f == NULL)
-                        abort();
-
-                close(fds[1]);
-
-                if (fgets(str, len, f) == NULL)
-                        abort();
-
-                if (waitpid(pid, &rc, 0) != pid)
-                        abort();
-
-                if (!WIFEXITED(rc) ||
-                    WEXITSTATUS(rc) != 0)
-                        abort();
-
-                if (strnlen(str, len) == len)
-                        str[len - 1] = 0;
-
-                if (str[strlen(str) - 1] == '\n')
-                        str[strlen(str) - 1] = 0;
-
-                fclose(f);
-        }
-}
-
-int
-main (int argc, char **argv)
-{
-        char unameinfo[80];
-        char gccinfo[80];
-
-        system_string("uname -a", unameinfo, sizeof(unameinfo));
-        system_string("gcc -v 2>&1 | tail -1", gccinfo, sizeof(gccinfo));
-
-        printf ("void lib_assert_wire_constants (void)\n"
-                "{\n"
-                "        /* Wire protocol assertions generated by 'wirecheck'\n"
-                "         * running on %s\n"
-                "         * with %s */\n"
-                "\n", unameinfo, gccinfo);
-
-        BLANK_LINE ();
-
-        COMMENT ("Constants...");
-        CHECK_DEFINE (PORTALS_PROTO_MAGIC);
-        CHECK_DEFINE (PORTALS_PROTO_VERSION_MAJOR);
-        CHECK_DEFINE (PORTALS_PROTO_VERSION_MINOR);
-
-        CHECK_VALUE (PTL_MSG_ACK);
-        CHECK_VALUE (PTL_MSG_PUT);
-        CHECK_VALUE (PTL_MSG_GET);
-        CHECK_VALUE (PTL_MSG_REPLY);
-        CHECK_VALUE (PTL_MSG_HELLO);
-
-        check_ptl_handle_wire ();
-        check_ptl_magicversion ();
-        check_ptl_hdr ();
-
-        printf ("}\n\n");
-
-        return (0);
-}
index 2141a43..bbbb5f0 100644 (file)
@@ -1,6 +1,6 @@
 MODULES := ptlrpc
 ifeq ($(PATCHLEVEL),6)
-LDLM := @top_srcdir@/ldlm/
+LDLM := @top_srcdir@/lustre/ldlm/
 else
 LDLM :=
 endif
index 6e354df..dfa578f 100644 (file)
@@ -3,11 +3,16 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-LDLM_COMM_SOURCES= $(top_srcdir)/ldlm/l_lock.c $(top_srcdir)/ldlm/ldlm_lock.c \
-    $(top_srcdir)/ldlm/ldlm_resource.c $(top_srcdir)/ldlm/ldlm_lib.c          \
-    $(top_srcdir)/ldlm/ldlm_plain.c $(top_srcdir)/ldlm/ldlm_extent.c          \
-    $(top_srcdir)/ldlm/ldlm_request.c $(top_srcdir)/ldlm/ldlm_lockd.c         \
-    $(top_srcdir)/ldlm/ldlm_internal.h $(top_srcdir)/ldlm/ldlm_inodebits.c
+LDLM_COMM_SOURCES= $(top_srcdir)/lustre/ldlm/l_lock.c          \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_lock.c            \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_resource.c        \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_lib.c             \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_plain.c           \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_extent.c          \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_request.c         \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_lockd.c           \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_internal.h        \
+       $(top_srcdir)/lustre/ldlm/lustre/ldlm_inodebits.c
 
 COMMON_SOURCES =  client.c recover.c connection.c niobuf.c pack_generic.c   \
     events.c ptlrpc_module.c service.c pinger.c recov_thread.c llog_net.c   \
index 5a6ea9c..b07c412 100644 (file)
@@ -3,14 +3,12 @@
 # This code is issued under the GNU General Public License.
 # See the file COPYING in this distribution
 
-EXTRA_DIST = license-status maketags.sh lustre.spec version_tag.pl.in  \
-       lustre lustrefs lustre.spec.in lustre-kernel-2.4.spec.in       \
-       lmake linux-merge-config.awk linux-merge-modules.awk            \
-       linux-rhconfig.h
+EXTRA_DIST = license-status maketags.sh version_tag.pl.in      \
+       lustre lustrefs
 
 initddir = $(sysconfdir)/init.d
 if UTILS
+if INIT_SCRIPTS
 initd_SCRIPTS = lustre lustrefs
 endif
-
-
+endif